|
{ |
|
"best_metric": 0.5579236745834351, |
|
"best_model_checkpoint": "deberta-v3-large-finetuned-squadv2/checkpoint-3620", |
|
"epoch": 2.550269740068661, |
|
"eval_steps": 20, |
|
"global_step": 5200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"loss": 6.0843, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 6.0629, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 6.0196, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 5.9309, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 5.768, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 5.4721, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.3900000000000002e-06, |
|
"loss": 5.1112, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.5900000000000002e-06, |
|
"loss": 4.6482, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.79e-06, |
|
"loss": 4.2226, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9900000000000004e-06, |
|
"loss": 3.7989, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.19e-06, |
|
"loss": 3.4023, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.39e-06, |
|
"loss": 3.0793, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.59e-06, |
|
"loss": 2.5037, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.7900000000000004e-06, |
|
"loss": 1.9528, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.99e-06, |
|
"loss": 1.736, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.1900000000000004e-06, |
|
"loss": 1.5742, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.3900000000000006e-06, |
|
"loss": 1.4485, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.58e-06, |
|
"loss": 1.4103, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.7800000000000002e-06, |
|
"loss": 1.3205, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.980000000000001e-06, |
|
"loss": 1.2793, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.18e-06, |
|
"loss": 1.1896, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.38e-06, |
|
"loss": 1.1228, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.57e-06, |
|
"loss": 1.0661, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.77e-06, |
|
"loss": 1.0459, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.970000000000001e-06, |
|
"loss": 1.0062, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 5.1700000000000005e-06, |
|
"loss": 0.9441, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 5.370000000000001e-06, |
|
"loss": 0.9871, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 5.570000000000001e-06, |
|
"loss": 0.9291, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 5.77e-06, |
|
"loss": 0.9635, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 5.9700000000000004e-06, |
|
"loss": 0.9874, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.17e-06, |
|
"loss": 0.8922, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.370000000000001e-06, |
|
"loss": 0.9304, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.570000000000001e-06, |
|
"loss": 0.8742, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.770000000000001e-06, |
|
"loss": 0.8423, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.97e-06, |
|
"loss": 0.8286, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 7.17e-06, |
|
"loss": 0.8192, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.370000000000001e-06, |
|
"loss": 0.8451, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.57e-06, |
|
"loss": 0.7954, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.77e-06, |
|
"loss": 0.8322, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 7.970000000000002e-06, |
|
"loss": 0.8318, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.17e-06, |
|
"loss": 0.8396, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.370000000000001e-06, |
|
"loss": 0.7922, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.570000000000001e-06, |
|
"loss": 0.7931, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.77e-06, |
|
"loss": 0.7942, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.97e-06, |
|
"loss": 0.8017, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.17e-06, |
|
"loss": 0.7346, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.370000000000002e-06, |
|
"loss": 0.6792, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.57e-06, |
|
"loss": 0.8069, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.770000000000001e-06, |
|
"loss": 0.8058, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.970000000000001e-06, |
|
"loss": 0.8398, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.95952380952381e-06, |
|
"loss": 0.8148, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.911904761904762e-06, |
|
"loss": 0.7888, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.864285714285715e-06, |
|
"loss": 0.7423, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.816666666666667e-06, |
|
"loss": 0.7855, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.76904761904762e-06, |
|
"loss": 0.7303, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.721428571428573e-06, |
|
"loss": 0.7156, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.673809523809525e-06, |
|
"loss": 0.7542, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.626190476190477e-06, |
|
"loss": 0.723, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.57857142857143e-06, |
|
"loss": 0.7025, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.530952380952381e-06, |
|
"loss": 0.7755, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.483333333333335e-06, |
|
"loss": 0.7225, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.435714285714286e-06, |
|
"loss": 0.7389, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.388095238095238e-06, |
|
"loss": 0.7083, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.340476190476191e-06, |
|
"loss": 0.6863, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.292857142857144e-06, |
|
"loss": 0.7167, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.245238095238096e-06, |
|
"loss": 0.7006, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.197619047619049e-06, |
|
"loss": 0.7058, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.15e-06, |
|
"loss": 0.7068, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.102380952380954e-06, |
|
"loss": 0.713, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.054761904761905e-06, |
|
"loss": 0.743, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.007142857142857e-06, |
|
"loss": 0.6588, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.95952380952381e-06, |
|
"loss": 0.7087, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.911904761904763e-06, |
|
"loss": 0.7172, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.864285714285715e-06, |
|
"loss": 0.6935, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 8.816666666666668e-06, |
|
"loss": 0.7049, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.76904761904762e-06, |
|
"loss": 0.6749, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.721428571428571e-06, |
|
"loss": 0.6773, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 8.673809523809524e-06, |
|
"loss": 0.7028, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 8.626190476190478e-06, |
|
"loss": 0.6754, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 8.57857142857143e-06, |
|
"loss": 0.744, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 8.530952380952382e-06, |
|
"loss": 0.6175, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 8.483333333333334e-06, |
|
"loss": 0.6788, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 8.435714285714286e-06, |
|
"loss": 0.6728, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.388095238095239e-06, |
|
"loss": 0.7451, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.34047619047619e-06, |
|
"loss": 0.6392, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.292857142857144e-06, |
|
"loss": 0.6522, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.245238095238097e-06, |
|
"loss": 0.6665, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 8.197619047619048e-06, |
|
"loss": 0.6178, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.15e-06, |
|
"loss": 0.6699, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.102380952380953e-06, |
|
"loss": 0.6543, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 8.054761904761905e-06, |
|
"loss": 0.6724, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 8.007142857142858e-06, |
|
"loss": 0.7194, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 7.959523809523811e-06, |
|
"loss": 0.6695, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 7.911904761904763e-06, |
|
"loss": 0.6542, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.864285714285716e-06, |
|
"loss": 0.6986, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 7.816666666666667e-06, |
|
"loss": 0.6527, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 7.769047619047619e-06, |
|
"loss": 0.6036, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 7.721428571428572e-06, |
|
"loss": 0.6309, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 7.673809523809524e-06, |
|
"loss": 0.6647, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 7.626190476190477e-06, |
|
"loss": 0.6625, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.5785714285714295e-06, |
|
"loss": 0.673, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 7.530952380952381e-06, |
|
"loss": 0.6615, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 7.483333333333333e-06, |
|
"loss": 0.5771, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 7.435714285714286e-06, |
|
"loss": 0.5824, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 7.388095238095239e-06, |
|
"loss": 0.5038, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 7.3404761904761914e-06, |
|
"loss": 0.5524, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 7.292857142857144e-06, |
|
"loss": 0.5903, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 7.245238095238096e-06, |
|
"loss": 0.609, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 7.197619047619048e-06, |
|
"loss": 0.5303, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 7.15e-06, |
|
"loss": 0.5637, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 7.1023809523809525e-06, |
|
"loss": 0.5513, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 7.054761904761906e-06, |
|
"loss": 0.5648, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 7.007142857142858e-06, |
|
"loss": 0.5907, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 6.9595238095238106e-06, |
|
"loss": 0.5791, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 6.911904761904763e-06, |
|
"loss": 0.5485, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 6.8642857142857145e-06, |
|
"loss": 0.5279, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 6.816666666666667e-06, |
|
"loss": 0.5283, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 6.769047619047619e-06, |
|
"loss": 0.5294, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 6.721428571428572e-06, |
|
"loss": 0.4647, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 6.673809523809525e-06, |
|
"loss": 0.5571, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 6.626190476190477e-06, |
|
"loss": 0.5188, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 6.578571428571429e-06, |
|
"loss": 0.5099, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 6.530952380952381e-06, |
|
"loss": 0.5371, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 6.483333333333334e-06, |
|
"loss": 0.55, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 6.435714285714286e-06, |
|
"loss": 0.5546, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 6.3880952380952384e-06, |
|
"loss": 0.5465, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 6.340476190476192e-06, |
|
"loss": 0.4944, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 6.292857142857144e-06, |
|
"loss": 0.5248, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 6.245238095238096e-06, |
|
"loss": 0.5585, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 6.197619047619048e-06, |
|
"loss": 0.5562, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 6.15e-06, |
|
"loss": 0.5402, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 6.102380952380953e-06, |
|
"loss": 0.5674, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.054761904761905e-06, |
|
"loss": 0.5641, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 6.0071428571428584e-06, |
|
"loss": 0.4678, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5.959523809523809e-06, |
|
"loss": 0.5162, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5.911904761904762e-06, |
|
"loss": 0.5826, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5.864285714285715e-06, |
|
"loss": 0.518, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.816666666666667e-06, |
|
"loss": 0.512, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.7690476190476196e-06, |
|
"loss": 0.5848, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5.721428571428572e-06, |
|
"loss": 0.526, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5.673809523809525e-06, |
|
"loss": 0.5419, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5.626190476190476e-06, |
|
"loss": 0.5642, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5.578571428571429e-06, |
|
"loss": 0.5126, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.5309523809523815e-06, |
|
"loss": 0.5687, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.483333333333334e-06, |
|
"loss": 0.5498, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.435714285714286e-06, |
|
"loss": 0.5213, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.388095238095239e-06, |
|
"loss": 0.575, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.34047619047619e-06, |
|
"loss": 0.5524, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5.292857142857143e-06, |
|
"loss": 0.5304, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.245238095238096e-06, |
|
"loss": 0.4706, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.197619047619048e-06, |
|
"loss": 0.5716, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.150000000000001e-06, |
|
"loss": 0.4976, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5.102380952380953e-06, |
|
"loss": 0.5022, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5.0547619047619055e-06, |
|
"loss": 0.556, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 5.007142857142857e-06, |
|
"loss": 0.5023, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.95952380952381e-06, |
|
"loss": 0.5613, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.911904761904762e-06, |
|
"loss": 0.6031, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.864285714285715e-06, |
|
"loss": 0.5346, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.816666666666667e-06, |
|
"loss": 0.5665, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.769047619047619e-06, |
|
"loss": 0.5293, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 0.5739259123802185, |
|
"eval_runtime": 277.0719, |
|
"eval_samples_per_second": 43.133, |
|
"eval_steps_per_second": 5.392, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.721428571428572e-06, |
|
"loss": 0.5106, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 0.5783331990242004, |
|
"eval_runtime": 276.9686, |
|
"eval_samples_per_second": 43.149, |
|
"eval_steps_per_second": 5.394, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.673809523809525e-06, |
|
"loss": 0.5338, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 0.5718061327934265, |
|
"eval_runtime": 276.8816, |
|
"eval_samples_per_second": 43.163, |
|
"eval_steps_per_second": 5.396, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.626190476190476e-06, |
|
"loss": 0.5128, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 0.5826650261878967, |
|
"eval_runtime": 276.9662, |
|
"eval_samples_per_second": 43.15, |
|
"eval_steps_per_second": 5.394, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.5785714285714285e-06, |
|
"loss": 0.5205, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_loss": 0.6044849157333374, |
|
"eval_runtime": 276.8959, |
|
"eval_samples_per_second": 43.161, |
|
"eval_steps_per_second": 5.396, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.530952380952382e-06, |
|
"loss": 0.5114, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 0.5880448818206787, |
|
"eval_runtime": 276.8767, |
|
"eval_samples_per_second": 43.164, |
|
"eval_steps_per_second": 5.396, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.483333333333333e-06, |
|
"loss": 0.5072, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_loss": 0.5788173079490662, |
|
"eval_runtime": 276.8866, |
|
"eval_samples_per_second": 43.162, |
|
"eval_steps_per_second": 5.396, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.435714285714286e-06, |
|
"loss": 0.5512, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 0.5863245725631714, |
|
"eval_runtime": 276.8934, |
|
"eval_samples_per_second": 43.161, |
|
"eval_steps_per_second": 5.396, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.388095238095238e-06, |
|
"loss": 0.4723, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_loss": 0.5898299813270569, |
|
"eval_runtime": 276.9062, |
|
"eval_samples_per_second": 43.159, |
|
"eval_steps_per_second": 5.395, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 4.340476190476191e-06, |
|
"loss": 0.5011, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_loss": 0.5917273163795471, |
|
"eval_runtime": 276.8734, |
|
"eval_samples_per_second": 43.164, |
|
"eval_steps_per_second": 5.396, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.292857142857143e-06, |
|
"loss": 0.5419, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_loss": 0.6026594042778015, |
|
"eval_runtime": 276.8544, |
|
"eval_samples_per_second": 43.167, |
|
"eval_steps_per_second": 5.396, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.245238095238095e-06, |
|
"loss": 0.5425, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 0.5699217915534973, |
|
"eval_runtime": 276.8159, |
|
"eval_samples_per_second": 43.173, |
|
"eval_steps_per_second": 5.397, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.1976190476190485e-06, |
|
"loss": 0.5703, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_loss": 0.5897491574287415, |
|
"eval_runtime": 276.8228, |
|
"eval_samples_per_second": 43.172, |
|
"eval_steps_per_second": 5.397, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.15e-06, |
|
"loss": 0.4646, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 0.5916581153869629, |
|
"eval_runtime": 276.8047, |
|
"eval_samples_per_second": 43.175, |
|
"eval_steps_per_second": 5.397, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.1023809523809525e-06, |
|
"loss": 0.4652, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_loss": 0.5745313167572021, |
|
"eval_runtime": 276.8115, |
|
"eval_samples_per_second": 43.174, |
|
"eval_steps_per_second": 5.397, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.054761904761905e-06, |
|
"loss": 0.5323, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_loss": 0.5859553217887878, |
|
"eval_runtime": 276.8471, |
|
"eval_samples_per_second": 43.168, |
|
"eval_steps_per_second": 5.396, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.007142857142857e-06, |
|
"loss": 0.5129, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 0.5655719637870789, |
|
"eval_runtime": 276.8529, |
|
"eval_samples_per_second": 43.167, |
|
"eval_steps_per_second": 5.396, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.95952380952381e-06, |
|
"loss": 0.5441, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_loss": 0.5642224550247192, |
|
"eval_runtime": 276.8896, |
|
"eval_samples_per_second": 43.162, |
|
"eval_steps_per_second": 5.396, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.911904761904762e-06, |
|
"loss": 0.5624, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 0.5872688293457031, |
|
"eval_runtime": 276.8743, |
|
"eval_samples_per_second": 43.164, |
|
"eval_steps_per_second": 5.396, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.864285714285715e-06, |
|
"loss": 0.4645, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 0.5890788435935974, |
|
"eval_runtime": 276.8469, |
|
"eval_samples_per_second": 43.168, |
|
"eval_steps_per_second": 5.396, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.816666666666667e-06, |
|
"loss": 0.5577, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_loss": 0.5816096663475037, |
|
"eval_runtime": 276.8584, |
|
"eval_samples_per_second": 43.166, |
|
"eval_steps_per_second": 5.396, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.7690476190476192e-06, |
|
"loss": 0.5199, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_loss": 0.5579236745834351, |
|
"eval_runtime": 276.8666, |
|
"eval_samples_per_second": 43.165, |
|
"eval_steps_per_second": 5.396, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.721428571428572e-06, |
|
"loss": 0.5061, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_loss": 0.5837463140487671, |
|
"eval_runtime": 276.8995, |
|
"eval_samples_per_second": 43.16, |
|
"eval_steps_per_second": 5.395, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.673809523809524e-06, |
|
"loss": 0.484, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_loss": 0.5721494555473328, |
|
"eval_runtime": 277.0285, |
|
"eval_samples_per_second": 43.14, |
|
"eval_steps_per_second": 5.393, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.6261904761904764e-06, |
|
"loss": 0.5095, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 0.5820609927177429, |
|
"eval_runtime": 277.1157, |
|
"eval_samples_per_second": 43.126, |
|
"eval_steps_per_second": 5.391, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.5785714285714292e-06, |
|
"loss": 0.5342, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_loss": 0.5602211356163025, |
|
"eval_runtime": 277.1392, |
|
"eval_samples_per_second": 43.123, |
|
"eval_steps_per_second": 5.391, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.530952380952381e-06, |
|
"loss": 0.5435, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 0.5910717248916626, |
|
"eval_runtime": 277.1237, |
|
"eval_samples_per_second": 43.125, |
|
"eval_steps_per_second": 5.391, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.4833333333333336e-06, |
|
"loss": 0.5288, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_loss": 0.5647350549697876, |
|
"eval_runtime": 277.1519, |
|
"eval_samples_per_second": 43.121, |
|
"eval_steps_per_second": 5.391, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.435714285714286e-06, |
|
"loss": 0.5476, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_loss": 0.5733036398887634, |
|
"eval_runtime": 277.012, |
|
"eval_samples_per_second": 43.143, |
|
"eval_steps_per_second": 5.393, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.388095238095238e-06, |
|
"loss": 0.5199, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_loss": 0.5674840807914734, |
|
"eval_runtime": 276.9911, |
|
"eval_samples_per_second": 43.146, |
|
"eval_steps_per_second": 5.394, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.3404761904761908e-06, |
|
"loss": 0.5067, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_loss": 0.5838811993598938, |
|
"eval_runtime": 276.9655, |
|
"eval_samples_per_second": 43.15, |
|
"eval_steps_per_second": 5.394, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.292857142857143e-06, |
|
"loss": 0.5418, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_loss": 0.5757073163986206, |
|
"eval_runtime": 277.0012, |
|
"eval_samples_per_second": 43.144, |
|
"eval_steps_per_second": 5.393, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.2452380952380955e-06, |
|
"loss": 0.4965, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_loss": 0.5763747692108154, |
|
"eval_runtime": 276.9993, |
|
"eval_samples_per_second": 43.145, |
|
"eval_steps_per_second": 5.394, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.197619047619048e-06, |
|
"loss": 0.5273, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 0.5905867218971252, |
|
"eval_runtime": 276.9687, |
|
"eval_samples_per_second": 43.149, |
|
"eval_steps_per_second": 5.394, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.1500000000000003e-06, |
|
"loss": 0.5808, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_loss": 0.5761615633964539, |
|
"eval_runtime": 276.9979, |
|
"eval_samples_per_second": 43.145, |
|
"eval_steps_per_second": 5.394, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.1023809523809527e-06, |
|
"loss": 0.5161, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_loss": 0.5611954927444458, |
|
"eval_runtime": 276.9358, |
|
"eval_samples_per_second": 43.154, |
|
"eval_steps_per_second": 5.395, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.0547619047619047e-06, |
|
"loss": 0.4863, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 0.5804067254066467, |
|
"eval_runtime": 276.9544, |
|
"eval_samples_per_second": 43.152, |
|
"eval_steps_per_second": 5.394, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.0071428571428575e-06, |
|
"loss": 0.4827, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_loss": 0.584104597568512, |
|
"eval_runtime": 276.9708, |
|
"eval_samples_per_second": 43.149, |
|
"eval_steps_per_second": 5.394, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 2.95952380952381e-06, |
|
"loss": 0.4643, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_loss": 0.5822347402572632, |
|
"eval_runtime": 276.9742, |
|
"eval_samples_per_second": 43.148, |
|
"eval_steps_per_second": 5.394, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 2.911904761904762e-06, |
|
"loss": 0.5029, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 0.6052400469779968, |
|
"eval_runtime": 276.9537, |
|
"eval_samples_per_second": 43.152, |
|
"eval_steps_per_second": 5.394, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 2.8642857142857143e-06, |
|
"loss": 0.509, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 0.5799996852874756, |
|
"eval_runtime": 276.9867, |
|
"eval_samples_per_second": 43.146, |
|
"eval_steps_per_second": 5.394, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.816666666666667e-06, |
|
"loss": 0.5382, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 0.5645180940628052, |
|
"eval_runtime": 277.0026, |
|
"eval_samples_per_second": 43.144, |
|
"eval_steps_per_second": 5.393, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 2.7690476190476195e-06, |
|
"loss": 0.469, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_loss": 0.5685124397277832, |
|
"eval_runtime": 276.9786, |
|
"eval_samples_per_second": 43.148, |
|
"eval_steps_per_second": 5.394, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.7214285714285714e-06, |
|
"loss": 0.5032, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 0.5778502225875854, |
|
"eval_runtime": 277.0026, |
|
"eval_samples_per_second": 43.144, |
|
"eval_steps_per_second": 5.393, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.6738095238095243e-06, |
|
"loss": 0.5171, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.5685559511184692, |
|
"eval_runtime": 277.0003, |
|
"eval_samples_per_second": 43.144, |
|
"eval_steps_per_second": 5.393, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.6261904761904767e-06, |
|
"loss": 0.3938, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_loss": 0.5889346599578857, |
|
"eval_runtime": 277.0343, |
|
"eval_samples_per_second": 43.139, |
|
"eval_steps_per_second": 5.393, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 2.5785714285714286e-06, |
|
"loss": 0.4321, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_loss": 0.6039115190505981, |
|
"eval_runtime": 276.9943, |
|
"eval_samples_per_second": 43.145, |
|
"eval_steps_per_second": 5.394, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 2.530952380952381e-06, |
|
"loss": 0.4185, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 0.599577009677887, |
|
"eval_runtime": 277.0077, |
|
"eval_samples_per_second": 43.143, |
|
"eval_steps_per_second": 5.393, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 2.4833333333333334e-06, |
|
"loss": 0.4782, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 0.580022931098938, |
|
"eval_runtime": 277.0163, |
|
"eval_samples_per_second": 43.142, |
|
"eval_steps_per_second": 5.393, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 2.435714285714286e-06, |
|
"loss": 0.424, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 0.6373934745788574, |
|
"eval_runtime": 277.0011, |
|
"eval_samples_per_second": 43.144, |
|
"eval_steps_per_second": 5.393, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 2.388095238095238e-06, |
|
"loss": 0.3766, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_loss": 0.6096173524856567, |
|
"eval_runtime": 276.9796, |
|
"eval_samples_per_second": 43.148, |
|
"eval_steps_per_second": 5.394, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 2.3404761904761906e-06, |
|
"loss": 0.415, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_loss": 0.6220654249191284, |
|
"eval_runtime": 277.0595, |
|
"eval_samples_per_second": 43.135, |
|
"eval_steps_per_second": 5.392, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.292857142857143e-06, |
|
"loss": 0.4352, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_loss": 0.615013599395752, |
|
"eval_runtime": 277.0601, |
|
"eval_samples_per_second": 43.135, |
|
"eval_steps_per_second": 5.392, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 2.2452380952380954e-06, |
|
"loss": 0.4336, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 0.6055351495742798, |
|
"eval_runtime": 277.0825, |
|
"eval_samples_per_second": 43.132, |
|
"eval_steps_per_second": 5.392, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 2.1976190476190478e-06, |
|
"loss": 0.4289, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_loss": 0.6138429641723633, |
|
"eval_runtime": 277.0898, |
|
"eval_samples_per_second": 43.13, |
|
"eval_steps_per_second": 5.392, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 2.15e-06, |
|
"loss": 0.4433, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 0.5946049094200134, |
|
"eval_runtime": 277.0747, |
|
"eval_samples_per_second": 43.133, |
|
"eval_steps_per_second": 5.392, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.1023809523809526e-06, |
|
"loss": 0.4478, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_loss": 0.611806333065033, |
|
"eval_runtime": 277.0164, |
|
"eval_samples_per_second": 43.142, |
|
"eval_steps_per_second": 5.393, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.054761904761905e-06, |
|
"loss": 0.4787, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_loss": 0.5969259738922119, |
|
"eval_runtime": 277.0423, |
|
"eval_samples_per_second": 43.138, |
|
"eval_steps_per_second": 5.393, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.0071428571428573e-06, |
|
"loss": 0.4432, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_loss": 0.6047642230987549, |
|
"eval_runtime": 277.0355, |
|
"eval_samples_per_second": 43.139, |
|
"eval_steps_per_second": 5.393, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.9595238095238097e-06, |
|
"loss": 0.4319, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_loss": 0.5948361158370972, |
|
"eval_runtime": 277.1353, |
|
"eval_samples_per_second": 43.123, |
|
"eval_steps_per_second": 5.391, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.911904761904762e-06, |
|
"loss": 0.3939, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 0.6115566492080688, |
|
"eval_runtime": 277.1102, |
|
"eval_samples_per_second": 43.127, |
|
"eval_steps_per_second": 5.391, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.8642857142857143e-06, |
|
"loss": 0.3921, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_loss": 0.608245849609375, |
|
"eval_runtime": 277.1416, |
|
"eval_samples_per_second": 43.122, |
|
"eval_steps_per_second": 5.391, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.816666666666667e-06, |
|
"loss": 0.4381, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_loss": 0.6282362937927246, |
|
"eval_runtime": 277.0787, |
|
"eval_samples_per_second": 43.132, |
|
"eval_steps_per_second": 5.392, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.769047619047619e-06, |
|
"loss": 0.4461, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 0.6083888411521912, |
|
"eval_runtime": 277.0933, |
|
"eval_samples_per_second": 43.13, |
|
"eval_steps_per_second": 5.392, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.7214285714285717e-06, |
|
"loss": 0.4012, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_loss": 0.6091529726982117, |
|
"eval_runtime": 277.0771, |
|
"eval_samples_per_second": 43.132, |
|
"eval_steps_per_second": 5.392, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.6738095238095239e-06, |
|
"loss": 0.3849, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_loss": 0.6152328848838806, |
|
"eval_runtime": 277.2346, |
|
"eval_samples_per_second": 43.108, |
|
"eval_steps_per_second": 5.389, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.6261904761904763e-06, |
|
"loss": 0.4178, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 0.6003779172897339, |
|
"eval_runtime": 277.2625, |
|
"eval_samples_per_second": 43.104, |
|
"eval_steps_per_second": 5.388, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.5785714285714287e-06, |
|
"loss": 0.4163, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_loss": 0.6059258580207825, |
|
"eval_runtime": 277.3365, |
|
"eval_samples_per_second": 43.092, |
|
"eval_steps_per_second": 5.387, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.530952380952381e-06, |
|
"loss": 0.4006, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_loss": 0.6115380525588989, |
|
"eval_runtime": 277.2201, |
|
"eval_samples_per_second": 43.11, |
|
"eval_steps_per_second": 5.389, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.4833333333333337e-06, |
|
"loss": 0.4225, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_loss": 0.6130145192146301, |
|
"eval_runtime": 277.4178, |
|
"eval_samples_per_second": 43.079, |
|
"eval_steps_per_second": 5.385, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.4357142857142859e-06, |
|
"loss": 0.4008, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 0.6094552278518677, |
|
"eval_runtime": 277.5161, |
|
"eval_samples_per_second": 43.064, |
|
"eval_steps_per_second": 5.383, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.388095238095238e-06, |
|
"loss": 0.4706, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 0.6135911345481873, |
|
"eval_runtime": 277.6184, |
|
"eval_samples_per_second": 43.048, |
|
"eval_steps_per_second": 5.381, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3404761904761906e-06, |
|
"loss": 0.3902, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_loss": 0.6103312373161316, |
|
"eval_runtime": 277.532, |
|
"eval_samples_per_second": 43.062, |
|
"eval_steps_per_second": 5.383, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.2928571428571428e-06, |
|
"loss": 0.4048, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 0.608475923538208, |
|
"eval_runtime": 277.3473, |
|
"eval_samples_per_second": 43.09, |
|
"eval_steps_per_second": 5.387, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.2452380952380954e-06, |
|
"loss": 0.4411, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_loss": 0.6138780117034912, |
|
"eval_runtime": 277.4196, |
|
"eval_samples_per_second": 43.079, |
|
"eval_steps_per_second": 5.385, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.1976190476190478e-06, |
|
"loss": 0.403, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_loss": 0.6047297120094299, |
|
"eval_runtime": 277.4316, |
|
"eval_samples_per_second": 43.077, |
|
"eval_steps_per_second": 5.385, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.1500000000000002e-06, |
|
"loss": 0.4799, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_loss": 0.6043194532394409, |
|
"eval_runtime": 277.378, |
|
"eval_samples_per_second": 43.086, |
|
"eval_steps_per_second": 5.386, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.1023809523809524e-06, |
|
"loss": 0.4316, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_loss": 0.5959681868553162, |
|
"eval_runtime": 277.4868, |
|
"eval_samples_per_second": 43.069, |
|
"eval_steps_per_second": 5.384, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.0547619047619048e-06, |
|
"loss": 0.4198, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 0.6030734181404114, |
|
"eval_runtime": 277.3901, |
|
"eval_samples_per_second": 43.084, |
|
"eval_steps_per_second": 5.386, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.0071428571428572e-06, |
|
"loss": 0.4254, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_loss": 0.60329669713974, |
|
"eval_runtime": 277.3302, |
|
"eval_samples_per_second": 43.093, |
|
"eval_steps_per_second": 5.387, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 9.595238095238096e-07, |
|
"loss": 0.387, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_loss": 0.611955463886261, |
|
"eval_runtime": 277.4816, |
|
"eval_samples_per_second": 43.07, |
|
"eval_steps_per_second": 5.384, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 9.119047619047621e-07, |
|
"loss": 0.3882, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 0.612755298614502, |
|
"eval_runtime": 277.4062, |
|
"eval_samples_per_second": 43.081, |
|
"eval_steps_per_second": 5.386, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 8.642857142857144e-07, |
|
"loss": 0.4307, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_loss": 0.6149932742118835, |
|
"eval_runtime": 277.4451, |
|
"eval_samples_per_second": 43.075, |
|
"eval_steps_per_second": 5.385, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 8.166666666666668e-07, |
|
"loss": 0.434, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_loss": 0.6077226400375366, |
|
"eval_runtime": 277.2986, |
|
"eval_samples_per_second": 43.098, |
|
"eval_steps_per_second": 5.388, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 7.690476190476191e-07, |
|
"loss": 0.4225, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_loss": 0.6070570945739746, |
|
"eval_runtime": 277.4566, |
|
"eval_samples_per_second": 43.073, |
|
"eval_steps_per_second": 5.385, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 7.214285714285715e-07, |
|
"loss": 0.4134, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 0.6035702228546143, |
|
"eval_runtime": 277.4001, |
|
"eval_samples_per_second": 43.082, |
|
"eval_steps_per_second": 5.386, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 6.738095238095238e-07, |
|
"loss": 0.3846, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_loss": 0.612420380115509, |
|
"eval_runtime": 277.4205, |
|
"eval_samples_per_second": 43.079, |
|
"eval_steps_per_second": 5.385, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 6.261904761904762e-07, |
|
"loss": 0.3943, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_loss": 0.6291103959083557, |
|
"eval_runtime": 277.2666, |
|
"eval_samples_per_second": 43.103, |
|
"eval_steps_per_second": 5.388, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.785714285714286e-07, |
|
"loss": 0.4455, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_loss": 0.6184937953948975, |
|
"eval_runtime": 277.2863, |
|
"eval_samples_per_second": 43.1, |
|
"eval_steps_per_second": 5.388, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.30952380952381e-07, |
|
"loss": 0.4104, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_loss": 0.6063624620437622, |
|
"eval_runtime": 277.3406, |
|
"eval_samples_per_second": 43.091, |
|
"eval_steps_per_second": 5.387, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 4.833333333333334e-07, |
|
"loss": 0.4158, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_loss": 0.6095247268676758, |
|
"eval_runtime": 277.4398, |
|
"eval_samples_per_second": 43.076, |
|
"eval_steps_per_second": 5.385, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 4.357142857142858e-07, |
|
"loss": 0.4135, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 0.6154703497886658, |
|
"eval_runtime": 277.3849, |
|
"eval_samples_per_second": 43.085, |
|
"eval_steps_per_second": 5.386, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.8809523809523813e-07, |
|
"loss": 0.3789, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_loss": 0.6208740472793579, |
|
"eval_runtime": 277.33, |
|
"eval_samples_per_second": 43.093, |
|
"eval_steps_per_second": 5.387, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.404761904761905e-07, |
|
"loss": 0.418, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 0.6106104850769043, |
|
"eval_runtime": 277.377, |
|
"eval_samples_per_second": 43.086, |
|
"eval_steps_per_second": 5.386, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 2.9285714285714287e-07, |
|
"loss": 0.3931, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_loss": 0.604749858379364, |
|
"eval_runtime": 277.377, |
|
"eval_samples_per_second": 43.086, |
|
"eval_steps_per_second": 5.386, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 2.4523809523809526e-07, |
|
"loss": 0.4289, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 0.6055382490158081, |
|
"eval_runtime": 277.4628, |
|
"eval_samples_per_second": 43.072, |
|
"eval_steps_per_second": 5.385, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.9761904761904763e-07, |
|
"loss": 0.4051, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_loss": 0.6083624958992004, |
|
"eval_runtime": 277.3533, |
|
"eval_samples_per_second": 43.089, |
|
"eval_steps_per_second": 5.387, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.5000000000000002e-07, |
|
"loss": 0.4217, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_loss": 0.611778736114502, |
|
"eval_runtime": 277.4092, |
|
"eval_samples_per_second": 43.081, |
|
"eval_steps_per_second": 5.386, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.023809523809524e-07, |
|
"loss": 0.3843, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 0.613944411277771, |
|
"eval_runtime": 277.3668, |
|
"eval_samples_per_second": 43.087, |
|
"eval_steps_per_second": 5.386, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 5.4761904761904766e-08, |
|
"loss": 0.4435, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_loss": 0.6125811338424683, |
|
"eval_runtime": 277.4188, |
|
"eval_samples_per_second": 43.079, |
|
"eval_steps_per_second": 5.385, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.142857142857144e-09, |
|
"loss": 0.4274, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 0.6120193600654602, |
|
"eval_runtime": 277.4157, |
|
"eval_samples_per_second": 43.08, |
|
"eval_steps_per_second": 5.385, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"step": 5200, |
|
"total_flos": 3.09073923350188e+17, |
|
"train_loss": 0.820626282783655, |
|
"train_runtime": 52836.9017, |
|
"train_samples_per_second": 6.299, |
|
"train_steps_per_second": 0.098 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 5200, |
|
"num_train_epochs": 3, |
|
"save_steps": 20, |
|
"total_flos": 3.09073923350188e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|