|
{ |
|
"best_metric": 2.000014066696167, |
|
"best_model_checkpoint": "./model_tweets_2020_Q2_full/checkpoint-2400000", |
|
"epoch": 2.618552224296455, |
|
"eval_steps": 8000, |
|
"global_step": 2400000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.1043460369110107, |
|
"eval_runtime": 841.4118, |
|
"eval_samples_per_second": 917.292, |
|
"eval_steps_per_second": 57.331, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.0726666666666665e-07, |
|
"loss": 2.2608, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.093374013900757, |
|
"eval_runtime": 839.5651, |
|
"eval_samples_per_second": 919.309, |
|
"eval_steps_per_second": 57.457, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 2.0861904621124268, |
|
"eval_runtime": 840.327, |
|
"eval_samples_per_second": 918.476, |
|
"eval_steps_per_second": 57.405, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.0453333333333336e-07, |
|
"loss": 2.2409, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 2.080547332763672, |
|
"eval_runtime": 841.1888, |
|
"eval_samples_per_second": 917.535, |
|
"eval_steps_per_second": 57.346, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 2.079263210296631, |
|
"eval_runtime": 841.5773, |
|
"eval_samples_per_second": 917.111, |
|
"eval_steps_per_second": 57.32, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.018e-07, |
|
"loss": 2.2278, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 2.071790933609009, |
|
"eval_runtime": 841.5993, |
|
"eval_samples_per_second": 917.087, |
|
"eval_steps_per_second": 57.318, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 2.0752639770507812, |
|
"eval_runtime": 841.4195, |
|
"eval_samples_per_second": 917.283, |
|
"eval_steps_per_second": 57.33, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.9906666666666667e-07, |
|
"loss": 2.2059, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.0668046474456787, |
|
"eval_runtime": 844.6529, |
|
"eval_samples_per_second": 913.772, |
|
"eval_steps_per_second": 57.111, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 2.0657169818878174, |
|
"eval_runtime": 844.5291, |
|
"eval_samples_per_second": 913.906, |
|
"eval_steps_per_second": 57.119, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.963333333333333e-07, |
|
"loss": 2.1997, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 2.062004566192627, |
|
"eval_runtime": 845.6772, |
|
"eval_samples_per_second": 912.665, |
|
"eval_steps_per_second": 57.042, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 2.0553247928619385, |
|
"eval_runtime": 846.4058, |
|
"eval_samples_per_second": 911.879, |
|
"eval_steps_per_second": 56.993, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.936e-07, |
|
"loss": 2.1988, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 2.0569465160369873, |
|
"eval_runtime": 841.5807, |
|
"eval_samples_per_second": 917.108, |
|
"eval_steps_per_second": 57.32, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 2.052541971206665, |
|
"eval_runtime": 845.3721, |
|
"eval_samples_per_second": 912.994, |
|
"eval_steps_per_second": 57.062, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.908666666666667e-07, |
|
"loss": 2.1861, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 2.05564284324646, |
|
"eval_runtime": 847.9385, |
|
"eval_samples_per_second": 910.231, |
|
"eval_steps_per_second": 56.89, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.04929256439209, |
|
"eval_runtime": 842.77, |
|
"eval_samples_per_second": 915.813, |
|
"eval_steps_per_second": 57.239, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.8813333333333334e-07, |
|
"loss": 2.1823, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 2.0508854389190674, |
|
"eval_runtime": 846.1188, |
|
"eval_samples_per_second": 912.189, |
|
"eval_steps_per_second": 57.012, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.0460989475250244, |
|
"eval_runtime": 845.0745, |
|
"eval_samples_per_second": 913.316, |
|
"eval_steps_per_second": 57.083, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.854e-07, |
|
"loss": 2.1851, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.0476059913635254, |
|
"eval_runtime": 844.5648, |
|
"eval_samples_per_second": 913.867, |
|
"eval_steps_per_second": 57.117, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.04502010345459, |
|
"eval_runtime": 845.4437, |
|
"eval_samples_per_second": 912.917, |
|
"eval_steps_per_second": 57.058, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.8266666666666665e-07, |
|
"loss": 2.1862, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.046872854232788, |
|
"eval_runtime": 843.2469, |
|
"eval_samples_per_second": 915.295, |
|
"eval_steps_per_second": 57.206, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 2.0441744327545166, |
|
"eval_runtime": 845.6614, |
|
"eval_samples_per_second": 912.682, |
|
"eval_steps_per_second": 57.043, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.799333333333333e-07, |
|
"loss": 2.1741, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 2.0456435680389404, |
|
"eval_runtime": 847.979, |
|
"eval_samples_per_second": 910.188, |
|
"eval_steps_per_second": 56.887, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 2.0441741943359375, |
|
"eval_runtime": 846.0243, |
|
"eval_samples_per_second": 912.291, |
|
"eval_steps_per_second": 57.018, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.772e-07, |
|
"loss": 2.181, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 2.040196418762207, |
|
"eval_runtime": 851.9304, |
|
"eval_samples_per_second": 905.966, |
|
"eval_steps_per_second": 56.623, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.0422918796539307, |
|
"eval_runtime": 847.5458, |
|
"eval_samples_per_second": 910.653, |
|
"eval_steps_per_second": 56.916, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.7446666666666667e-07, |
|
"loss": 2.1692, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 2.041342258453369, |
|
"eval_runtime": 847.0952, |
|
"eval_samples_per_second": 911.137, |
|
"eval_steps_per_second": 56.946, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 2.0448198318481445, |
|
"eval_runtime": 846.694, |
|
"eval_samples_per_second": 911.569, |
|
"eval_steps_per_second": 56.973, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.7173333333333333e-07, |
|
"loss": 2.1678, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 2.0417792797088623, |
|
"eval_runtime": 846.2034, |
|
"eval_samples_per_second": 912.098, |
|
"eval_steps_per_second": 57.006, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 2.041692018508911, |
|
"eval_runtime": 848.6147, |
|
"eval_samples_per_second": 909.506, |
|
"eval_steps_per_second": 56.844, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.69e-07, |
|
"loss": 2.1756, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 2.034193754196167, |
|
"eval_runtime": 847.0585, |
|
"eval_samples_per_second": 911.177, |
|
"eval_steps_per_second": 56.949, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 2.037684202194214, |
|
"eval_runtime": 846.2239, |
|
"eval_samples_per_second": 912.075, |
|
"eval_steps_per_second": 57.005, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.6626666666666664e-07, |
|
"loss": 2.1752, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 2.0381019115448, |
|
"eval_runtime": 846.7119, |
|
"eval_samples_per_second": 911.55, |
|
"eval_steps_per_second": 56.972, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 2.035405158996582, |
|
"eval_runtime": 851.4653, |
|
"eval_samples_per_second": 906.461, |
|
"eval_steps_per_second": 56.654, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.6353333333333335e-07, |
|
"loss": 2.1673, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 2.038097858428955, |
|
"eval_runtime": 846.955, |
|
"eval_samples_per_second": 911.288, |
|
"eval_steps_per_second": 56.956, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 2.0375349521636963, |
|
"eval_runtime": 846.7581, |
|
"eval_samples_per_second": 911.5, |
|
"eval_steps_per_second": 56.969, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.608e-07, |
|
"loss": 2.1585, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 2.033590078353882, |
|
"eval_runtime": 848.1336, |
|
"eval_samples_per_second": 910.022, |
|
"eval_steps_per_second": 56.877, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 2.0344314575195312, |
|
"eval_runtime": 847.2304, |
|
"eval_samples_per_second": 910.992, |
|
"eval_steps_per_second": 56.937, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.5806666666666666e-07, |
|
"loss": 2.1703, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 2.034810781478882, |
|
"eval_runtime": 846.3544, |
|
"eval_samples_per_second": 911.935, |
|
"eval_steps_per_second": 56.996, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 2.0329954624176025, |
|
"eval_runtime": 847.3997, |
|
"eval_samples_per_second": 910.81, |
|
"eval_steps_per_second": 56.926, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.553333333333333e-07, |
|
"loss": 2.1667, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 2.0352213382720947, |
|
"eval_runtime": 846.3586, |
|
"eval_samples_per_second": 911.93, |
|
"eval_steps_per_second": 56.996, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 2.0359089374542236, |
|
"eval_runtime": 848.8487, |
|
"eval_samples_per_second": 909.255, |
|
"eval_steps_per_second": 56.829, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.5259999999999997e-07, |
|
"loss": 2.1649, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 2.031733512878418, |
|
"eval_runtime": 848.6246, |
|
"eval_samples_per_second": 909.495, |
|
"eval_steps_per_second": 56.844, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 2.0314059257507324, |
|
"eval_runtime": 851.4761, |
|
"eval_samples_per_second": 906.449, |
|
"eval_steps_per_second": 56.653, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.498666666666667e-07, |
|
"loss": 2.1564, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 2.030597686767578, |
|
"eval_runtime": 850.4287, |
|
"eval_samples_per_second": 907.566, |
|
"eval_steps_per_second": 56.723, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 2.029878616333008, |
|
"eval_runtime": 850.6967, |
|
"eval_samples_per_second": 907.28, |
|
"eval_steps_per_second": 56.705, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.4713333333333333e-07, |
|
"loss": 2.161, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 2.0317320823669434, |
|
"eval_runtime": 851.1347, |
|
"eval_samples_per_second": 906.813, |
|
"eval_steps_per_second": 56.676, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 2.032505989074707, |
|
"eval_runtime": 854.4271, |
|
"eval_samples_per_second": 903.319, |
|
"eval_steps_per_second": 56.458, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.444e-07, |
|
"loss": 2.1551, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 2.0273916721343994, |
|
"eval_runtime": 850.6274, |
|
"eval_samples_per_second": 907.354, |
|
"eval_steps_per_second": 56.71, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 2.0281741619110107, |
|
"eval_runtime": 850.1523, |
|
"eval_samples_per_second": 907.861, |
|
"eval_steps_per_second": 56.742, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.416666666666667e-07, |
|
"loss": 2.1602, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 2.0300543308258057, |
|
"eval_runtime": 852.8839, |
|
"eval_samples_per_second": 904.953, |
|
"eval_steps_per_second": 56.56, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 2.0302786827087402, |
|
"eval_runtime": 854.7636, |
|
"eval_samples_per_second": 902.963, |
|
"eval_steps_per_second": 56.435, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3893333333333335e-07, |
|
"loss": 2.1581, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 2.026031732559204, |
|
"eval_runtime": 852.2087, |
|
"eval_samples_per_second": 905.67, |
|
"eval_steps_per_second": 56.605, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 2.0248208045959473, |
|
"eval_runtime": 850.4117, |
|
"eval_samples_per_second": 907.584, |
|
"eval_steps_per_second": 56.724, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.3619999999999995e-07, |
|
"loss": 2.1494, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 2.026501178741455, |
|
"eval_runtime": 848.7671, |
|
"eval_samples_per_second": 909.343, |
|
"eval_steps_per_second": 56.834, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 2.0246880054473877, |
|
"eval_runtime": 849.7267, |
|
"eval_samples_per_second": 908.316, |
|
"eval_steps_per_second": 56.77, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.3346666666666666e-07, |
|
"loss": 2.1508, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 2.0231027603149414, |
|
"eval_runtime": 849.0484, |
|
"eval_samples_per_second": 909.041, |
|
"eval_steps_per_second": 56.815, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 2.0276315212249756, |
|
"eval_runtime": 849.4168, |
|
"eval_samples_per_second": 908.647, |
|
"eval_steps_per_second": 56.791, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 3.307333333333333e-07, |
|
"loss": 2.153, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 2.0275754928588867, |
|
"eval_runtime": 848.4629, |
|
"eval_samples_per_second": 909.669, |
|
"eval_steps_per_second": 56.855, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 2.0241763591766357, |
|
"eval_runtime": 849.6091, |
|
"eval_samples_per_second": 908.441, |
|
"eval_steps_per_second": 56.778, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.28e-07, |
|
"loss": 2.1489, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 2.0259480476379395, |
|
"eval_runtime": 849.4664, |
|
"eval_samples_per_second": 908.594, |
|
"eval_steps_per_second": 56.787, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 2.025740623474121, |
|
"eval_runtime": 850.1732, |
|
"eval_samples_per_second": 907.839, |
|
"eval_steps_per_second": 56.74, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.252666666666667e-07, |
|
"loss": 2.1468, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 2.027461528778076, |
|
"eval_runtime": 850.2923, |
|
"eval_samples_per_second": 907.711, |
|
"eval_steps_per_second": 56.732, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 2.030271053314209, |
|
"eval_runtime": 851.4114, |
|
"eval_samples_per_second": 906.518, |
|
"eval_steps_per_second": 56.658, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.2253333333333334e-07, |
|
"loss": 2.1446, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 2.0248193740844727, |
|
"eval_runtime": 852.1182, |
|
"eval_samples_per_second": 905.766, |
|
"eval_steps_per_second": 56.611, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 2.0285604000091553, |
|
"eval_runtime": 849.8013, |
|
"eval_samples_per_second": 908.236, |
|
"eval_steps_per_second": 56.765, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.198e-07, |
|
"loss": 2.1409, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 2.0211498737335205, |
|
"eval_runtime": 855.0597, |
|
"eval_samples_per_second": 902.65, |
|
"eval_steps_per_second": 56.416, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 2.0204012393951416, |
|
"eval_runtime": 856.0145, |
|
"eval_samples_per_second": 901.644, |
|
"eval_steps_per_second": 56.353, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.1706666666666665e-07, |
|
"loss": 2.1536, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 2.0198850631713867, |
|
"eval_runtime": 856.7067, |
|
"eval_samples_per_second": 900.915, |
|
"eval_steps_per_second": 56.307, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 2.0281307697296143, |
|
"eval_runtime": 867.0343, |
|
"eval_samples_per_second": 890.184, |
|
"eval_steps_per_second": 55.637, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.1433333333333336e-07, |
|
"loss": 2.1416, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 2.0237483978271484, |
|
"eval_runtime": 866.1166, |
|
"eval_samples_per_second": 891.127, |
|
"eval_steps_per_second": 55.696, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 2.0231337547302246, |
|
"eval_runtime": 863.3507, |
|
"eval_samples_per_second": 893.982, |
|
"eval_steps_per_second": 55.874, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.116e-07, |
|
"loss": 2.1502, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 2.0205323696136475, |
|
"eval_runtime": 857.8171, |
|
"eval_samples_per_second": 899.749, |
|
"eval_steps_per_second": 56.235, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 2.021655559539795, |
|
"eval_runtime": 853.6943, |
|
"eval_samples_per_second": 904.094, |
|
"eval_steps_per_second": 56.506, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.0886666666666667e-07, |
|
"loss": 2.1424, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 2.024162769317627, |
|
"eval_runtime": 861.2895, |
|
"eval_samples_per_second": 896.121, |
|
"eval_steps_per_second": 56.008, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 2.0237643718719482, |
|
"eval_runtime": 859.5317, |
|
"eval_samples_per_second": 897.954, |
|
"eval_steps_per_second": 56.122, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.061333333333333e-07, |
|
"loss": 2.1469, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 2.0191547870635986, |
|
"eval_runtime": 855.9495, |
|
"eval_samples_per_second": 901.712, |
|
"eval_steps_per_second": 56.357, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 2.024866819381714, |
|
"eval_runtime": 857.0469, |
|
"eval_samples_per_second": 900.557, |
|
"eval_steps_per_second": 56.285, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.034e-07, |
|
"loss": 2.145, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 2.0195770263671875, |
|
"eval_runtime": 858.8544, |
|
"eval_samples_per_second": 898.662, |
|
"eval_steps_per_second": 56.167, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 2.022365093231201, |
|
"eval_runtime": 854.0414, |
|
"eval_samples_per_second": 903.727, |
|
"eval_steps_per_second": 56.483, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.0066666666666663e-07, |
|
"loss": 2.1503, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 2.0216493606567383, |
|
"eval_runtime": 854.8203, |
|
"eval_samples_per_second": 902.903, |
|
"eval_steps_per_second": 56.432, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 2.022836208343506, |
|
"eval_runtime": 857.6145, |
|
"eval_samples_per_second": 899.962, |
|
"eval_steps_per_second": 56.248, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.9793333333333334e-07, |
|
"loss": 2.1355, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 2.019666910171509, |
|
"eval_runtime": 859.7029, |
|
"eval_samples_per_second": 897.775, |
|
"eval_steps_per_second": 56.111, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 2.0240182876586914, |
|
"eval_runtime": 858.0715, |
|
"eval_samples_per_second": 899.482, |
|
"eval_steps_per_second": 56.218, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.952e-07, |
|
"loss": 2.1392, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 2.0232093334198, |
|
"eval_runtime": 856.593, |
|
"eval_samples_per_second": 901.035, |
|
"eval_steps_per_second": 56.315, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 2.020932912826538, |
|
"eval_runtime": 858.8309, |
|
"eval_samples_per_second": 898.687, |
|
"eval_steps_per_second": 56.168, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.9246666666666665e-07, |
|
"loss": 2.1378, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 2.0219063758850098, |
|
"eval_runtime": 860.0126, |
|
"eval_samples_per_second": 897.452, |
|
"eval_steps_per_second": 56.091, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 2.019192695617676, |
|
"eval_runtime": 861.8149, |
|
"eval_samples_per_second": 895.575, |
|
"eval_steps_per_second": 55.974, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.897333333333333e-07, |
|
"loss": 2.1446, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 2.0194740295410156, |
|
"eval_runtime": 857.8914, |
|
"eval_samples_per_second": 899.671, |
|
"eval_steps_per_second": 56.23, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 2.01971173286438, |
|
"eval_runtime": 857.8638, |
|
"eval_samples_per_second": 899.7, |
|
"eval_steps_per_second": 56.232, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.8699999999999996e-07, |
|
"loss": 2.1351, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 2.0183634757995605, |
|
"eval_runtime": 857.8713, |
|
"eval_samples_per_second": 899.692, |
|
"eval_steps_per_second": 56.231, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 2.0162270069122314, |
|
"eval_runtime": 857.9238, |
|
"eval_samples_per_second": 899.637, |
|
"eval_steps_per_second": 56.228, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.8426666666666667e-07, |
|
"loss": 2.1437, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 2.015068531036377, |
|
"eval_runtime": 857.7851, |
|
"eval_samples_per_second": 899.783, |
|
"eval_steps_per_second": 56.237, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 2.0202245712280273, |
|
"eval_runtime": 857.6732, |
|
"eval_samples_per_second": 899.9, |
|
"eval_steps_per_second": 56.244, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.815333333333333e-07, |
|
"loss": 2.1249, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 2.0169003009796143, |
|
"eval_runtime": 860.8823, |
|
"eval_samples_per_second": 896.545, |
|
"eval_steps_per_second": 56.034, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 2.018857002258301, |
|
"eval_runtime": 856.9399, |
|
"eval_samples_per_second": 900.67, |
|
"eval_steps_per_second": 56.292, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.7880000000000003e-07, |
|
"loss": 2.1355, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 2.022115707397461, |
|
"eval_runtime": 860.0914, |
|
"eval_samples_per_second": 897.37, |
|
"eval_steps_per_second": 56.086, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 2.0194284915924072, |
|
"eval_runtime": 858.1451, |
|
"eval_samples_per_second": 899.405, |
|
"eval_steps_per_second": 56.213, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.7606666666666664e-07, |
|
"loss": 2.1387, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 2.018942356109619, |
|
"eval_runtime": 862.7177, |
|
"eval_samples_per_second": 894.638, |
|
"eval_steps_per_second": 55.915, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 2.016535520553589, |
|
"eval_runtime": 858.1148, |
|
"eval_samples_per_second": 899.437, |
|
"eval_steps_per_second": 56.215, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.733333333333333e-07, |
|
"loss": 2.1334, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 2.0169451236724854, |
|
"eval_runtime": 860.3041, |
|
"eval_samples_per_second": 897.148, |
|
"eval_steps_per_second": 56.072, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 2.0188918113708496, |
|
"eval_runtime": 861.004, |
|
"eval_samples_per_second": 896.419, |
|
"eval_steps_per_second": 56.026, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.706e-07, |
|
"loss": 2.137, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 2.016237258911133, |
|
"eval_runtime": 862.2544, |
|
"eval_samples_per_second": 895.119, |
|
"eval_steps_per_second": 55.945, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 2.0168325901031494, |
|
"eval_runtime": 860.8877, |
|
"eval_samples_per_second": 896.54, |
|
"eval_steps_per_second": 56.034, |
|
"step": 824000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.6786666666666666e-07, |
|
"loss": 2.1331, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 2.0192737579345703, |
|
"eval_runtime": 859.4597, |
|
"eval_samples_per_second": 898.029, |
|
"eval_steps_per_second": 56.127, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 2.016619920730591, |
|
"eval_runtime": 863.1851, |
|
"eval_samples_per_second": 894.153, |
|
"eval_steps_per_second": 55.885, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.651333333333333e-07, |
|
"loss": 2.1293, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 2.013720989227295, |
|
"eval_runtime": 863.4541, |
|
"eval_samples_per_second": 893.875, |
|
"eval_steps_per_second": 55.867, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 2.018291711807251, |
|
"eval_runtime": 877.742, |
|
"eval_samples_per_second": 879.324, |
|
"eval_steps_per_second": 54.958, |
|
"step": 856000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.624e-07, |
|
"loss": 2.1358, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 2.018421173095703, |
|
"eval_runtime": 873.6563, |
|
"eval_samples_per_second": 883.437, |
|
"eval_steps_per_second": 55.215, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 2.017104387283325, |
|
"eval_runtime": 874.261, |
|
"eval_samples_per_second": 882.826, |
|
"eval_steps_per_second": 55.177, |
|
"step": 872000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.596666666666667e-07, |
|
"loss": 2.1296, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 2.0179190635681152, |
|
"eval_runtime": 874.7051, |
|
"eval_samples_per_second": 882.377, |
|
"eval_steps_per_second": 55.149, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 2.015188455581665, |
|
"eval_runtime": 875.6595, |
|
"eval_samples_per_second": 881.416, |
|
"eval_steps_per_second": 55.089, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.5693333333333333e-07, |
|
"loss": 2.1319, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 2.0173678398132324, |
|
"eval_runtime": 877.4749, |
|
"eval_samples_per_second": 879.592, |
|
"eval_steps_per_second": 54.975, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 2.020580291748047, |
|
"eval_runtime": 874.219, |
|
"eval_samples_per_second": 882.868, |
|
"eval_steps_per_second": 55.18, |
|
"step": 904000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.542e-07, |
|
"loss": 2.1344, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.0178616046905518, |
|
"eval_runtime": 871.4372, |
|
"eval_samples_per_second": 885.686, |
|
"eval_steps_per_second": 55.356, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.0153729915618896, |
|
"eval_runtime": 874.8229, |
|
"eval_samples_per_second": 882.259, |
|
"eval_steps_per_second": 55.141, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.5146666666666664e-07, |
|
"loss": 2.1352, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 2.018483877182007, |
|
"eval_runtime": 876.0163, |
|
"eval_samples_per_second": 881.057, |
|
"eval_steps_per_second": 55.066, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 2.016976833343506, |
|
"eval_runtime": 878.2619, |
|
"eval_samples_per_second": 878.804, |
|
"eval_steps_per_second": 54.926, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.4873333333333335e-07, |
|
"loss": 2.1336, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 2.016388416290283, |
|
"eval_runtime": 877.6593, |
|
"eval_samples_per_second": 879.407, |
|
"eval_steps_per_second": 54.963, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_loss": 2.013742208480835, |
|
"eval_runtime": 871.0407, |
|
"eval_samples_per_second": 886.09, |
|
"eval_steps_per_second": 55.381, |
|
"step": 952000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.46e-07, |
|
"loss": 2.1315, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 2.0176327228546143, |
|
"eval_runtime": 877.004, |
|
"eval_samples_per_second": 880.064, |
|
"eval_steps_per_second": 55.004, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_loss": 2.0155346393585205, |
|
"eval_runtime": 872.5922, |
|
"eval_samples_per_second": 884.514, |
|
"eval_steps_per_second": 55.282, |
|
"step": 968000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.4326666666666666e-07, |
|
"loss": 2.1255, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_loss": 2.014533281326294, |
|
"eval_runtime": 871.4139, |
|
"eval_samples_per_second": 885.71, |
|
"eval_steps_per_second": 55.357, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 2.023314952850342, |
|
"eval_runtime": 879.3224, |
|
"eval_samples_per_second": 877.744, |
|
"eval_steps_per_second": 54.859, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.405333333333333e-07, |
|
"loss": 2.1249, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 2.0147762298583984, |
|
"eval_runtime": 866.8225, |
|
"eval_samples_per_second": 890.401, |
|
"eval_steps_per_second": 55.65, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 2.016249895095825, |
|
"eval_runtime": 867.6683, |
|
"eval_samples_per_second": 889.533, |
|
"eval_steps_per_second": 55.596, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.3779999999999997e-07, |
|
"loss": 2.123, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_loss": 2.017381191253662, |
|
"eval_runtime": 868.2141, |
|
"eval_samples_per_second": 888.974, |
|
"eval_steps_per_second": 55.561, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 2.015009880065918, |
|
"eval_runtime": 865.5792, |
|
"eval_samples_per_second": 891.68, |
|
"eval_steps_per_second": 55.73, |
|
"step": 1016000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.3506666666666668e-07, |
|
"loss": 2.1263, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 2.0160863399505615, |
|
"eval_runtime": 869.2474, |
|
"eval_samples_per_second": 887.917, |
|
"eval_steps_per_second": 55.495, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_loss": 2.0128889083862305, |
|
"eval_runtime": 866.9502, |
|
"eval_samples_per_second": 890.27, |
|
"eval_steps_per_second": 55.642, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.3233333333333334e-07, |
|
"loss": 2.1232, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_loss": 2.0166754722595215, |
|
"eval_runtime": 901.7962, |
|
"eval_samples_per_second": 855.87, |
|
"eval_steps_per_second": 53.492, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_loss": 2.012477397918701, |
|
"eval_runtime": 911.6669, |
|
"eval_samples_per_second": 846.603, |
|
"eval_steps_per_second": 52.913, |
|
"step": 1048000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.2960000000000002e-07, |
|
"loss": 2.1168, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 2.0113391876220703, |
|
"eval_runtime": 912.2557, |
|
"eval_samples_per_second": 846.057, |
|
"eval_steps_per_second": 52.879, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_loss": 2.013575792312622, |
|
"eval_runtime": 901.3301, |
|
"eval_samples_per_second": 856.312, |
|
"eval_steps_per_second": 53.52, |
|
"step": 1064000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.2686666666666667e-07, |
|
"loss": 2.1307, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 2.014338254928589, |
|
"eval_runtime": 891.2807, |
|
"eval_samples_per_second": 865.967, |
|
"eval_steps_per_second": 54.123, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 2.0166401863098145, |
|
"eval_runtime": 886.4005, |
|
"eval_samples_per_second": 870.735, |
|
"eval_steps_per_second": 54.421, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.2413333333333333e-07, |
|
"loss": 2.1336, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 2.0103185176849365, |
|
"eval_runtime": 886.4458, |
|
"eval_samples_per_second": 870.691, |
|
"eval_steps_per_second": 54.418, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 2.0129764080047607, |
|
"eval_runtime": 890.355, |
|
"eval_samples_per_second": 866.868, |
|
"eval_steps_per_second": 54.18, |
|
"step": 1096000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.214e-07, |
|
"loss": 2.1227, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 2.012451648712158, |
|
"eval_runtime": 895.3428, |
|
"eval_samples_per_second": 862.039, |
|
"eval_steps_per_second": 53.878, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 2.0183231830596924, |
|
"eval_runtime": 888.3913, |
|
"eval_samples_per_second": 868.784, |
|
"eval_steps_per_second": 54.299, |
|
"step": 1112000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.1866666666666667e-07, |
|
"loss": 2.1223, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_loss": 2.014848470687866, |
|
"eval_runtime": 889.5583, |
|
"eval_samples_per_second": 867.644, |
|
"eval_steps_per_second": 54.228, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_loss": 2.0147109031677246, |
|
"eval_runtime": 884.3146, |
|
"eval_samples_per_second": 872.789, |
|
"eval_steps_per_second": 54.55, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.1593333333333332e-07, |
|
"loss": 2.1289, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 2.0108699798583984, |
|
"eval_runtime": 888.3584, |
|
"eval_samples_per_second": 868.816, |
|
"eval_steps_per_second": 54.301, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 2.0163819789886475, |
|
"eval_runtime": 887.4195, |
|
"eval_samples_per_second": 869.735, |
|
"eval_steps_per_second": 54.359, |
|
"step": 1144000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.132e-07, |
|
"loss": 2.1278, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 2.0163345336914062, |
|
"eval_runtime": 886.1604, |
|
"eval_samples_per_second": 870.971, |
|
"eval_steps_per_second": 54.436, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_loss": 2.012103319168091, |
|
"eval_runtime": 889.5174, |
|
"eval_samples_per_second": 867.684, |
|
"eval_steps_per_second": 54.231, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.1046666666666666e-07, |
|
"loss": 2.1261, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_loss": 2.011343240737915, |
|
"eval_runtime": 890.9332, |
|
"eval_samples_per_second": 866.305, |
|
"eval_steps_per_second": 54.144, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 2.0137104988098145, |
|
"eval_runtime": 883.4659, |
|
"eval_samples_per_second": 873.627, |
|
"eval_steps_per_second": 54.602, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.0773333333333334e-07, |
|
"loss": 2.126, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_loss": 2.015174627304077, |
|
"eval_runtime": 885.9678, |
|
"eval_samples_per_second": 871.16, |
|
"eval_steps_per_second": 54.448, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 2.010411500930786, |
|
"eval_runtime": 888.6748, |
|
"eval_samples_per_second": 868.507, |
|
"eval_steps_per_second": 54.282, |
|
"step": 1192000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.05e-07, |
|
"loss": 2.1235, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 2.013165235519409, |
|
"eval_runtime": 888.6503, |
|
"eval_samples_per_second": 868.531, |
|
"eval_steps_per_second": 54.283, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_loss": 2.0113847255706787, |
|
"eval_runtime": 884.261, |
|
"eval_samples_per_second": 872.842, |
|
"eval_steps_per_second": 54.553, |
|
"step": 1208000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.0226666666666668e-07, |
|
"loss": 2.1229, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 2.010532855987549, |
|
"eval_runtime": 887.5065, |
|
"eval_samples_per_second": 869.65, |
|
"eval_steps_per_second": 54.353, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_loss": 2.0130858421325684, |
|
"eval_runtime": 881.1399, |
|
"eval_samples_per_second": 875.934, |
|
"eval_steps_per_second": 54.746, |
|
"step": 1224000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.9953333333333333e-07, |
|
"loss": 2.1213, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_loss": 2.0141072273254395, |
|
"eval_runtime": 882.2467, |
|
"eval_samples_per_second": 874.835, |
|
"eval_steps_per_second": 54.677, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 2.010868549346924, |
|
"eval_runtime": 881.7078, |
|
"eval_samples_per_second": 875.369, |
|
"eval_steps_per_second": 54.711, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.968e-07, |
|
"loss": 2.1185, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_loss": 2.0129363536834717, |
|
"eval_runtime": 886.2455, |
|
"eval_samples_per_second": 870.887, |
|
"eval_steps_per_second": 54.431, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 2.011003017425537, |
|
"eval_runtime": 888.1974, |
|
"eval_samples_per_second": 868.974, |
|
"eval_steps_per_second": 54.311, |
|
"step": 1256000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.9406666666666667e-07, |
|
"loss": 2.131, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 2.01228928565979, |
|
"eval_runtime": 884.9282, |
|
"eval_samples_per_second": 872.184, |
|
"eval_steps_per_second": 54.512, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_loss": 2.0104737281799316, |
|
"eval_runtime": 881.1611, |
|
"eval_samples_per_second": 875.912, |
|
"eval_steps_per_second": 54.745, |
|
"step": 1272000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.9133333333333333e-07, |
|
"loss": 2.1141, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_loss": 2.010425090789795, |
|
"eval_runtime": 882.3806, |
|
"eval_samples_per_second": 874.702, |
|
"eval_steps_per_second": 54.669, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 2.015007734298706, |
|
"eval_runtime": 879.3909, |
|
"eval_samples_per_second": 877.676, |
|
"eval_steps_per_second": 54.855, |
|
"step": 1288000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.886e-07, |
|
"loss": 2.1219, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 2.0161073207855225, |
|
"eval_runtime": 879.4904, |
|
"eval_samples_per_second": 877.576, |
|
"eval_steps_per_second": 54.849, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_loss": 2.00930118560791, |
|
"eval_runtime": 882.5935, |
|
"eval_samples_per_second": 874.491, |
|
"eval_steps_per_second": 54.656, |
|
"step": 1304000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.8586666666666666e-07, |
|
"loss": 2.1203, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_loss": 2.0104291439056396, |
|
"eval_runtime": 882.9969, |
|
"eval_samples_per_second": 874.091, |
|
"eval_steps_per_second": 54.631, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 2.0144429206848145, |
|
"eval_runtime": 878.5955, |
|
"eval_samples_per_second": 878.47, |
|
"eval_steps_per_second": 54.905, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.8313333333333332e-07, |
|
"loss": 2.1264, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_loss": 2.0084986686706543, |
|
"eval_runtime": 878.8817, |
|
"eval_samples_per_second": 878.184, |
|
"eval_steps_per_second": 54.887, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 2.0118672847747803, |
|
"eval_runtime": 880.8514, |
|
"eval_samples_per_second": 876.22, |
|
"eval_steps_per_second": 54.764, |
|
"step": 1336000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.804e-07, |
|
"loss": 2.1194, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_loss": 2.011784076690674, |
|
"eval_runtime": 878.874, |
|
"eval_samples_per_second": 878.192, |
|
"eval_steps_per_second": 54.887, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 2.0109827518463135, |
|
"eval_runtime": 893.715, |
|
"eval_samples_per_second": 863.609, |
|
"eval_steps_per_second": 53.976, |
|
"step": 1352000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.7766666666666666e-07, |
|
"loss": 2.117, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 2.014660596847534, |
|
"eval_runtime": 915.8924, |
|
"eval_samples_per_second": 842.697, |
|
"eval_steps_per_second": 52.669, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_loss": 2.013535261154175, |
|
"eval_runtime": 909.1816, |
|
"eval_samples_per_second": 848.917, |
|
"eval_steps_per_second": 53.058, |
|
"step": 1368000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.7493333333333334e-07, |
|
"loss": 2.1311, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 2.0076611042022705, |
|
"eval_runtime": 909.3083, |
|
"eval_samples_per_second": 848.799, |
|
"eval_steps_per_second": 53.05, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 2.006574869155884, |
|
"eval_runtime": 904.8344, |
|
"eval_samples_per_second": 852.996, |
|
"eval_steps_per_second": 53.313, |
|
"step": 1384000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.722e-07, |
|
"loss": 2.1215, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 2.008929967880249, |
|
"eval_runtime": 903.4488, |
|
"eval_samples_per_second": 854.304, |
|
"eval_steps_per_second": 53.394, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 2.0118260383605957, |
|
"eval_runtime": 913.6278, |
|
"eval_samples_per_second": 844.786, |
|
"eval_steps_per_second": 52.799, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.6946666666666668e-07, |
|
"loss": 2.1185, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_loss": 2.0105414390563965, |
|
"eval_runtime": 907.6551, |
|
"eval_samples_per_second": 850.345, |
|
"eval_steps_per_second": 53.147, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_loss": 2.012268304824829, |
|
"eval_runtime": 903.9952, |
|
"eval_samples_per_second": 853.788, |
|
"eval_steps_per_second": 53.362, |
|
"step": 1416000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.6673333333333333e-07, |
|
"loss": 2.1284, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 2.0133912563323975, |
|
"eval_runtime": 910.6028, |
|
"eval_samples_per_second": 847.592, |
|
"eval_steps_per_second": 52.975, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_loss": 2.009307861328125, |
|
"eval_runtime": 904.2587, |
|
"eval_samples_per_second": 853.539, |
|
"eval_steps_per_second": 53.346, |
|
"step": 1432000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.64e-07, |
|
"loss": 2.1174, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 2.0101728439331055, |
|
"eval_runtime": 912.2693, |
|
"eval_samples_per_second": 846.044, |
|
"eval_steps_per_second": 52.878, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 2.00759220123291, |
|
"eval_runtime": 910.2393, |
|
"eval_samples_per_second": 847.931, |
|
"eval_steps_per_second": 52.996, |
|
"step": 1448000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.6126666666666667e-07, |
|
"loss": 2.1108, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 2.00740909576416, |
|
"eval_runtime": 914.6796, |
|
"eval_samples_per_second": 843.815, |
|
"eval_steps_per_second": 52.739, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 2.007056474685669, |
|
"eval_runtime": 908.0025, |
|
"eval_samples_per_second": 850.02, |
|
"eval_steps_per_second": 53.127, |
|
"step": 1464000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.5853333333333332e-07, |
|
"loss": 2.1252, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_loss": 2.0092082023620605, |
|
"eval_runtime": 905.6872, |
|
"eval_samples_per_second": 852.193, |
|
"eval_steps_per_second": 53.262, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_loss": 2.007967233657837, |
|
"eval_runtime": 910.9272, |
|
"eval_samples_per_second": 847.291, |
|
"eval_steps_per_second": 52.956, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.558e-07, |
|
"loss": 2.121, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 2.0052874088287354, |
|
"eval_runtime": 908.8472, |
|
"eval_samples_per_second": 849.23, |
|
"eval_steps_per_second": 53.077, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_loss": 2.0071661472320557, |
|
"eval_runtime": 907.693, |
|
"eval_samples_per_second": 850.31, |
|
"eval_steps_per_second": 53.145, |
|
"step": 1496000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.5306666666666666e-07, |
|
"loss": 2.1178, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 2.0059070587158203, |
|
"eval_runtime": 908.356, |
|
"eval_samples_per_second": 849.689, |
|
"eval_steps_per_second": 53.106, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_loss": 2.00836443901062, |
|
"eval_runtime": 908.0246, |
|
"eval_samples_per_second": 849.999, |
|
"eval_steps_per_second": 53.125, |
|
"step": 1512000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.5033333333333332e-07, |
|
"loss": 2.1154, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_loss": 2.0105550289154053, |
|
"eval_runtime": 903.6608, |
|
"eval_samples_per_second": 854.104, |
|
"eval_steps_per_second": 53.382, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_loss": 2.0116729736328125, |
|
"eval_runtime": 909.1515, |
|
"eval_samples_per_second": 848.945, |
|
"eval_steps_per_second": 53.059, |
|
"step": 1528000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.476e-07, |
|
"loss": 2.1214, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 2.006955146789551, |
|
"eval_runtime": 907.2355, |
|
"eval_samples_per_second": 850.738, |
|
"eval_steps_per_second": 53.171, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 2.0078775882720947, |
|
"eval_runtime": 908.5609, |
|
"eval_samples_per_second": 849.497, |
|
"eval_steps_per_second": 53.094, |
|
"step": 1544000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.4486666666666665e-07, |
|
"loss": 2.1175, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_loss": 2.0101876258850098, |
|
"eval_runtime": 901.3076, |
|
"eval_samples_per_second": 856.334, |
|
"eval_steps_per_second": 53.521, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 2.009697675704956, |
|
"eval_runtime": 906.1011, |
|
"eval_samples_per_second": 851.803, |
|
"eval_steps_per_second": 53.238, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.4213333333333334e-07, |
|
"loss": 2.1206, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_loss": 2.0092358589172363, |
|
"eval_runtime": 901.2376, |
|
"eval_samples_per_second": 856.4, |
|
"eval_steps_per_second": 53.525, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_loss": 2.005527973175049, |
|
"eval_runtime": 896.3075, |
|
"eval_samples_per_second": 861.111, |
|
"eval_steps_per_second": 53.82, |
|
"step": 1576000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.3940000000000002e-07, |
|
"loss": 2.1302, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 2.008502244949341, |
|
"eval_runtime": 899.3251, |
|
"eval_samples_per_second": 858.221, |
|
"eval_steps_per_second": 53.639, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_loss": 2.0109806060791016, |
|
"eval_runtime": 906.7205, |
|
"eval_samples_per_second": 851.222, |
|
"eval_steps_per_second": 53.202, |
|
"step": 1592000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.3666666666666665e-07, |
|
"loss": 2.1177, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 2.006521701812744, |
|
"eval_runtime": 898.4764, |
|
"eval_samples_per_second": 859.032, |
|
"eval_steps_per_second": 53.69, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 2.0131704807281494, |
|
"eval_runtime": 906.0839, |
|
"eval_samples_per_second": 851.82, |
|
"eval_steps_per_second": 53.239, |
|
"step": 1608000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.3393333333333333e-07, |
|
"loss": 2.1101, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 2.0085511207580566, |
|
"eval_runtime": 896.2709, |
|
"eval_samples_per_second": 861.146, |
|
"eval_steps_per_second": 53.822, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_loss": 2.0077245235443115, |
|
"eval_runtime": 897.3988, |
|
"eval_samples_per_second": 860.064, |
|
"eval_steps_per_second": 53.754, |
|
"step": 1624000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.312e-07, |
|
"loss": 2.1194, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_loss": 2.008148431777954, |
|
"eval_runtime": 896.5575, |
|
"eval_samples_per_second": 860.871, |
|
"eval_steps_per_second": 53.805, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_loss": 2.008798122406006, |
|
"eval_runtime": 897.2787, |
|
"eval_samples_per_second": 860.179, |
|
"eval_steps_per_second": 53.761, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.2846666666666667e-07, |
|
"loss": 2.1167, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 2.002239942550659, |
|
"eval_runtime": 893.5655, |
|
"eval_samples_per_second": 863.753, |
|
"eval_steps_per_second": 53.985, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_loss": 2.007662296295166, |
|
"eval_runtime": 895.7141, |
|
"eval_samples_per_second": 861.681, |
|
"eval_steps_per_second": 53.855, |
|
"step": 1656000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.2573333333333332e-07, |
|
"loss": 2.1083, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 2.0065953731536865, |
|
"eval_runtime": 890.9713, |
|
"eval_samples_per_second": 866.268, |
|
"eval_steps_per_second": 54.142, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 2.0137040615081787, |
|
"eval_runtime": 885.7627, |
|
"eval_samples_per_second": 871.362, |
|
"eval_steps_per_second": 54.46, |
|
"step": 1672000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.23e-07, |
|
"loss": 2.1232, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_loss": 2.0067014694213867, |
|
"eval_runtime": 890.51, |
|
"eval_samples_per_second": 866.717, |
|
"eval_steps_per_second": 54.17, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_loss": 2.0039150714874268, |
|
"eval_runtime": 889.3586, |
|
"eval_samples_per_second": 867.839, |
|
"eval_steps_per_second": 54.24, |
|
"step": 1688000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.2026666666666666e-07, |
|
"loss": 2.1212, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_loss": 2.008970022201538, |
|
"eval_runtime": 893.785, |
|
"eval_samples_per_second": 863.541, |
|
"eval_steps_per_second": 53.972, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_loss": 2.0079498291015625, |
|
"eval_runtime": 882.5613, |
|
"eval_samples_per_second": 874.523, |
|
"eval_steps_per_second": 54.658, |
|
"step": 1704000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.1753333333333334e-07, |
|
"loss": 2.1246, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_loss": 2.0082814693450928, |
|
"eval_runtime": 886.133, |
|
"eval_samples_per_second": 870.998, |
|
"eval_steps_per_second": 54.438, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_loss": 2.003898859024048, |
|
"eval_runtime": 887.1853, |
|
"eval_samples_per_second": 869.965, |
|
"eval_steps_per_second": 54.373, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.1480000000000001e-07, |
|
"loss": 2.1129, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 2.0069074630737305, |
|
"eval_runtime": 891.3907, |
|
"eval_samples_per_second": 865.86, |
|
"eval_steps_per_second": 54.117, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 2.007922410964966, |
|
"eval_runtime": 884.1175, |
|
"eval_samples_per_second": 872.984, |
|
"eval_steps_per_second": 54.562, |
|
"step": 1736000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.1206666666666666e-07, |
|
"loss": 2.1209, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_loss": 2.00584077835083, |
|
"eval_runtime": 888.6359, |
|
"eval_samples_per_second": 868.545, |
|
"eval_steps_per_second": 54.284, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_loss": 2.0071957111358643, |
|
"eval_runtime": 891.8674, |
|
"eval_samples_per_second": 865.398, |
|
"eval_steps_per_second": 54.088, |
|
"step": 1752000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.0933333333333333e-07, |
|
"loss": 2.1209, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 2.0067615509033203, |
|
"eval_runtime": 884.8141, |
|
"eval_samples_per_second": 872.296, |
|
"eval_steps_per_second": 54.519, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_loss": 2.0078628063201904, |
|
"eval_runtime": 888.3025, |
|
"eval_samples_per_second": 868.871, |
|
"eval_steps_per_second": 54.305, |
|
"step": 1768000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.066e-07, |
|
"loss": 2.1184, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_loss": 2.0036442279815674, |
|
"eval_runtime": 887.5766, |
|
"eval_samples_per_second": 869.581, |
|
"eval_steps_per_second": 54.349, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 2.0064985752105713, |
|
"eval_runtime": 890.3705, |
|
"eval_samples_per_second": 866.853, |
|
"eval_steps_per_second": 54.179, |
|
"step": 1784000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.0386666666666667e-07, |
|
"loss": 2.1065, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 2.007737159729004, |
|
"eval_runtime": 889.1985, |
|
"eval_samples_per_second": 867.995, |
|
"eval_steps_per_second": 54.25, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 2.006197452545166, |
|
"eval_runtime": 889.8901, |
|
"eval_samples_per_second": 867.321, |
|
"eval_steps_per_second": 54.208, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.0113333333333334e-07, |
|
"loss": 2.109, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 2.0090434551239014, |
|
"eval_runtime": 888.3297, |
|
"eval_samples_per_second": 868.844, |
|
"eval_steps_per_second": 54.303, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_loss": 2.012356758117676, |
|
"eval_runtime": 893.3256, |
|
"eval_samples_per_second": 863.985, |
|
"eval_steps_per_second": 53.999, |
|
"step": 1816000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 9.84e-08, |
|
"loss": 2.1081, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 2.0065596103668213, |
|
"eval_runtime": 893.6122, |
|
"eval_samples_per_second": 863.708, |
|
"eval_steps_per_second": 53.982, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.008080005645752, |
|
"eval_runtime": 891.4247, |
|
"eval_samples_per_second": 865.828, |
|
"eval_steps_per_second": 54.115, |
|
"step": 1832000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.566666666666666e-08, |
|
"loss": 2.1151, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_loss": 2.008512258529663, |
|
"eval_runtime": 884.9554, |
|
"eval_samples_per_second": 872.157, |
|
"eval_steps_per_second": 54.51, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_loss": 2.0054173469543457, |
|
"eval_runtime": 886.9049, |
|
"eval_samples_per_second": 870.24, |
|
"eval_steps_per_second": 54.39, |
|
"step": 1848000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.293333333333333e-08, |
|
"loss": 2.1178, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 2.005777359008789, |
|
"eval_runtime": 886.5315, |
|
"eval_samples_per_second": 870.606, |
|
"eval_steps_per_second": 54.413, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 2.0048415660858154, |
|
"eval_runtime": 893.5519, |
|
"eval_samples_per_second": 863.766, |
|
"eval_steps_per_second": 53.986, |
|
"step": 1864000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.02e-08, |
|
"loss": 2.1035, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 2.004007339477539, |
|
"eval_runtime": 890.5358, |
|
"eval_samples_per_second": 866.692, |
|
"eval_steps_per_second": 54.169, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 2.0059244632720947, |
|
"eval_runtime": 887.0437, |
|
"eval_samples_per_second": 870.104, |
|
"eval_steps_per_second": 54.382, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 8.746666666666667e-08, |
|
"loss": 2.1197, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_loss": 2.0071017742156982, |
|
"eval_runtime": 889.191, |
|
"eval_samples_per_second": 868.003, |
|
"eval_steps_per_second": 54.25, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_loss": 2.005682945251465, |
|
"eval_runtime": 888.8818, |
|
"eval_samples_per_second": 868.304, |
|
"eval_steps_per_second": 54.269, |
|
"step": 1896000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 8.473333333333334e-08, |
|
"loss": 2.1143, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_loss": 2.005943536758423, |
|
"eval_runtime": 884.5437, |
|
"eval_samples_per_second": 872.563, |
|
"eval_steps_per_second": 54.535, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 2.0042991638183594, |
|
"eval_runtime": 884.1715, |
|
"eval_samples_per_second": 872.93, |
|
"eval_steps_per_second": 54.558, |
|
"step": 1912000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 8.2e-08, |
|
"loss": 2.1082, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 2.0067648887634277, |
|
"eval_runtime": 885.4828, |
|
"eval_samples_per_second": 871.637, |
|
"eval_steps_per_second": 54.478, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_loss": 2.0057313442230225, |
|
"eval_runtime": 887.8665, |
|
"eval_samples_per_second": 869.297, |
|
"eval_steps_per_second": 54.331, |
|
"step": 1928000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 7.926666666666666e-08, |
|
"loss": 2.1202, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 2.007241725921631, |
|
"eval_runtime": 885.5971, |
|
"eval_samples_per_second": 871.525, |
|
"eval_steps_per_second": 54.471, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_loss": 2.0057430267333984, |
|
"eval_runtime": 888.4045, |
|
"eval_samples_per_second": 868.771, |
|
"eval_steps_per_second": 54.298, |
|
"step": 1944000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 7.653333333333333e-08, |
|
"loss": 2.1138, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_loss": 2.0051097869873047, |
|
"eval_runtime": 889.7536, |
|
"eval_samples_per_second": 867.454, |
|
"eval_steps_per_second": 54.216, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_loss": 2.008528709411621, |
|
"eval_runtime": 887.8548, |
|
"eval_samples_per_second": 869.309, |
|
"eval_steps_per_second": 54.332, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 7.38e-08, |
|
"loss": 2.1082, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_loss": 2.007629871368408, |
|
"eval_runtime": 886.2101, |
|
"eval_samples_per_second": 870.922, |
|
"eval_steps_per_second": 54.433, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 2.0076658725738525, |
|
"eval_runtime": 886.4111, |
|
"eval_samples_per_second": 870.725, |
|
"eval_steps_per_second": 54.421, |
|
"step": 1976000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 7.106666666666667e-08, |
|
"loss": 2.1084, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 2.001997470855713, |
|
"eval_runtime": 885.1567, |
|
"eval_samples_per_second": 871.959, |
|
"eval_steps_per_second": 54.498, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_loss": 2.005009651184082, |
|
"eval_runtime": 889.5629, |
|
"eval_samples_per_second": 867.64, |
|
"eval_steps_per_second": 54.228, |
|
"step": 1992000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 6.833333333333332e-08, |
|
"loss": 2.1151, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_loss": 2.0065817832946777, |
|
"eval_runtime": 885.7641, |
|
"eval_samples_per_second": 871.361, |
|
"eval_steps_per_second": 54.46, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 2.003136396408081, |
|
"eval_runtime": 886.578, |
|
"eval_samples_per_second": 870.561, |
|
"eval_steps_per_second": 54.41, |
|
"step": 2008000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 6.56e-08, |
|
"loss": 2.1141, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_loss": 2.0128238201141357, |
|
"eval_runtime": 891.0219, |
|
"eval_samples_per_second": 866.219, |
|
"eval_steps_per_second": 54.139, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_loss": 2.0021839141845703, |
|
"eval_runtime": 895.8435, |
|
"eval_samples_per_second": 861.557, |
|
"eval_steps_per_second": 53.848, |
|
"step": 2024000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 6.286666666666666e-08, |
|
"loss": 2.1129, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 2.0065131187438965, |
|
"eval_runtime": 890.2528, |
|
"eval_samples_per_second": 866.967, |
|
"eval_steps_per_second": 54.186, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_loss": 2.005363941192627, |
|
"eval_runtime": 890.9681, |
|
"eval_samples_per_second": 866.271, |
|
"eval_steps_per_second": 54.142, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 6.013333333333333e-08, |
|
"loss": 2.1164, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_loss": 2.0038933753967285, |
|
"eval_runtime": 892.3995, |
|
"eval_samples_per_second": 864.882, |
|
"eval_steps_per_second": 54.055, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_loss": 2.003117561340332, |
|
"eval_runtime": 894.495, |
|
"eval_samples_per_second": 862.856, |
|
"eval_steps_per_second": 53.929, |
|
"step": 2056000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 5.7400000000000004e-08, |
|
"loss": 2.1121, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_loss": 2.0101029872894287, |
|
"eval_runtime": 886.6646, |
|
"eval_samples_per_second": 870.476, |
|
"eval_steps_per_second": 54.405, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 2.0098650455474854, |
|
"eval_runtime": 887.3882, |
|
"eval_samples_per_second": 869.766, |
|
"eval_steps_per_second": 54.361, |
|
"step": 2072000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 5.4666666666666666e-08, |
|
"loss": 2.1071, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 2.0041701793670654, |
|
"eval_runtime": 891.5578, |
|
"eval_samples_per_second": 865.698, |
|
"eval_steps_per_second": 54.106, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_loss": 2.0030367374420166, |
|
"eval_runtime": 886.7055, |
|
"eval_samples_per_second": 870.436, |
|
"eval_steps_per_second": 54.403, |
|
"step": 2088000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 5.1933333333333335e-08, |
|
"loss": 2.1094, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 2.00482439994812, |
|
"eval_runtime": 887.8886, |
|
"eval_samples_per_second": 869.276, |
|
"eval_steps_per_second": 54.33, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_loss": 2.004595994949341, |
|
"eval_runtime": 887.4455, |
|
"eval_samples_per_second": 869.71, |
|
"eval_steps_per_second": 54.357, |
|
"step": 2104000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.92e-08, |
|
"loss": 2.1017, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_loss": 2.0038633346557617, |
|
"eval_runtime": 888.4121, |
|
"eval_samples_per_second": 868.764, |
|
"eval_steps_per_second": 54.298, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_loss": 2.0011472702026367, |
|
"eval_runtime": 889.7748, |
|
"eval_samples_per_second": 867.433, |
|
"eval_steps_per_second": 54.215, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 4.6466666666666666e-08, |
|
"loss": 2.1124, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_loss": 2.007091522216797, |
|
"eval_runtime": 892.2658, |
|
"eval_samples_per_second": 865.011, |
|
"eval_steps_per_second": 54.063, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 2.0060718059539795, |
|
"eval_runtime": 887.502, |
|
"eval_samples_per_second": 869.654, |
|
"eval_steps_per_second": 54.354, |
|
"step": 2136000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.3733333333333335e-08, |
|
"loss": 2.1064, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_loss": 2.0040297508239746, |
|
"eval_runtime": 888.8512, |
|
"eval_samples_per_second": 868.334, |
|
"eval_steps_per_second": 54.271, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_loss": 2.007528066635132, |
|
"eval_runtime": 895.8909, |
|
"eval_samples_per_second": 861.511, |
|
"eval_steps_per_second": 53.845, |
|
"step": 2152000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 4.1e-08, |
|
"loss": 2.115, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 2.0025811195373535, |
|
"eval_runtime": 894.6822, |
|
"eval_samples_per_second": 862.675, |
|
"eval_steps_per_second": 53.917, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_loss": 2.006788492202759, |
|
"eval_runtime": 885.9111, |
|
"eval_samples_per_second": 871.216, |
|
"eval_steps_per_second": 54.451, |
|
"step": 2168000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.8266666666666665e-08, |
|
"loss": 2.114, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_loss": 2.006558418273926, |
|
"eval_runtime": 889.8092, |
|
"eval_samples_per_second": 867.399, |
|
"eval_steps_per_second": 54.213, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_loss": 2.0079538822174072, |
|
"eval_runtime": 889.2248, |
|
"eval_samples_per_second": 867.97, |
|
"eval_steps_per_second": 54.248, |
|
"step": 2184000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 3.5533333333333334e-08, |
|
"loss": 2.1171, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_loss": 2.0031957626342773, |
|
"eval_runtime": 891.062, |
|
"eval_samples_per_second": 866.18, |
|
"eval_steps_per_second": 54.137, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 2.0036396980285645, |
|
"eval_runtime": 889.4858, |
|
"eval_samples_per_second": 867.715, |
|
"eval_steps_per_second": 54.232, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.28e-08, |
|
"loss": 2.1119, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_loss": 2.004848003387451, |
|
"eval_runtime": 890.2659, |
|
"eval_samples_per_second": 866.954, |
|
"eval_steps_per_second": 54.185, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_loss": 2.0058629512786865, |
|
"eval_runtime": 890.6135, |
|
"eval_samples_per_second": 866.616, |
|
"eval_steps_per_second": 54.164, |
|
"step": 2216000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.0066666666666665e-08, |
|
"loss": 2.1097, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_loss": 2.005845546722412, |
|
"eval_runtime": 889.9256, |
|
"eval_samples_per_second": 867.286, |
|
"eval_steps_per_second": 54.206, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_loss": 2.004934310913086, |
|
"eval_runtime": 893.1468, |
|
"eval_samples_per_second": 864.158, |
|
"eval_steps_per_second": 54.01, |
|
"step": 2232000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 2.7333333333333333e-08, |
|
"loss": 2.1091, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_loss": 2.005760669708252, |
|
"eval_runtime": 893.6832, |
|
"eval_samples_per_second": 863.639, |
|
"eval_steps_per_second": 53.978, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_loss": 2.0032405853271484, |
|
"eval_runtime": 894.8171, |
|
"eval_samples_per_second": 862.545, |
|
"eval_steps_per_second": 53.909, |
|
"step": 2248000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 2.46e-08, |
|
"loss": 2.1107, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 2.00769305229187, |
|
"eval_runtime": 893.4774, |
|
"eval_samples_per_second": 863.838, |
|
"eval_steps_per_second": 53.99, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_loss": 2.0032243728637695, |
|
"eval_runtime": 893.6019, |
|
"eval_samples_per_second": 863.718, |
|
"eval_steps_per_second": 53.983, |
|
"step": 2264000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 2.1866666666666667e-08, |
|
"loss": 2.1126, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 2.0055274963378906, |
|
"eval_runtime": 891.7304, |
|
"eval_samples_per_second": 865.531, |
|
"eval_steps_per_second": 54.096, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_loss": 2.002612590789795, |
|
"eval_runtime": 892.1014, |
|
"eval_samples_per_second": 865.171, |
|
"eval_steps_per_second": 54.073, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.9133333333333333e-08, |
|
"loss": 2.1173, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 2.0062429904937744, |
|
"eval_runtime": 891.9249, |
|
"eval_samples_per_second": 865.342, |
|
"eval_steps_per_second": 54.084, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_loss": 2.003859043121338, |
|
"eval_runtime": 892.8008, |
|
"eval_samples_per_second": 864.493, |
|
"eval_steps_per_second": 54.031, |
|
"step": 2296000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.64e-08, |
|
"loss": 2.114, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_loss": 2.006359100341797, |
|
"eval_runtime": 891.1547, |
|
"eval_samples_per_second": 866.09, |
|
"eval_steps_per_second": 54.131, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_loss": 2.0113308429718018, |
|
"eval_runtime": 890.136, |
|
"eval_samples_per_second": 867.081, |
|
"eval_steps_per_second": 54.193, |
|
"step": 2312000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.3666666666666667e-08, |
|
"loss": 2.1131, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 2.0065314769744873, |
|
"eval_runtime": 890.6924, |
|
"eval_samples_per_second": 866.539, |
|
"eval_steps_per_second": 54.159, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_loss": 2.0098392963409424, |
|
"eval_runtime": 892.2668, |
|
"eval_samples_per_second": 865.01, |
|
"eval_steps_per_second": 54.063, |
|
"step": 2328000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.0933333333333334e-08, |
|
"loss": 2.1045, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 2.0060501098632812, |
|
"eval_runtime": 891.9301, |
|
"eval_samples_per_second": 865.337, |
|
"eval_steps_per_second": 54.084, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 2.006572961807251, |
|
"eval_runtime": 894.7549, |
|
"eval_samples_per_second": 862.605, |
|
"eval_steps_per_second": 53.913, |
|
"step": 2344000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 8.2e-09, |
|
"loss": 2.1144, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_loss": 2.006028175354004, |
|
"eval_runtime": 899.347, |
|
"eval_samples_per_second": 858.2, |
|
"eval_steps_per_second": 53.638, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_loss": 2.00589656829834, |
|
"eval_runtime": 893.5452, |
|
"eval_samples_per_second": 863.773, |
|
"eval_steps_per_second": 53.986, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 5.466666666666667e-09, |
|
"loss": 2.1086, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_loss": 2.0038540363311768, |
|
"eval_runtime": 893.2561, |
|
"eval_samples_per_second": 864.052, |
|
"eval_steps_per_second": 54.004, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_loss": 2.0076115131378174, |
|
"eval_runtime": 895.0756, |
|
"eval_samples_per_second": 862.296, |
|
"eval_steps_per_second": 53.894, |
|
"step": 2376000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.7333333333333334e-09, |
|
"loss": 2.1058, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_loss": 2.0035552978515625, |
|
"eval_runtime": 895.3228, |
|
"eval_samples_per_second": 862.058, |
|
"eval_steps_per_second": 53.879, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_loss": 2.0077223777770996, |
|
"eval_runtime": 896.1834, |
|
"eval_samples_per_second": 861.23, |
|
"eval_steps_per_second": 53.827, |
|
"step": 2392000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0, |
|
"loss": 2.1112, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 2.000014066696167, |
|
"eval_runtime": 893.9091, |
|
"eval_samples_per_second": 863.421, |
|
"eval_steps_per_second": 53.964, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"step": 2400000, |
|
"total_flos": 7.571300080769916e+17, |
|
"train_loss": 2.133689431966146, |
|
"train_runtime": 416842.919, |
|
"train_samples_per_second": 92.121, |
|
"train_steps_per_second": 5.758 |
|
} |
|
], |
|
"logging_steps": 16000, |
|
"max_steps": 2400000, |
|
"num_train_epochs": 3, |
|
"save_steps": 32000, |
|
"total_flos": 7.571300080769916e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|