|
{ |
|
"best_metric": 2.2190773487091064, |
|
"best_model_checkpoint": "./model_tweets_2020_Q1_25/checkpoint-1952000", |
|
"epoch": 6.556194784547049, |
|
"eval_steps": 8000, |
|
"global_step": 2400000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.5883126258850098, |
|
"eval_runtime": 332.1552, |
|
"eval_samples_per_second": 928.078, |
|
"eval_steps_per_second": 58.006, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.0726666666666665e-07, |
|
"loss": 2.7561, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 2.4957592487335205, |
|
"eval_runtime": 332.3107, |
|
"eval_samples_per_second": 927.644, |
|
"eval_steps_per_second": 57.979, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.437556266784668, |
|
"eval_runtime": 332.4781, |
|
"eval_samples_per_second": 927.177, |
|
"eval_steps_per_second": 57.95, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.0453333333333336e-07, |
|
"loss": 2.531, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 2.409024477005005, |
|
"eval_runtime": 332.2129, |
|
"eval_samples_per_second": 927.917, |
|
"eval_steps_per_second": 57.996, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 2.3791210651397705, |
|
"eval_runtime": 331.9964, |
|
"eval_samples_per_second": 928.522, |
|
"eval_steps_per_second": 58.034, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.018e-07, |
|
"loss": 2.4627, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.3666348457336426, |
|
"eval_runtime": 332.5374, |
|
"eval_samples_per_second": 927.011, |
|
"eval_steps_per_second": 57.939, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.3457391262054443, |
|
"eval_runtime": 333.7346, |
|
"eval_samples_per_second": 923.686, |
|
"eval_steps_per_second": 57.732, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.9906666666666667e-07, |
|
"loss": 2.4252, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.337965726852417, |
|
"eval_runtime": 331.9355, |
|
"eval_samples_per_second": 928.693, |
|
"eval_steps_per_second": 58.044, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 2.329805374145508, |
|
"eval_runtime": 333.3329, |
|
"eval_samples_per_second": 924.799, |
|
"eval_steps_per_second": 57.801, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.963333333333333e-07, |
|
"loss": 2.4061, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.325295925140381, |
|
"eval_runtime": 334.0512, |
|
"eval_samples_per_second": 922.811, |
|
"eval_steps_per_second": 57.677, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 2.317714214324951, |
|
"eval_runtime": 334.2839, |
|
"eval_samples_per_second": 922.168, |
|
"eval_steps_per_second": 57.637, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.936e-07, |
|
"loss": 2.395, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 2.3130738735198975, |
|
"eval_runtime": 334.6418, |
|
"eval_samples_per_second": 921.182, |
|
"eval_steps_per_second": 57.575, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 2.305846691131592, |
|
"eval_runtime": 333.8278, |
|
"eval_samples_per_second": 923.428, |
|
"eval_steps_per_second": 57.715, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.908666666666667e-07, |
|
"loss": 2.3843, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 2.3009700775146484, |
|
"eval_runtime": 334.6239, |
|
"eval_samples_per_second": 921.231, |
|
"eval_steps_per_second": 57.578, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 2.2925422191619873, |
|
"eval_runtime": 335.4315, |
|
"eval_samples_per_second": 919.013, |
|
"eval_steps_per_second": 57.439, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.8813333333333334e-07, |
|
"loss": 2.3738, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 2.2916228771209717, |
|
"eval_runtime": 334.3802, |
|
"eval_samples_per_second": 921.903, |
|
"eval_steps_per_second": 57.62, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 2.29465389251709, |
|
"eval_runtime": 333.899, |
|
"eval_samples_per_second": 923.231, |
|
"eval_steps_per_second": 57.703, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.854e-07, |
|
"loss": 2.3686, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 2.2834742069244385, |
|
"eval_runtime": 334.2854, |
|
"eval_samples_per_second": 922.164, |
|
"eval_steps_per_second": 57.636, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 2.2863762378692627, |
|
"eval_runtime": 334.9469, |
|
"eval_samples_per_second": 920.343, |
|
"eval_steps_per_second": 57.523, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.8266666666666665e-07, |
|
"loss": 2.3615, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 2.283402919769287, |
|
"eval_runtime": 340.7251, |
|
"eval_samples_per_second": 904.735, |
|
"eval_steps_per_second": 56.547, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 2.276758909225464, |
|
"eval_runtime": 337.1601, |
|
"eval_samples_per_second": 914.301, |
|
"eval_steps_per_second": 57.145, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.799333333333333e-07, |
|
"loss": 2.3515, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 2.280259847640991, |
|
"eval_runtime": 339.3606, |
|
"eval_samples_per_second": 908.373, |
|
"eval_steps_per_second": 56.774, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 2.280385732650757, |
|
"eval_runtime": 337.3298, |
|
"eval_samples_per_second": 913.841, |
|
"eval_steps_per_second": 57.116, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.772e-07, |
|
"loss": 2.3508, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 2.275428295135498, |
|
"eval_runtime": 335.956, |
|
"eval_samples_per_second": 917.578, |
|
"eval_steps_per_second": 57.35, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 2.2767016887664795, |
|
"eval_runtime": 335.4697, |
|
"eval_samples_per_second": 918.909, |
|
"eval_steps_per_second": 57.433, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.7446666666666667e-07, |
|
"loss": 2.35, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 2.2741713523864746, |
|
"eval_runtime": 335.6132, |
|
"eval_samples_per_second": 918.516, |
|
"eval_steps_per_second": 57.408, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 2.272230386734009, |
|
"eval_runtime": 334.8971, |
|
"eval_samples_per_second": 920.48, |
|
"eval_steps_per_second": 57.531, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.7173333333333333e-07, |
|
"loss": 2.3385, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 2.266052484512329, |
|
"eval_runtime": 336.4044, |
|
"eval_samples_per_second": 916.356, |
|
"eval_steps_per_second": 57.273, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 2.2705941200256348, |
|
"eval_runtime": 333.5697, |
|
"eval_samples_per_second": 924.143, |
|
"eval_steps_per_second": 57.76, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.69e-07, |
|
"loss": 2.3393, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 2.2633328437805176, |
|
"eval_runtime": 337.2689, |
|
"eval_samples_per_second": 914.007, |
|
"eval_steps_per_second": 57.127, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 2.26476788520813, |
|
"eval_runtime": 335.5887, |
|
"eval_samples_per_second": 918.583, |
|
"eval_steps_per_second": 57.413, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.6626666666666664e-07, |
|
"loss": 2.3392, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 2.2656445503234863, |
|
"eval_runtime": 336.8746, |
|
"eval_samples_per_second": 915.076, |
|
"eval_steps_per_second": 57.193, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 2.2660086154937744, |
|
"eval_runtime": 335.5863, |
|
"eval_samples_per_second": 918.589, |
|
"eval_steps_per_second": 57.413, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.6353333333333335e-07, |
|
"loss": 2.3336, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 2.265676736831665, |
|
"eval_runtime": 336.9338, |
|
"eval_samples_per_second": 914.916, |
|
"eval_steps_per_second": 57.183, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 2.2604660987854004, |
|
"eval_runtime": 334.7066, |
|
"eval_samples_per_second": 921.004, |
|
"eval_steps_per_second": 57.564, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.608e-07, |
|
"loss": 2.3324, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 2.261540651321411, |
|
"eval_runtime": 336.5364, |
|
"eval_samples_per_second": 915.996, |
|
"eval_steps_per_second": 57.251, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 2.255148410797119, |
|
"eval_runtime": 334.2192, |
|
"eval_samples_per_second": 922.347, |
|
"eval_steps_per_second": 57.648, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.5806666666666666e-07, |
|
"loss": 2.3312, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 2.2581000328063965, |
|
"eval_runtime": 334.0976, |
|
"eval_samples_per_second": 922.683, |
|
"eval_steps_per_second": 57.669, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 2.262571334838867, |
|
"eval_runtime": 333.6502, |
|
"eval_samples_per_second": 923.92, |
|
"eval_steps_per_second": 57.746, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.553333333333333e-07, |
|
"loss": 2.3352, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 2.2576231956481934, |
|
"eval_runtime": 334.3477, |
|
"eval_samples_per_second": 921.992, |
|
"eval_steps_per_second": 57.626, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 2.2552905082702637, |
|
"eval_runtime": 334.1252, |
|
"eval_samples_per_second": 922.606, |
|
"eval_steps_per_second": 57.664, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.5259999999999997e-07, |
|
"loss": 2.3287, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 2.259079694747925, |
|
"eval_runtime": 336.4319, |
|
"eval_samples_per_second": 916.281, |
|
"eval_steps_per_second": 57.269, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 2.2557525634765625, |
|
"eval_runtime": 334.0372, |
|
"eval_samples_per_second": 922.849, |
|
"eval_steps_per_second": 57.679, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.498666666666667e-07, |
|
"loss": 2.321, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 2.260307788848877, |
|
"eval_runtime": 335.3355, |
|
"eval_samples_per_second": 919.276, |
|
"eval_steps_per_second": 57.456, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 2.256932258605957, |
|
"eval_runtime": 334.5306, |
|
"eval_samples_per_second": 921.488, |
|
"eval_steps_per_second": 57.594, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.4713333333333333e-07, |
|
"loss": 2.3278, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 2.254404067993164, |
|
"eval_runtime": 335.0696, |
|
"eval_samples_per_second": 920.006, |
|
"eval_steps_per_second": 57.501, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 2.260352373123169, |
|
"eval_runtime": 334.4596, |
|
"eval_samples_per_second": 921.684, |
|
"eval_steps_per_second": 57.606, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.444e-07, |
|
"loss": 2.319, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 2.25346040725708, |
|
"eval_runtime": 334.8356, |
|
"eval_samples_per_second": 920.649, |
|
"eval_steps_per_second": 57.542, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 2.241976499557495, |
|
"eval_runtime": 335.4873, |
|
"eval_samples_per_second": 918.86, |
|
"eval_steps_per_second": 57.43, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.416666666666667e-07, |
|
"loss": 2.3151, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 2.2583224773406982, |
|
"eval_runtime": 335.4778, |
|
"eval_samples_per_second": 918.887, |
|
"eval_steps_per_second": 57.432, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 2.2534608840942383, |
|
"eval_runtime": 335.9955, |
|
"eval_samples_per_second": 917.471, |
|
"eval_steps_per_second": 57.343, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.3893333333333335e-07, |
|
"loss": 2.3144, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_loss": 2.258167266845703, |
|
"eval_runtime": 335.4405, |
|
"eval_samples_per_second": 918.989, |
|
"eval_steps_per_second": 57.438, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_loss": 2.249640464782715, |
|
"eval_runtime": 334.4567, |
|
"eval_samples_per_second": 921.692, |
|
"eval_steps_per_second": 57.607, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.3619999999999995e-07, |
|
"loss": 2.3191, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 2.2531871795654297, |
|
"eval_runtime": 335.2874, |
|
"eval_samples_per_second": 919.408, |
|
"eval_steps_per_second": 57.464, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 2.2515294551849365, |
|
"eval_runtime": 335.0984, |
|
"eval_samples_per_second": 919.927, |
|
"eval_steps_per_second": 57.497, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.3346666666666666e-07, |
|
"loss": 2.3168, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_loss": 2.2500967979431152, |
|
"eval_runtime": 336.248, |
|
"eval_samples_per_second": 916.782, |
|
"eval_steps_per_second": 57.3, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 2.245288848876953, |
|
"eval_runtime": 337.1188, |
|
"eval_samples_per_second": 914.414, |
|
"eval_steps_per_second": 57.152, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.307333333333333e-07, |
|
"loss": 2.3156, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_loss": 2.2444746494293213, |
|
"eval_runtime": 335.5009, |
|
"eval_samples_per_second": 918.823, |
|
"eval_steps_per_second": 57.428, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_loss": 2.2484536170959473, |
|
"eval_runtime": 334.7608, |
|
"eval_samples_per_second": 920.855, |
|
"eval_steps_per_second": 57.555, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.28e-07, |
|
"loss": 2.3178, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 2.249347448348999, |
|
"eval_runtime": 335.2634, |
|
"eval_samples_per_second": 919.474, |
|
"eval_steps_per_second": 57.468, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 2.244321346282959, |
|
"eval_runtime": 335.8842, |
|
"eval_samples_per_second": 917.775, |
|
"eval_steps_per_second": 57.362, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.252666666666667e-07, |
|
"loss": 2.3113, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 2.2493255138397217, |
|
"eval_runtime": 336.0104, |
|
"eval_samples_per_second": 917.43, |
|
"eval_steps_per_second": 57.34, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 2.2493276596069336, |
|
"eval_runtime": 336.0104, |
|
"eval_samples_per_second": 917.43, |
|
"eval_steps_per_second": 57.34, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.2253333333333334e-07, |
|
"loss": 2.3116, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_loss": 2.2483131885528564, |
|
"eval_runtime": 335.3926, |
|
"eval_samples_per_second": 919.12, |
|
"eval_steps_per_second": 57.446, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_loss": 2.245854616165161, |
|
"eval_runtime": 335.4507, |
|
"eval_samples_per_second": 918.961, |
|
"eval_steps_per_second": 57.436, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.198e-07, |
|
"loss": 2.3166, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 2.248081922531128, |
|
"eval_runtime": 336.9072, |
|
"eval_samples_per_second": 914.988, |
|
"eval_steps_per_second": 57.188, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 2.254246711730957, |
|
"eval_runtime": 335.1811, |
|
"eval_samples_per_second": 919.7, |
|
"eval_steps_per_second": 57.482, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.1706666666666665e-07, |
|
"loss": 2.3158, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_loss": 2.244293451309204, |
|
"eval_runtime": 336.0363, |
|
"eval_samples_per_second": 917.359, |
|
"eval_steps_per_second": 57.336, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 2.240211009979248, |
|
"eval_runtime": 335.2536, |
|
"eval_samples_per_second": 919.501, |
|
"eval_steps_per_second": 57.47, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 3.1433333333333336e-07, |
|
"loss": 2.3148, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 2.2449421882629395, |
|
"eval_runtime": 335.5679, |
|
"eval_samples_per_second": 918.64, |
|
"eval_steps_per_second": 57.416, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 2.2415246963500977, |
|
"eval_runtime": 337.8499, |
|
"eval_samples_per_second": 912.435, |
|
"eval_steps_per_second": 57.028, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 3.116e-07, |
|
"loss": 2.3145, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 2.2471208572387695, |
|
"eval_runtime": 338.2882, |
|
"eval_samples_per_second": 911.253, |
|
"eval_steps_per_second": 56.954, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 2.2469303607940674, |
|
"eval_runtime": 338.2218, |
|
"eval_samples_per_second": 911.431, |
|
"eval_steps_per_second": 56.966, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.0886666666666667e-07, |
|
"loss": 2.3119, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 2.244541645050049, |
|
"eval_runtime": 336.9016, |
|
"eval_samples_per_second": 915.003, |
|
"eval_steps_per_second": 57.189, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 2.248655080795288, |
|
"eval_runtime": 335.5748, |
|
"eval_samples_per_second": 918.621, |
|
"eval_steps_per_second": 57.415, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.061333333333333e-07, |
|
"loss": 2.3045, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_loss": 2.2455668449401855, |
|
"eval_runtime": 335.9119, |
|
"eval_samples_per_second": 917.699, |
|
"eval_steps_per_second": 57.357, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 2.246622085571289, |
|
"eval_runtime": 335.9733, |
|
"eval_samples_per_second": 917.531, |
|
"eval_steps_per_second": 57.347, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.034e-07, |
|
"loss": 2.3046, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 2.2357234954833984, |
|
"eval_runtime": 337.5898, |
|
"eval_samples_per_second": 913.138, |
|
"eval_steps_per_second": 57.072, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 2.244824171066284, |
|
"eval_runtime": 336.5413, |
|
"eval_samples_per_second": 915.983, |
|
"eval_steps_per_second": 57.25, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.0066666666666663e-07, |
|
"loss": 2.3083, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 2.2381463050842285, |
|
"eval_runtime": 337.7594, |
|
"eval_samples_per_second": 912.679, |
|
"eval_steps_per_second": 57.044, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_loss": 2.24391770362854, |
|
"eval_runtime": 337.5805, |
|
"eval_samples_per_second": 913.163, |
|
"eval_steps_per_second": 57.074, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.9793333333333334e-07, |
|
"loss": 2.3065, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_loss": 2.2402257919311523, |
|
"eval_runtime": 336.8532, |
|
"eval_samples_per_second": 915.135, |
|
"eval_steps_per_second": 57.197, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_loss": 2.2439496517181396, |
|
"eval_runtime": 337.4274, |
|
"eval_samples_per_second": 913.577, |
|
"eval_steps_per_second": 57.1, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.952e-07, |
|
"loss": 2.307, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_loss": 2.2409019470214844, |
|
"eval_runtime": 336.4075, |
|
"eval_samples_per_second": 916.347, |
|
"eval_steps_per_second": 57.273, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_loss": 2.2425954341888428, |
|
"eval_runtime": 336.4355, |
|
"eval_samples_per_second": 916.271, |
|
"eval_steps_per_second": 57.268, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.9246666666666665e-07, |
|
"loss": 2.3026, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_loss": 2.2386796474456787, |
|
"eval_runtime": 336.9441, |
|
"eval_samples_per_second": 914.888, |
|
"eval_steps_per_second": 57.182, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_loss": 2.235677719116211, |
|
"eval_runtime": 337.5327, |
|
"eval_samples_per_second": 913.292, |
|
"eval_steps_per_second": 57.082, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.897333333333333e-07, |
|
"loss": 2.2949, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 2.237877607345581, |
|
"eval_runtime": 336.876, |
|
"eval_samples_per_second": 915.073, |
|
"eval_steps_per_second": 57.193, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 2.2408130168914795, |
|
"eval_runtime": 338.2422, |
|
"eval_samples_per_second": 911.376, |
|
"eval_steps_per_second": 56.962, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.8699999999999996e-07, |
|
"loss": 2.2951, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 2.2431986331939697, |
|
"eval_runtime": 337.6174, |
|
"eval_samples_per_second": 913.063, |
|
"eval_steps_per_second": 57.068, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 2.244434118270874, |
|
"eval_runtime": 337.68, |
|
"eval_samples_per_second": 912.894, |
|
"eval_steps_per_second": 57.057, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.8426666666666667e-07, |
|
"loss": 2.3011, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_loss": 2.2381932735443115, |
|
"eval_runtime": 336.7238, |
|
"eval_samples_per_second": 915.486, |
|
"eval_steps_per_second": 57.219, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 2.2391436100006104, |
|
"eval_runtime": 337.1171, |
|
"eval_samples_per_second": 914.418, |
|
"eval_steps_per_second": 57.152, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 2.815333333333333e-07, |
|
"loss": 2.3017, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 2.236323833465576, |
|
"eval_runtime": 338.3716, |
|
"eval_samples_per_second": 911.028, |
|
"eval_steps_per_second": 56.94, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_loss": 2.2444024085998535, |
|
"eval_runtime": 337.1979, |
|
"eval_samples_per_second": 914.199, |
|
"eval_steps_per_second": 57.139, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 2.7880000000000003e-07, |
|
"loss": 2.2978, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_loss": 2.2370431423187256, |
|
"eval_runtime": 338.942, |
|
"eval_samples_per_second": 909.495, |
|
"eval_steps_per_second": 56.845, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_loss": 2.2350406646728516, |
|
"eval_runtime": 337.2566, |
|
"eval_samples_per_second": 914.04, |
|
"eval_steps_per_second": 57.129, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.7606666666666664e-07, |
|
"loss": 2.2961, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_loss": 2.234744071960449, |
|
"eval_runtime": 338.2021, |
|
"eval_samples_per_second": 911.485, |
|
"eval_steps_per_second": 56.969, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 2.238616704940796, |
|
"eval_runtime": 338.0434, |
|
"eval_samples_per_second": 911.913, |
|
"eval_steps_per_second": 56.996, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.733333333333333e-07, |
|
"loss": 2.2968, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 2.2322075366973877, |
|
"eval_runtime": 337.896, |
|
"eval_samples_per_second": 912.31, |
|
"eval_steps_per_second": 57.021, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_loss": 2.240255117416382, |
|
"eval_runtime": 338.7394, |
|
"eval_samples_per_second": 910.039, |
|
"eval_steps_per_second": 56.879, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.706e-07, |
|
"loss": 2.2962, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_loss": 2.2347311973571777, |
|
"eval_runtime": 339.8784, |
|
"eval_samples_per_second": 906.989, |
|
"eval_steps_per_second": 56.688, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_loss": 2.239776372909546, |
|
"eval_runtime": 338.4053, |
|
"eval_samples_per_second": 910.937, |
|
"eval_steps_per_second": 56.935, |
|
"step": 824000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 2.6786666666666666e-07, |
|
"loss": 2.2984, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 2.235778570175171, |
|
"eval_runtime": 338.4942, |
|
"eval_samples_per_second": 910.698, |
|
"eval_steps_per_second": 56.92, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 2.2412359714508057, |
|
"eval_runtime": 338.0942, |
|
"eval_samples_per_second": 911.775, |
|
"eval_steps_per_second": 56.987, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.651333333333333e-07, |
|
"loss": 2.3029, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_loss": 2.238647937774658, |
|
"eval_runtime": 338.342, |
|
"eval_samples_per_second": 911.108, |
|
"eval_steps_per_second": 56.945, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_loss": 2.234633445739746, |
|
"eval_runtime": 339.1684, |
|
"eval_samples_per_second": 908.888, |
|
"eval_steps_per_second": 56.807, |
|
"step": 856000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.624e-07, |
|
"loss": 2.2985, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 2.2323224544525146, |
|
"eval_runtime": 340.1121, |
|
"eval_samples_per_second": 906.366, |
|
"eval_steps_per_second": 56.649, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_loss": 2.2386910915374756, |
|
"eval_runtime": 338.6678, |
|
"eval_samples_per_second": 910.231, |
|
"eval_steps_per_second": 56.891, |
|
"step": 872000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.596666666666667e-07, |
|
"loss": 2.2922, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 2.230320453643799, |
|
"eval_runtime": 338.2571, |
|
"eval_samples_per_second": 911.336, |
|
"eval_steps_per_second": 56.96, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_loss": 2.232644557952881, |
|
"eval_runtime": 338.4677, |
|
"eval_samples_per_second": 910.769, |
|
"eval_steps_per_second": 56.924, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 2.5693333333333333e-07, |
|
"loss": 2.2967, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_loss": 2.2422056198120117, |
|
"eval_runtime": 338.7421, |
|
"eval_samples_per_second": 910.031, |
|
"eval_steps_per_second": 56.878, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_loss": 2.235010862350464, |
|
"eval_runtime": 339.4694, |
|
"eval_samples_per_second": 908.082, |
|
"eval_steps_per_second": 56.756, |
|
"step": 904000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 2.542e-07, |
|
"loss": 2.2917, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_loss": 2.2299275398254395, |
|
"eval_runtime": 339.4601, |
|
"eval_samples_per_second": 908.107, |
|
"eval_steps_per_second": 56.758, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_loss": 2.2307727336883545, |
|
"eval_runtime": 339.5941, |
|
"eval_samples_per_second": 907.748, |
|
"eval_steps_per_second": 56.735, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.5146666666666664e-07, |
|
"loss": 2.2912, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_loss": 2.23453688621521, |
|
"eval_runtime": 339.4986, |
|
"eval_samples_per_second": 908.004, |
|
"eval_steps_per_second": 56.751, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 2.2263941764831543, |
|
"eval_runtime": 340.6179, |
|
"eval_samples_per_second": 905.02, |
|
"eval_steps_per_second": 56.565, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.4873333333333335e-07, |
|
"loss": 2.2887, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_loss": 2.236109972000122, |
|
"eval_runtime": 339.6326, |
|
"eval_samples_per_second": 907.646, |
|
"eval_steps_per_second": 56.729, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_loss": 2.2318520545959473, |
|
"eval_runtime": 342.6146, |
|
"eval_samples_per_second": 899.746, |
|
"eval_steps_per_second": 56.235, |
|
"step": 952000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.46e-07, |
|
"loss": 2.2956, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 2.2339940071105957, |
|
"eval_runtime": 342.333, |
|
"eval_samples_per_second": 900.486, |
|
"eval_steps_per_second": 56.281, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"eval_loss": 2.235605478286743, |
|
"eval_runtime": 342.7919, |
|
"eval_samples_per_second": 899.28, |
|
"eval_steps_per_second": 56.206, |
|
"step": 968000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.4326666666666666e-07, |
|
"loss": 2.2927, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 2.2365500926971436, |
|
"eval_runtime": 342.6264, |
|
"eval_samples_per_second": 899.715, |
|
"eval_steps_per_second": 56.233, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_loss": 2.2334821224212646, |
|
"eval_runtime": 342.0197, |
|
"eval_samples_per_second": 901.311, |
|
"eval_steps_per_second": 56.333, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.405333333333333e-07, |
|
"loss": 2.2872, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_loss": 2.232952833175659, |
|
"eval_runtime": 343.3686, |
|
"eval_samples_per_second": 897.77, |
|
"eval_steps_per_second": 56.112, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_loss": 2.225148916244507, |
|
"eval_runtime": 342.4862, |
|
"eval_samples_per_second": 900.083, |
|
"eval_steps_per_second": 56.256, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.3779999999999997e-07, |
|
"loss": 2.2936, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_loss": 2.232741594314575, |
|
"eval_runtime": 342.7157, |
|
"eval_samples_per_second": 899.48, |
|
"eval_steps_per_second": 56.219, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_loss": 2.2326343059539795, |
|
"eval_runtime": 344.8133, |
|
"eval_samples_per_second": 894.008, |
|
"eval_steps_per_second": 55.877, |
|
"step": 1016000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.3506666666666668e-07, |
|
"loss": 2.2899, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 2.2306628227233887, |
|
"eval_runtime": 344.8753, |
|
"eval_samples_per_second": 893.848, |
|
"eval_steps_per_second": 55.867, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_loss": 2.2291181087493896, |
|
"eval_runtime": 343.9896, |
|
"eval_samples_per_second": 896.149, |
|
"eval_steps_per_second": 56.01, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.3233333333333334e-07, |
|
"loss": 2.2931, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 2.228482723236084, |
|
"eval_runtime": 343.2535, |
|
"eval_samples_per_second": 898.071, |
|
"eval_steps_per_second": 56.131, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_loss": 2.2326762676239014, |
|
"eval_runtime": 340.5017, |
|
"eval_samples_per_second": 905.329, |
|
"eval_steps_per_second": 56.584, |
|
"step": 1048000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.2960000000000002e-07, |
|
"loss": 2.3042, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_loss": 2.2366533279418945, |
|
"eval_runtime": 342.3036, |
|
"eval_samples_per_second": 900.563, |
|
"eval_steps_per_second": 56.286, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_loss": 2.2344892024993896, |
|
"eval_runtime": 341.9516, |
|
"eval_samples_per_second": 901.49, |
|
"eval_steps_per_second": 56.344, |
|
"step": 1064000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.2686666666666667e-07, |
|
"loss": 2.2864, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_loss": 2.2267308235168457, |
|
"eval_runtime": 341.6099, |
|
"eval_samples_per_second": 902.392, |
|
"eval_steps_per_second": 56.401, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_loss": 2.2342631816864014, |
|
"eval_runtime": 343.6113, |
|
"eval_samples_per_second": 897.136, |
|
"eval_steps_per_second": 56.072, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.2413333333333333e-07, |
|
"loss": 2.2933, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_loss": 2.235445261001587, |
|
"eval_runtime": 342.1009, |
|
"eval_samples_per_second": 901.097, |
|
"eval_steps_per_second": 56.32, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_loss": 2.226022243499756, |
|
"eval_runtime": 343.4712, |
|
"eval_samples_per_second": 897.502, |
|
"eval_steps_per_second": 56.095, |
|
"step": 1096000 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 2.214e-07, |
|
"loss": 2.2909, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_loss": 2.2340822219848633, |
|
"eval_runtime": 342.2127, |
|
"eval_samples_per_second": 900.802, |
|
"eval_steps_per_second": 56.301, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_loss": 2.2265801429748535, |
|
"eval_runtime": 344.5846, |
|
"eval_samples_per_second": 894.602, |
|
"eval_steps_per_second": 55.914, |
|
"step": 1112000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 2.1866666666666667e-07, |
|
"loss": 2.2889, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"eval_loss": 2.225277900695801, |
|
"eval_runtime": 343.7811, |
|
"eval_samples_per_second": 896.693, |
|
"eval_steps_per_second": 56.044, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"eval_loss": 2.225517511367798, |
|
"eval_runtime": 344.8771, |
|
"eval_samples_per_second": 893.843, |
|
"eval_steps_per_second": 55.866, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 2.1593333333333332e-07, |
|
"loss": 2.292, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_loss": 2.219359874725342, |
|
"eval_runtime": 342.5656, |
|
"eval_samples_per_second": 899.874, |
|
"eval_steps_per_second": 56.243, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_loss": 2.2318532466888428, |
|
"eval_runtime": 342.1989, |
|
"eval_samples_per_second": 900.839, |
|
"eval_steps_per_second": 56.304, |
|
"step": 1144000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 2.132e-07, |
|
"loss": 2.282, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"eval_loss": 2.2221035957336426, |
|
"eval_runtime": 342.4265, |
|
"eval_samples_per_second": 900.24, |
|
"eval_steps_per_second": 56.266, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"eval_loss": 2.2272608280181885, |
|
"eval_runtime": 341.618, |
|
"eval_samples_per_second": 902.371, |
|
"eval_steps_per_second": 56.399, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 2.1046666666666666e-07, |
|
"loss": 2.2827, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"eval_loss": 2.2295727729797363, |
|
"eval_runtime": 344.8334, |
|
"eval_samples_per_second": 893.956, |
|
"eval_steps_per_second": 55.873, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_loss": 2.2331955432891846, |
|
"eval_runtime": 343.112, |
|
"eval_samples_per_second": 898.441, |
|
"eval_steps_per_second": 56.154, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 2.0773333333333334e-07, |
|
"loss": 2.2937, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_loss": 2.230241298675537, |
|
"eval_runtime": 342.0014, |
|
"eval_samples_per_second": 901.359, |
|
"eval_steps_per_second": 56.336, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_loss": 2.2262063026428223, |
|
"eval_runtime": 344.0166, |
|
"eval_samples_per_second": 896.079, |
|
"eval_steps_per_second": 56.006, |
|
"step": 1192000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 2.05e-07, |
|
"loss": 2.2845, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_loss": 2.231752872467041, |
|
"eval_runtime": 342.9925, |
|
"eval_samples_per_second": 898.754, |
|
"eval_steps_per_second": 56.173, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_loss": 2.229050636291504, |
|
"eval_runtime": 342.7011, |
|
"eval_samples_per_second": 899.519, |
|
"eval_steps_per_second": 56.221, |
|
"step": 1208000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 2.0226666666666668e-07, |
|
"loss": 2.284, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"eval_loss": 2.232661008834839, |
|
"eval_runtime": 343.2876, |
|
"eval_samples_per_second": 897.982, |
|
"eval_steps_per_second": 56.125, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"eval_loss": 2.230750560760498, |
|
"eval_runtime": 343.2951, |
|
"eval_samples_per_second": 897.962, |
|
"eval_steps_per_second": 56.124, |
|
"step": 1224000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.9953333333333333e-07, |
|
"loss": 2.2923, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_loss": 2.226369857788086, |
|
"eval_runtime": 343.209, |
|
"eval_samples_per_second": 898.188, |
|
"eval_steps_per_second": 56.138, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_loss": 2.2390074729919434, |
|
"eval_runtime": 342.5512, |
|
"eval_samples_per_second": 899.912, |
|
"eval_steps_per_second": 56.246, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.968e-07, |
|
"loss": 2.2859, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"eval_loss": 2.2309505939483643, |
|
"eval_runtime": 343.4164, |
|
"eval_samples_per_second": 897.645, |
|
"eval_steps_per_second": 56.104, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"eval_loss": 2.22867751121521, |
|
"eval_runtime": 342.6067, |
|
"eval_samples_per_second": 899.766, |
|
"eval_steps_per_second": 56.236, |
|
"step": 1256000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 1.9406666666666667e-07, |
|
"loss": 2.2879, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"eval_loss": 2.228405714035034, |
|
"eval_runtime": 345.1005, |
|
"eval_samples_per_second": 893.264, |
|
"eval_steps_per_second": 55.83, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"eval_loss": 2.2228379249572754, |
|
"eval_runtime": 343.2335, |
|
"eval_samples_per_second": 898.123, |
|
"eval_steps_per_second": 56.134, |
|
"step": 1272000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 1.9133333333333333e-07, |
|
"loss": 2.292, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_loss": 2.2295541763305664, |
|
"eval_runtime": 343.6906, |
|
"eval_samples_per_second": 896.929, |
|
"eval_steps_per_second": 56.059, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"eval_loss": 2.232851982116699, |
|
"eval_runtime": 343.5216, |
|
"eval_samples_per_second": 897.37, |
|
"eval_steps_per_second": 56.087, |
|
"step": 1288000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 1.886e-07, |
|
"loss": 2.2827, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"eval_loss": 2.226313352584839, |
|
"eval_runtime": 343.9653, |
|
"eval_samples_per_second": 896.213, |
|
"eval_steps_per_second": 56.014, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_loss": 2.2323992252349854, |
|
"eval_runtime": 345.0144, |
|
"eval_samples_per_second": 893.487, |
|
"eval_steps_per_second": 55.844, |
|
"step": 1304000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 1.8586666666666666e-07, |
|
"loss": 2.2829, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"eval_loss": 2.2231664657592773, |
|
"eval_runtime": 343.513, |
|
"eval_samples_per_second": 897.393, |
|
"eval_steps_per_second": 56.088, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_loss": 2.2273144721984863, |
|
"eval_runtime": 344.0809, |
|
"eval_samples_per_second": 895.911, |
|
"eval_steps_per_second": 55.996, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 1.8313333333333332e-07, |
|
"loss": 2.2863, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"eval_loss": 2.2296173572540283, |
|
"eval_runtime": 344.7931, |
|
"eval_samples_per_second": 894.061, |
|
"eval_steps_per_second": 55.88, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"eval_loss": 2.2293524742126465, |
|
"eval_runtime": 343.7387, |
|
"eval_samples_per_second": 896.803, |
|
"eval_steps_per_second": 56.051, |
|
"step": 1336000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.804e-07, |
|
"loss": 2.2796, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_loss": 2.228300094604492, |
|
"eval_runtime": 345.0604, |
|
"eval_samples_per_second": 893.368, |
|
"eval_steps_per_second": 55.837, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"eval_loss": 2.2279834747314453, |
|
"eval_runtime": 343.4098, |
|
"eval_samples_per_second": 897.662, |
|
"eval_steps_per_second": 56.105, |
|
"step": 1352000 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 1.7766666666666666e-07, |
|
"loss": 2.2835, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"eval_loss": 2.226436138153076, |
|
"eval_runtime": 344.8134, |
|
"eval_samples_per_second": 894.008, |
|
"eval_steps_per_second": 55.877, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_loss": 2.222442150115967, |
|
"eval_runtime": 344.7708, |
|
"eval_samples_per_second": 894.119, |
|
"eval_steps_per_second": 55.883, |
|
"step": 1368000 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 1.7493333333333334e-07, |
|
"loss": 2.2875, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"eval_loss": 2.2218754291534424, |
|
"eval_runtime": 346.0197, |
|
"eval_samples_per_second": 890.891, |
|
"eval_steps_per_second": 55.682, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_loss": 2.224281072616577, |
|
"eval_runtime": 344.7226, |
|
"eval_samples_per_second": 894.244, |
|
"eval_steps_per_second": 55.891, |
|
"step": 1384000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 1.722e-07, |
|
"loss": 2.2792, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_loss": 2.232009172439575, |
|
"eval_runtime": 344.233, |
|
"eval_samples_per_second": 895.516, |
|
"eval_steps_per_second": 55.971, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"eval_loss": 2.227288246154785, |
|
"eval_runtime": 344.3453, |
|
"eval_samples_per_second": 895.223, |
|
"eval_steps_per_second": 55.953, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 1.6946666666666668e-07, |
|
"loss": 2.2932, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"eval_loss": 2.225741386413574, |
|
"eval_runtime": 343.9791, |
|
"eval_samples_per_second": 896.177, |
|
"eval_steps_per_second": 56.012, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_loss": 2.235980749130249, |
|
"eval_runtime": 344.5804, |
|
"eval_samples_per_second": 894.613, |
|
"eval_steps_per_second": 55.914, |
|
"step": 1416000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 1.6673333333333333e-07, |
|
"loss": 2.2899, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"eval_loss": 2.227717399597168, |
|
"eval_runtime": 345.0207, |
|
"eval_samples_per_second": 893.471, |
|
"eval_steps_per_second": 55.843, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_loss": 2.227459192276001, |
|
"eval_runtime": 345.1331, |
|
"eval_samples_per_second": 893.18, |
|
"eval_steps_per_second": 55.825, |
|
"step": 1432000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 1.64e-07, |
|
"loss": 2.2859, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_loss": 2.228656530380249, |
|
"eval_runtime": 345.5823, |
|
"eval_samples_per_second": 892.019, |
|
"eval_steps_per_second": 55.752, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"eval_loss": 2.2210566997528076, |
|
"eval_runtime": 345.2225, |
|
"eval_samples_per_second": 892.949, |
|
"eval_steps_per_second": 55.81, |
|
"step": 1448000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 1.6126666666666667e-07, |
|
"loss": 2.2876, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"eval_loss": 2.2235565185546875, |
|
"eval_runtime": 345.2552, |
|
"eval_samples_per_second": 892.864, |
|
"eval_steps_per_second": 55.805, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.2287678718566895, |
|
"eval_runtime": 345.4865, |
|
"eval_samples_per_second": 892.266, |
|
"eval_steps_per_second": 55.768, |
|
"step": 1464000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 1.5853333333333332e-07, |
|
"loss": 2.2879, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_loss": 2.2225778102874756, |
|
"eval_runtime": 346.081, |
|
"eval_samples_per_second": 890.734, |
|
"eval_steps_per_second": 55.672, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"eval_loss": 2.2241647243499756, |
|
"eval_runtime": 345.7366, |
|
"eval_samples_per_second": 891.621, |
|
"eval_steps_per_second": 55.727, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 1.558e-07, |
|
"loss": 2.282, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"eval_loss": 2.2286031246185303, |
|
"eval_runtime": 345.8095, |
|
"eval_samples_per_second": 891.433, |
|
"eval_steps_per_second": 55.716, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"eval_loss": 2.220984697341919, |
|
"eval_runtime": 346.175, |
|
"eval_samples_per_second": 890.492, |
|
"eval_steps_per_second": 55.657, |
|
"step": 1496000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 1.5306666666666666e-07, |
|
"loss": 2.2828, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_loss": 2.2303643226623535, |
|
"eval_runtime": 345.9183, |
|
"eval_samples_per_second": 891.153, |
|
"eval_steps_per_second": 55.698, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"eval_loss": 2.2310214042663574, |
|
"eval_runtime": 345.0795, |
|
"eval_samples_per_second": 893.319, |
|
"eval_steps_per_second": 55.834, |
|
"step": 1512000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.5033333333333332e-07, |
|
"loss": 2.2765, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"eval_loss": 2.229534387588501, |
|
"eval_runtime": 346.2314, |
|
"eval_samples_per_second": 890.347, |
|
"eval_steps_per_second": 55.648, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"eval_loss": 2.2276086807250977, |
|
"eval_runtime": 345.5194, |
|
"eval_samples_per_second": 892.181, |
|
"eval_steps_per_second": 55.762, |
|
"step": 1528000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 1.476e-07, |
|
"loss": 2.2839, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_loss": 2.226030111312866, |
|
"eval_runtime": 345.3689, |
|
"eval_samples_per_second": 892.57, |
|
"eval_steps_per_second": 55.787, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"eval_loss": 2.225531578063965, |
|
"eval_runtime": 346.1485, |
|
"eval_samples_per_second": 890.56, |
|
"eval_steps_per_second": 55.661, |
|
"step": 1544000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 1.4486666666666665e-07, |
|
"loss": 2.2845, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"eval_loss": 2.2199981212615967, |
|
"eval_runtime": 345.8554, |
|
"eval_samples_per_second": 891.315, |
|
"eval_steps_per_second": 55.708, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"eval_loss": 2.222754955291748, |
|
"eval_runtime": 346.6039, |
|
"eval_samples_per_second": 889.39, |
|
"eval_steps_per_second": 55.588, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 1.4213333333333334e-07, |
|
"loss": 2.2816, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"eval_loss": 2.2322280406951904, |
|
"eval_runtime": 346.0535, |
|
"eval_samples_per_second": 890.804, |
|
"eval_steps_per_second": 55.676, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"eval_loss": 2.225015163421631, |
|
"eval_runtime": 345.7846, |
|
"eval_samples_per_second": 891.497, |
|
"eval_steps_per_second": 55.72, |
|
"step": 1576000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 1.3940000000000002e-07, |
|
"loss": 2.2965, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"eval_loss": 2.2242190837860107, |
|
"eval_runtime": 348.9458, |
|
"eval_samples_per_second": 883.421, |
|
"eval_steps_per_second": 55.215, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_loss": 2.22951340675354, |
|
"eval_runtime": 346.387, |
|
"eval_samples_per_second": 889.947, |
|
"eval_steps_per_second": 55.623, |
|
"step": 1592000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 1.3666666666666665e-07, |
|
"loss": 2.2806, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"eval_loss": 2.219784736633301, |
|
"eval_runtime": 346.2214, |
|
"eval_samples_per_second": 890.372, |
|
"eval_steps_per_second": 55.649, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"eval_loss": 2.230062961578369, |
|
"eval_runtime": 346.9441, |
|
"eval_samples_per_second": 888.518, |
|
"eval_steps_per_second": 55.533, |
|
"step": 1608000 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 1.3393333333333333e-07, |
|
"loss": 2.2868, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"eval_loss": 2.2308502197265625, |
|
"eval_runtime": 348.3618, |
|
"eval_samples_per_second": 884.902, |
|
"eval_steps_per_second": 55.307, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"eval_loss": 2.226969003677368, |
|
"eval_runtime": 347.2742, |
|
"eval_samples_per_second": 887.673, |
|
"eval_steps_per_second": 55.481, |
|
"step": 1624000 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 1.312e-07, |
|
"loss": 2.2907, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"eval_loss": 2.2290947437286377, |
|
"eval_runtime": 346.9393, |
|
"eval_samples_per_second": 888.53, |
|
"eval_steps_per_second": 55.534, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"eval_loss": 2.226861000061035, |
|
"eval_runtime": 347.5547, |
|
"eval_samples_per_second": 886.957, |
|
"eval_steps_per_second": 55.436, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.2846666666666667e-07, |
|
"loss": 2.2809, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"eval_loss": 2.2260689735412598, |
|
"eval_runtime": 347.4948, |
|
"eval_samples_per_second": 887.11, |
|
"eval_steps_per_second": 55.445, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"eval_loss": 2.231820583343506, |
|
"eval_runtime": 348.114, |
|
"eval_samples_per_second": 885.532, |
|
"eval_steps_per_second": 55.347, |
|
"step": 1656000 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 1.2573333333333332e-07, |
|
"loss": 2.2876, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"eval_loss": 2.22523832321167, |
|
"eval_runtime": 347.061, |
|
"eval_samples_per_second": 888.219, |
|
"eval_steps_per_second": 55.515, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"eval_loss": 2.2248425483703613, |
|
"eval_runtime": 347.102, |
|
"eval_samples_per_second": 888.114, |
|
"eval_steps_per_second": 55.508, |
|
"step": 1672000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 1.23e-07, |
|
"loss": 2.2844, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"eval_loss": 2.222309112548828, |
|
"eval_runtime": 347.6799, |
|
"eval_samples_per_second": 886.637, |
|
"eval_steps_per_second": 55.416, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"eval_loss": 2.2250306606292725, |
|
"eval_runtime": 348.9099, |
|
"eval_samples_per_second": 883.512, |
|
"eval_steps_per_second": 55.221, |
|
"step": 1688000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 1.2026666666666666e-07, |
|
"loss": 2.2841, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"eval_loss": 2.227815866470337, |
|
"eval_runtime": 347.3551, |
|
"eval_samples_per_second": 887.466, |
|
"eval_steps_per_second": 55.468, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_loss": 2.222553014755249, |
|
"eval_runtime": 347.2892, |
|
"eval_samples_per_second": 887.635, |
|
"eval_steps_per_second": 55.478, |
|
"step": 1704000 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 1.1753333333333334e-07, |
|
"loss": 2.2851, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"eval_loss": 2.2273736000061035, |
|
"eval_runtime": 347.2652, |
|
"eval_samples_per_second": 887.696, |
|
"eval_steps_per_second": 55.482, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"eval_loss": 2.2246508598327637, |
|
"eval_runtime": 348.4684, |
|
"eval_samples_per_second": 884.631, |
|
"eval_steps_per_second": 55.291, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 1.1480000000000001e-07, |
|
"loss": 2.2863, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_loss": 2.223870277404785, |
|
"eval_runtime": 347.9398, |
|
"eval_samples_per_second": 885.975, |
|
"eval_steps_per_second": 55.375, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"eval_loss": 2.2227413654327393, |
|
"eval_runtime": 350.0067, |
|
"eval_samples_per_second": 880.743, |
|
"eval_steps_per_second": 55.048, |
|
"step": 1736000 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 1.1206666666666666e-07, |
|
"loss": 2.2788, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"eval_loss": 2.223409414291382, |
|
"eval_runtime": 350.9466, |
|
"eval_samples_per_second": 878.384, |
|
"eval_steps_per_second": 54.9, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"eval_loss": 2.2293310165405273, |
|
"eval_runtime": 350.7999, |
|
"eval_samples_per_second": 878.752, |
|
"eval_steps_per_second": 54.923, |
|
"step": 1752000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 1.0933333333333333e-07, |
|
"loss": 2.2849, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_loss": 2.2198665142059326, |
|
"eval_runtime": 350.5794, |
|
"eval_samples_per_second": 879.304, |
|
"eval_steps_per_second": 54.958, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"eval_loss": 2.2308552265167236, |
|
"eval_runtime": 351.9489, |
|
"eval_samples_per_second": 875.883, |
|
"eval_steps_per_second": 54.744, |
|
"step": 1768000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 1.066e-07, |
|
"loss": 2.2826, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"eval_loss": 2.223459243774414, |
|
"eval_runtime": 352.3884, |
|
"eval_samples_per_second": 874.79, |
|
"eval_steps_per_second": 54.675, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"eval_loss": 2.2291903495788574, |
|
"eval_runtime": 350.0779, |
|
"eval_samples_per_second": 880.564, |
|
"eval_steps_per_second": 55.036, |
|
"step": 1784000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 1.0386666666666667e-07, |
|
"loss": 2.2809, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_loss": 2.224785089492798, |
|
"eval_runtime": 351.7673, |
|
"eval_samples_per_second": 876.335, |
|
"eval_steps_per_second": 54.772, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_loss": 2.218683958053589, |
|
"eval_runtime": 353.6242, |
|
"eval_samples_per_second": 871.733, |
|
"eval_steps_per_second": 54.484, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 1.0113333333333334e-07, |
|
"loss": 2.2865, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_loss": 2.233116388320923, |
|
"eval_runtime": 351.3999, |
|
"eval_samples_per_second": 877.251, |
|
"eval_steps_per_second": 54.829, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"eval_loss": 2.2243831157684326, |
|
"eval_runtime": 349.1059, |
|
"eval_samples_per_second": 883.016, |
|
"eval_steps_per_second": 55.19, |
|
"step": 1816000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 9.84e-08, |
|
"loss": 2.2773, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"eval_loss": 2.2246005535125732, |
|
"eval_runtime": 349.0697, |
|
"eval_samples_per_second": 883.107, |
|
"eval_steps_per_second": 55.195, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 2.2314696311950684, |
|
"eval_runtime": 349.3246, |
|
"eval_samples_per_second": 882.463, |
|
"eval_steps_per_second": 55.155, |
|
"step": 1832000 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 9.566666666666666e-08, |
|
"loss": 2.2738, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"eval_loss": 2.231853485107422, |
|
"eval_runtime": 349.0971, |
|
"eval_samples_per_second": 883.038, |
|
"eval_steps_per_second": 55.191, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_loss": 2.225752115249634, |
|
"eval_runtime": 350.4379, |
|
"eval_samples_per_second": 879.659, |
|
"eval_steps_per_second": 54.98, |
|
"step": 1848000 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 9.293333333333333e-08, |
|
"loss": 2.2806, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"eval_loss": 2.2240936756134033, |
|
"eval_runtime": 350.1455, |
|
"eval_samples_per_second": 880.394, |
|
"eval_steps_per_second": 55.026, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"eval_loss": 2.2228317260742188, |
|
"eval_runtime": 352.1603, |
|
"eval_samples_per_second": 875.357, |
|
"eval_steps_per_second": 54.711, |
|
"step": 1864000 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 9.02e-08, |
|
"loss": 2.2822, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"eval_loss": 2.2218220233917236, |
|
"eval_runtime": 352.822, |
|
"eval_samples_per_second": 873.715, |
|
"eval_steps_per_second": 54.608, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"eval_loss": 2.227595329284668, |
|
"eval_runtime": 350.713, |
|
"eval_samples_per_second": 878.969, |
|
"eval_steps_per_second": 54.937, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 8.746666666666667e-08, |
|
"loss": 2.2866, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"eval_loss": 2.2233176231384277, |
|
"eval_runtime": 351.7914, |
|
"eval_samples_per_second": 876.275, |
|
"eval_steps_per_second": 54.768, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"eval_loss": 2.2265591621398926, |
|
"eval_runtime": 350.4277, |
|
"eval_samples_per_second": 879.685, |
|
"eval_steps_per_second": 54.981, |
|
"step": 1896000 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 8.473333333333334e-08, |
|
"loss": 2.2831, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"eval_loss": 2.2230618000030518, |
|
"eval_runtime": 353.0183, |
|
"eval_samples_per_second": 873.229, |
|
"eval_steps_per_second": 54.578, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_loss": 2.224078416824341, |
|
"eval_runtime": 349.9623, |
|
"eval_samples_per_second": 880.855, |
|
"eval_steps_per_second": 55.055, |
|
"step": 1912000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 8.2e-08, |
|
"loss": 2.2875, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_loss": 2.226329803466797, |
|
"eval_runtime": 352.5996, |
|
"eval_samples_per_second": 874.266, |
|
"eval_steps_per_second": 54.643, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"eval_loss": 2.22342586517334, |
|
"eval_runtime": 350.4146, |
|
"eval_samples_per_second": 879.718, |
|
"eval_steps_per_second": 54.983, |
|
"step": 1928000 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 7.926666666666666e-08, |
|
"loss": 2.2802, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"eval_loss": 2.2269349098205566, |
|
"eval_runtime": 352.2475, |
|
"eval_samples_per_second": 875.14, |
|
"eval_steps_per_second": 54.697, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"eval_loss": 2.2252981662750244, |
|
"eval_runtime": 351.5457, |
|
"eval_samples_per_second": 876.887, |
|
"eval_steps_per_second": 54.807, |
|
"step": 1944000 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 7.653333333333333e-08, |
|
"loss": 2.2905, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"eval_loss": 2.2190773487091064, |
|
"eval_runtime": 351.1477, |
|
"eval_samples_per_second": 877.881, |
|
"eval_steps_per_second": 54.869, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"eval_loss": 2.2216830253601074, |
|
"eval_runtime": 350.8504, |
|
"eval_samples_per_second": 878.625, |
|
"eval_steps_per_second": 54.915, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 7.38e-08, |
|
"loss": 2.282, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"eval_loss": 2.221177577972412, |
|
"eval_runtime": 352.0571, |
|
"eval_samples_per_second": 875.614, |
|
"eval_steps_per_second": 54.727, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"eval_loss": 2.221277952194214, |
|
"eval_runtime": 350.8021, |
|
"eval_samples_per_second": 878.746, |
|
"eval_steps_per_second": 54.923, |
|
"step": 1976000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 7.106666666666667e-08, |
|
"loss": 2.2798, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"eval_loss": 2.2217955589294434, |
|
"eval_runtime": 352.0687, |
|
"eval_samples_per_second": 875.585, |
|
"eval_steps_per_second": 54.725, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"eval_loss": 2.222245216369629, |
|
"eval_runtime": 351.0214, |
|
"eval_samples_per_second": 878.197, |
|
"eval_steps_per_second": 54.888, |
|
"step": 1992000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 6.833333333333332e-08, |
|
"loss": 2.2864, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"eval_loss": 2.2212188243865967, |
|
"eval_runtime": 351.5119, |
|
"eval_samples_per_second": 876.972, |
|
"eval_steps_per_second": 54.812, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"eval_loss": 2.228152275085449, |
|
"eval_runtime": 351.2144, |
|
"eval_samples_per_second": 877.715, |
|
"eval_steps_per_second": 54.858, |
|
"step": 2008000 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 6.56e-08, |
|
"loss": 2.2867, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"eval_loss": 2.2304341793060303, |
|
"eval_runtime": 352.0733, |
|
"eval_samples_per_second": 875.573, |
|
"eval_steps_per_second": 54.724, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"eval_loss": 2.2221643924713135, |
|
"eval_runtime": 353.329, |
|
"eval_samples_per_second": 872.462, |
|
"eval_steps_per_second": 54.53, |
|
"step": 2024000 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 6.286666666666666e-08, |
|
"loss": 2.2834, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"eval_loss": 2.2284741401672363, |
|
"eval_runtime": 352.1812, |
|
"eval_samples_per_second": 875.305, |
|
"eval_steps_per_second": 54.708, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_loss": 2.222963571548462, |
|
"eval_runtime": 351.3628, |
|
"eval_samples_per_second": 877.344, |
|
"eval_steps_per_second": 54.835, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 6.013333333333333e-08, |
|
"loss": 2.2851, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"eval_loss": 2.223684072494507, |
|
"eval_runtime": 351.5337, |
|
"eval_samples_per_second": 876.917, |
|
"eval_steps_per_second": 54.808, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"eval_loss": 2.228254795074463, |
|
"eval_runtime": 352.7325, |
|
"eval_samples_per_second": 873.937, |
|
"eval_steps_per_second": 54.622, |
|
"step": 2056000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 5.7400000000000004e-08, |
|
"loss": 2.2774, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"eval_loss": 2.2232439517974854, |
|
"eval_runtime": 352.1455, |
|
"eval_samples_per_second": 875.394, |
|
"eval_steps_per_second": 54.713, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_loss": 2.2282047271728516, |
|
"eval_runtime": 352.0221, |
|
"eval_samples_per_second": 875.701, |
|
"eval_steps_per_second": 54.732, |
|
"step": 2072000 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 5.4666666666666666e-08, |
|
"loss": 2.277, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"eval_loss": 2.2271482944488525, |
|
"eval_runtime": 351.8672, |
|
"eval_samples_per_second": 876.086, |
|
"eval_steps_per_second": 54.756, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"eval_loss": 2.2255890369415283, |
|
"eval_runtime": 351.9475, |
|
"eval_samples_per_second": 875.886, |
|
"eval_steps_per_second": 54.744, |
|
"step": 2088000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 5.1933333333333335e-08, |
|
"loss": 2.2868, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"eval_loss": 2.2252378463745117, |
|
"eval_runtime": 352.5562, |
|
"eval_samples_per_second": 874.374, |
|
"eval_steps_per_second": 54.649, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"eval_loss": 2.228463888168335, |
|
"eval_runtime": 352.4037, |
|
"eval_samples_per_second": 874.753, |
|
"eval_steps_per_second": 54.673, |
|
"step": 2104000 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 4.92e-08, |
|
"loss": 2.2727, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"eval_loss": 2.2250723838806152, |
|
"eval_runtime": 351.7999, |
|
"eval_samples_per_second": 876.254, |
|
"eval_steps_per_second": 54.767, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"eval_loss": 2.2239432334899902, |
|
"eval_runtime": 352.3889, |
|
"eval_samples_per_second": 874.789, |
|
"eval_steps_per_second": 54.675, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 4.6466666666666666e-08, |
|
"loss": 2.2803, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"eval_loss": 2.228705883026123, |
|
"eval_runtime": 352.9086, |
|
"eval_samples_per_second": 873.501, |
|
"eval_steps_per_second": 54.595, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"eval_loss": 2.227353096008301, |
|
"eval_runtime": 353.6833, |
|
"eval_samples_per_second": 871.588, |
|
"eval_steps_per_second": 54.475, |
|
"step": 2136000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 4.3733333333333335e-08, |
|
"loss": 2.2785, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"eval_loss": 2.2227251529693604, |
|
"eval_runtime": 353.3218, |
|
"eval_samples_per_second": 872.479, |
|
"eval_steps_per_second": 54.531, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"eval_loss": 2.226724863052368, |
|
"eval_runtime": 352.6583, |
|
"eval_samples_per_second": 874.121, |
|
"eval_steps_per_second": 54.634, |
|
"step": 2152000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 4.1e-08, |
|
"loss": 2.2829, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"eval_loss": 2.225097894668579, |
|
"eval_runtime": 352.6716, |
|
"eval_samples_per_second": 874.088, |
|
"eval_steps_per_second": 54.632, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"eval_loss": 2.222792387008667, |
|
"eval_runtime": 353.112, |
|
"eval_samples_per_second": 872.998, |
|
"eval_steps_per_second": 54.563, |
|
"step": 2168000 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 3.8266666666666665e-08, |
|
"loss": 2.2816, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_loss": 2.22352933883667, |
|
"eval_runtime": 353.4605, |
|
"eval_samples_per_second": 872.137, |
|
"eval_steps_per_second": 54.51, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"eval_loss": 2.22891902923584, |
|
"eval_runtime": 352.6495, |
|
"eval_samples_per_second": 874.143, |
|
"eval_steps_per_second": 54.635, |
|
"step": 2184000 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 3.5533333333333334e-08, |
|
"loss": 2.283, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_loss": 2.2238047122955322, |
|
"eval_runtime": 353.765, |
|
"eval_samples_per_second": 871.386, |
|
"eval_steps_per_second": 54.463, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"eval_loss": 2.224536657333374, |
|
"eval_runtime": 353.3721, |
|
"eval_samples_per_second": 872.355, |
|
"eval_steps_per_second": 54.523, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 3.28e-08, |
|
"loss": 2.2761, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"eval_loss": 2.2296528816223145, |
|
"eval_runtime": 353.2693, |
|
"eval_samples_per_second": 872.609, |
|
"eval_steps_per_second": 54.539, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"eval_loss": 2.230041265487671, |
|
"eval_runtime": 355.1155, |
|
"eval_samples_per_second": 868.073, |
|
"eval_steps_per_second": 54.256, |
|
"step": 2216000 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 3.0066666666666665e-08, |
|
"loss": 2.2823, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"eval_loss": 2.22680401802063, |
|
"eval_runtime": 364.0284, |
|
"eval_samples_per_second": 846.818, |
|
"eval_steps_per_second": 52.927, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"eval_loss": 2.2252140045166016, |
|
"eval_runtime": 353.3986, |
|
"eval_samples_per_second": 872.29, |
|
"eval_steps_per_second": 54.519, |
|
"step": 2232000 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 2.7333333333333333e-08, |
|
"loss": 2.2715, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"eval_loss": 2.2239723205566406, |
|
"eval_runtime": 353.6515, |
|
"eval_samples_per_second": 871.666, |
|
"eval_steps_per_second": 54.48, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"eval_loss": 2.2233335971832275, |
|
"eval_runtime": 353.2896, |
|
"eval_samples_per_second": 872.559, |
|
"eval_steps_per_second": 54.536, |
|
"step": 2248000 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 2.46e-08, |
|
"loss": 2.2809, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"eval_loss": 2.223785161972046, |
|
"eval_runtime": 354.1994, |
|
"eval_samples_per_second": 870.318, |
|
"eval_steps_per_second": 54.396, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"eval_loss": 2.220431089401245, |
|
"eval_runtime": 353.1693, |
|
"eval_samples_per_second": 872.856, |
|
"eval_steps_per_second": 54.555, |
|
"step": 2264000 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 2.1866666666666667e-08, |
|
"loss": 2.2823, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"eval_loss": 2.2218103408813477, |
|
"eval_runtime": 354.0959, |
|
"eval_samples_per_second": 870.572, |
|
"eval_steps_per_second": 54.412, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"eval_loss": 2.2294769287109375, |
|
"eval_runtime": 353.4329, |
|
"eval_samples_per_second": 872.205, |
|
"eval_steps_per_second": 54.514, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 1.9133333333333333e-08, |
|
"loss": 2.2848, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"eval_loss": 2.2298202514648438, |
|
"eval_runtime": 353.5589, |
|
"eval_samples_per_second": 871.894, |
|
"eval_steps_per_second": 54.494, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"eval_loss": 2.2298853397369385, |
|
"eval_runtime": 354.6654, |
|
"eval_samples_per_second": 869.174, |
|
"eval_steps_per_second": 54.324, |
|
"step": 2296000 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 1.64e-08, |
|
"loss": 2.2847, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"eval_loss": 2.224604606628418, |
|
"eval_runtime": 354.3384, |
|
"eval_samples_per_second": 869.976, |
|
"eval_steps_per_second": 54.375, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"eval_loss": 2.222991704940796, |
|
"eval_runtime": 353.8486, |
|
"eval_samples_per_second": 871.181, |
|
"eval_steps_per_second": 54.45, |
|
"step": 2312000 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 1.3666666666666667e-08, |
|
"loss": 2.2783, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"eval_loss": 2.2260053157806396, |
|
"eval_runtime": 354.5313, |
|
"eval_samples_per_second": 869.503, |
|
"eval_steps_per_second": 54.345, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"eval_loss": 2.217644453048706, |
|
"eval_runtime": 355.0802, |
|
"eval_samples_per_second": 868.159, |
|
"eval_steps_per_second": 54.261, |
|
"step": 2328000 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 1.0933333333333334e-08, |
|
"loss": 2.2791, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"eval_loss": 2.221074342727661, |
|
"eval_runtime": 353.9231, |
|
"eval_samples_per_second": 870.997, |
|
"eval_steps_per_second": 54.438, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 2.2261996269226074, |
|
"eval_runtime": 355.1404, |
|
"eval_samples_per_second": 868.012, |
|
"eval_steps_per_second": 54.252, |
|
"step": 2344000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 8.2e-09, |
|
"loss": 2.2797, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"eval_loss": 2.2293312549591064, |
|
"eval_runtime": 354.4371, |
|
"eval_samples_per_second": 869.734, |
|
"eval_steps_per_second": 54.359, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"eval_loss": 2.221876859664917, |
|
"eval_runtime": 356.764, |
|
"eval_samples_per_second": 864.061, |
|
"eval_steps_per_second": 54.005, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 5.466666666666667e-09, |
|
"loss": 2.2784, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"eval_loss": 2.2249085903167725, |
|
"eval_runtime": 354.8521, |
|
"eval_samples_per_second": 868.717, |
|
"eval_steps_per_second": 54.296, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"eval_loss": 2.2216453552246094, |
|
"eval_runtime": 355.228, |
|
"eval_samples_per_second": 867.798, |
|
"eval_steps_per_second": 54.238, |
|
"step": 2376000 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 2.7333333333333334e-09, |
|
"loss": 2.271, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"eval_loss": 2.225550413131714, |
|
"eval_runtime": 355.1559, |
|
"eval_samples_per_second": 867.974, |
|
"eval_steps_per_second": 54.249, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"eval_loss": 2.2295968532562256, |
|
"eval_runtime": 355.7601, |
|
"eval_samples_per_second": 866.5, |
|
"eval_steps_per_second": 54.157, |
|
"step": 2392000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 0.0, |
|
"loss": 2.2787, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"eval_loss": 2.2274532318115234, |
|
"eval_runtime": 356.8124, |
|
"eval_samples_per_second": 863.944, |
|
"eval_steps_per_second": 53.998, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"step": 2400000, |
|
"total_flos": 7.335203818962209e+17, |
|
"train_loss": 2.304743234049479, |
|
"train_runtime": 256347.6755, |
|
"train_samples_per_second": 149.797, |
|
"train_steps_per_second": 9.362 |
|
} |
|
], |
|
"logging_steps": 16000, |
|
"max_steps": 2400000, |
|
"num_train_epochs": 7, |
|
"save_steps": 32000, |
|
"total_flos": 7.335203818962209e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|