|
{ |
|
"best_metric": 2.596395254135132, |
|
"best_model_checkpoint": "./model_tweets_2020_Q4_50/checkpoint-2112000", |
|
"epoch": 10.105348255564257, |
|
"eval_steps": 8000, |
|
"global_step": 2400000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 2.9659674167633057, |
|
"eval_runtime": 427.2887, |
|
"eval_samples_per_second": 468.063, |
|
"eval_steps_per_second": 29.254, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.0726666666666665e-07, |
|
"loss": 3.1627, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.875443458557129, |
|
"eval_runtime": 428.9511, |
|
"eval_samples_per_second": 466.249, |
|
"eval_steps_per_second": 29.141, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 2.8262600898742676, |
|
"eval_runtime": 428.7189, |
|
"eval_samples_per_second": 466.501, |
|
"eval_steps_per_second": 29.157, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.0453333333333336e-07, |
|
"loss": 2.9611, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.7973239421844482, |
|
"eval_runtime": 427.7192, |
|
"eval_samples_per_second": 467.592, |
|
"eval_steps_per_second": 29.225, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.774101734161377, |
|
"eval_runtime": 430.9135, |
|
"eval_samples_per_second": 464.126, |
|
"eval_steps_per_second": 29.008, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.018e-07, |
|
"loss": 2.8986, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 2.757391929626465, |
|
"eval_runtime": 434.8184, |
|
"eval_samples_per_second": 459.958, |
|
"eval_steps_per_second": 28.748, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 2.741281509399414, |
|
"eval_runtime": 431.3008, |
|
"eval_samples_per_second": 463.709, |
|
"eval_steps_per_second": 28.982, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.9906666666666667e-07, |
|
"loss": 2.8726, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 2.724008798599243, |
|
"eval_runtime": 431.9972, |
|
"eval_samples_per_second": 462.961, |
|
"eval_steps_per_second": 28.935, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 2.7238657474517822, |
|
"eval_runtime": 428.6784, |
|
"eval_samples_per_second": 466.546, |
|
"eval_steps_per_second": 29.159, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.963333333333333e-07, |
|
"loss": 2.8558, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 2.7132034301757812, |
|
"eval_runtime": 428.5232, |
|
"eval_samples_per_second": 466.714, |
|
"eval_steps_per_second": 29.17, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 2.702976942062378, |
|
"eval_runtime": 434.0957, |
|
"eval_samples_per_second": 460.723, |
|
"eval_steps_per_second": 28.795, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.936e-07, |
|
"loss": 2.8459, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 2.7112441062927246, |
|
"eval_runtime": 430.6879, |
|
"eval_samples_per_second": 464.369, |
|
"eval_steps_per_second": 29.023, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 2.6918396949768066, |
|
"eval_runtime": 432.9686, |
|
"eval_samples_per_second": 461.923, |
|
"eval_steps_per_second": 28.87, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.908666666666667e-07, |
|
"loss": 2.8379, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 2.701714515686035, |
|
"eval_runtime": 431.7861, |
|
"eval_samples_per_second": 463.188, |
|
"eval_steps_per_second": 28.95, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 2.6920413970947266, |
|
"eval_runtime": 432.0079, |
|
"eval_samples_per_second": 462.95, |
|
"eval_steps_per_second": 28.935, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.8813333333333334e-07, |
|
"loss": 2.8265, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 2.6970582008361816, |
|
"eval_runtime": 428.3996, |
|
"eval_samples_per_second": 466.849, |
|
"eval_steps_per_second": 29.178, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 2.692416191101074, |
|
"eval_runtime": 431.1394, |
|
"eval_samples_per_second": 463.882, |
|
"eval_steps_per_second": 28.993, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.854e-07, |
|
"loss": 2.8227, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 2.69515323638916, |
|
"eval_runtime": 428.2249, |
|
"eval_samples_per_second": 467.04, |
|
"eval_steps_per_second": 29.19, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 2.681142807006836, |
|
"eval_runtime": 427.7295, |
|
"eval_samples_per_second": 467.581, |
|
"eval_steps_per_second": 29.224, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.8266666666666665e-07, |
|
"loss": 2.8209, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 2.6828596591949463, |
|
"eval_runtime": 429.1179, |
|
"eval_samples_per_second": 466.068, |
|
"eval_steps_per_second": 29.13, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 2.6882851123809814, |
|
"eval_runtime": 430.1324, |
|
"eval_samples_per_second": 464.968, |
|
"eval_steps_per_second": 29.061, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.799333333333333e-07, |
|
"loss": 2.8147, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 2.6675167083740234, |
|
"eval_runtime": 428.1706, |
|
"eval_samples_per_second": 467.099, |
|
"eval_steps_per_second": 29.194, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 2.66744327545166, |
|
"eval_runtime": 427.87, |
|
"eval_samples_per_second": 467.427, |
|
"eval_steps_per_second": 29.214, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.772e-07, |
|
"loss": 2.8077, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 2.6661171913146973, |
|
"eval_runtime": 426.1667, |
|
"eval_samples_per_second": 469.295, |
|
"eval_steps_per_second": 29.331, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 2.6773271560668945, |
|
"eval_runtime": 430.5929, |
|
"eval_samples_per_second": 464.471, |
|
"eval_steps_per_second": 29.03, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.7446666666666667e-07, |
|
"loss": 2.8058, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 2.6734445095062256, |
|
"eval_runtime": 431.7928, |
|
"eval_samples_per_second": 463.18, |
|
"eval_steps_per_second": 28.949, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 2.6741647720336914, |
|
"eval_runtime": 433.2608, |
|
"eval_samples_per_second": 461.611, |
|
"eval_steps_per_second": 28.851, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.7173333333333333e-07, |
|
"loss": 2.812, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 2.6665947437286377, |
|
"eval_runtime": 429.9196, |
|
"eval_samples_per_second": 465.199, |
|
"eval_steps_per_second": 29.075, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 2.6641786098480225, |
|
"eval_runtime": 432.0068, |
|
"eval_samples_per_second": 462.951, |
|
"eval_steps_per_second": 28.935, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.69e-07, |
|
"loss": 2.8025, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 2.668074607849121, |
|
"eval_runtime": 429.4276, |
|
"eval_samples_per_second": 465.732, |
|
"eval_steps_per_second": 29.109, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_loss": 2.6663010120391846, |
|
"eval_runtime": 430.5757, |
|
"eval_samples_per_second": 464.49, |
|
"eval_steps_per_second": 29.031, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.6626666666666664e-07, |
|
"loss": 2.809, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 2.6645491123199463, |
|
"eval_runtime": 429.4305, |
|
"eval_samples_per_second": 465.728, |
|
"eval_steps_per_second": 29.108, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 2.6528775691986084, |
|
"eval_runtime": 430.2815, |
|
"eval_samples_per_second": 464.807, |
|
"eval_steps_per_second": 29.051, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.6353333333333335e-07, |
|
"loss": 2.8073, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 2.662346839904785, |
|
"eval_runtime": 427.2132, |
|
"eval_samples_per_second": 468.146, |
|
"eval_steps_per_second": 29.259, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 2.655132293701172, |
|
"eval_runtime": 427.6981, |
|
"eval_samples_per_second": 467.615, |
|
"eval_steps_per_second": 29.226, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.608e-07, |
|
"loss": 2.8005, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 2.664275646209717, |
|
"eval_runtime": 428.4044, |
|
"eval_samples_per_second": 466.844, |
|
"eval_steps_per_second": 29.178, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 2.6627519130706787, |
|
"eval_runtime": 432.9315, |
|
"eval_samples_per_second": 461.962, |
|
"eval_steps_per_second": 28.873, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.5806666666666666e-07, |
|
"loss": 2.7988, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 2.658301591873169, |
|
"eval_runtime": 430.042, |
|
"eval_samples_per_second": 465.066, |
|
"eval_steps_per_second": 29.067, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 2.659410238265991, |
|
"eval_runtime": 430.2191, |
|
"eval_samples_per_second": 464.875, |
|
"eval_steps_per_second": 29.055, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.553333333333333e-07, |
|
"loss": 2.7887, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 2.654367685317993, |
|
"eval_runtime": 428.3466, |
|
"eval_samples_per_second": 466.907, |
|
"eval_steps_per_second": 29.182, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 2.6515774726867676, |
|
"eval_runtime": 428.1496, |
|
"eval_samples_per_second": 467.122, |
|
"eval_steps_per_second": 29.195, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 3.5259999999999997e-07, |
|
"loss": 2.7964, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 2.655459403991699, |
|
"eval_runtime": 428.9032, |
|
"eval_samples_per_second": 466.301, |
|
"eval_steps_per_second": 29.144, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_loss": 2.65506911277771, |
|
"eval_runtime": 431.185, |
|
"eval_samples_per_second": 463.833, |
|
"eval_steps_per_second": 28.99, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.498666666666667e-07, |
|
"loss": 2.7919, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 2.6507833003997803, |
|
"eval_runtime": 429.9054, |
|
"eval_samples_per_second": 465.214, |
|
"eval_steps_per_second": 29.076, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 2.648573637008667, |
|
"eval_runtime": 426.8576, |
|
"eval_samples_per_second": 468.536, |
|
"eval_steps_per_second": 29.284, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 3.4713333333333333e-07, |
|
"loss": 2.8058, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 2.648383140563965, |
|
"eval_runtime": 426.9677, |
|
"eval_samples_per_second": 468.415, |
|
"eval_steps_per_second": 29.276, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 2.653191566467285, |
|
"eval_runtime": 429.3713, |
|
"eval_samples_per_second": 465.793, |
|
"eval_steps_per_second": 29.112, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.444e-07, |
|
"loss": 2.796, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 2.6472740173339844, |
|
"eval_runtime": 426.8024, |
|
"eval_samples_per_second": 468.596, |
|
"eval_steps_per_second": 29.288, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_loss": 2.6489272117614746, |
|
"eval_runtime": 428.8708, |
|
"eval_samples_per_second": 466.336, |
|
"eval_steps_per_second": 29.146, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.416666666666667e-07, |
|
"loss": 2.799, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 2.6475629806518555, |
|
"eval_runtime": 427.9765, |
|
"eval_samples_per_second": 467.311, |
|
"eval_steps_per_second": 29.207, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_loss": 2.6416804790496826, |
|
"eval_runtime": 426.9442, |
|
"eval_samples_per_second": 468.441, |
|
"eval_steps_per_second": 29.278, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.3893333333333335e-07, |
|
"loss": 2.7991, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 2.6544759273529053, |
|
"eval_runtime": 427.1377, |
|
"eval_samples_per_second": 468.228, |
|
"eval_steps_per_second": 29.265, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_loss": 2.6465859413146973, |
|
"eval_runtime": 428.9124, |
|
"eval_samples_per_second": 466.291, |
|
"eval_steps_per_second": 29.143, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.3619999999999995e-07, |
|
"loss": 2.792, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 2.6396875381469727, |
|
"eval_runtime": 428.572, |
|
"eval_samples_per_second": 466.661, |
|
"eval_steps_per_second": 29.167, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_loss": 2.642756938934326, |
|
"eval_runtime": 428.9912, |
|
"eval_samples_per_second": 466.205, |
|
"eval_steps_per_second": 29.138, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.3346666666666666e-07, |
|
"loss": 2.7972, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 2.644583225250244, |
|
"eval_runtime": 427.6258, |
|
"eval_samples_per_second": 467.694, |
|
"eval_steps_per_second": 29.231, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 2.6433892250061035, |
|
"eval_runtime": 429.7813, |
|
"eval_samples_per_second": 465.348, |
|
"eval_steps_per_second": 29.085, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.307333333333333e-07, |
|
"loss": 2.798, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 2.648988962173462, |
|
"eval_runtime": 429.7597, |
|
"eval_samples_per_second": 465.372, |
|
"eval_steps_per_second": 29.086, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 2.6502037048339844, |
|
"eval_runtime": 430.0385, |
|
"eval_samples_per_second": 465.07, |
|
"eval_steps_per_second": 29.067, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.28e-07, |
|
"loss": 2.7914, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_loss": 2.6407032012939453, |
|
"eval_runtime": 430.7536, |
|
"eval_samples_per_second": 464.298, |
|
"eval_steps_per_second": 29.019, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 2.6283910274505615, |
|
"eval_runtime": 433.2696, |
|
"eval_samples_per_second": 461.602, |
|
"eval_steps_per_second": 28.85, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.252666666666667e-07, |
|
"loss": 2.7932, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 2.642556667327881, |
|
"eval_runtime": 432.5252, |
|
"eval_samples_per_second": 462.396, |
|
"eval_steps_per_second": 28.9, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_loss": 2.6423070430755615, |
|
"eval_runtime": 432.6328, |
|
"eval_samples_per_second": 462.281, |
|
"eval_steps_per_second": 28.893, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.2253333333333334e-07, |
|
"loss": 2.787, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 2.638451337814331, |
|
"eval_runtime": 436.1841, |
|
"eval_samples_per_second": 458.517, |
|
"eval_steps_per_second": 28.658, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 2.6388065814971924, |
|
"eval_runtime": 430.5381, |
|
"eval_samples_per_second": 464.53, |
|
"eval_steps_per_second": 29.033, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.198e-07, |
|
"loss": 2.7893, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 2.642207145690918, |
|
"eval_runtime": 428.0052, |
|
"eval_samples_per_second": 467.279, |
|
"eval_steps_per_second": 29.205, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 2.6409590244293213, |
|
"eval_runtime": 429.1736, |
|
"eval_samples_per_second": 466.007, |
|
"eval_steps_per_second": 29.126, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.1706666666666665e-07, |
|
"loss": 2.7889, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 2.633716583251953, |
|
"eval_runtime": 430.2951, |
|
"eval_samples_per_second": 464.793, |
|
"eval_steps_per_second": 29.05, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_loss": 2.627978801727295, |
|
"eval_runtime": 437.5919, |
|
"eval_samples_per_second": 457.042, |
|
"eval_steps_per_second": 28.565, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.1433333333333336e-07, |
|
"loss": 2.791, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 2.6364176273345947, |
|
"eval_runtime": 435.9143, |
|
"eval_samples_per_second": 458.801, |
|
"eval_steps_per_second": 28.675, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_loss": 2.6341359615325928, |
|
"eval_runtime": 436.6584, |
|
"eval_samples_per_second": 458.019, |
|
"eval_steps_per_second": 28.626, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.116e-07, |
|
"loss": 2.7883, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_loss": 2.6317176818847656, |
|
"eval_runtime": 433.1883, |
|
"eval_samples_per_second": 461.688, |
|
"eval_steps_per_second": 28.856, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 2.6277999877929688, |
|
"eval_runtime": 430.7277, |
|
"eval_samples_per_second": 464.326, |
|
"eval_steps_per_second": 29.021, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.0886666666666667e-07, |
|
"loss": 2.7889, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_loss": 2.635715961456299, |
|
"eval_runtime": 431.7357, |
|
"eval_samples_per_second": 463.242, |
|
"eval_steps_per_second": 28.953, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 2.6340713500976562, |
|
"eval_runtime": 431.0234, |
|
"eval_samples_per_second": 464.007, |
|
"eval_steps_per_second": 29.001, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.061333333333333e-07, |
|
"loss": 2.7838, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 2.633284091949463, |
|
"eval_runtime": 430.0207, |
|
"eval_samples_per_second": 465.089, |
|
"eval_steps_per_second": 29.068, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_loss": 2.638205051422119, |
|
"eval_runtime": 430.4236, |
|
"eval_samples_per_second": 464.654, |
|
"eval_steps_per_second": 29.041, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.034e-07, |
|
"loss": 2.7873, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 2.6274592876434326, |
|
"eval_runtime": 429.469, |
|
"eval_samples_per_second": 465.687, |
|
"eval_steps_per_second": 29.106, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_loss": 2.6260039806365967, |
|
"eval_runtime": 430.4416, |
|
"eval_samples_per_second": 464.634, |
|
"eval_steps_per_second": 29.04, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.0066666666666663e-07, |
|
"loss": 2.7813, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_loss": 2.63725209236145, |
|
"eval_runtime": 430.3994, |
|
"eval_samples_per_second": 464.68, |
|
"eval_steps_per_second": 29.043, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_loss": 2.6348888874053955, |
|
"eval_runtime": 430.1402, |
|
"eval_samples_per_second": 464.96, |
|
"eval_steps_per_second": 29.06, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.9793333333333334e-07, |
|
"loss": 2.7858, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 2.622281789779663, |
|
"eval_runtime": 429.8357, |
|
"eval_samples_per_second": 465.289, |
|
"eval_steps_per_second": 29.081, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 2.6275925636291504, |
|
"eval_runtime": 429.5654, |
|
"eval_samples_per_second": 465.582, |
|
"eval_steps_per_second": 29.099, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.952e-07, |
|
"loss": 2.7895, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_loss": 2.6354682445526123, |
|
"eval_runtime": 432.8071, |
|
"eval_samples_per_second": 462.095, |
|
"eval_steps_per_second": 28.881, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_loss": 2.6269936561584473, |
|
"eval_runtime": 431.2646, |
|
"eval_samples_per_second": 463.748, |
|
"eval_steps_per_second": 28.985, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 2.9246666666666665e-07, |
|
"loss": 2.7873, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_loss": 2.624408006668091, |
|
"eval_runtime": 431.1254, |
|
"eval_samples_per_second": 463.898, |
|
"eval_steps_per_second": 28.994, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_loss": 2.6397435665130615, |
|
"eval_runtime": 428.8853, |
|
"eval_samples_per_second": 466.32, |
|
"eval_steps_per_second": 29.145, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.897333333333333e-07, |
|
"loss": 2.7866, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 2.6303048133850098, |
|
"eval_runtime": 428.8276, |
|
"eval_samples_per_second": 466.383, |
|
"eval_steps_per_second": 29.149, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.6166625022888184, |
|
"eval_runtime": 430.118, |
|
"eval_samples_per_second": 464.984, |
|
"eval_steps_per_second": 29.062, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 2.8699999999999996e-07, |
|
"loss": 2.7865, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_loss": 2.6264894008636475, |
|
"eval_runtime": 430.8627, |
|
"eval_samples_per_second": 464.18, |
|
"eval_steps_per_second": 29.012, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"eval_loss": 2.640347957611084, |
|
"eval_runtime": 433.7872, |
|
"eval_samples_per_second": 461.051, |
|
"eval_steps_per_second": 28.816, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 2.8426666666666667e-07, |
|
"loss": 2.7716, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_loss": 2.6247291564941406, |
|
"eval_runtime": 428.0225, |
|
"eval_samples_per_second": 467.26, |
|
"eval_steps_per_second": 29.204, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_loss": 2.625520706176758, |
|
"eval_runtime": 430.348, |
|
"eval_samples_per_second": 464.736, |
|
"eval_steps_per_second": 29.046, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 2.815333333333333e-07, |
|
"loss": 2.779, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"eval_loss": 2.6315715312957764, |
|
"eval_runtime": 429.049, |
|
"eval_samples_per_second": 466.143, |
|
"eval_steps_per_second": 29.134, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_loss": 2.6269681453704834, |
|
"eval_runtime": 429.2822, |
|
"eval_samples_per_second": 465.889, |
|
"eval_steps_per_second": 29.118, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 2.7880000000000003e-07, |
|
"loss": 2.7811, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_loss": 2.626842498779297, |
|
"eval_runtime": 430.4013, |
|
"eval_samples_per_second": 464.678, |
|
"eval_steps_per_second": 29.043, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"eval_loss": 2.6146929264068604, |
|
"eval_runtime": 427.5353, |
|
"eval_samples_per_second": 467.793, |
|
"eval_steps_per_second": 29.237, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 2.7606666666666664e-07, |
|
"loss": 2.7797, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_loss": 2.6270837783813477, |
|
"eval_runtime": 429.6573, |
|
"eval_samples_per_second": 465.483, |
|
"eval_steps_per_second": 29.093, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 2.6242685317993164, |
|
"eval_runtime": 427.6055, |
|
"eval_samples_per_second": 467.716, |
|
"eval_steps_per_second": 29.233, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 2.733333333333333e-07, |
|
"loss": 2.7798, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_loss": 2.623974323272705, |
|
"eval_runtime": 428.5522, |
|
"eval_samples_per_second": 466.683, |
|
"eval_steps_per_second": 29.168, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_loss": 2.622472047805786, |
|
"eval_runtime": 434.6013, |
|
"eval_samples_per_second": 460.187, |
|
"eval_steps_per_second": 28.762, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 2.706e-07, |
|
"loss": 2.7774, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"eval_loss": 2.6231884956359863, |
|
"eval_runtime": 429.2208, |
|
"eval_samples_per_second": 465.956, |
|
"eval_steps_per_second": 29.123, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"eval_loss": 2.6246674060821533, |
|
"eval_runtime": 427.3461, |
|
"eval_samples_per_second": 468.0, |
|
"eval_steps_per_second": 29.25, |
|
"step": 824000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 2.6786666666666666e-07, |
|
"loss": 2.7744, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_loss": 2.6269562244415283, |
|
"eval_runtime": 427.4671, |
|
"eval_samples_per_second": 467.868, |
|
"eval_steps_per_second": 29.242, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"eval_loss": 2.6175224781036377, |
|
"eval_runtime": 431.5837, |
|
"eval_samples_per_second": 463.405, |
|
"eval_steps_per_second": 28.963, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.651333333333333e-07, |
|
"loss": 2.7786, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_loss": 2.6263880729675293, |
|
"eval_runtime": 429.7254, |
|
"eval_samples_per_second": 465.409, |
|
"eval_steps_per_second": 29.088, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_loss": 2.6192069053649902, |
|
"eval_runtime": 429.8114, |
|
"eval_samples_per_second": 465.316, |
|
"eval_steps_per_second": 29.083, |
|
"step": 856000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 2.624e-07, |
|
"loss": 2.7829, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_loss": 2.6278185844421387, |
|
"eval_runtime": 429.9283, |
|
"eval_samples_per_second": 465.189, |
|
"eval_steps_per_second": 29.075, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_loss": 2.623713731765747, |
|
"eval_runtime": 439.0054, |
|
"eval_samples_per_second": 455.571, |
|
"eval_steps_per_second": 28.473, |
|
"step": 872000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 2.596666666666667e-07, |
|
"loss": 2.776, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"eval_loss": 2.6201553344726562, |
|
"eval_runtime": 437.9731, |
|
"eval_samples_per_second": 456.644, |
|
"eval_steps_per_second": 28.541, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_loss": 2.621598482131958, |
|
"eval_runtime": 438.0301, |
|
"eval_samples_per_second": 456.585, |
|
"eval_steps_per_second": 28.537, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 2.5693333333333333e-07, |
|
"loss": 2.7797, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_loss": 2.617363929748535, |
|
"eval_runtime": 441.0259, |
|
"eval_samples_per_second": 453.484, |
|
"eval_steps_per_second": 28.343, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"eval_loss": 2.6238625049591064, |
|
"eval_runtime": 429.8157, |
|
"eval_samples_per_second": 465.311, |
|
"eval_steps_per_second": 29.082, |
|
"step": 904000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 2.542e-07, |
|
"loss": 2.7744, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_loss": 2.616323709487915, |
|
"eval_runtime": 431.7594, |
|
"eval_samples_per_second": 463.216, |
|
"eval_steps_per_second": 28.951, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_loss": 2.6197702884674072, |
|
"eval_runtime": 428.7684, |
|
"eval_samples_per_second": 466.448, |
|
"eval_steps_per_second": 29.153, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 2.5146666666666664e-07, |
|
"loss": 2.7713, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_loss": 2.623600959777832, |
|
"eval_runtime": 427.7444, |
|
"eval_samples_per_second": 467.564, |
|
"eval_steps_per_second": 29.223, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_loss": 2.622565507888794, |
|
"eval_runtime": 440.6765, |
|
"eval_samples_per_second": 453.843, |
|
"eval_steps_per_second": 28.365, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 2.4873333333333335e-07, |
|
"loss": 2.7853, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"eval_loss": 2.6175239086151123, |
|
"eval_runtime": 438.6983, |
|
"eval_samples_per_second": 455.89, |
|
"eval_steps_per_second": 28.493, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"eval_loss": 2.618924140930176, |
|
"eval_runtime": 435.2773, |
|
"eval_samples_per_second": 459.473, |
|
"eval_steps_per_second": 28.717, |
|
"step": 952000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 2.46e-07, |
|
"loss": 2.7766, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"eval_loss": 2.619227647781372, |
|
"eval_runtime": 436.1097, |
|
"eval_samples_per_second": 458.596, |
|
"eval_steps_per_second": 28.663, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_loss": 2.631781578063965, |
|
"eval_runtime": 434.7218, |
|
"eval_samples_per_second": 460.06, |
|
"eval_steps_per_second": 28.754, |
|
"step": 968000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 2.4326666666666666e-07, |
|
"loss": 2.7851, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_loss": 2.6210079193115234, |
|
"eval_runtime": 434.5133, |
|
"eval_samples_per_second": 460.28, |
|
"eval_steps_per_second": 28.768, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"eval_loss": 2.6172115802764893, |
|
"eval_runtime": 432.096, |
|
"eval_samples_per_second": 462.855, |
|
"eval_steps_per_second": 28.929, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 2.405333333333333e-07, |
|
"loss": 2.7804, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_loss": 2.6199557781219482, |
|
"eval_runtime": 431.3352, |
|
"eval_samples_per_second": 463.672, |
|
"eval_steps_per_second": 28.98, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"eval_loss": 2.6156609058380127, |
|
"eval_runtime": 427.025, |
|
"eval_samples_per_second": 468.352, |
|
"eval_steps_per_second": 29.272, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 2.3779999999999997e-07, |
|
"loss": 2.773, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"eval_loss": 2.60978364944458, |
|
"eval_runtime": 428.465, |
|
"eval_samples_per_second": 466.778, |
|
"eval_steps_per_second": 29.174, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"eval_loss": 2.6156139373779297, |
|
"eval_runtime": 427.2949, |
|
"eval_samples_per_second": 468.056, |
|
"eval_steps_per_second": 29.254, |
|
"step": 1016000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 2.3506666666666668e-07, |
|
"loss": 2.7818, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"eval_loss": 2.6148924827575684, |
|
"eval_runtime": 426.9113, |
|
"eval_samples_per_second": 468.477, |
|
"eval_steps_per_second": 29.28, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_loss": 2.612070083618164, |
|
"eval_runtime": 441.0262, |
|
"eval_samples_per_second": 453.483, |
|
"eval_steps_per_second": 28.343, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 2.3233333333333334e-07, |
|
"loss": 2.7736, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_loss": 2.615013599395752, |
|
"eval_runtime": 435.5985, |
|
"eval_samples_per_second": 459.134, |
|
"eval_steps_per_second": 28.696, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"eval_loss": 2.6156115531921387, |
|
"eval_runtime": 436.5917, |
|
"eval_samples_per_second": 458.089, |
|
"eval_steps_per_second": 28.631, |
|
"step": 1048000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 2.2960000000000002e-07, |
|
"loss": 2.7761, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"eval_loss": 2.6170592308044434, |
|
"eval_runtime": 437.6326, |
|
"eval_samples_per_second": 457.0, |
|
"eval_steps_per_second": 28.563, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"eval_loss": 2.6124441623687744, |
|
"eval_runtime": 430.5486, |
|
"eval_samples_per_second": 464.519, |
|
"eval_steps_per_second": 29.033, |
|
"step": 1064000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 2.2686666666666667e-07, |
|
"loss": 2.7789, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"eval_loss": 2.6276962757110596, |
|
"eval_runtime": 428.8138, |
|
"eval_samples_per_second": 466.398, |
|
"eval_steps_per_second": 29.15, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"eval_loss": 2.6138463020324707, |
|
"eval_runtime": 431.2439, |
|
"eval_samples_per_second": 463.77, |
|
"eval_steps_per_second": 28.986, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 2.2413333333333333e-07, |
|
"loss": 2.7744, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"eval_loss": 2.6080663204193115, |
|
"eval_runtime": 429.0451, |
|
"eval_samples_per_second": 466.147, |
|
"eval_steps_per_second": 29.134, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"eval_loss": 2.620119571685791, |
|
"eval_runtime": 435.3098, |
|
"eval_samples_per_second": 459.438, |
|
"eval_steps_per_second": 28.715, |
|
"step": 1096000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 2.214e-07, |
|
"loss": 2.77, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_loss": 2.6170663833618164, |
|
"eval_runtime": 430.7548, |
|
"eval_samples_per_second": 464.297, |
|
"eval_steps_per_second": 29.019, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"eval_loss": 2.609856128692627, |
|
"eval_runtime": 430.7439, |
|
"eval_samples_per_second": 464.308, |
|
"eval_steps_per_second": 29.02, |
|
"step": 1112000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 2.1866666666666667e-07, |
|
"loss": 2.772, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_loss": 2.614119052886963, |
|
"eval_runtime": 430.1555, |
|
"eval_samples_per_second": 464.944, |
|
"eval_steps_per_second": 29.059, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_loss": 2.6174395084381104, |
|
"eval_runtime": 430.2468, |
|
"eval_samples_per_second": 464.845, |
|
"eval_steps_per_second": 29.053, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 2.1593333333333332e-07, |
|
"loss": 2.7709, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"eval_loss": 2.6200435161590576, |
|
"eval_runtime": 429.4394, |
|
"eval_samples_per_second": 465.719, |
|
"eval_steps_per_second": 29.108, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"eval_loss": 2.6149799823760986, |
|
"eval_runtime": 429.9204, |
|
"eval_samples_per_second": 465.198, |
|
"eval_steps_per_second": 29.075, |
|
"step": 1144000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 2.132e-07, |
|
"loss": 2.7724, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"eval_loss": 2.6041531562805176, |
|
"eval_runtime": 433.3404, |
|
"eval_samples_per_second": 461.526, |
|
"eval_steps_per_second": 28.846, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"eval_loss": 2.615821361541748, |
|
"eval_runtime": 427.3883, |
|
"eval_samples_per_second": 467.954, |
|
"eval_steps_per_second": 29.247, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 2.1046666666666666e-07, |
|
"loss": 2.7763, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_loss": 2.616694211959839, |
|
"eval_runtime": 424.8183, |
|
"eval_samples_per_second": 470.785, |
|
"eval_steps_per_second": 29.424, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_loss": 2.617375612258911, |
|
"eval_runtime": 425.4075, |
|
"eval_samples_per_second": 470.133, |
|
"eval_steps_per_second": 29.384, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 2.0773333333333334e-07, |
|
"loss": 2.7736, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_loss": 2.609928846359253, |
|
"eval_runtime": 429.8799, |
|
"eval_samples_per_second": 465.242, |
|
"eval_steps_per_second": 29.078, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_loss": 2.607574462890625, |
|
"eval_runtime": 427.5829, |
|
"eval_samples_per_second": 467.741, |
|
"eval_steps_per_second": 29.234, |
|
"step": 1192000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 2.05e-07, |
|
"loss": 2.7692, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_loss": 2.6088333129882812, |
|
"eval_runtime": 427.3086, |
|
"eval_samples_per_second": 468.041, |
|
"eval_steps_per_second": 29.253, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"eval_loss": 2.6174449920654297, |
|
"eval_runtime": 426.9323, |
|
"eval_samples_per_second": 468.454, |
|
"eval_steps_per_second": 29.279, |
|
"step": 1208000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 2.0226666666666668e-07, |
|
"loss": 2.7794, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"eval_loss": 2.604072332382202, |
|
"eval_runtime": 425.7765, |
|
"eval_samples_per_second": 469.725, |
|
"eval_steps_per_second": 29.358, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"eval_loss": 2.60508394241333, |
|
"eval_runtime": 429.4647, |
|
"eval_samples_per_second": 465.691, |
|
"eval_steps_per_second": 29.106, |
|
"step": 1224000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 1.9953333333333333e-07, |
|
"loss": 2.7709, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"eval_loss": 2.6092872619628906, |
|
"eval_runtime": 431.1236, |
|
"eval_samples_per_second": 463.899, |
|
"eval_steps_per_second": 28.994, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_loss": 2.6061949729919434, |
|
"eval_runtime": 430.933, |
|
"eval_samples_per_second": 464.105, |
|
"eval_steps_per_second": 29.007, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 1.968e-07, |
|
"loss": 2.7727, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"eval_loss": 2.6051762104034424, |
|
"eval_runtime": 428.4194, |
|
"eval_samples_per_second": 466.828, |
|
"eval_steps_per_second": 29.177, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"eval_loss": 2.612610340118408, |
|
"eval_runtime": 426.271, |
|
"eval_samples_per_second": 469.18, |
|
"eval_steps_per_second": 29.324, |
|
"step": 1256000 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 1.9406666666666667e-07, |
|
"loss": 2.7686, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"eval_loss": 2.609870433807373, |
|
"eval_runtime": 424.9717, |
|
"eval_samples_per_second": 470.615, |
|
"eval_steps_per_second": 29.414, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"eval_loss": 2.619239568710327, |
|
"eval_runtime": 428.8052, |
|
"eval_samples_per_second": 466.408, |
|
"eval_steps_per_second": 29.151, |
|
"step": 1272000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 1.9133333333333333e-07, |
|
"loss": 2.7668, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"eval_loss": 2.616584539413452, |
|
"eval_runtime": 426.4046, |
|
"eval_samples_per_second": 469.033, |
|
"eval_steps_per_second": 29.315, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"eval_loss": 2.6041982173919678, |
|
"eval_runtime": 425.6206, |
|
"eval_samples_per_second": 469.897, |
|
"eval_steps_per_second": 29.369, |
|
"step": 1288000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 1.886e-07, |
|
"loss": 2.7777, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"eval_loss": 2.603804349899292, |
|
"eval_runtime": 425.5755, |
|
"eval_samples_per_second": 469.947, |
|
"eval_steps_per_second": 29.372, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"eval_loss": 2.6119117736816406, |
|
"eval_runtime": 426.6248, |
|
"eval_samples_per_second": 468.791, |
|
"eval_steps_per_second": 29.3, |
|
"step": 1304000 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 1.8586666666666666e-07, |
|
"loss": 2.7737, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"eval_loss": 2.615492343902588, |
|
"eval_runtime": 426.0601, |
|
"eval_samples_per_second": 469.413, |
|
"eval_steps_per_second": 29.339, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"eval_loss": 2.6235928535461426, |
|
"eval_runtime": 427.3368, |
|
"eval_samples_per_second": 468.01, |
|
"eval_steps_per_second": 29.251, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 1.8313333333333332e-07, |
|
"loss": 2.7757, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"eval_loss": 2.6123950481414795, |
|
"eval_runtime": 427.0533, |
|
"eval_samples_per_second": 468.321, |
|
"eval_steps_per_second": 29.27, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"eval_loss": 2.5992510318756104, |
|
"eval_runtime": 425.5905, |
|
"eval_samples_per_second": 469.931, |
|
"eval_steps_per_second": 29.371, |
|
"step": 1336000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 1.804e-07, |
|
"loss": 2.7757, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_loss": 2.613180160522461, |
|
"eval_runtime": 424.8849, |
|
"eval_samples_per_second": 470.711, |
|
"eval_steps_per_second": 29.42, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"eval_loss": 2.6062958240509033, |
|
"eval_runtime": 424.2489, |
|
"eval_samples_per_second": 471.417, |
|
"eval_steps_per_second": 29.464, |
|
"step": 1352000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 1.7766666666666666e-07, |
|
"loss": 2.7748, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"eval_loss": 2.612989902496338, |
|
"eval_runtime": 424.5958, |
|
"eval_samples_per_second": 471.032, |
|
"eval_steps_per_second": 29.44, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"eval_loss": 2.610032796859741, |
|
"eval_runtime": 427.5632, |
|
"eval_samples_per_second": 467.762, |
|
"eval_steps_per_second": 29.235, |
|
"step": 1368000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 1.7493333333333334e-07, |
|
"loss": 2.769, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"eval_loss": 2.602424383163452, |
|
"eval_runtime": 424.7302, |
|
"eval_samples_per_second": 470.882, |
|
"eval_steps_per_second": 29.43, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"eval_loss": 2.6061973571777344, |
|
"eval_runtime": 428.6827, |
|
"eval_samples_per_second": 466.541, |
|
"eval_steps_per_second": 29.159, |
|
"step": 1384000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 1.722e-07, |
|
"loss": 2.7713, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"eval_loss": 2.6137661933898926, |
|
"eval_runtime": 430.6966, |
|
"eval_samples_per_second": 464.359, |
|
"eval_steps_per_second": 29.023, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"eval_loss": 2.6025471687316895, |
|
"eval_runtime": 430.8099, |
|
"eval_samples_per_second": 464.237, |
|
"eval_steps_per_second": 29.015, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 1.6946666666666668e-07, |
|
"loss": 2.7766, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"eval_loss": 2.6087875366210938, |
|
"eval_runtime": 427.6043, |
|
"eval_samples_per_second": 467.717, |
|
"eval_steps_per_second": 29.233, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"eval_loss": 2.6138193607330322, |
|
"eval_runtime": 428.3638, |
|
"eval_samples_per_second": 466.888, |
|
"eval_steps_per_second": 29.181, |
|
"step": 1416000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.6673333333333333e-07, |
|
"loss": 2.7727, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.604793071746826, |
|
"eval_runtime": 426.9803, |
|
"eval_samples_per_second": 468.401, |
|
"eval_steps_per_second": 29.275, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"eval_loss": 2.606837511062622, |
|
"eval_runtime": 429.9514, |
|
"eval_samples_per_second": 465.164, |
|
"eval_steps_per_second": 29.073, |
|
"step": 1432000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 1.64e-07, |
|
"loss": 2.7737, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_loss": 2.614352226257324, |
|
"eval_runtime": 427.0139, |
|
"eval_samples_per_second": 468.364, |
|
"eval_steps_per_second": 29.273, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"eval_loss": 2.6051464080810547, |
|
"eval_runtime": 430.2406, |
|
"eval_samples_per_second": 464.852, |
|
"eval_steps_per_second": 29.054, |
|
"step": 1448000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 1.6126666666666667e-07, |
|
"loss": 2.778, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"eval_loss": 2.6157853603363037, |
|
"eval_runtime": 431.1381, |
|
"eval_samples_per_second": 463.884, |
|
"eval_steps_per_second": 28.993, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"eval_loss": 2.615216016769409, |
|
"eval_runtime": 425.9498, |
|
"eval_samples_per_second": 469.534, |
|
"eval_steps_per_second": 29.346, |
|
"step": 1464000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 1.5853333333333332e-07, |
|
"loss": 2.7767, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"eval_loss": 2.6018834114074707, |
|
"eval_runtime": 426.8729, |
|
"eval_samples_per_second": 468.519, |
|
"eval_steps_per_second": 29.283, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"eval_loss": 2.611650228500366, |
|
"eval_runtime": 425.3068, |
|
"eval_samples_per_second": 470.244, |
|
"eval_steps_per_second": 29.391, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 1.558e-07, |
|
"loss": 2.7706, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"eval_loss": 2.6065428256988525, |
|
"eval_runtime": 427.2148, |
|
"eval_samples_per_second": 468.144, |
|
"eval_steps_per_second": 29.259, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"eval_loss": 2.612178087234497, |
|
"eval_runtime": 424.5636, |
|
"eval_samples_per_second": 471.067, |
|
"eval_steps_per_second": 29.442, |
|
"step": 1496000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 1.5306666666666666e-07, |
|
"loss": 2.7775, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"eval_loss": 2.610006809234619, |
|
"eval_runtime": 424.709, |
|
"eval_samples_per_second": 470.906, |
|
"eval_steps_per_second": 29.432, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"eval_loss": 2.6100497245788574, |
|
"eval_runtime": 424.7106, |
|
"eval_samples_per_second": 470.904, |
|
"eval_steps_per_second": 29.432, |
|
"step": 1512000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 1.5033333333333332e-07, |
|
"loss": 2.7753, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 2.605093479156494, |
|
"eval_runtime": 427.9079, |
|
"eval_samples_per_second": 467.386, |
|
"eval_steps_per_second": 29.212, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"eval_loss": 2.6036603450775146, |
|
"eval_runtime": 425.7208, |
|
"eval_samples_per_second": 469.787, |
|
"eval_steps_per_second": 29.362, |
|
"step": 1528000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 1.476e-07, |
|
"loss": 2.7691, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"eval_loss": 2.6037118434906006, |
|
"eval_runtime": 425.0126, |
|
"eval_samples_per_second": 470.57, |
|
"eval_steps_per_second": 29.411, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"eval_loss": 2.599247932434082, |
|
"eval_runtime": 425.3525, |
|
"eval_samples_per_second": 470.194, |
|
"eval_steps_per_second": 29.387, |
|
"step": 1544000 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 1.4486666666666665e-07, |
|
"loss": 2.758, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"eval_loss": 2.6080071926116943, |
|
"eval_runtime": 424.5489, |
|
"eval_samples_per_second": 471.084, |
|
"eval_steps_per_second": 29.443, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"eval_loss": 2.6138691902160645, |
|
"eval_runtime": 427.0402, |
|
"eval_samples_per_second": 468.335, |
|
"eval_steps_per_second": 29.271, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 1.4213333333333334e-07, |
|
"loss": 2.7722, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_loss": 2.6000304222106934, |
|
"eval_runtime": 426.0372, |
|
"eval_samples_per_second": 469.438, |
|
"eval_steps_per_second": 29.34, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"eval_loss": 2.6107337474823, |
|
"eval_runtime": 428.351, |
|
"eval_samples_per_second": 466.902, |
|
"eval_steps_per_second": 29.182, |
|
"step": 1576000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 1.3940000000000002e-07, |
|
"loss": 2.7737, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"eval_loss": 2.6056902408599854, |
|
"eval_runtime": 427.5761, |
|
"eval_samples_per_second": 467.748, |
|
"eval_steps_per_second": 29.235, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"eval_loss": 2.6063265800476074, |
|
"eval_runtime": 427.3328, |
|
"eval_samples_per_second": 468.015, |
|
"eval_steps_per_second": 29.251, |
|
"step": 1592000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 1.3666666666666665e-07, |
|
"loss": 2.7722, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"eval_loss": 2.602761745452881, |
|
"eval_runtime": 427.2826, |
|
"eval_samples_per_second": 468.07, |
|
"eval_steps_per_second": 29.255, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"eval_loss": 2.5995423793792725, |
|
"eval_runtime": 431.7911, |
|
"eval_samples_per_second": 463.182, |
|
"eval_steps_per_second": 28.949, |
|
"step": 1608000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 1.3393333333333333e-07, |
|
"loss": 2.7659, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_loss": 2.604205846786499, |
|
"eval_runtime": 430.2172, |
|
"eval_samples_per_second": 464.877, |
|
"eval_steps_per_second": 29.055, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"eval_loss": 2.601318597793579, |
|
"eval_runtime": 426.9357, |
|
"eval_samples_per_second": 468.45, |
|
"eval_steps_per_second": 29.278, |
|
"step": 1624000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 1.312e-07, |
|
"loss": 2.7769, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"eval_loss": 2.60282826423645, |
|
"eval_runtime": 430.2987, |
|
"eval_samples_per_second": 464.789, |
|
"eval_steps_per_second": 29.05, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"eval_loss": 2.608042001724243, |
|
"eval_runtime": 428.8831, |
|
"eval_samples_per_second": 466.323, |
|
"eval_steps_per_second": 29.145, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 1.2846666666666667e-07, |
|
"loss": 2.7732, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"eval_loss": 2.5994017124176025, |
|
"eval_runtime": 427.4593, |
|
"eval_samples_per_second": 467.876, |
|
"eval_steps_per_second": 29.243, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_loss": 2.6063311100006104, |
|
"eval_runtime": 427.0875, |
|
"eval_samples_per_second": 468.283, |
|
"eval_steps_per_second": 29.268, |
|
"step": 1656000 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 1.2573333333333332e-07, |
|
"loss": 2.7708, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"eval_loss": 2.6120190620422363, |
|
"eval_runtime": 426.6328, |
|
"eval_samples_per_second": 468.783, |
|
"eval_steps_per_second": 29.299, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"eval_loss": 2.602278470993042, |
|
"eval_runtime": 426.2122, |
|
"eval_samples_per_second": 469.245, |
|
"eval_steps_per_second": 29.328, |
|
"step": 1672000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 1.23e-07, |
|
"loss": 2.7614, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"eval_loss": 2.6091384887695312, |
|
"eval_runtime": 428.3432, |
|
"eval_samples_per_second": 466.911, |
|
"eval_steps_per_second": 29.182, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"eval_loss": 2.600266218185425, |
|
"eval_runtime": 426.5859, |
|
"eval_samples_per_second": 468.834, |
|
"eval_steps_per_second": 29.302, |
|
"step": 1688000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 1.2026666666666666e-07, |
|
"loss": 2.7655, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"eval_loss": 2.6015985012054443, |
|
"eval_runtime": 427.3154, |
|
"eval_samples_per_second": 468.034, |
|
"eval_steps_per_second": 29.252, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"eval_loss": 2.605762481689453, |
|
"eval_runtime": 431.3039, |
|
"eval_samples_per_second": 463.706, |
|
"eval_steps_per_second": 28.982, |
|
"step": 1704000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 1.1753333333333334e-07, |
|
"loss": 2.7747, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"eval_loss": 2.6045358180999756, |
|
"eval_runtime": 430.9287, |
|
"eval_samples_per_second": 464.109, |
|
"eval_steps_per_second": 29.007, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"eval_loss": 2.609701633453369, |
|
"eval_runtime": 430.8027, |
|
"eval_samples_per_second": 464.245, |
|
"eval_steps_per_second": 29.016, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 1.1480000000000001e-07, |
|
"loss": 2.7685, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"eval_loss": 2.606764316558838, |
|
"eval_runtime": 430.5036, |
|
"eval_samples_per_second": 464.568, |
|
"eval_steps_per_second": 29.036, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"eval_loss": 2.6037065982818604, |
|
"eval_runtime": 428.2126, |
|
"eval_samples_per_second": 467.053, |
|
"eval_steps_per_second": 29.191, |
|
"step": 1736000 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 1.1206666666666666e-07, |
|
"loss": 2.7736, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"eval_loss": 2.612487554550171, |
|
"eval_runtime": 426.9199, |
|
"eval_samples_per_second": 468.467, |
|
"eval_steps_per_second": 29.279, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"eval_loss": 2.6112568378448486, |
|
"eval_runtime": 424.0556, |
|
"eval_samples_per_second": 471.632, |
|
"eval_steps_per_second": 29.477, |
|
"step": 1752000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 1.0933333333333333e-07, |
|
"loss": 2.7666, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"eval_loss": 2.597158193588257, |
|
"eval_runtime": 424.796, |
|
"eval_samples_per_second": 470.81, |
|
"eval_steps_per_second": 29.426, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"eval_loss": 2.6080923080444336, |
|
"eval_runtime": 426.0498, |
|
"eval_samples_per_second": 469.424, |
|
"eval_steps_per_second": 29.339, |
|
"step": 1768000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 1.066e-07, |
|
"loss": 2.7658, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"eval_loss": 2.6089766025543213, |
|
"eval_runtime": 426.2325, |
|
"eval_samples_per_second": 469.223, |
|
"eval_steps_per_second": 29.327, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"eval_loss": 2.6126182079315186, |
|
"eval_runtime": 425.9411, |
|
"eval_samples_per_second": 469.544, |
|
"eval_steps_per_second": 29.347, |
|
"step": 1784000 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 1.0386666666666667e-07, |
|
"loss": 2.7802, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"eval_loss": 2.6020755767822266, |
|
"eval_runtime": 425.4923, |
|
"eval_samples_per_second": 470.039, |
|
"eval_steps_per_second": 29.378, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"eval_loss": 2.608738660812378, |
|
"eval_runtime": 423.4207, |
|
"eval_samples_per_second": 472.339, |
|
"eval_steps_per_second": 29.521, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 1.0113333333333334e-07, |
|
"loss": 2.7749, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"eval_loss": 2.5985732078552246, |
|
"eval_runtime": 427.6655, |
|
"eval_samples_per_second": 467.651, |
|
"eval_steps_per_second": 29.228, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"eval_loss": 2.600249767303467, |
|
"eval_runtime": 425.4151, |
|
"eval_samples_per_second": 470.124, |
|
"eval_steps_per_second": 29.383, |
|
"step": 1816000 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 9.84e-08, |
|
"loss": 2.7689, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"eval_loss": 2.6023120880126953, |
|
"eval_runtime": 426.7002, |
|
"eval_samples_per_second": 468.708, |
|
"eval_steps_per_second": 29.295, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"eval_loss": 2.5969133377075195, |
|
"eval_runtime": 429.4646, |
|
"eval_samples_per_second": 465.691, |
|
"eval_steps_per_second": 29.106, |
|
"step": 1832000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 9.566666666666666e-08, |
|
"loss": 2.7699, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"eval_loss": 2.5974552631378174, |
|
"eval_runtime": 425.4534, |
|
"eval_samples_per_second": 470.082, |
|
"eval_steps_per_second": 29.38, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"eval_loss": 2.606982946395874, |
|
"eval_runtime": 427.6242, |
|
"eval_samples_per_second": 467.696, |
|
"eval_steps_per_second": 29.231, |
|
"step": 1848000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 9.293333333333333e-08, |
|
"loss": 2.7715, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"eval_loss": 2.603482484817505, |
|
"eval_runtime": 429.5475, |
|
"eval_samples_per_second": 465.602, |
|
"eval_steps_per_second": 29.1, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"eval_loss": 2.6048595905303955, |
|
"eval_runtime": 430.2797, |
|
"eval_samples_per_second": 464.809, |
|
"eval_steps_per_second": 29.051, |
|
"step": 1864000 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 9.02e-08, |
|
"loss": 2.7653, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"eval_loss": 2.6129438877105713, |
|
"eval_runtime": 430.4774, |
|
"eval_samples_per_second": 464.596, |
|
"eval_steps_per_second": 29.038, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"eval_loss": 2.602692127227783, |
|
"eval_runtime": 426.0433, |
|
"eval_samples_per_second": 469.431, |
|
"eval_steps_per_second": 29.34, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 8.746666666666667e-08, |
|
"loss": 2.7729, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"eval_loss": 2.5999996662139893, |
|
"eval_runtime": 427.4645, |
|
"eval_samples_per_second": 467.87, |
|
"eval_steps_per_second": 29.242, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"eval_loss": 2.6137943267822266, |
|
"eval_runtime": 427.597, |
|
"eval_samples_per_second": 467.726, |
|
"eval_steps_per_second": 29.233, |
|
"step": 1896000 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 8.473333333333334e-08, |
|
"loss": 2.7693, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"eval_loss": 2.6051719188690186, |
|
"eval_runtime": 428.0224, |
|
"eval_samples_per_second": 467.261, |
|
"eval_steps_per_second": 29.204, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"eval_loss": 2.6060233116149902, |
|
"eval_runtime": 430.6968, |
|
"eval_samples_per_second": 464.359, |
|
"eval_steps_per_second": 29.023, |
|
"step": 1912000 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 8.2e-08, |
|
"loss": 2.7585, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"eval_loss": 2.6064672470092773, |
|
"eval_runtime": 427.6059, |
|
"eval_samples_per_second": 467.716, |
|
"eval_steps_per_second": 29.233, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"eval_loss": 2.6105079650878906, |
|
"eval_runtime": 438.1262, |
|
"eval_samples_per_second": 456.485, |
|
"eval_steps_per_second": 28.531, |
|
"step": 1928000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 7.926666666666666e-08, |
|
"loss": 2.7652, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"eval_loss": 2.607515335083008, |
|
"eval_runtime": 438.8385, |
|
"eval_samples_per_second": 455.744, |
|
"eval_steps_per_second": 28.484, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"eval_loss": 2.607562780380249, |
|
"eval_runtime": 434.8048, |
|
"eval_samples_per_second": 459.972, |
|
"eval_steps_per_second": 28.749, |
|
"step": 1944000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 7.653333333333333e-08, |
|
"loss": 2.7508, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"eval_loss": 2.6083250045776367, |
|
"eval_runtime": 436.0024, |
|
"eval_samples_per_second": 458.709, |
|
"eval_steps_per_second": 28.67, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"eval_loss": 2.6111507415771484, |
|
"eval_runtime": 436.2282, |
|
"eval_samples_per_second": 458.471, |
|
"eval_steps_per_second": 28.655, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 7.38e-08, |
|
"loss": 2.7678, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"eval_loss": 2.6018521785736084, |
|
"eval_runtime": 437.0443, |
|
"eval_samples_per_second": 457.615, |
|
"eval_steps_per_second": 28.601, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"eval_loss": 2.602910280227661, |
|
"eval_runtime": 435.5894, |
|
"eval_samples_per_second": 459.143, |
|
"eval_steps_per_second": 28.697, |
|
"step": 1976000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 7.106666666666667e-08, |
|
"loss": 2.7653, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"eval_loss": 2.6087162494659424, |
|
"eval_runtime": 434.8171, |
|
"eval_samples_per_second": 459.959, |
|
"eval_steps_per_second": 28.748, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"eval_loss": 2.606381416320801, |
|
"eval_runtime": 429.8668, |
|
"eval_samples_per_second": 465.256, |
|
"eval_steps_per_second": 29.079, |
|
"step": 1992000 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 6.833333333333332e-08, |
|
"loss": 2.7661, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"eval_loss": 2.603147506713867, |
|
"eval_runtime": 424.748, |
|
"eval_samples_per_second": 470.863, |
|
"eval_steps_per_second": 29.429, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"eval_loss": 2.6050961017608643, |
|
"eval_runtime": 424.2051, |
|
"eval_samples_per_second": 471.465, |
|
"eval_steps_per_second": 29.467, |
|
"step": 2008000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 6.56e-08, |
|
"loss": 2.7742, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_loss": 2.6091232299804688, |
|
"eval_runtime": 425.2363, |
|
"eval_samples_per_second": 470.322, |
|
"eval_steps_per_second": 29.395, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"eval_loss": 2.5978386402130127, |
|
"eval_runtime": 421.3464, |
|
"eval_samples_per_second": 474.664, |
|
"eval_steps_per_second": 29.667, |
|
"step": 2024000 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 6.286666666666666e-08, |
|
"loss": 2.7748, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"eval_loss": 2.6131348609924316, |
|
"eval_runtime": 421.1531, |
|
"eval_samples_per_second": 474.882, |
|
"eval_steps_per_second": 29.68, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"eval_loss": 2.6030309200286865, |
|
"eval_runtime": 423.5077, |
|
"eval_samples_per_second": 472.242, |
|
"eval_steps_per_second": 29.515, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 6.013333333333333e-08, |
|
"loss": 2.7706, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"eval_loss": 2.6036195755004883, |
|
"eval_runtime": 424.684, |
|
"eval_samples_per_second": 470.934, |
|
"eval_steps_per_second": 29.434, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"eval_loss": 2.599808692932129, |
|
"eval_runtime": 421.9265, |
|
"eval_samples_per_second": 474.011, |
|
"eval_steps_per_second": 29.626, |
|
"step": 2056000 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 5.7400000000000004e-08, |
|
"loss": 2.769, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"eval_loss": 2.6013376712799072, |
|
"eval_runtime": 424.7682, |
|
"eval_samples_per_second": 470.84, |
|
"eval_steps_per_second": 29.428, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"eval_loss": 2.6000382900238037, |
|
"eval_runtime": 422.3044, |
|
"eval_samples_per_second": 473.587, |
|
"eval_steps_per_second": 29.6, |
|
"step": 2072000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 5.4666666666666666e-08, |
|
"loss": 2.7733, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"eval_loss": 2.606200695037842, |
|
"eval_runtime": 421.4091, |
|
"eval_samples_per_second": 474.593, |
|
"eval_steps_per_second": 29.662, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"eval_loss": 2.605668783187866, |
|
"eval_runtime": 425.3566, |
|
"eval_samples_per_second": 470.189, |
|
"eval_steps_per_second": 29.387, |
|
"step": 2088000 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 5.1933333333333335e-08, |
|
"loss": 2.7714, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"eval_loss": 2.602085828781128, |
|
"eval_runtime": 423.0675, |
|
"eval_samples_per_second": 472.733, |
|
"eval_steps_per_second": 29.546, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"eval_loss": 2.602846145629883, |
|
"eval_runtime": 424.1211, |
|
"eval_samples_per_second": 471.559, |
|
"eval_steps_per_second": 29.473, |
|
"step": 2104000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 4.92e-08, |
|
"loss": 2.7754, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"eval_loss": 2.596395254135132, |
|
"eval_runtime": 421.9383, |
|
"eval_samples_per_second": 473.998, |
|
"eval_steps_per_second": 29.625, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"eval_loss": 2.601470470428467, |
|
"eval_runtime": 430.2725, |
|
"eval_samples_per_second": 464.817, |
|
"eval_steps_per_second": 29.051, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 4.6466666666666666e-08, |
|
"loss": 2.7683, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"eval_loss": 2.6060473918914795, |
|
"eval_runtime": 428.5391, |
|
"eval_samples_per_second": 466.697, |
|
"eval_steps_per_second": 29.169, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_loss": 2.6081697940826416, |
|
"eval_runtime": 430.2474, |
|
"eval_samples_per_second": 464.844, |
|
"eval_steps_per_second": 29.053, |
|
"step": 2136000 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 4.3733333333333335e-08, |
|
"loss": 2.7758, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"eval_loss": 2.6130032539367676, |
|
"eval_runtime": 427.9829, |
|
"eval_samples_per_second": 467.304, |
|
"eval_steps_per_second": 29.207, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"eval_loss": 2.607052803039551, |
|
"eval_runtime": 426.9186, |
|
"eval_samples_per_second": 468.469, |
|
"eval_steps_per_second": 29.28, |
|
"step": 2152000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 4.1e-08, |
|
"loss": 2.768, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_loss": 2.6140778064727783, |
|
"eval_runtime": 426.6857, |
|
"eval_samples_per_second": 468.724, |
|
"eval_steps_per_second": 29.296, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"eval_loss": 2.600281000137329, |
|
"eval_runtime": 427.0893, |
|
"eval_samples_per_second": 468.281, |
|
"eval_steps_per_second": 29.268, |
|
"step": 2168000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 3.8266666666666665e-08, |
|
"loss": 2.7653, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"eval_loss": 2.5986554622650146, |
|
"eval_runtime": 425.4388, |
|
"eval_samples_per_second": 470.098, |
|
"eval_steps_per_second": 29.381, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"eval_loss": 2.6066486835479736, |
|
"eval_runtime": 425.1112, |
|
"eval_samples_per_second": 470.46, |
|
"eval_steps_per_second": 29.404, |
|
"step": 2184000 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 3.5533333333333334e-08, |
|
"loss": 2.7621, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"eval_loss": 2.6040539741516113, |
|
"eval_runtime": 424.0828, |
|
"eval_samples_per_second": 471.601, |
|
"eval_steps_per_second": 29.475, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"eval_loss": 2.605970859527588, |
|
"eval_runtime": 424.8928, |
|
"eval_samples_per_second": 470.702, |
|
"eval_steps_per_second": 29.419, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 3.28e-08, |
|
"loss": 2.7712, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"eval_loss": 2.6143710613250732, |
|
"eval_runtime": 422.534, |
|
"eval_samples_per_second": 473.33, |
|
"eval_steps_per_second": 29.583, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"eval_loss": 2.5990421772003174, |
|
"eval_runtime": 423.7702, |
|
"eval_samples_per_second": 471.949, |
|
"eval_steps_per_second": 29.497, |
|
"step": 2216000 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 3.0066666666666665e-08, |
|
"loss": 2.7718, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"eval_loss": 2.6039345264434814, |
|
"eval_runtime": 422.8971, |
|
"eval_samples_per_second": 472.924, |
|
"eval_steps_per_second": 29.558, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"eval_loss": 2.593075752258301, |
|
"eval_runtime": 423.1257, |
|
"eval_samples_per_second": 472.668, |
|
"eval_steps_per_second": 29.542, |
|
"step": 2232000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 2.7333333333333333e-08, |
|
"loss": 2.774, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_loss": 2.6128671169281006, |
|
"eval_runtime": 423.6724, |
|
"eval_samples_per_second": 472.058, |
|
"eval_steps_per_second": 29.504, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"eval_loss": 2.6095166206359863, |
|
"eval_runtime": 427.8902, |
|
"eval_samples_per_second": 467.405, |
|
"eval_steps_per_second": 29.213, |
|
"step": 2248000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 2.46e-08, |
|
"loss": 2.765, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"eval_loss": 2.5932390689849854, |
|
"eval_runtime": 426.8467, |
|
"eval_samples_per_second": 468.548, |
|
"eval_steps_per_second": 29.285, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"eval_loss": 2.6009600162506104, |
|
"eval_runtime": 424.1412, |
|
"eval_samples_per_second": 471.536, |
|
"eval_steps_per_second": 29.471, |
|
"step": 2264000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 2.1866666666666667e-08, |
|
"loss": 2.7754, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"eval_loss": 2.60778546333313, |
|
"eval_runtime": 425.6089, |
|
"eval_samples_per_second": 469.91, |
|
"eval_steps_per_second": 29.37, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"eval_loss": 2.5981459617614746, |
|
"eval_runtime": 425.6519, |
|
"eval_samples_per_second": 469.863, |
|
"eval_steps_per_second": 29.367, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 1.9133333333333333e-08, |
|
"loss": 2.771, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"eval_loss": 2.6052143573760986, |
|
"eval_runtime": 424.5141, |
|
"eval_samples_per_second": 471.122, |
|
"eval_steps_per_second": 29.445, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"eval_loss": 2.5944042205810547, |
|
"eval_runtime": 425.2866, |
|
"eval_samples_per_second": 470.266, |
|
"eval_steps_per_second": 29.392, |
|
"step": 2296000 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 1.64e-08, |
|
"loss": 2.7757, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"eval_loss": 2.6045000553131104, |
|
"eval_runtime": 428.7498, |
|
"eval_samples_per_second": 466.468, |
|
"eval_steps_per_second": 29.155, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"eval_loss": 2.5971217155456543, |
|
"eval_runtime": 432.7726, |
|
"eval_samples_per_second": 462.132, |
|
"eval_steps_per_second": 28.884, |
|
"step": 2312000 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 1.3666666666666667e-08, |
|
"loss": 2.7685, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"eval_loss": 2.610078811645508, |
|
"eval_runtime": 432.5525, |
|
"eval_samples_per_second": 462.367, |
|
"eval_steps_per_second": 28.898, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"eval_loss": 2.596436023712158, |
|
"eval_runtime": 432.023, |
|
"eval_samples_per_second": 462.934, |
|
"eval_steps_per_second": 28.934, |
|
"step": 2328000 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 1.0933333333333334e-08, |
|
"loss": 2.7708, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"eval_loss": 2.5973622798919678, |
|
"eval_runtime": 432.5814, |
|
"eval_samples_per_second": 462.336, |
|
"eval_steps_per_second": 28.896, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"eval_loss": 2.5952794551849365, |
|
"eval_runtime": 427.2422, |
|
"eval_samples_per_second": 468.114, |
|
"eval_steps_per_second": 29.257, |
|
"step": 2344000 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 8.2e-09, |
|
"loss": 2.7695, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"eval_loss": 2.598102569580078, |
|
"eval_runtime": 427.2188, |
|
"eval_samples_per_second": 468.139, |
|
"eval_steps_per_second": 29.259, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"eval_loss": 2.6094541549682617, |
|
"eval_runtime": 427.6756, |
|
"eval_samples_per_second": 467.639, |
|
"eval_steps_per_second": 29.228, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 5.466666666666667e-09, |
|
"loss": 2.7702, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"eval_loss": 2.6042184829711914, |
|
"eval_runtime": 426.894, |
|
"eval_samples_per_second": 468.496, |
|
"eval_steps_per_second": 29.281, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 2.6094839572906494, |
|
"eval_runtime": 429.3589, |
|
"eval_samples_per_second": 465.806, |
|
"eval_steps_per_second": 29.113, |
|
"step": 2376000 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 2.7333333333333334e-09, |
|
"loss": 2.7614, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"eval_loss": 2.6007468700408936, |
|
"eval_runtime": 426.5863, |
|
"eval_samples_per_second": 468.834, |
|
"eval_steps_per_second": 29.302, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 10.07, |
|
"eval_loss": 2.601724863052368, |
|
"eval_runtime": 428.327, |
|
"eval_samples_per_second": 466.928, |
|
"eval_steps_per_second": 29.183, |
|
"step": 2392000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 0.0, |
|
"loss": 2.7708, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"eval_loss": 2.611358880996704, |
|
"eval_runtime": 427.933, |
|
"eval_samples_per_second": 467.358, |
|
"eval_steps_per_second": 29.21, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"step": 2400000, |
|
"total_flos": 7.626726685368748e+17, |
|
"train_loss": 2.7854965201822917, |
|
"train_runtime": 398406.2107, |
|
"train_samples_per_second": 96.384, |
|
"train_steps_per_second": 6.024 |
|
} |
|
], |
|
"logging_steps": 16000, |
|
"max_steps": 2400000, |
|
"num_train_epochs": 11, |
|
"save_steps": 32000, |
|
"total_flos": 7.626726685368748e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|