|
{ |
|
"best_metric": 1.3436678647994995, |
|
"best_model_checkpoint": "saves/ChineseLLaMA2-7B-Chat/lora/2023-09-07-12-02-29/checkpoint-2400", |
|
"epoch": 2.60092115957735, |
|
"eval_steps": 100, |
|
"global_step": 2400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0009999919374161553, |
|
"loss": 2.0025, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0009999677499246417, |
|
"loss": 1.7737, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0009999274383055143, |
|
"loss": 1.7391, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0009998710038588363, |
|
"loss": 1.7959, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0009997984484046375, |
|
"loss": 1.713, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0009997097742828556, |
|
"loss": 1.6441, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009996049843532607, |
|
"loss": 1.704, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009994840819953633, |
|
"loss": 1.6532, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009993470711083048, |
|
"loss": 1.6791, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009991939561107325, |
|
"loss": 1.6465, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000999024741940656, |
|
"loss": 1.6511, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009988394340552898, |
|
"loss": 1.6727, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009986380384308746, |
|
"loss": 1.6653, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009984205615624873, |
|
"loss": 1.6339, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009981870104638294, |
|
"loss": 1.5562, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009979373926670028, |
|
"loss": 1.6291, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009976717162222645, |
|
"loss": 1.625, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009973899896977695, |
|
"loss": 1.6008, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000997092222179292, |
|
"loss": 1.6821, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009967784232699352, |
|
"loss": 1.582, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 1.6186352968215942, |
|
"eval_runtime": 10.6735, |
|
"eval_samples_per_second": 14.054, |
|
"eval_steps_per_second": 1.78, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009964486030898186, |
|
"loss": 1.5769, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009961027722757538, |
|
"loss": 1.5868, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009957409419809006, |
|
"loss": 1.5601, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.000995363123874407, |
|
"loss": 1.6061, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009949693301410341, |
|
"loss": 1.6073, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009945595734807615, |
|
"loss": 1.4998, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009941338671083794, |
|
"loss": 1.5295, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009936922247530606, |
|
"loss": 1.5418, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009932346606579192, |
|
"loss": 1.554, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009927611895795513, |
|
"loss": 1.5509, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009922718267875571, |
|
"loss": 1.6123, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009917665880640515, |
|
"loss": 1.6267, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009912454897031524, |
|
"loss": 1.6116, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009907085485104568, |
|
"loss": 1.5618, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009901557818024981, |
|
"loss": 1.6085, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009895872074061885, |
|
"loss": 1.5829, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009890028436582426, |
|
"loss": 1.5407, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009884027094045871, |
|
"loss": 1.5568, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009877868239997532, |
|
"loss": 1.5831, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009871552073062516, |
|
"loss": 1.5231, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.5717933177947998, |
|
"eval_runtime": 10.6708, |
|
"eval_samples_per_second": 14.057, |
|
"eval_steps_per_second": 1.781, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009865078796939327, |
|
"loss": 1.5467, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.000985844862039329, |
|
"loss": 1.6403, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009851661757249823, |
|
"loss": 1.5352, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0009844718426387537, |
|
"loss": 1.5616, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.000983761885173118, |
|
"loss": 1.5274, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.000983036326224442, |
|
"loss": 1.6153, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0009822951891922448, |
|
"loss": 1.5062, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0009815384979784444, |
|
"loss": 1.6038, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.000980766276986586, |
|
"loss": 1.5097, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0009799785511210557, |
|
"loss": 1.535, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.000979175345786277, |
|
"loss": 1.52, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0009783566868858912, |
|
"loss": 1.5678, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0009775226008219224, |
|
"loss": 1.5536, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0009766731144939258, |
|
"loss": 1.4826, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0009758082552981204, |
|
"loss": 1.5537, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0009749280511265056, |
|
"loss": 1.5277, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0009740325303659609, |
|
"loss": 1.5445, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.000973121721897331, |
|
"loss": 1.4944, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0009721956550944948, |
|
"loss": 1.5088, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0009712543598234172, |
|
"loss": 1.585, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.5345921516418457, |
|
"eval_runtime": 10.6704, |
|
"eval_samples_per_second": 14.058, |
|
"eval_steps_per_second": 1.781, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0009702978664411863, |
|
"loss": 1.5427, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0009693262057950345, |
|
"loss": 1.4475, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0009683394092213436, |
|
"loss": 1.5321, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0009673375085446339, |
|
"loss": 1.5171, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0009663205360765382, |
|
"loss": 1.5198, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00096528852461476, |
|
"loss": 1.492, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0009642415074420146, |
|
"loss": 1.5036, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0009631795183249573, |
|
"loss": 1.5134, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0009621025915130932, |
|
"loss": 1.5568, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0009610107617376733, |
|
"loss": 1.503, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0009599040642105736, |
|
"loss": 1.4584, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.000958782534623161, |
|
"loss": 1.4832, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0009576462091451406, |
|
"loss": 1.4598, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0009564951244233901, |
|
"loss": 1.5492, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.000955329317580778, |
|
"loss": 1.5145, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0009541488262149661, |
|
"loss": 1.589, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0009529536883971963, |
|
"loss": 1.6003, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0009517439426710646, |
|
"loss": 1.55, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0009505196280512762, |
|
"loss": 1.5359, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0009492807840223881, |
|
"loss": 1.4854, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.5193477869033813, |
|
"eval_runtime": 10.6722, |
|
"eval_samples_per_second": 14.055, |
|
"eval_steps_per_second": 1.78, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0009480274505375358, |
|
"loss": 1.4891, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0009467596680171446, |
|
"loss": 1.4719, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0009454774773476257, |
|
"loss": 1.4939, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0009441809198800587, |
|
"loss": 1.4382, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0009428700374288564, |
|
"loss": 1.4427, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0009415448722704175, |
|
"loss": 1.4767, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0009402054671417628, |
|
"loss": 1.4799, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0009388518652391571, |
|
"loss": 1.4608, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0009374841102167157, |
|
"loss": 1.4937, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0009361022461849965, |
|
"loss": 1.5468, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0009347063177095783, |
|
"loss": 1.5481, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0009332963698096223, |
|
"loss": 1.4478, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0009318724479564215, |
|
"loss": 1.4977, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0009304345980719329, |
|
"loss": 1.5091, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0009289828665272977, |
|
"loss": 1.43, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0009275173001413448, |
|
"loss": 1.4725, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0009260379461790822, |
|
"loss": 1.3741, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0009245448523501708, |
|
"loss": 1.4917, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0009230380668073877, |
|
"loss": 1.4684, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0009215176381450717, |
|
"loss": 1.5209, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.5050214529037476, |
|
"eval_runtime": 10.6706, |
|
"eval_samples_per_second": 14.057, |
|
"eval_steps_per_second": 1.781, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0009199836153975573, |
|
"loss": 1.4913, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0009184360480375926, |
|
"loss": 1.5377, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0009168749859747438, |
|
"loss": 1.4608, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0009153004795537861, |
|
"loss": 1.4738, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0009137125795530795, |
|
"loss": 1.4947, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0009121113371829318, |
|
"loss": 1.5267, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0009104968040839463, |
|
"loss": 1.5116, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.000908869032325357, |
|
"loss": 1.4423, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.000907228074403349, |
|
"loss": 1.4565, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0009055739832393655, |
|
"loss": 1.4923, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0009039068121784016, |
|
"loss": 1.4304, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0009022266149872829, |
|
"loss": 1.4422, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0009005334458529322, |
|
"loss": 1.522, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0008988273593806222, |
|
"loss": 1.499, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0008971084105922139, |
|
"loss": 1.4796, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0008953766549243818, |
|
"loss": 1.4231, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0008936321482268275, |
|
"loss": 1.462, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0008918749467604766, |
|
"loss": 1.5191, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0008901051071956661, |
|
"loss": 1.4845, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0008883226866103152, |
|
"loss": 1.4652, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.486396074295044, |
|
"eval_runtime": 10.6718, |
|
"eval_samples_per_second": 14.056, |
|
"eval_steps_per_second": 1.78, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0008865277424880859, |
|
"loss": 1.4773, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0008847203327165278, |
|
"loss": 1.4555, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0008829005155852125, |
|
"loss": 1.5235, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0008810683497838525, |
|
"loss": 1.4329, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0008792238944004096, |
|
"loss": 1.4515, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0008773672089191885, |
|
"loss": 1.4616, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0008754983532189185, |
|
"loss": 1.3931, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0008736173875708229, |
|
"loss": 1.4714, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0008717243726366746, |
|
"loss": 1.4831, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00086981936946684, |
|
"loss": 1.4928, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0008679024394983105, |
|
"loss": 1.3735, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0008659736445527202, |
|
"loss": 1.4587, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0008640330468343532, |
|
"loss": 1.5138, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0008620807089281364, |
|
"loss": 1.4625, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0008601166937976226, |
|
"loss": 1.4173, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.000858141064782958, |
|
"loss": 1.4901, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0008561538855988409, |
|
"loss": 1.4056, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0008541552203324667, |
|
"loss": 1.4486, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0008521451334414605, |
|
"loss": 1.4147, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0008501236897517987, |
|
"loss": 1.4547, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.4729957580566406, |
|
"eval_runtime": 10.6704, |
|
"eval_samples_per_second": 14.058, |
|
"eval_steps_per_second": 1.781, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.000848090954455718, |
|
"loss": 1.4464, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0008460469931096138, |
|
"loss": 1.4163, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0008439918716319246, |
|
"loss": 1.5283, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0008419256563010076, |
|
"loss": 1.4313, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.000839848413753, |
|
"loss": 1.3995, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0008377602109796709, |
|
"loss": 1.4265, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0008356611153262598, |
|
"loss": 1.4426, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0008335511944893057, |
|
"loss": 1.4251, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0008314305165144633, |
|
"loss": 1.4686, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0008292991497943081, |
|
"loss": 1.4658, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0008271571630661321, |
|
"loss": 1.4347, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0008250046254097255, |
|
"loss": 1.4235, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0008228416062451494, |
|
"loss": 1.5047, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0008206681753304976, |
|
"loss": 1.445, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0008184844027596461, |
|
"loss": 1.4077, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0008162903589599924, |
|
"loss": 1.5057, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0008140861146901849, |
|
"loss": 1.4445, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0008118717410378407, |
|
"loss": 1.5333, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0008096473094172527, |
|
"loss": 1.3786, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0008074128915670868, |
|
"loss": 1.3781, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.4600605964660645, |
|
"eval_runtime": 10.6704, |
|
"eval_samples_per_second": 14.058, |
|
"eval_steps_per_second": 1.781, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0008051685595480678, |
|
"loss": 1.5097, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0008029143857406563, |
|
"loss": 1.5608, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0008006504428427133, |
|
"loss": 1.4113, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0007983768038671568, |
|
"loss": 1.3781, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0007960935421396062, |
|
"loss": 1.4056, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0007938007312960178, |
|
"loss": 1.4463, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0007914984452803105, |
|
"loss": 1.3983, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0007891867583419805, |
|
"loss": 1.3968, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0007868657450337066, |
|
"loss": 1.4587, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0007845354802089463, |
|
"loss": 1.4654, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0007821960390195224, |
|
"loss": 1.4384, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0007798474969131971, |
|
"loss": 1.44, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0007774899296312414, |
|
"loss": 1.4221, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0007751234132059906, |
|
"loss": 1.3795, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0007727480239583933, |
|
"loss": 1.4748, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0007703638384955494, |
|
"loss": 1.5171, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0007679709337082394, |
|
"loss": 1.3996, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0007655693867684454, |
|
"loss": 1.4386, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0007631592751268618, |
|
"loss": 1.3789, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0007607406765103972, |
|
"loss": 1.4553, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 1.4479364156723022, |
|
"eval_runtime": 10.6698, |
|
"eval_samples_per_second": 14.058, |
|
"eval_steps_per_second": 1.781, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.000758313668919668, |
|
"loss": 1.3962, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.000755878330626483, |
|
"loss": 1.3899, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0007534347401713191, |
|
"loss": 1.3965, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0007509829763607879, |
|
"loss": 1.367, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0007485231182650945, |
|
"loss": 1.4027, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0007460552452154877, |
|
"loss": 1.3563, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0007435794368017007, |
|
"loss": 1.3192, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0007410957728693856, |
|
"loss": 1.2772, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0007386043335175367, |
|
"loss": 1.3291, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.000736105199095909, |
|
"loss": 1.304, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0007335984502024256, |
|
"loss": 1.3832, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0007310841676805791, |
|
"loss": 1.3351, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.000728562432616824, |
|
"loss": 1.3375, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0007260333263379619, |
|
"loss": 1.3323, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0007234969304085186, |
|
"loss": 1.3293, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0007209533266281133, |
|
"loss": 1.3859, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0007184025970288211, |
|
"loss": 1.3553, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.000715844823872527, |
|
"loss": 1.3607, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0007132800896482731, |
|
"loss": 1.3457, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0007107084770695986, |
|
"loss": 1.3788, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 1.4371482133865356, |
|
"eval_runtime": 10.6719, |
|
"eval_samples_per_second": 14.056, |
|
"eval_steps_per_second": 1.78, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0007081300690718709, |
|
"loss": 1.3039, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0007055449488096132, |
|
"loss": 1.2719, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0007029531996538212, |
|
"loss": 1.4107, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0007003549051892738, |
|
"loss": 1.38, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0006977501492118391, |
|
"loss": 1.3408, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0006951390157257712, |
|
"loss": 1.3704, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0006925215889410004, |
|
"loss": 1.345, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0006898979532704186, |
|
"loss": 1.3414, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0006872681933271559, |
|
"loss": 1.3131, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0006846323939218526, |
|
"loss": 1.3363, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0006819906400599234, |
|
"loss": 1.3659, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0006793430169388163, |
|
"loss": 1.3145, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0006766896099452652, |
|
"loss": 1.3727, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0006740305046525351, |
|
"loss": 1.3478, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0006713657868176639, |
|
"loss": 1.3848, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0006686955423786951, |
|
"loss": 1.3501, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0006660198574519078, |
|
"loss": 1.3782, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.000663338818329038, |
|
"loss": 1.3767, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0006606525114744965, |
|
"loss": 1.3665, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0006579610235225805, |
|
"loss": 1.2234, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 1.4341663122177124, |
|
"eval_runtime": 10.6713, |
|
"eval_samples_per_second": 14.056, |
|
"eval_steps_per_second": 1.78, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0006552644412746791, |
|
"loss": 1.4083, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0006525628516964741, |
|
"loss": 1.4225, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0006498563419151354, |
|
"loss": 1.3677, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0006471449992165113, |
|
"loss": 1.2836, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0006444289110423129, |
|
"loss": 1.3428, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0006417081649872952, |
|
"loss": 1.3192, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0006389828487964305, |
|
"loss": 1.3084, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00063625305036208, |
|
"loss": 1.3702, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.000633518857721159, |
|
"loss": 1.3054, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0006307803590522972, |
|
"loss": 1.3211, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0006280376426729947, |
|
"loss": 1.3319, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0006252907970367749, |
|
"loss": 1.4346, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0006225399107303309, |
|
"loss": 1.3938, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0006197850724706682, |
|
"loss": 1.4371, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0006170263711022451, |
|
"loss": 1.2925, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0006142638955941057, |
|
"loss": 1.3135, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0006114977350370114, |
|
"loss": 1.3572, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0006087279786405684, |
|
"loss": 1.3918, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0006059547157303491, |
|
"loss": 1.3732, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0006031780357450124, |
|
"loss": 1.3541, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 1.4208500385284424, |
|
"eval_runtime": 10.67, |
|
"eval_samples_per_second": 14.058, |
|
"eval_steps_per_second": 1.781, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0006003980282334191, |
|
"loss": 1.2997, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0005976147828517439, |
|
"loss": 1.2832, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0005948283893605839, |
|
"loss": 1.3863, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0005920389376220633, |
|
"loss": 1.3599, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0005892465175969366, |
|
"loss": 1.3085, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.000586451219341686, |
|
"loss": 1.3355, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0005836531330056176, |
|
"loss": 1.291, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0005808523488279542, |
|
"loss": 1.3286, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0005780489571349249, |
|
"loss": 1.3704, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.000575243048336852, |
|
"loss": 1.3263, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.0005724347129252354, |
|
"loss": 1.3357, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0005696240414698337, |
|
"loss": 1.3665, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0005668111246157441, |
|
"loss": 1.2568, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0005639960530804787, |
|
"loss": 1.3212, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0005611789176510384, |
|
"loss": 1.3358, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0005583598091809859, |
|
"loss": 1.3618, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0005555388185875146, |
|
"loss": 1.3273, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0005527160368485172, |
|
"loss": 1.284, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0005498915549996516, |
|
"loss": 1.3665, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0005470654641314045, |
|
"loss": 1.2796, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 1.4054052829742432, |
|
"eval_runtime": 10.6703, |
|
"eval_samples_per_second": 14.058, |
|
"eval_steps_per_second": 1.781, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0005442378553861545, |
|
"loss": 1.3107, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0005414088199552319, |
|
"loss": 1.3665, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.000538578449075978, |
|
"loss": 1.3326, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.0005357468340288031, |
|
"loss": 1.3383, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.000532914066134242, |
|
"loss": 1.336, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.0005300802367500093, |
|
"loss": 1.3949, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0005272454372680532, |
|
"loss": 1.3214, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0005244097591116077, |
|
"loss": 1.376, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0005215732937322439, |
|
"loss": 1.2345, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0005187361326069224, |
|
"loss": 1.4495, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0005158983672350405, |
|
"loss": 1.3978, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0005130600891354833, |
|
"loss": 1.2517, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0005102213898436715, |
|
"loss": 1.3823, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0005073823609086091, |
|
"loss": 1.3219, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.0005045430938899315, |
|
"loss": 1.3354, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0005017036803549523, |
|
"loss": 1.3054, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0004988642118757102, |
|
"loss": 1.2346, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0004960247800260161, |
|
"loss": 1.274, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0004931854763784994, |
|
"loss": 1.4231, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.000490346392501655, |
|
"loss": 1.2872, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 1.3990795612335205, |
|
"eval_runtime": 10.6706, |
|
"eval_samples_per_second": 14.057, |
|
"eval_steps_per_second": 1.781, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00048750761995688984, |
|
"loss": 1.4041, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0004846692502955709, |
|
"loss": 1.3405, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00048183137505607154, |
|
"loss": 1.3198, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00047899408576082016, |
|
"loss": 1.3528, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0004761574739133478, |
|
"loss": 1.3095, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00047332163099533787, |
|
"loss": 1.3278, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00047048664846367587, |
|
"loss": 1.3305, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0004676526177474991, |
|
"loss": 1.3997, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00046481963024524846, |
|
"loss": 1.341, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00046198777732172133, |
|
"loss": 1.3008, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00045915715030512405, |
|
"loss": 1.2643, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.0004563278404841273, |
|
"loss": 1.3169, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00045349993910492154, |
|
"loss": 1.3062, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00045067353736827495, |
|
"loss": 1.2876, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0004478487264265913, |
|
"loss": 1.3534, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0004450255973809707, |
|
"loss": 1.318, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.000442204241278272, |
|
"loss": 1.3195, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.0004393847491081756, |
|
"loss": 1.3208, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0004365672118002494, |
|
"loss": 1.3879, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.0004337517202210168, |
|
"loss": 1.3356, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_loss": 1.3873966932296753, |
|
"eval_runtime": 10.6704, |
|
"eval_samples_per_second": 14.058, |
|
"eval_steps_per_second": 1.781, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.0004309383651710254, |
|
"loss": 1.3163, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00042812723738191896, |
|
"loss": 1.3119, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0004253184275135116, |
|
"loss": 1.2777, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0004225120261508637, |
|
"loss": 1.3624, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0004197081238013602, |
|
"loss": 1.3231, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.0004169068108917924, |
|
"loss": 1.3807, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.0004141081777654412, |
|
"loss": 1.3301, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.0004113123146791633, |
|
"loss": 1.3032, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.000408519311800481, |
|
"loss": 1.2957, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00040572925920467375, |
|
"loss": 1.3138, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.0004029422468718737, |
|
"loss": 1.2496, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.0004001583646841632, |
|
"loss": 1.3796, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00039737770242267637, |
|
"loss": 1.3492, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00039460034976470396, |
|
"loss": 1.3138, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0003918263962808004, |
|
"loss": 1.3172, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0003890559314318959, |
|
"loss": 1.3446, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00038628904456641116, |
|
"loss": 1.3062, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00038352582491737547, |
|
"loss": 1.2899, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0003807663615995491, |
|
"loss": 1.2942, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0003780107436065498, |
|
"loss": 1.2902, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 1.379552960395813, |
|
"eval_runtime": 10.6705, |
|
"eval_samples_per_second": 14.057, |
|
"eval_steps_per_second": 1.781, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00037525905980798183, |
|
"loss": 1.3213, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0003725113989465705, |
|
"loss": 1.2286, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00036976784963530017, |
|
"loss": 1.3394, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.0003670285003545564, |
|
"loss": 1.2879, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00036429343944927196, |
|
"loss": 1.3369, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0003615627551260785, |
|
"loss": 1.3393, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0003588365354504612, |
|
"loss": 1.3437, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00035611486834391894, |
|
"loss": 1.2843, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00035339784158112893, |
|
"loss": 1.3463, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00035068554278711494, |
|
"loss": 1.2847, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00034797805943442313, |
|
"loss": 1.2493, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.0003452754788402996, |
|
"loss": 1.3471, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00034257788816387475, |
|
"loss": 1.2983, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.0003398853744033529, |
|
"loss": 1.3259, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0003371980243932056, |
|
"loss": 1.333, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00033451592480137195, |
|
"loss": 1.3071, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00033183916212646346, |
|
"loss": 1.3238, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.0003291678226949741, |
|
"loss": 1.3129, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.0003265019926584964, |
|
"loss": 1.3235, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00032384175799094297, |
|
"loss": 1.3016, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_loss": 1.3693352937698364, |
|
"eval_runtime": 10.6712, |
|
"eval_samples_per_second": 14.057, |
|
"eval_steps_per_second": 1.78, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0003211872044857743, |
|
"loss": 1.2658, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00031853841775323103, |
|
"loss": 1.274, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00031589548321757366, |
|
"loss": 1.2629, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.0003132584861143274, |
|
"loss": 1.3052, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0003106275114875332, |
|
"loss": 1.3099, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0003080026441870051, |
|
"loss": 1.2878, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00030538396886559393, |
|
"loss": 1.2815, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00030277156997645706, |
|
"loss": 1.2896, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00030016553177033466, |
|
"loss": 1.3545, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0002975659382928332, |
|
"loss": 1.29, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00029497287338171385, |
|
"loss": 1.3543, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00029238642066418995, |
|
"loss": 1.3202, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.0002898066635542288, |
|
"loss": 1.3261, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.0002872336852498627, |
|
"loss": 1.2256, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.0002846675687305045, |
|
"loss": 1.3423, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.0002821083967542727, |
|
"loss": 1.2896, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00027955625185532217, |
|
"loss": 1.2312, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00027701121634118143, |
|
"loss": 1.2822, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00027447337229009937, |
|
"loss": 1.319, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00027194280154839824, |
|
"loss": 1.3727, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 1.3620151281356812, |
|
"eval_runtime": 10.671, |
|
"eval_samples_per_second": 14.057, |
|
"eval_steps_per_second": 1.781, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.0002694195857278326, |
|
"loss": 1.251, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.0002669038062029592, |
|
"loss": 1.2324, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0002643955441085115, |
|
"loss": 1.2644, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.000261894880336783, |
|
"loss": 1.2582, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0002594018955350191, |
|
"loss": 1.3433, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00025691667010281616, |
|
"loss": 1.3069, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00025443928418952724, |
|
"loss": 1.2895, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0002519698176916791, |
|
"loss": 1.2799, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.000249508350250395, |
|
"loss": 1.3044, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0002470549612488247, |
|
"loss": 1.2324, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.0002446097298095867, |
|
"loss": 1.2357, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.00024217273479221514, |
|
"loss": 1.2329, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.00023974405479061623, |
|
"loss": 1.2486, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.0002373237681305348, |
|
"loss": 1.1959, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.00023491195286702777, |
|
"loss": 1.2485, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.00023250868678194536, |
|
"loss": 1.2585, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.00023011404738142532, |
|
"loss": 1.2108, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0002277281118933916, |
|
"loss": 1.188, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.00022535095726506344, |
|
"loss": 1.2197, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00022298266016047513, |
|
"loss": 1.1352, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_loss": 1.3637186288833618, |
|
"eval_runtime": 10.671, |
|
"eval_samples_per_second": 14.057, |
|
"eval_steps_per_second": 1.781, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.0002206232969580027, |
|
"loss": 1.2265, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00021827294374790034, |
|
"loss": 1.2631, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00021593167632984756, |
|
"loss": 1.1309, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00021359957021050392, |
|
"loss": 1.2877, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.00021127670060107362, |
|
"loss": 1.2993, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.00020896314241488075, |
|
"loss": 1.2244, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.0002066589702649529, |
|
"loss": 1.1812, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.00020436425846161437, |
|
"loss": 1.2113, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00020207908101009054, |
|
"loss": 1.1754, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00019980351160812083, |
|
"loss": 1.1897, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.0001975376236435813, |
|
"loss": 1.1978, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00019528149019211883, |
|
"loss": 1.1937, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.00019303518401479414, |
|
"loss": 1.2093, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.00019079877755573442, |
|
"loss": 1.2119, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.0001885723429397983, |
|
"loss": 1.1933, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.00018635595197024886, |
|
"loss": 1.2046, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.00018414967612643814, |
|
"loss": 1.1605, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.0001819535865615018, |
|
"loss": 1.1764, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.00017976775410006508, |
|
"loss": 1.2094, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.000177592249235958, |
|
"loss": 1.146, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_loss": 1.3614530563354492, |
|
"eval_runtime": 10.6711, |
|
"eval_samples_per_second": 14.057, |
|
"eval_steps_per_second": 1.781, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.00017542714212994188, |
|
"loss": 1.2674, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.00017327250260744698, |
|
"loss": 1.2817, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.00017112840015632086, |
|
"loss": 1.2693, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.00016899490392458628, |
|
"loss": 1.253, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.00016687208271821253, |
|
"loss": 1.208, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.00016476000499889514, |
|
"loss": 1.1818, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.0001626587388818491, |
|
"loss": 1.1945, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.0001605683521336116, |
|
"loss": 1.2225, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.00015848891216985596, |
|
"loss": 1.1726, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.00015642048605321856, |
|
"loss": 1.1651, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.0001543631404911356, |
|
"loss": 1.2148, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.00015231694183369106, |
|
"loss": 1.191, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.0001502819560714781, |
|
"loss": 1.2421, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.00014825824883347018, |
|
"loss": 1.1924, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.00014624588538490413, |
|
"loss": 1.1714, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.00014424493062517623, |
|
"loss": 1.2641, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.00014225544908574872, |
|
"loss": 1.2721, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.00014027750492806817, |
|
"loss": 1.2431, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.00013831116194149712, |
|
"loss": 1.2983, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.00013635648354125662, |
|
"loss": 1.2144, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_loss": 1.3538448810577393, |
|
"eval_runtime": 10.6717, |
|
"eval_samples_per_second": 14.056, |
|
"eval_steps_per_second": 1.78, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.0001344135327663804, |
|
"loss": 1.1463, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.00013248237227768246, |
|
"loss": 1.2751, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.00013056306435573633, |
|
"loss": 1.2196, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.00012865567089886642, |
|
"loss": 1.1964, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.00012676025342115105, |
|
"loss": 1.1749, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00012487687305043978, |
|
"loss": 1.2615, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00012300559052638122, |
|
"loss": 1.2064, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.00012114646619846425, |
|
"loss": 1.1642, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.00011929956002407194, |
|
"loss": 1.1704, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.00011746493156654814, |
|
"loss": 1.1668, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.00011564263999327546, |
|
"loss": 1.1584, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.00011383274407376848, |
|
"loss": 1.2412, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.0001120353021777778, |
|
"loss": 1.1688, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.00011025037227340711, |
|
"loss": 1.2097, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.00010847801192524454, |
|
"loss": 1.2057, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.00010671827829250585, |
|
"loss": 1.2296, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.00010497122812719068, |
|
"loss": 1.2547, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.00010323691777225286, |
|
"loss": 1.1746, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.00010151540315978314, |
|
"loss": 1.1466, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 9.98067398092049e-05, |
|
"loss": 1.1551, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_loss": 1.349250316619873, |
|
"eval_runtime": 10.6708, |
|
"eval_samples_per_second": 14.057, |
|
"eval_steps_per_second": 1.781, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 9.811098282548447e-05, |
|
"loss": 1.158, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.642818689735305e-05, |
|
"loss": 1.1444, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.475840629554394e-05, |
|
"loss": 1.2504, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.310169487104131e-05, |
|
"loss": 1.1439, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.145810605334454e-05, |
|
"loss": 1.2758, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 8.982769284874386e-05, |
|
"loss": 1.1992, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 8.821050783861212e-05, |
|
"loss": 1.2177, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 8.660660317770841e-05, |
|
"loss": 1.1942, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 8.501603059249563e-05, |
|
"loss": 1.163, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 8.343884137947333e-05, |
|
"loss": 1.239, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 8.187508640352265e-05, |
|
"loss": 1.1455, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 8.032481609626575e-05, |
|
"loss": 1.2165, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 7.878808045444014e-05, |
|
"loss": 1.1982, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 7.726492903828575e-05, |
|
"loss": 1.212, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 7.575541096994637e-05, |
|
"loss": 1.2453, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 7.4259574931886e-05, |
|
"loss": 1.2607, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 7.27774691653188e-05, |
|
"loss": 1.1936, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 7.130914146865247e-05, |
|
"loss": 1.2702, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 6.985463919594781e-05, |
|
"loss": 1.133, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 6.841400925539104e-05, |
|
"loss": 1.2135, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_loss": 1.3470078706741333, |
|
"eval_runtime": 10.6711, |
|
"eval_samples_per_second": 14.057, |
|
"eval_steps_per_second": 1.781, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 6.698729810778065e-05, |
|
"loss": 1.1986, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 6.557455176502986e-05, |
|
"loss": 1.2254, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 6.417581578868198e-05, |
|
"loss": 1.212, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 6.279113528844127e-05, |
|
"loss": 1.1517, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 6.14205549207184e-05, |
|
"loss": 1.1889, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 6.006411888718982e-05, |
|
"loss": 1.2348, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 5.872187093337239e-05, |
|
"loss": 1.1862, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 5.739385434721295e-05, |
|
"loss": 1.2143, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 5.608011195769186e-05, |
|
"loss": 1.242, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 5.478068613344151e-05, |
|
"loss": 1.1817, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 5.3495618781380764e-05, |
|
"loss": 1.1916, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 5.2224951345362703e-05, |
|
"loss": 1.1231, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 5.096872480483816e-05, |
|
"loss": 1.2113, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.972697967353445e-05, |
|
"loss": 1.164, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.8499755998148656e-05, |
|
"loss": 1.1947, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.728709335705561e-05, |
|
"loss": 1.2219, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.6089030859032376e-05, |
|
"loss": 1.2104, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.490560714199637e-05, |
|
"loss": 1.2077, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.373686037175917e-05, |
|
"loss": 1.1758, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.258282824079618e-05, |
|
"loss": 1.2094, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_loss": 1.3436678647994995, |
|
"eval_runtime": 10.6699, |
|
"eval_samples_per_second": 14.058, |
|
"eval_steps_per_second": 1.781, |
|
"step": 2400 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 2766, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 7.911849987145728e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|