|
{ |
|
"best_metric": 3.0452535152435303, |
|
"best_model_checkpoint": "output/the-notorious-big/checkpoint-2101", |
|
"epoch": 11.0, |
|
"global_step": 2101, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.318581390145182e-07, |
|
"loss": 3.6069, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.258652616957599e-07, |
|
"loss": 3.6646, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.0773300794160956e-06, |
|
"loss": 3.5544, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.6784690210450094e-06, |
|
"loss": 3.8578, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5.718458847652147e-06, |
|
"loss": 3.5273, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.183509814491012e-06, |
|
"loss": 3.6649, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.1056958885708937e-05, |
|
"loss": 3.4757, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.4319382371678895e-05, |
|
"loss": 3.4231, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.7948727227557336e-05, |
|
"loss": 3.4855, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.1920460125527185e-05, |
|
"loss": 3.7272, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.6207733293040745e-05, |
|
"loss": 3.6966, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.078156599604364e-05, |
|
"loss": 3.5331, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.561104044040511e-05, |
|
"loss": 3.5794, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.0663510767318035e-05, |
|
"loss": 3.5518, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.590482372991862e-05, |
|
"loss": 3.5769, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.12995495594168e-05, |
|
"loss": 3.7063, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.681122146014723e-05, |
|
"loss": 3.5721, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 6.240258211462417e-05, |
|
"loss": 3.45, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 6.803583553229638e-05, |
|
"loss": 3.5444, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 7.36729025395774e-05, |
|
"loss": 3.5872, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 7.927567818411118e-05, |
|
"loss": 3.5725, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.480628931329552e-05, |
|
"loss": 3.5443, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.022735058590523e-05, |
|
"loss": 3.4685, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.550221718625067e-05, |
|
"loss": 3.5604, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010059523253259703, |
|
"loss": 3.5, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00010547196930540564, |
|
"loss": 3.4513, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00011009946216611652, |
|
"loss": 3.6782, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00011444643059335986, |
|
"loss": 3.7351, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00011848349033028726, |
|
"loss": 3.4672, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0001221833520137007, |
|
"loss": 3.4939, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00012552100564230202, |
|
"loss": 3.3836, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00012847388963711152, |
|
"loss": 3.5585, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00013102204335125564, |
|
"loss": 3.6885, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00013314824199820138, |
|
"loss": 3.4539, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00013483811308636105, |
|
"loss": 3.44, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00013608023357300362, |
|
"loss": 3.2787, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00013686620708073944, |
|
"loss": 3.4844, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0001371907206546175, |
|
"loss": 3.4819, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 3.4188125133514404, |
|
"eval_runtime": 3.4458, |
|
"eval_samples_per_second": 76.904, |
|
"eval_steps_per_second": 9.867, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0001370515806761736, |
|
"loss": 3.3397, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00013644972769166031, |
|
"loss": 3.3572, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00013538923005422486, |
|
"loss": 3.262, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00013387725642301142, |
|
"loss": 3.1682, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00013192402730508626, |
|
"loss": 3.4747, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00012954274596774817, |
|
"loss": 3.4567, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0001267495091882376, |
|
"loss": 3.2241, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00012356319844415004, |
|
"loss": 3.104, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00012000535228007566, |
|
"loss": 3.0082, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0001161000207132303, |
|
"loss": 3.2858, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00011187360266225318, |
|
"loss": 3.2368, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00010735466749810626, |
|
"loss": 3.2569, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0001025737619233402, |
|
"loss": 3.1777, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 9.756320348516688e-05, |
|
"loss": 3.3335, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 9.23568621181315e-05, |
|
"loss": 3.0882, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 8.69899311930922e-05, |
|
"loss": 3.2659, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 8.149868962015093e-05, |
|
"loss": 3.2573, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 7.592025661365123e-05, |
|
"loss": 3.1257, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 7.029234077696063e-05, |
|
"loss": 3.4355, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 6.465298520315264e-05, |
|
"loss": 3.089, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 5.9040310314635076e-05, |
|
"loss": 3.0155, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 5.349225618005358e-05, |
|
"loss": 3.098, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.804632605033502e-05, |
|
"loss": 3.0511, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.273933284749334e-05, |
|
"loss": 3.254, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.760715031986035e-05, |
|
"loss": 3.4357, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.268447054585981e-05, |
|
"loss": 3.1706, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.8004569425531536e-05, |
|
"loss": 3.2949, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.359908174501344e-05, |
|
"loss": 3.0581, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.949778733448209e-05, |
|
"loss": 2.9815, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.572840976506218e-05, |
|
"loss": 3.1285, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.2316428945455985e-05, |
|
"loss": 3.2872, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 9.284908885085886e-06, |
|
"loss": 3.3075, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 6.654341788021345e-06, |
|
"loss": 3.1916, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 4.442509531570712e-06, |
|
"loss": 3.2904, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.6643634659029754e-06, |
|
"loss": 3.1763, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.3319233472187158e-06, |
|
"loss": 3.2474, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 4.541960876539865e-07, |
|
"loss": 3.295, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.711487114362266e-08, |
|
"loss": 2.9591, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 3.276902914047241, |
|
"eval_runtime": 3.4584, |
|
"eval_samples_per_second": 76.624, |
|
"eval_steps_per_second": 9.831, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 8.349904680360514e-08, |
|
"loss": 3.0232, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 5.930350709415567e-07, |
|
"loss": 3.027, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.5622786265220133e-06, |
|
"loss": 3.0191, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 2.9846779057592496e-06, |
|
"loss": 2.7702, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.850617898454477e-06, |
|
"loss": 2.9301, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 7.147485386700417e-06, |
|
"loss": 2.9283, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 9.859754206608232e-06, |
|
"loss": 3.039, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.2969090200711697e-05, |
|
"loss": 3.1938, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.645447515160081e-05, |
|
"loss": 2.8778, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.0292348859028838e-05, |
|
"loss": 3.1301, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.4456768400092677e-05, |
|
"loss": 3.2, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.891958349593333e-05, |
|
"loss": 3.2341, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 3.3650626799527644e-05, |
|
"loss": 3.0918, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.861791781828188e-05, |
|
"loss": 3.0801, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 4.378787909296712e-05, |
|
"loss": 3.1323, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 4.91255631716905e-05, |
|
"loss": 2.9001, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.4594888844622835e-05, |
|
"loss": 2.9404, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 6.0158885042604066e-05, |
|
"loss": 2.8547, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 6.577994075094185e-05, |
|
"loss": 3.023, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 7.14200592490581e-05, |
|
"loss": 2.8448, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.704111495739588e-05, |
|
"loss": 2.933, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 8.260511115537712e-05, |
|
"loss": 2.9129, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 8.807443682830946e-05, |
|
"loss": 2.9149, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 9.341212090703282e-05, |
|
"loss": 3.051, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 9.858208218171807e-05, |
|
"loss": 2.9097, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.00010354937320047231, |
|
"loss": 3.0169, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.00010828041650406663, |
|
"loss": 2.9528, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.00011274323159990728, |
|
"loss": 3.1967, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 0.00011690765114097114, |
|
"loss": 3.1819, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.00012074552484839915, |
|
"loss": 2.9504, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.00012423090979928828, |
|
"loss": 2.9404, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.00012734024579339175, |
|
"loss": 3.0818, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.00013005251461329956, |
|
"loss": 3.14, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 0.0001323493821015455, |
|
"loss": 2.9756, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.00013421532209424074, |
|
"loss": 3.1237, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.000135637721373478, |
|
"loss": 2.7926, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.00013660696492905842, |
|
"loss": 3.0619, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.00013711650095319638, |
|
"loss": 3.0946, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 3.266069173812866, |
|
"eval_runtime": 3.4464, |
|
"eval_samples_per_second": 76.891, |
|
"eval_steps_per_second": 9.865, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 0.00013716288512885638, |
|
"loss": 3.1661, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.000136745803912346, |
|
"loss": 2.6779, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.00013586807665278127, |
|
"loss": 2.6821, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 0.00013453563653409702, |
|
"loss": 2.7079, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.00013275749046842927, |
|
"loss": 2.7825, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.0001305456582119787, |
|
"loss": 2.8057, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 0.00012791509111491415, |
|
"loss": 2.9291, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.00012488357105454404, |
|
"loss": 2.9353, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.00012147159023493787, |
|
"loss": 2.8316, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 0.00011770221266551796, |
|
"loss": 2.9357, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 0.00011360091825498661, |
|
"loss": 2.9269, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.00010919543057446852, |
|
"loss": 2.8717, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 0.00010451552945414024, |
|
"loss": 2.8045, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 9.959284968013971e-05, |
|
"loss": 2.9829, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 9.446066715250671e-05, |
|
"loss": 2.7457, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 8.915367394966503e-05, |
|
"loss": 2.7768, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 8.370774381994648e-05, |
|
"loss": 2.5404, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 7.815968968536493e-05, |
|
"loss": 2.6558, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 7.254701479684736e-05, |
|
"loss": 2.8658, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 6.690765922303943e-05, |
|
"loss": 3.0811, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 6.127974338634884e-05, |
|
"loss": 2.7066, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 5.570131037984919e-05, |
|
"loss": 2.8037, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 5.0210068806907975e-05, |
|
"loss": 2.5833, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 4.484313788186866e-05, |
|
"loss": 2.8664, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 3.963679651483327e-05, |
|
"loss": 2.7661, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 3.462623807665996e-05, |
|
"loss": 2.6497, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 2.9845332501893884e-05, |
|
"loss": 2.6264, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 2.5326397337746965e-05, |
|
"loss": 2.7237, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 2.109997928676983e-05, |
|
"loss": 2.8732, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 1.719464771992433e-05, |
|
"loss": 2.9159, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 1.3636801555849966e-05, |
|
"loss": 2.9253, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 1.0450490811762374e-05, |
|
"loss": 2.6746, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 7.657254032251816e-06, |
|
"loss": 2.7104, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 5.275972694913755e-06, |
|
"loss": 2.8526, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 3.3227435769885726e-06, |
|
"loss": 2.8344, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 1.8107699457751103e-06, |
|
"loss": 2.7425, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 7.502723083396683e-07, |
|
"loss": 2.8643, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 1.4841932382638991e-07, |
|
"loss": 2.8953, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 3.1601226329803467, |
|
"eval_runtime": 3.449, |
|
"eval_samples_per_second": 76.834, |
|
"eval_steps_per_second": 9.858, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 9.279345382488647e-09, |
|
"loss": 2.6508, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 3.3379291926057076e-07, |
|
"loss": 2.4879, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 1.119766426996385e-06, |
|
"loss": 2.5634, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 2.361886913638944e-06, |
|
"loss": 2.6256, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 4.051758001798611e-06, |
|
"loss": 2.7326, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 6.1779566487443804e-06, |
|
"loss": 2.6579, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 8.726110362888466e-06, |
|
"loss": 2.5027, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 1.167899435769802e-05, |
|
"loss": 2.9039, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 1.5016647986299231e-05, |
|
"loss": 2.4959, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 1.8716509669712747e-05, |
|
"loss": 2.5862, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 2.2753569406640054e-05, |
|
"loss": 2.5506, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 2.7100537833883422e-05, |
|
"loss": 2.3108, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 3.172803069459421e-05, |
|
"loss": 2.536, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 3.660476746740293e-05, |
|
"loss": 2.5062, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 4.169778281374917e-05, |
|
"loss": 2.4466, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 4.697264941409472e-05, |
|
"loss": 2.4376, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 5.2393710686704254e-05, |
|
"loss": 2.8146, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 5.79243218158887e-05, |
|
"loss": 2.6586, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 6.35270974604226e-05, |
|
"loss": 2.5297, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 6.916416446770344e-05, |
|
"loss": 2.33, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 7.47974178853758e-05, |
|
"loss": 2.5292, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 8.038877853985258e-05, |
|
"loss": 2.5357, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 8.590045044058314e-05, |
|
"loss": 2.6811, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 9.129517627008144e-05, |
|
"loss": 2.5603, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 9.653648923268214e-05, |
|
"loss": 2.716, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.00010158895955959489, |
|
"loss": 2.7213, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 0.00010641843400395645, |
|
"loss": 2.4893, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 0.0001109922667069592, |
|
"loss": 2.6099, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 0.00011527953987447286, |
|
"loss": 2.7473, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 0.00011925127277244263, |
|
"loss": 2.7703, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.00012288061762832114, |
|
"loss": 2.75, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 0.00012614304111429104, |
|
"loss": 2.7698, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 0.000129016490185509, |
|
"loss": 2.7411, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 0.0001314815411523478, |
|
"loss": 2.6514, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 0.000133521530978955, |
|
"loss": 2.7788, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 0.00013512266992058388, |
|
"loss": 2.5737, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.00013627413473830423, |
|
"loss": 2.3355, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 0.00013696814186098548, |
|
"loss": 2.7536, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0001372, |
|
"loss": 2.8008, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 3.157210350036621, |
|
"eval_runtime": 3.4457, |
|
"eval_samples_per_second": 76.906, |
|
"eval_steps_per_second": 9.867, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 0.00013696814186098548, |
|
"loss": 2.3276, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 0.00013627413473830426, |
|
"loss": 2.5804, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.0001351226699205839, |
|
"loss": 2.554, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.00013352153097895503, |
|
"loss": 2.5025, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 0.00013148154115234786, |
|
"loss": 2.4069, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 0.00012901649018550904, |
|
"loss": 2.6704, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 0.0001261430411142911, |
|
"loss": 2.2362, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 0.00012288061762832122, |
|
"loss": 2.3459, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 0.0001192512727724427, |
|
"loss": 2.2415, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.00011527953987447295, |
|
"loss": 2.5082, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.00011099226670695929, |
|
"loss": 2.3534, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.00010641843400395655, |
|
"loss": 2.5017, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 0.00010158895955959498, |
|
"loss": 2.4172, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 9.653648923268222e-05, |
|
"loss": 2.5386, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 9.129517627008154e-05, |
|
"loss": 2.4429, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 8.590045044058324e-05, |
|
"loss": 2.3742, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 8.038877853985269e-05, |
|
"loss": 2.4793, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 7.479741788537588e-05, |
|
"loss": 2.6054, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 6.916416446770354e-05, |
|
"loss": 2.4472, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 6.35270974604227e-05, |
|
"loss": 2.5238, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 5.792432181588881e-05, |
|
"loss": 2.2441, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 5.239371068670435e-05, |
|
"loss": 2.5048, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 4.6972649414094817e-05, |
|
"loss": 2.4761, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 4.1697782813749265e-05, |
|
"loss": 2.5068, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 3.6604767467403014e-05, |
|
"loss": 2.4981, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 3.17280306945943e-05, |
|
"loss": 2.3925, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 2.7100537833883503e-05, |
|
"loss": 2.271, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 2.275356940664013e-05, |
|
"loss": 2.5484, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 1.8716509669712814e-05, |
|
"loss": 2.3203, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 1.50166479862993e-05, |
|
"loss": 2.4399, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 1.1678994357698073e-05, |
|
"loss": 2.5271, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 8.726110362888518e-06, |
|
"loss": 2.535, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 6.177956648744426e-06, |
|
"loss": 2.3464, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 4.0517580017986414e-06, |
|
"loss": 2.2768, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 2.3618869136389365e-06, |
|
"loss": 2.5173, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 1.1197664269964003e-06, |
|
"loss": 2.4667, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 3.3379291926057076e-07, |
|
"loss": 2.4227, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 9.279345382488647e-09, |
|
"loss": 2.5067, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 3.07564377784729, |
|
"eval_runtime": 3.4511, |
|
"eval_samples_per_second": 76.787, |
|
"eval_steps_per_second": 9.852, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 1.4841932382638991e-07, |
|
"loss": 2.2549, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 7.502723083396531e-07, |
|
"loss": 2.2228, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 1.8107699457751103e-06, |
|
"loss": 2.1061, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 3.322743576988504e-06, |
|
"loss": 2.1815, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 5.275972694913717e-06, |
|
"loss": 2.4782, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 7.657254032251716e-06, |
|
"loss": 2.1363, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 1.045049081176232e-05, |
|
"loss": 2.2368, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 1.3636801555849831e-05, |
|
"loss": 2.2992, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 1.719464771992426e-05, |
|
"loss": 2.3285, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 2.1099979286769666e-05, |
|
"loss": 2.2711, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 2.5326397337746883e-05, |
|
"loss": 2.3187, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 2.9845332501893694e-05, |
|
"loss": 2.0419, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 3.462623807665987e-05, |
|
"loss": 2.0977, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 3.9636796514833065e-05, |
|
"loss": 2.2842, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 4.484313788186857e-05, |
|
"loss": 2.3898, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 5.021006880690775e-05, |
|
"loss": 2.3513, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 5.570131037984909e-05, |
|
"loss": 1.947, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 6.127974338634885e-05, |
|
"loss": 2.2545, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 6.690765922303933e-05, |
|
"loss": 2.07, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 7.254701479684739e-05, |
|
"loss": 2.1936, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 7.815968968536482e-05, |
|
"loss": 2.4149, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 8.37077438199465e-05, |
|
"loss": 2.3692, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 8.915367394966493e-05, |
|
"loss": 2.3514, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 9.446066715250662e-05, |
|
"loss": 1.9397, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 9.959284968013953e-05, |
|
"loss": 2.3535, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 0.00010451552945414016, |
|
"loss": 2.111, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 0.00010919543057446833, |
|
"loss": 2.2016, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 0.00011360091825498652, |
|
"loss": 2.4254, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 0.00011770221266551781, |
|
"loss": 2.4963, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.0001214715902349378, |
|
"loss": 2.3937, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 0.00012488357105454407, |
|
"loss": 2.2234, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 0.0001279150911149141, |
|
"loss": 2.3214, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 0.0001305456582119787, |
|
"loss": 2.3266, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 0.00013275749046842922, |
|
"loss": 2.3065, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 0.00013453563653409702, |
|
"loss": 2.4139, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 0.00013586807665278124, |
|
"loss": 2.38, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.000136745803912346, |
|
"loss": 2.3551, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 0.00013716288512885636, |
|
"loss": 2.3378, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 3.0805811882019043, |
|
"eval_runtime": 3.449, |
|
"eval_samples_per_second": 76.834, |
|
"eval_steps_per_second": 9.858, |
|
"step": 1337 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 0.0001371165009531964, |
|
"loss": 2.2552, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 0.00013660696492905845, |
|
"loss": 2.1842, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.00013563772137347802, |
|
"loss": 2.097, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 0.00013421532209424082, |
|
"loss": 1.7536, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 0.00013234938210154558, |
|
"loss": 2.2293, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 0.0001300525146132997, |
|
"loss": 2.0082, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 0.00012734024579339186, |
|
"loss": 1.951, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.00012423090979928834, |
|
"loss": 2.3563, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 0.00012074552484839929, |
|
"loss": 2.0015, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 0.00011690765114097119, |
|
"loss": 2.3068, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 0.00011274323159990744, |
|
"loss": 2.2242, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 0.0001082804165040668, |
|
"loss": 2.1933, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 0.00010354937320047261, |
|
"loss": 2.0466, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 9.858208218171827e-05, |
|
"loss": 2.1267, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 9.341212090703316e-05, |
|
"loss": 2.08, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 8.807443682830967e-05, |
|
"loss": 2.1487, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 8.260511115537746e-05, |
|
"loss": 2.0919, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 7.70411149573961e-05, |
|
"loss": 2.2303, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 7.142005924905844e-05, |
|
"loss": 2.2703, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 6.57799407509417e-05, |
|
"loss": 2.1199, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 6.015888504260404e-05, |
|
"loss": 2.1918, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 5.4594888844622686e-05, |
|
"loss": 2.1859, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 4.912556317169047e-05, |
|
"loss": 2.2244, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 4.378787909296698e-05, |
|
"loss": 2.0414, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 3.861791781828185e-05, |
|
"loss": 2.2903, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 3.365062679952752e-05, |
|
"loss": 2.1773, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 2.8919583495933312e-05, |
|
"loss": 2.0957, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 2.4456768400092664e-05, |
|
"loss": 2.1373, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 2.029234885902891e-05, |
|
"loss": 2.2073, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 1.6454475151600793e-05, |
|
"loss": 2.0692, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 1.2969090200711758e-05, |
|
"loss": 2.1887, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 9.859754206608216e-06, |
|
"loss": 2.0833, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 7.147485386700356e-06, |
|
"loss": 2.3646, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 4.850617898454462e-06, |
|
"loss": 2.2123, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 2.9846779057592115e-06, |
|
"loss": 2.0273, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 1.5622786265220057e-06, |
|
"loss": 1.9322, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 5.93035070941549e-07, |
|
"loss": 2.0401, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 8.349904680360514e-08, |
|
"loss": 2.0603, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 3.059950828552246, |
|
"eval_runtime": 3.4514, |
|
"eval_samples_per_second": 76.78, |
|
"eval_steps_per_second": 9.851, |
|
"step": 1528 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 3.711487114362266e-08, |
|
"loss": 2.0764, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 4.5419608765397886e-07, |
|
"loss": 1.9304, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 1.3319233472187234e-06, |
|
"loss": 1.8933, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 2.664363465902945e-06, |
|
"loss": 2.2206, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 4.44250953157072e-06, |
|
"loss": 2.0486, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 6.654341788021246e-06, |
|
"loss": 1.8876, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 9.28490888508584e-06, |
|
"loss": 1.7926, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 1.2316428945455863e-05, |
|
"loss": 1.7425, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 1.572840976506211e-05, |
|
"loss": 1.9577, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 1.9497787334482106e-05, |
|
"loss": 2.0153, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 2.3599081745013365e-05, |
|
"loss": 1.9377, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 2.800456942553155e-05, |
|
"loss": 2.2322, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 3.268447054585973e-05, |
|
"loss": 1.7475, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 3.760715031986036e-05, |
|
"loss": 1.8289, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 4.273933284749324e-05, |
|
"loss": 1.8663, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 4.804632605033493e-05, |
|
"loss": 2.0319, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 5.349225618005337e-05, |
|
"loss": 2.1028, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 5.9040310314635035e-05, |
|
"loss": 1.9332, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 6.465298520315248e-05, |
|
"loss": 1.8289, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 7.029234077696052e-05, |
|
"loss": 1.8439, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 7.5920256613651e-05, |
|
"loss": 1.7968, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 8.149868962015078e-05, |
|
"loss": 2.0554, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 8.698993119309211e-05, |
|
"loss": 2.0439, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 9.235686211813131e-05, |
|
"loss": 2.1254, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 9.75632034851668e-05, |
|
"loss": 1.8997, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 0.00010257376192334, |
|
"loss": 1.8282, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 0.00010735466749810619, |
|
"loss": 1.977, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 0.000111873602662253, |
|
"loss": 1.9521, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 0.00011610002071323023, |
|
"loss": 2.227, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 0.00012000535228007549, |
|
"loss": 1.9954, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 0.00012356319844414993, |
|
"loss": 2.1384, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 0.00012674950918823747, |
|
"loss": 1.9971, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 0.0001295427459677481, |
|
"loss": 1.9247, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 0.00013192402730508612, |
|
"loss": 2.0361, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 0.00013387725642301137, |
|
"loss": 1.9686, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 0.0001353892300542248, |
|
"loss": 2.1605, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 0.0001364497276916603, |
|
"loss": 2.12, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 0.0001370515806761736, |
|
"loss": 2.0884, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 3.1394710540771484, |
|
"eval_runtime": 3.4497, |
|
"eval_samples_per_second": 76.819, |
|
"eval_steps_per_second": 9.856, |
|
"step": 1719 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 0.0001371907206546175, |
|
"loss": 2.0551, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 0.00013686620708073944, |
|
"loss": 1.8553, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 0.00013608023357300367, |
|
"loss": 1.9159, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 0.0001348381130863611, |
|
"loss": 1.7198, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 0.0001331482419982015, |
|
"loss": 1.8409, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 0.00013102204335125575, |
|
"loss": 1.8302, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 0.00012847388963711166, |
|
"loss": 1.8527, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 0.00012552100564230202, |
|
"loss": 2.1229, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 0.0001221833520137008, |
|
"loss": 1.8018, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 0.00011848349033028711, |
|
"loss": 2.0999, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 0.00011444643059335979, |
|
"loss": 1.8556, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 0.00011009946216611662, |
|
"loss": 1.918, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 0.00010547196930540541, |
|
"loss": 1.8345, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 0.0001005952325325969, |
|
"loss": 1.9419, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 9.550221718625064e-05, |
|
"loss": 1.7719, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 9.022735058590532e-05, |
|
"loss": 1.6571, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 8.480628931329531e-05, |
|
"loss": 1.7121, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 7.92756781841111e-05, |
|
"loss": 1.8914, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 7.367290253957744e-05, |
|
"loss": 2.0196, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 6.80358355322966e-05, |
|
"loss": 1.7844, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 6.240258211462402e-05, |
|
"loss": 1.9618, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 5.681122146014721e-05, |
|
"loss": 1.9428, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 5.129954955941689e-05, |
|
"loss": 1.7754, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 4.590482372991883e-05, |
|
"loss": 1.9829, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 4.066351076731789e-05, |
|
"loss": 1.8845, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 3.561104044040515e-05, |
|
"loss": 1.9994, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"learning_rate": 3.078156599604378e-05, |
|
"loss": 1.826, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 2.6207733293040633e-05, |
|
"loss": 1.8695, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"learning_rate": 2.1920460125527168e-05, |
|
"loss": 1.9426, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 1.7948727227557404e-05, |
|
"loss": 1.7794, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 1.4319382371679026e-05, |
|
"loss": 1.5958, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 1.105695888570886e-05, |
|
"loss": 1.9165, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 8.18350981449102e-06, |
|
"loss": 1.8373, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 5.7184588476522e-06, |
|
"loss": 1.9335, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 3.6784690210450856e-06, |
|
"loss": 1.7418, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 2.0773300794160803e-06, |
|
"loss": 2.0059, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 9.258652616957751e-07, |
|
"loss": 1.7969, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 2.3185813901453346e-07, |
|
"loss": 1.9667, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0, |
|
"loss": 2.0133, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 3.0807313919067383, |
|
"eval_runtime": 3.4521, |
|
"eval_samples_per_second": 76.764, |
|
"eval_steps_per_second": 9.849, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 2.318581390145182e-07, |
|
"loss": 1.8296, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 9.258652616957447e-07, |
|
"loss": 1.6133, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 2.0773300794160346e-06, |
|
"loss": 1.7176, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 3.6784690210450323e-06, |
|
"loss": 1.7257, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"learning_rate": 5.718458847652131e-06, |
|
"loss": 1.7978, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 8.183509814490936e-06, |
|
"loss": 1.798, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"learning_rate": 1.1056958885708754e-05, |
|
"loss": 1.6063, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 1.4319382371678919e-05, |
|
"loss": 1.5478, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"learning_rate": 1.7948727227557282e-05, |
|
"loss": 1.6348, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 2.1920460125527032e-05, |
|
"loss": 1.7918, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"learning_rate": 2.6207733293040488e-05, |
|
"loss": 1.7877, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 3.078156599604362e-05, |
|
"loss": 1.7034, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 10.34, |
|
"learning_rate": 3.561104044040498e-05, |
|
"loss": 1.6054, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"learning_rate": 4.066351076731773e-05, |
|
"loss": 1.7727, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 10.39, |
|
"learning_rate": 4.5904823729918646e-05, |
|
"loss": 1.7093, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"learning_rate": 5.1299549559416716e-05, |
|
"loss": 1.5078, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 10.45, |
|
"learning_rate": 5.681122146014703e-05, |
|
"loss": 1.7173, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 10.47, |
|
"learning_rate": 6.240258211462383e-05, |
|
"loss": 2.0244, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 6.803583553229642e-05, |
|
"loss": 1.5563, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 10.52, |
|
"learning_rate": 7.367290253957726e-05, |
|
"loss": 1.587, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 7.927567818411092e-05, |
|
"loss": 1.6739, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"learning_rate": 8.480628931329514e-05, |
|
"loss": 1.7724, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 9.022735058590513e-05, |
|
"loss": 1.7843, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 10.63, |
|
"learning_rate": 9.550221718625046e-05, |
|
"loss": 1.7604, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 0.00010059523253259673, |
|
"loss": 1.8025, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"learning_rate": 0.00010547196930540525, |
|
"loss": 1.6344, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 0.00011009946216611647, |
|
"loss": 1.8952, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"learning_rate": 0.00011444643059335966, |
|
"loss": 1.5879, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"learning_rate": 0.00011848349033028699, |
|
"loss": 1.7946, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 10.79, |
|
"learning_rate": 0.00012218335201370068, |
|
"loss": 1.6615, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 10.81, |
|
"learning_rate": 0.0001255210056423019, |
|
"loss": 1.6975, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 10.84, |
|
"learning_rate": 0.00012847388963711158, |
|
"loss": 1.465, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 10.86, |
|
"learning_rate": 0.00013102204335125564, |
|
"loss": 2.0658, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"learning_rate": 0.00013314824199820135, |
|
"loss": 1.9283, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 10.92, |
|
"learning_rate": 0.00013483811308636113, |
|
"loss": 1.8069, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 10.94, |
|
"learning_rate": 0.00013608023357300364, |
|
"loss": 1.7805, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"learning_rate": 0.0001368662070807394, |
|
"loss": 1.6908, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 0.0001371907206546175, |
|
"loss": 1.8003, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 3.0452535152435303, |
|
"eval_runtime": 3.4493, |
|
"eval_samples_per_second": 76.827, |
|
"eval_steps_per_second": 9.857, |
|
"step": 2101 |
|
} |
|
], |
|
"max_steps": 11651, |
|
"num_train_epochs": 61, |
|
"total_flos": 2193024024576000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|