|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"global_step": 4944, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.0161290322580645e-07, |
|
"loss": 1.7344, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.032258064516129e-07, |
|
"loss": 1.6586, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.048387096774194e-07, |
|
"loss": 1.6465, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.064516129032258e-07, |
|
"loss": 1.5832, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0080645161290323e-06, |
|
"loss": 1.5574, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.2096774193548388e-06, |
|
"loss": 1.4381, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.4112903225806455e-06, |
|
"loss": 1.5215, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6129032258064516e-06, |
|
"loss": 1.5566, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8145161290322583e-06, |
|
"loss": 1.5641, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0161290322580646e-06, |
|
"loss": 1.4707, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.217741935483871e-06, |
|
"loss": 1.4484, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4193548387096776e-06, |
|
"loss": 1.4656, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.620967741935484e-06, |
|
"loss": 1.3938, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.822580645161291e-06, |
|
"loss": 1.4152, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.024193548387097e-06, |
|
"loss": 1.4182, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.225806451612903e-06, |
|
"loss": 1.5051, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.4274193548387097e-06, |
|
"loss": 1.4918, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6290322580645166e-06, |
|
"loss": 1.4738, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.830645161290323e-06, |
|
"loss": 1.4035, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.032258064516129e-06, |
|
"loss": 1.4367, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.233870967741936e-06, |
|
"loss": 1.4076, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.435483870967742e-06, |
|
"loss": 1.4902, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.637096774193548e-06, |
|
"loss": 1.3578, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.838709677419355e-06, |
|
"loss": 1.4467, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5.040322580645161e-06, |
|
"loss": 1.4766, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 5.241935483870968e-06, |
|
"loss": 1.4828, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 5.443548387096774e-06, |
|
"loss": 1.4258, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 5.645161290322582e-06, |
|
"loss": 1.4602, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 5.846774193548388e-06, |
|
"loss": 1.4902, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 6.048387096774194e-06, |
|
"loss": 1.3729, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.25e-06, |
|
"loss": 1.4902, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.451612903225806e-06, |
|
"loss": 1.435, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.653225806451613e-06, |
|
"loss": 1.4096, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 6.854838709677419e-06, |
|
"loss": 1.4508, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.056451612903227e-06, |
|
"loss": 1.4266, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.258064516129033e-06, |
|
"loss": 1.4648, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.459677419354839e-06, |
|
"loss": 1.3996, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.661290322580646e-06, |
|
"loss": 1.4152, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.862903225806451e-06, |
|
"loss": 1.4648, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.064516129032258e-06, |
|
"loss": 1.3128, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.266129032258065e-06, |
|
"loss": 1.4242, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.467741935483872e-06, |
|
"loss": 1.4648, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.669354838709677e-06, |
|
"loss": 1.4527, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.870967741935484e-06, |
|
"loss": 1.4, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.072580645161291e-06, |
|
"loss": 1.4984, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.274193548387097e-06, |
|
"loss": 1.4297, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.475806451612905e-06, |
|
"loss": 1.4645, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.67741935483871e-06, |
|
"loss": 1.4107, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.879032258064517e-06, |
|
"loss": 1.4328, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.999995524479982e-06, |
|
"loss": 1.4279, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.999945174971776e-06, |
|
"loss": 1.4738, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.999838882120566e-06, |
|
"loss": 1.3898, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.999676647115646e-06, |
|
"loss": 1.3346, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.999458471772225e-06, |
|
"loss": 1.5164, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.999184358531422e-06, |
|
"loss": 1.4148, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.998854310460233e-06, |
|
"loss": 1.4273, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.998468331251499e-06, |
|
"loss": 1.4592, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.998026425223858e-06, |
|
"loss": 1.4504, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.997528597321704e-06, |
|
"loss": 1.448, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.996974853115132e-06, |
|
"loss": 1.4523, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.996365198799868e-06, |
|
"loss": 1.5063, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.99569964119721e-06, |
|
"loss": 1.4051, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.99497818775394e-06, |
|
"loss": 1.4312, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.994200846542251e-06, |
|
"loss": 1.4758, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.993367626259652e-06, |
|
"loss": 1.468, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.99247853622887e-06, |
|
"loss": 1.527, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.991533586397751e-06, |
|
"loss": 1.4969, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.990532787339137e-06, |
|
"loss": 1.4996, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.98947615025076e-06, |
|
"loss": 1.5262, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.988363686955116e-06, |
|
"loss": 1.5992, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.987195409899322e-06, |
|
"loss": 1.4711, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.985971332154985e-06, |
|
"loss": 1.45, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.984691467418057e-06, |
|
"loss": 1.4863, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.983355830008678e-06, |
|
"loss": 1.5219, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.981964434871015e-06, |
|
"loss": 1.5977, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.980517297573097e-06, |
|
"loss": 1.4539, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.979014434306642e-06, |
|
"loss": 1.3713, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.977455861886874e-06, |
|
"loss": 1.4434, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.975841597752334e-06, |
|
"loss": 1.5469, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.974171659964688e-06, |
|
"loss": 1.4531, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.972446067208519e-06, |
|
"loss": 1.4828, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.970664838791126e-06, |
|
"loss": 1.4512, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.9688279946423e-06, |
|
"loss": 1.4076, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.966935555314107e-06, |
|
"loss": 1.4969, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.96498754198066e-06, |
|
"loss": 1.498, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.962983976437868e-06, |
|
"loss": 1.4393, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.96092488110321e-06, |
|
"loss": 1.5219, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.958810279015474e-06, |
|
"loss": 1.4484, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.956640193834501e-06, |
|
"loss": 1.4805, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.954414649840922e-06, |
|
"loss": 1.4832, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.952133671935885e-06, |
|
"loss": 1.3789, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.949797285640771e-06, |
|
"loss": 1.4773, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.947405517096927e-06, |
|
"loss": 1.5246, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.944958393065343e-06, |
|
"loss": 1.5094, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.942455940926384e-06, |
|
"loss": 1.4253, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.939898188679465e-06, |
|
"loss": 1.4396, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.93728516494274e-06, |
|
"loss": 1.3863, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.934616898952787e-06, |
|
"loss": 1.4965, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.931893420564277e-06, |
|
"loss": 1.5633, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.929114760249642e-06, |
|
"loss": 1.3689, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.926280949098732e-06, |
|
"loss": 1.4434, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.923392018818467e-06, |
|
"loss": 1.4758, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.92044800173249e-06, |
|
"loss": 1.4133, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.917448930780786e-06, |
|
"loss": 1.451, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.91439483951934e-06, |
|
"loss": 1.5117, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.91128576211974e-06, |
|
"loss": 1.4949, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.908121733368803e-06, |
|
"loss": 1.3984, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.904902788668187e-06, |
|
"loss": 1.6094, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.901628964033993e-06, |
|
"loss": 1.452, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.89830029609636e-06, |
|
"loss": 1.4293, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.894916822099062e-06, |
|
"loss": 1.4957, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.89147857989908e-06, |
|
"loss": 1.507, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.887985607966188e-06, |
|
"loss": 1.4379, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.884437945382523e-06, |
|
"loss": 1.4812, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.880835631842141e-06, |
|
"loss": 1.4477, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.877178707650573e-06, |
|
"loss": 1.4484, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.873467213724384e-06, |
|
"loss": 1.4897, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.869701191590703e-06, |
|
"loss": 1.4641, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.865880683386766e-06, |
|
"loss": 1.4809, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.862005731859443e-06, |
|
"loss": 1.4742, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.858076380364757e-06, |
|
"loss": 1.4902, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.854092672867399e-06, |
|
"loss": 1.5898, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.850054653940247e-06, |
|
"loss": 1.4984, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.845962368763847e-06, |
|
"loss": 1.4898, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.841815863125923e-06, |
|
"loss": 1.5082, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.837615183420866e-06, |
|
"loss": 1.4992, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.8333603766492e-06, |
|
"loss": 1.3744, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.829051490417074e-06, |
|
"loss": 1.527, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.824688572935713e-06, |
|
"loss": 1.5008, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.820271673020891e-06, |
|
"loss": 1.4721, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.81580084009238e-06, |
|
"loss": 1.5555, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.811276124173395e-06, |
|
"loss": 1.5285, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.806697575890043e-06, |
|
"loss": 1.4777, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.802065246470738e-06, |
|
"loss": 1.4322, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.797379187745652e-06, |
|
"loss": 1.5197, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.792639452146116e-06, |
|
"loss": 1.4703, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.787846092704043e-06, |
|
"loss": 1.4539, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.782999163051332e-06, |
|
"loss": 1.4326, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.778098717419266e-06, |
|
"loss": 1.3992, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.773144810637908e-06, |
|
"loss": 1.457, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.768137498135489e-06, |
|
"loss": 1.477, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.763076835937782e-06, |
|
"loss": 1.4455, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.75796288066748e-06, |
|
"loss": 1.442, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.752795689543563e-06, |
|
"loss": 1.5156, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.747575320380652e-06, |
|
"loss": 1.5129, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.742301831588368e-06, |
|
"loss": 1.3873, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.736975282170678e-06, |
|
"loss": 1.448, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.731595731725232e-06, |
|
"loss": 1.4695, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.726163240442695e-06, |
|
"loss": 1.4898, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.720677869106077e-06, |
|
"loss": 1.4619, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.715139679090057e-06, |
|
"loss": 1.5293, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.709548732360286e-06, |
|
"loss": 1.4535, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.703905091472698e-06, |
|
"loss": 1.3992, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.698208819572815e-06, |
|
"loss": 1.5254, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.692459980395034e-06, |
|
"loss": 1.351, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.686658638261916e-06, |
|
"loss": 1.3639, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.680804858083468e-06, |
|
"loss": 1.3813, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.674898705356413e-06, |
|
"loss": 1.4016, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.668940246163464e-06, |
|
"loss": 1.4611, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.662929547172575e-06, |
|
"loss": 1.4902, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.656866675636204e-06, |
|
"loss": 1.5445, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.650751699390554e-06, |
|
"loss": 1.5363, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.64458468685482e-06, |
|
"loss": 1.4508, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.638365707030415e-06, |
|
"loss": 1.4367, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.632094829500206e-06, |
|
"loss": 1.4594, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.62577212442774e-06, |
|
"loss": 1.498, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.619397662556434e-06, |
|
"loss": 1.4816, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.61297151520882e-06, |
|
"loss": 1.4754, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.606493754285712e-06, |
|
"loss": 1.4805, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.599964452265434e-06, |
|
"loss": 1.5539, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.593383682202974e-06, |
|
"loss": 1.4996, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.586751517729203e-06, |
|
"loss": 1.484, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.580068033050019e-06, |
|
"loss": 1.4781, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.57333330294554e-06, |
|
"loss": 1.5156, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.566547402769255e-06, |
|
"loss": 1.4734, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.559710408447185e-06, |
|
"loss": 1.5398, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.55282239647703e-06, |
|
"loss": 1.5621, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.545883443927325e-06, |
|
"loss": 1.4793, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.538893628436554e-06, |
|
"loss": 1.3898, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.531853028212308e-06, |
|
"loss": 1.4719, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.52476172203039e-06, |
|
"loss": 1.5215, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.51761978923395e-06, |
|
"loss": 1.4264, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.51042730973258e-06, |
|
"loss": 1.502, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.503184364001432e-06, |
|
"loss": 1.4287, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.495891033080315e-06, |
|
"loss": 1.4693, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.488547398572787e-06, |
|
"loss": 1.5344, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.48115354264524e-06, |
|
"loss": 1.3703, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.473709548025987e-06, |
|
"loss": 1.6094, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.466215498004328e-06, |
|
"loss": 1.4773, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.458671476429624e-06, |
|
"loss": 1.4602, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.45107756771036e-06, |
|
"loss": 1.4798, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.443433856813197e-06, |
|
"loss": 1.516, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.435740429262016e-06, |
|
"loss": 1.4941, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.427997371136976e-06, |
|
"loss": 1.402, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.420204769073538e-06, |
|
"loss": 1.5305, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.4123627102615e-06, |
|
"loss": 1.4604, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.404471282444019e-06, |
|
"loss": 1.5129, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.396530573916636e-06, |
|
"loss": 1.5453, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.38854067352628e-06, |
|
"loss": 1.4834, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.38050167067028e-06, |
|
"loss": 1.4387, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.372413655295362e-06, |
|
"loss": 1.4383, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.364276717896639e-06, |
|
"loss": 1.602, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.356090949516608e-06, |
|
"loss": 1.4193, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.347856441744122e-06, |
|
"loss": 1.5031, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.339573286713369e-06, |
|
"loss": 1.4828, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.331241577102841e-06, |
|
"loss": 1.5191, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.322861406134302e-06, |
|
"loss": 1.4305, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.314432867571732e-06, |
|
"loss": 1.4625, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.30595605572029e-06, |
|
"loss": 1.5246, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.297431065425257e-06, |
|
"loss": 1.5227, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.28885799207097e-06, |
|
"loss": 1.5367, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 9.280236931579754e-06, |
|
"loss": 1.4879, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 9.271567980410859e-06, |
|
"loss": 1.5137, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.26285123555937e-06, |
|
"loss": 1.4449, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.254086794555121e-06, |
|
"loss": 1.4602, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.245274755461621e-06, |
|
"loss": 1.4187, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 9.23641521687493e-06, |
|
"loss": 1.5391, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 9.227508277922579e-06, |
|
"loss": 1.3988, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 9.218554038262448e-06, |
|
"loss": 1.5984, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 9.209552598081657e-06, |
|
"loss": 1.5109, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 9.200504058095439e-06, |
|
"loss": 1.5418, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.191408519546022e-06, |
|
"loss": 1.4275, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.182266084201486e-06, |
|
"loss": 1.4074, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.173076854354634e-06, |
|
"loss": 1.5016, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.16384093282184e-06, |
|
"loss": 1.5188, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.154558422941901e-06, |
|
"loss": 1.4738, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.145229428574886e-06, |
|
"loss": 1.4049, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.135854054100961e-06, |
|
"loss": 1.4363, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 9.126432404419239e-06, |
|
"loss": 1.5211, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 9.11696458494659e-06, |
|
"loss": 1.457, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 9.107450701616469e-06, |
|
"loss": 1.4543, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.097890860877732e-06, |
|
"loss": 1.534, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.088285169693442e-06, |
|
"loss": 1.5254, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.078633735539673e-06, |
|
"loss": 1.5371, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.068936666404307e-06, |
|
"loss": 1.475, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.059194070785823e-06, |
|
"loss": 1.457, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.049406057692097e-06, |
|
"loss": 1.4891, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.03957273663916e-06, |
|
"loss": 1.5648, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.02969421764999e-06, |
|
"loss": 1.5855, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.019770611253272e-06, |
|
"loss": 1.4734, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.009802028482169e-06, |
|
"loss": 1.4867, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.999788580873074e-06, |
|
"loss": 1.5094, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.989730380464362e-06, |
|
"loss": 1.4965, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.979627539795136e-06, |
|
"loss": 1.4887, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.969480171903973e-06, |
|
"loss": 1.4398, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.959288390327656e-06, |
|
"loss": 1.4301, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.949052309099897e-06, |
|
"loss": 1.5309, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.938772042750078e-06, |
|
"loss": 1.3054, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 8.928447706301951e-06, |
|
"loss": 1.2152, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 8.91807941527236e-06, |
|
"loss": 1.2238, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 8.907667285669955e-06, |
|
"loss": 1.1881, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 8.897211433993873e-06, |
|
"loss": 1.234, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 8.886711977232463e-06, |
|
"loss": 1.1724, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 8.87616903286195e-06, |
|
"loss": 1.3313, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 8.865582718845142e-06, |
|
"loss": 1.2277, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 8.854953153630097e-06, |
|
"loss": 1.2145, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 8.844280456148799e-06, |
|
"loss": 1.243, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 8.833564745815835e-06, |
|
"loss": 1.1816, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 8.82280614252705e-06, |
|
"loss": 1.1965, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 8.81200476665821e-06, |
|
"loss": 1.2035, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 8.801160739063657e-06, |
|
"loss": 1.2477, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 8.790274181074951e-06, |
|
"loss": 1.1686, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 8.779345214499517e-06, |
|
"loss": 1.1877, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 8.768373961619283e-06, |
|
"loss": 1.2209, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 8.757360545189308e-06, |
|
"loss": 1.2066, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 8.746305088436406e-06, |
|
"loss": 1.2484, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 8.735207715057779e-06, |
|
"loss": 1.2068, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 8.724068549219618e-06, |
|
"loss": 1.1803, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 8.712887715555728e-06, |
|
"loss": 1.234, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 8.701665339166122e-06, |
|
"loss": 1.2441, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.690401545615626e-06, |
|
"loss": 1.2082, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.679096460932477e-06, |
|
"loss": 1.2176, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.667750211606906e-06, |
|
"loss": 1.2516, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.65636292458973e-06, |
|
"loss": 1.1766, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.644934727290927e-06, |
|
"loss": 1.2277, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.63346574757821e-06, |
|
"loss": 1.2773, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.621956113775601e-06, |
|
"loss": 1.2162, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.610405954661988e-06, |
|
"loss": 1.2551, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.598815399469694e-06, |
|
"loss": 1.2625, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.587184577883018e-06, |
|
"loss": 1.2465, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.5755136200368e-06, |
|
"loss": 1.2008, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.563802656514946e-06, |
|
"loss": 1.1623, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.552051818348986e-06, |
|
"loss": 1.1625, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.540261237016597e-06, |
|
"loss": 1.1723, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.528431044440127e-06, |
|
"loss": 1.268, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.516561372985137e-06, |
|
"loss": 1.2488, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.504652355458901e-06, |
|
"loss": 1.298, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.492704125108933e-06, |
|
"loss": 1.2168, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.480716815621486e-06, |
|
"loss": 1.2166, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.468690561120064e-06, |
|
"loss": 1.201, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.456625496163921e-06, |
|
"loss": 1.2266, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.444521755746547e-06, |
|
"loss": 1.1812, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.43237947529417e-06, |
|
"loss": 1.1762, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.420198790664232e-06, |
|
"loss": 1.2473, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.407979838143869e-06, |
|
"loss": 1.1887, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.395722754448392e-06, |
|
"loss": 1.2277, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.38342767671975e-06, |
|
"loss": 1.2418, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 8.371094742525006e-06, |
|
"loss": 1.2081, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 8.358724089854784e-06, |
|
"loss": 1.3461, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 8.346315857121732e-06, |
|
"loss": 1.1977, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 8.33387018315898e-06, |
|
"loss": 1.2336, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 8.321387207218578e-06, |
|
"loss": 1.249, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 8.308867068969933e-06, |
|
"loss": 1.2188, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 8.296309908498264e-06, |
|
"loss": 1.1823, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 8.283715866303016e-06, |
|
"loss": 1.2462, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 8.271085083296295e-06, |
|
"loss": 1.218, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 8.258417700801301e-06, |
|
"loss": 1.249, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 8.245713860550734e-06, |
|
"loss": 1.2629, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 8.232973704685208e-06, |
|
"loss": 1.2605, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 8.220197375751667e-06, |
|
"loss": 1.2232, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 8.207385016701792e-06, |
|
"loss": 1.2242, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 8.194536770890392e-06, |
|
"loss": 1.1824, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 8.181652782073808e-06, |
|
"loss": 1.275, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 8.168733194408302e-06, |
|
"loss": 1.2164, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 8.155778152448443e-06, |
|
"loss": 1.2207, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 8.142787801145495e-06, |
|
"loss": 1.2266, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 8.129762285845784e-06, |
|
"loss": 1.1971, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 8.116701752289084e-06, |
|
"loss": 1.2107, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 8.103606346606978e-06, |
|
"loss": 1.1904, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 8.090476215321226e-06, |
|
"loss": 1.2895, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 8.07731150534213e-06, |
|
"loss": 1.235, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 8.064112363966877e-06, |
|
"loss": 1.2238, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 8.050878938877908e-06, |
|
"loss": 1.2535, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 8.037611378141257e-06, |
|
"loss": 1.2336, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 8.024309830204888e-06, |
|
"loss": 1.2285, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 8.010974443897046e-06, |
|
"loss": 1.25, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 7.997605368424585e-06, |
|
"loss": 1.2492, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 7.9842027533713e-06, |
|
"loss": 1.1783, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 7.970766748696254e-06, |
|
"loss": 1.171, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 7.9572975047321e-06, |
|
"loss": 1.2336, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 7.943795172183394e-06, |
|
"loss": 1.204, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 7.93025990212492e-06, |
|
"loss": 1.2342, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 7.916691845999986e-06, |
|
"loss": 1.1936, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 7.903091155618747e-06, |
|
"loss": 1.1992, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 7.889457983156484e-06, |
|
"loss": 1.1707, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 7.875792481151916e-06, |
|
"loss": 1.1835, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 7.862094802505498e-06, |
|
"loss": 1.2359, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 7.848365100477695e-06, |
|
"loss": 1.2754, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 7.834603528687277e-06, |
|
"loss": 1.2664, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 7.8208102411096e-06, |
|
"loss": 1.2535, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 7.806985392074877e-06, |
|
"loss": 1.2158, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 7.793129136266464e-06, |
|
"loss": 1.2504, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 7.779241628719108e-06, |
|
"loss": 1.1505, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 7.765323024817237e-06, |
|
"loss": 1.2695, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 7.751373480293205e-06, |
|
"loss": 1.2059, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 7.737393151225555e-06, |
|
"loss": 1.2547, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 7.723382194037266e-06, |
|
"loss": 1.2127, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 7.709340765494017e-06, |
|
"loss": 1.1734, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 7.695269022702425e-06, |
|
"loss": 1.2037, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 7.681167123108277e-06, |
|
"loss": 1.2891, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 7.667035224494787e-06, |
|
"loss": 1.2285, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 7.65287348498082e-06, |
|
"loss": 1.217, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 7.63868206301912e-06, |
|
"loss": 1.1856, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 7.62446111739455e-06, |
|
"loss": 1.2613, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 7.6102108072223e-06, |
|
"loss": 1.1617, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 7.595931291946116e-06, |
|
"loss": 1.2006, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 7.581622731336515e-06, |
|
"loss": 1.2543, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 7.567285285488994e-06, |
|
"loss": 1.2498, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 7.552919114822246e-06, |
|
"loss": 1.2484, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 7.5385243800763505e-06, |
|
"loss": 1.2543, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 7.524101242310993e-06, |
|
"loss": 1.2621, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 7.509649862903652e-06, |
|
"loss": 1.2176, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 7.495170403547797e-06, |
|
"loss": 1.2189, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 7.480663026251073e-06, |
|
"loss": 1.2503, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 7.466127893333498e-06, |
|
"loss": 1.2186, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 7.451565167425642e-06, |
|
"loss": 1.2805, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 7.436975011466805e-06, |
|
"loss": 1.2347, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 7.422357588703195e-06, |
|
"loss": 1.266, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 7.407713062686107e-06, |
|
"loss": 1.2496, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.393041597270085e-06, |
|
"loss": 1.2902, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.378343356611093e-06, |
|
"loss": 1.2367, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.363618505164678e-06, |
|
"loss": 1.274, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 7.348867207684132e-06, |
|
"loss": 1.2242, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 7.334089629218639e-06, |
|
"loss": 1.2844, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 7.319285935111444e-06, |
|
"loss": 1.2672, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 7.304456290997991e-06, |
|
"loss": 1.1542, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 7.289600862804069e-06, |
|
"loss": 1.15, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 7.274719816743967e-06, |
|
"loss": 1.2385, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 7.259813319318601e-06, |
|
"loss": 1.2348, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 7.244881537313664e-06, |
|
"loss": 1.2578, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 7.229924637797742e-06, |
|
"loss": 1.2191, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 7.214942788120466e-06, |
|
"loss": 1.251, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 7.1999361559106225e-06, |
|
"loss": 1.2031, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 7.184904909074293e-06, |
|
"loss": 1.2766, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 7.169849215792955e-06, |
|
"loss": 1.2299, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 7.15476924452162e-06, |
|
"loss": 1.2355, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 7.139665163986938e-06, |
|
"loss": 1.2336, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 7.124537143185317e-06, |
|
"loss": 1.3566, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 7.109385351381022e-06, |
|
"loss": 1.1423, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 7.09420995810429e-06, |
|
"loss": 1.2576, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 7.079011133149427e-06, |
|
"loss": 1.2563, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 7.0637890465729165e-06, |
|
"loss": 1.2695, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 7.048543868691506e-06, |
|
"loss": 1.1986, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 7.033275770080309e-06, |
|
"loss": 1.25, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 7.017984921570895e-06, |
|
"loss": 1.2025, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 7.002671494249376e-06, |
|
"loss": 1.2465, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 6.987335659454493e-06, |
|
"loss": 1.2336, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 6.971977588775703e-06, |
|
"loss": 1.2436, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 6.956597454051253e-06, |
|
"loss": 1.2429, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 6.941195427366259e-06, |
|
"loss": 1.2574, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 6.925771681050784e-06, |
|
"loss": 1.2465, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 6.910326387677906e-06, |
|
"loss": 1.2805, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 6.89485972006179e-06, |
|
"loss": 1.2664, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 6.879371851255747e-06, |
|
"loss": 1.1826, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 6.863862954550315e-06, |
|
"loss": 1.2441, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 6.8483332034713006e-06, |
|
"loss": 1.191, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 6.832782771777846e-06, |
|
"loss": 1.2574, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 6.817211833460484e-06, |
|
"loss": 1.2865, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 6.801620562739197e-06, |
|
"loss": 1.2504, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 6.7860091340614575e-06, |
|
"loss": 1.2084, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 6.770377722100284e-06, |
|
"loss": 1.2609, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 6.75472650175228e-06, |
|
"loss": 1.2723, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 6.739055648135685e-06, |
|
"loss": 1.1243, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 6.723365336588409e-06, |
|
"loss": 1.2529, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 6.707655742666074e-06, |
|
"loss": 1.3047, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 6.691927042140044e-06, |
|
"loss": 1.257, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 6.6761794109954714e-06, |
|
"loss": 1.2086, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 6.660413025429312e-06, |
|
"loss": 1.2711, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 6.644628061848363e-06, |
|
"loss": 1.1157, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 6.628824696867286e-06, |
|
"loss": 1.2309, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 6.613003107306637e-06, |
|
"loss": 1.2363, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 6.597163470190877e-06, |
|
"loss": 1.207, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 6.5813059627464e-06, |
|
"loss": 1.2641, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 6.565430762399546e-06, |
|
"loss": 1.252, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 6.549538046774621e-06, |
|
"loss": 1.2586, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 6.533627993691901e-06, |
|
"loss": 1.3012, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 6.517700781165649e-06, |
|
"loss": 1.1842, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 6.501756587402124e-06, |
|
"loss": 1.2016, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 6.485795590797579e-06, |
|
"loss": 1.2988, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 6.469817969936277e-06, |
|
"loss": 1.2547, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 6.453823903588481e-06, |
|
"loss": 1.2309, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 6.437813570708463e-06, |
|
"loss": 1.2855, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 6.421787150432493e-06, |
|
"loss": 1.1488, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 6.405744822076845e-06, |
|
"loss": 1.2115, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 6.389686765135782e-06, |
|
"loss": 1.2336, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 6.3736131592795525e-06, |
|
"loss": 1.2746, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 6.357524184352375e-06, |
|
"loss": 1.201, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 6.341420020370435e-06, |
|
"loss": 1.2703, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 6.325300847519859e-06, |
|
"loss": 1.2441, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 6.309166846154713e-06, |
|
"loss": 1.2684, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 6.293018196794964e-06, |
|
"loss": 1.2449, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 6.276855080124483e-06, |
|
"loss": 1.268, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 6.260677676989008e-06, |
|
"loss": 1.2906, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 6.24448616839413e-06, |
|
"loss": 1.307, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 6.228280735503254e-06, |
|
"loss": 1.2699, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 6.212061559635588e-06, |
|
"loss": 1.24, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 6.195828822264107e-06, |
|
"loss": 1.2605, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 6.179582705013519e-06, |
|
"loss": 1.3457, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 6.163323389658242e-06, |
|
"loss": 1.2301, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 6.147051058120359e-06, |
|
"loss": 1.257, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 6.130765892467595e-06, |
|
"loss": 1.1584, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 6.114468074911265e-06, |
|
"loss": 1.2537, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 6.098157787804252e-06, |
|
"loss": 1.2559, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 6.081835213638951e-06, |
|
"loss": 1.2307, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 6.0655005350452414e-06, |
|
"loss": 1.1664, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 6.049153934788429e-06, |
|
"loss": 1.2146, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 6.032795595767214e-06, |
|
"loss": 1.2498, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 6.016425701011637e-06, |
|
"loss": 1.2379, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 6.000044433681034e-06, |
|
"loss": 1.2584, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5.9836519770619865e-06, |
|
"loss": 1.2805, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5.967248514566271e-06, |
|
"loss": 1.2348, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5.9508342297288035e-06, |
|
"loss": 1.2572, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5.934409306205593e-06, |
|
"loss": 1.2018, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5.917973927771678e-06, |
|
"loss": 1.2641, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5.901528278319083e-06, |
|
"loss": 1.2293, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5.885072541854742e-06, |
|
"loss": 1.3113, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5.868606902498457e-06, |
|
"loss": 1.26, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5.852131544480831e-06, |
|
"loss": 1.2092, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5.835646652141208e-06, |
|
"loss": 1.2535, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5.8191524099256035e-06, |
|
"loss": 1.2535, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5.802649002384655e-06, |
|
"loss": 1.2629, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5.786136614171542e-06, |
|
"loss": 1.233, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5.769615430039931e-06, |
|
"loss": 1.2375, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 5.753085634841903e-06, |
|
"loss": 1.2312, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 5.736547413525888e-06, |
|
"loss": 1.1715, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5.72000095113459e-06, |
|
"loss": 1.2695, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5.703446432802924e-06, |
|
"loss": 1.1672, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5.686884043755942e-06, |
|
"loss": 1.2637, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.6703139693067554e-06, |
|
"loss": 1.1591, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.653736394854471e-06, |
|
"loss": 1.2343, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.637151505882109e-06, |
|
"loss": 1.2172, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.620559487954531e-06, |
|
"loss": 1.2121, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.603960526716361e-06, |
|
"loss": 1.178, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5.587354807889913e-06, |
|
"loss": 1.2256, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5.570742517273109e-06, |
|
"loss": 1.241, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.554123840737402e-06, |
|
"loss": 1.2773, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.537498964225694e-06, |
|
"loss": 1.2383, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.520868073750261e-06, |
|
"loss": 1.177, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.50423135539066e-06, |
|
"loss": 1.1607, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.487588995291666e-06, |
|
"loss": 1.2957, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.47094117966117e-06, |
|
"loss": 1.164, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.454288094768108e-06, |
|
"loss": 1.225, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.437629926940367e-06, |
|
"loss": 1.2602, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.420966862562718e-06, |
|
"loss": 1.2434, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.404299088074702e-06, |
|
"loss": 1.0836, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.387626789968574e-06, |
|
"loss": 1.0635, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.370950154787195e-06, |
|
"loss": 1.032, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.354269369121958e-06, |
|
"loss": 1.0236, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 5.337584619610691e-06, |
|
"loss": 1.0402, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 5.320896092935575e-06, |
|
"loss": 1.0713, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 5.304203975821048e-06, |
|
"loss": 1.0443, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 5.287508455031729e-06, |
|
"loss": 1.0523, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 5.270809717370314e-06, |
|
"loss": 1.0072, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 5.254107949675493e-06, |
|
"loss": 1.0473, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 5.237403338819859e-06, |
|
"loss": 1.0189, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 5.220696071707816e-06, |
|
"loss": 1.027, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 5.20398633527349e-06, |
|
"loss": 0.9773, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 5.187274316478632e-06, |
|
"loss": 0.9916, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 5.170560202310536e-06, |
|
"loss": 1.0252, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 5.153844179779932e-06, |
|
"loss": 1.0508, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 5.137126435918912e-06, |
|
"loss": 1.0217, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 5.12040715777882e-06, |
|
"loss": 1.0367, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 5.1036865324281716e-06, |
|
"loss": 1.0121, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 5.08696474695055e-06, |
|
"loss": 0.9992, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 5.070241988442528e-06, |
|
"loss": 1.0778, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 5.053518444011557e-06, |
|
"loss": 1.0703, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 5.036794300773887e-06, |
|
"loss": 1.017, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 5.020069745852463e-06, |
|
"loss": 0.9813, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 5.003344966374843e-06, |
|
"loss": 1.0287, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.9866201494710934e-06, |
|
"loss": 1.0617, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.969895482271695e-06, |
|
"loss": 1.1227, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.953171151905466e-06, |
|
"loss": 1.0496, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.936447345497443e-06, |
|
"loss": 1.0287, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.919724250166808e-06, |
|
"loss": 1.0656, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.903002053024782e-06, |
|
"loss": 1.0287, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.886280941172539e-06, |
|
"loss": 1.0293, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.869561101699113e-06, |
|
"loss": 1.0805, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.852842721679293e-06, |
|
"loss": 1.0068, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.836125988171547e-06, |
|
"loss": 1.0056, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.8194110882159175e-06, |
|
"loss": 1.0256, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.802698208831929e-06, |
|
"loss": 1.0551, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.785987537016504e-06, |
|
"loss": 1.002, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.769279259741858e-06, |
|
"loss": 1.0378, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.752573563953422e-06, |
|
"loss": 1.0088, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.735870636567736e-06, |
|
"loss": 0.9963, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 4.719170664470371e-06, |
|
"loss": 0.9977, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 4.702473834513826e-06, |
|
"loss": 1.0533, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 4.685780333515449e-06, |
|
"loss": 1.0148, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 4.669090348255338e-06, |
|
"loss": 1.0023, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 4.652404065474257e-06, |
|
"loss": 1.0227, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 4.6357216718715375e-06, |
|
"loss": 1.0236, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 4.619043354103002e-06, |
|
"loss": 1.01, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 4.602369298778866e-06, |
|
"loss": 1.0625, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 4.585699692461655e-06, |
|
"loss": 1.0154, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 4.569034721664114e-06, |
|
"loss": 1.0547, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 4.552374572847122e-06, |
|
"loss": 0.981, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 4.535719432417612e-06, |
|
"loss": 1.0691, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 4.519069486726468e-06, |
|
"loss": 1.0451, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 4.502424922066462e-06, |
|
"loss": 0.9773, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 4.485785924670151e-06, |
|
"loss": 0.9898, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 4.469152680707804e-06, |
|
"loss": 1.0496, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 4.452525376285319e-06, |
|
"loss": 1.0211, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 4.435904197442131e-06, |
|
"loss": 1.0961, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 4.419289330149145e-06, |
|
"loss": 1.0279, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.4026809603066375e-06, |
|
"loss": 1.0081, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.386079273742199e-06, |
|
"loss": 1.0764, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.3694844562086325e-06, |
|
"loss": 1.0342, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.3528966933818865e-06, |
|
"loss": 1.0707, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.33631617085898e-06, |
|
"loss": 1.0127, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.319743074155916e-06, |
|
"loss": 1.0658, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.3031775887056176e-06, |
|
"loss": 1.0881, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.2866198998558404e-06, |
|
"loss": 1.043, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.2700701928671105e-06, |
|
"loss": 1.008, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.253528652910647e-06, |
|
"loss": 1.0571, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 4.236995465066287e-06, |
|
"loss": 1.0859, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 4.220470814320417e-06, |
|
"loss": 1.0085, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 4.203954885563909e-06, |
|
"loss": 1.0146, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.187447863590039e-06, |
|
"loss": 1.0562, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.170949933092432e-06, |
|
"loss": 1.1096, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 4.154461278662989e-06, |
|
"loss": 1.0555, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 4.137982084789823e-06, |
|
"loss": 0.9902, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 4.121512535855193e-06, |
|
"loss": 1.06, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 4.105052816133448e-06, |
|
"loss": 1.0412, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 4.0886031097889556e-06, |
|
"loss": 1.0354, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 4.072163600874045e-06, |
|
"loss": 1.0928, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 4.0557344733269505e-06, |
|
"loss": 1.0645, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 4.039315910969754e-06, |
|
"loss": 0.9994, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.02290809750632e-06, |
|
"loss": 1.003, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.006511216520251e-06, |
|
"loss": 1.0512, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 3.9901254514728225e-06, |
|
"loss": 1.06, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 3.973750985700943e-06, |
|
"loss": 1.0541, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 3.957388002415093e-06, |
|
"loss": 1.0078, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.941036684697274e-06, |
|
"loss": 1.0104, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.924697215498971e-06, |
|
"loss": 1.0465, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.908369777639091e-06, |
|
"loss": 0.9527, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.892054553801931e-06, |
|
"loss": 1.0559, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.875751726535124e-06, |
|
"loss": 1.041, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 3.8594614782476024e-06, |
|
"loss": 1.0352, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 3.843183991207551e-06, |
|
"loss": 1.0175, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 3.82691944754038e-06, |
|
"loss": 0.9959, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 3.8106680292266717e-06, |
|
"loss": 1.0094, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 3.7944299181001544e-06, |
|
"loss": 1.0367, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.778205295845663e-06, |
|
"loss": 1.0443, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.7619943439971107e-06, |
|
"loss": 1.0074, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.7457972439354526e-06, |
|
"loss": 1.0396, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.7296141768866635e-06, |
|
"loss": 1.0506, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.7134453239196987e-06, |
|
"loss": 1.0268, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.6972908659444828e-06, |
|
"loss": 1.0101, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.6811509837098756e-06, |
|
"loss": 1.0076, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.6650258578016474e-06, |
|
"loss": 1.0602, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.6489156686404683e-06, |
|
"loss": 1.0418, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.6328205964798822e-06, |
|
"loss": 1.0498, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.616740821404292e-06, |
|
"loss": 1.0277, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.600676523326946e-06, |
|
"loss": 1.0979, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.5846278819879197e-06, |
|
"loss": 1.0467, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.568595076952113e-06, |
|
"loss": 1.0344, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.552578287607237e-06, |
|
"loss": 0.9874, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.536577693161801e-06, |
|
"loss": 1.0688, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.520593472643122e-06, |
|
"loss": 1.0023, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.504625804895302e-06, |
|
"loss": 1.0315, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.488674868577246e-06, |
|
"loss": 1.0318, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.472740842160649e-06, |
|
"loss": 1.057, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.4568239039280094e-06, |
|
"loss": 1.041, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.4409242319706225e-06, |
|
"loss": 1.126, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.4250420041866057e-06, |
|
"loss": 1.0151, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.4091773982788867e-06, |
|
"loss": 1.0395, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.393330591753231e-06, |
|
"loss": 1.0207, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.377501761916249e-06, |
|
"loss": 0.9663, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.3616910858734143e-06, |
|
"loss": 1.055, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.3458987405270803e-06, |
|
"loss": 1.017, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.330124902574505e-06, |
|
"loss": 1.0034, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.3143697485058666e-06, |
|
"loss": 1.0262, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.2986334546022964e-06, |
|
"loss": 1.0723, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.282916196933904e-06, |
|
"loss": 1.0314, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.2672181513578038e-06, |
|
"loss": 1.0613, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.251539493516152e-06, |
|
"loss": 1.0641, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.2358803988341776e-06, |
|
"loss": 1.0283, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.220241042518223e-06, |
|
"loss": 1.0502, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.2046215995537837e-06, |
|
"loss": 1.0416, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 3.1890222447035444e-06, |
|
"loss": 1.0549, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 3.173443152505431e-06, |
|
"loss": 1.034, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.157884497270658e-06, |
|
"loss": 1.0594, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.1423464530817673e-06, |
|
"loss": 1.0637, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.1268291937906957e-06, |
|
"loss": 1.0402, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 3.1113328930168153e-06, |
|
"loss": 1.0236, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 3.095857724145004e-06, |
|
"loss": 1.0414, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 3.0804038603236943e-06, |
|
"loss": 1.0465, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 3.0649714744629454e-06, |
|
"loss": 1.0561, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 3.0495607392324987e-06, |
|
"loss": 1.0414, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 3.0341718270598557e-06, |
|
"loss": 1.0492, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 3.0188049101283433e-06, |
|
"loss": 1.0053, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 3.003460160375189e-06, |
|
"loss": 1.0193, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.9881377494895925e-06, |
|
"loss": 1.093, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.9728378489108135e-06, |
|
"loss": 1.0285, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.957560629826244e-06, |
|
"loss": 1.0982, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.942306263169502e-06, |
|
"loss": 1.0438, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.9270749196185095e-06, |
|
"loss": 1.0695, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.911866769593592e-06, |
|
"loss": 1.0139, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.896681983255565e-06, |
|
"loss": 1.1477, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.881520730503837e-06, |
|
"loss": 1.0437, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.866383180974498e-06, |
|
"loss": 1.0455, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.8512695040384287e-06, |
|
"loss": 1.0014, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.8361798687994097e-06, |
|
"loss": 1.0016, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.8211144440922176e-06, |
|
"loss": 0.9983, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.8060733984807466e-06, |
|
"loss": 1.0927, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.7910569002561137e-06, |
|
"loss": 1.0424, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.7760651174347854e-06, |
|
"loss": 1.0555, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.7610982177566926e-06, |
|
"loss": 0.983, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.7461563686833504e-06, |
|
"loss": 0.9712, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.7312397373959894e-06, |
|
"loss": 1.04, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.716348490793681e-06, |
|
"loss": 1.092, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.7014827954914814e-06, |
|
"loss": 0.9855, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.686642817818548e-06, |
|
"loss": 1.0319, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.6718287238162963e-06, |
|
"loss": 0.9938, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.6570406792365268e-06, |
|
"loss": 1.0662, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.6422788495395912e-06, |
|
"loss": 1.0263, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.6275433998925176e-06, |
|
"loss": 1.0584, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.612834495167177e-06, |
|
"loss": 1.0334, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.5981522999384323e-06, |
|
"loss": 1.0426, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.583496978482305e-06, |
|
"loss": 1.0199, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.568868694774127e-06, |
|
"loss": 1.0363, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.5542676124867103e-06, |
|
"loss": 0.9959, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.5396938949885163e-06, |
|
"loss": 1.0357, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.52514770534183e-06, |
|
"loss": 1.0444, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.510629206300933e-06, |
|
"loss": 1.0627, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.4961385603102794e-06, |
|
"loss": 1.0535, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.481675929502682e-06, |
|
"loss": 1.0276, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.467241475697498e-06, |
|
"loss": 1.0057, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.45283536039882e-06, |
|
"loss": 1.0055, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.438457744793665e-06, |
|
"loss": 1.0001, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.4241087897501703e-06, |
|
"loss": 1.1129, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.409788655815802e-06, |
|
"loss": 0.9816, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.395497503215551e-06, |
|
"loss": 1.008, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.3812354918501397e-06, |
|
"loss": 1.0068, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.3670027812942353e-06, |
|
"loss": 1.0779, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.3527995307946655e-06, |
|
"loss": 1.0264, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.338625899268638e-06, |
|
"loss": 1.0395, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.3244820453019566e-06, |
|
"loss": 1.0604, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.3103681271472516e-06, |
|
"loss": 1.0236, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.296284302722205e-06, |
|
"loss": 1.0918, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.28223072960779e-06, |
|
"loss": 1.0504, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.2682075650465063e-06, |
|
"loss": 1.0361, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.2542149659406126e-06, |
|
"loss": 1.0268, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.2402530888503783e-06, |
|
"loss": 1.0434, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.226322089992336e-06, |
|
"loss": 1.0348, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.2124221252375215e-06, |
|
"loss": 1.0135, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.1985533501097407e-06, |
|
"loss": 1.0488, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.1847159197838213e-06, |
|
"loss": 0.9809, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.1709099890838846e-06, |
|
"loss": 1.0627, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.1571357124816107e-06, |
|
"loss": 1.0373, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.1433932440945028e-06, |
|
"loss": 1.0068, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.129682737684171e-06, |
|
"loss": 1.0604, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.11600434665461e-06, |
|
"loss": 1.0337, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.1023582240504836e-06, |
|
"loss": 1.0668, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.088744522555409e-06, |
|
"loss": 1.0088, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.0751633944902487e-06, |
|
"loss": 1.0436, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.061614991811414e-06, |
|
"loss": 1.0138, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.0480994661091507e-06, |
|
"loss": 1.1406, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.0346169686058586e-06, |
|
"loss": 1.0391, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.0211676501543866e-06, |
|
"loss": 1.0592, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.00775166123635e-06, |
|
"loss": 0.9783, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.9943691519604523e-06, |
|
"loss": 1.0473, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.9810202720607945e-06, |
|
"loss": 1.0555, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.967705170895208e-06, |
|
"loss": 1.0691, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.9544239974435797e-06, |
|
"loss": 1.026, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.9411769003061874e-06, |
|
"loss": 1.0588, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.9279640277020396e-06, |
|
"loss": 1.0635, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.9147855274672073e-06, |
|
"loss": 0.9919, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.9016415470531773e-06, |
|
"loss": 1.0053, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.8885322335252076e-06, |
|
"loss": 1.0461, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.8754577335606689e-06, |
|
"loss": 1.0051, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.8624181934474117e-06, |
|
"loss": 1.0521, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.8494137590821282e-06, |
|
"loss": 0.9926, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.8364445759687233e-06, |
|
"loss": 1.0264, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.823510789216676e-06, |
|
"loss": 1.0475, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.8106125435394312e-06, |
|
"loss": 1.012, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.7977499832527655e-06, |
|
"loss": 1.0269, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.7849232522731797e-06, |
|
"loss": 1.0463, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.7721324941162933e-06, |
|
"loss": 1.025, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.7593778518952275e-06, |
|
"loss": 1.0326, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.7466594683190107e-06, |
|
"loss": 1.0389, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.7339774856909851e-06, |
|
"loss": 1.0609, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.7213320459072047e-06, |
|
"loss": 0.9949, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.7087232904548595e-06, |
|
"loss": 1.0083, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.69615136041068e-06, |
|
"loss": 1.0377, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.6836163964393664e-06, |
|
"loss": 1.0514, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.6711185387920176e-06, |
|
"loss": 0.99, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.6586579273045529e-06, |
|
"loss": 1.0146, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.6462347013961526e-06, |
|
"loss": 1.0445, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.6338490000676987e-06, |
|
"loss": 1.0674, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.6215009619002197e-06, |
|
"loss": 1.0215, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.609190725053335e-06, |
|
"loss": 0.9832, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.5969184272637184e-06, |
|
"loss": 0.9313, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.5846842058435457e-06, |
|
"loss": 1.0244, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.5724881976789696e-06, |
|
"loss": 0.9002, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.5603305392285785e-06, |
|
"loss": 0.957, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.548211366521875e-06, |
|
"loss": 0.9404, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.5361308151577526e-06, |
|
"loss": 0.9199, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.5240890203029813e-06, |
|
"loss": 0.9224, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.5120861166906869e-06, |
|
"loss": 0.9822, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.5001222386188573e-06, |
|
"loss": 0.9063, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.4881975199488247e-06, |
|
"loss": 0.9455, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.4763120941037757e-06, |
|
"loss": 0.8986, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.4644660940672628e-06, |
|
"loss": 0.9297, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.4526596523817066e-06, |
|
"loss": 0.9889, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.4408929011469175e-06, |
|
"loss": 0.9387, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.4291659720186218e-06, |
|
"loss": 0.8889, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.4174789962069808e-06, |
|
"loss": 0.9965, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.4058321044751255e-06, |
|
"loss": 0.9279, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.3942254271377004e-06, |
|
"loss": 0.9621, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.3826590940593926e-06, |
|
"loss": 0.9081, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.3711332346534916e-06, |
|
"loss": 0.9201, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.3596479778804312e-06, |
|
"loss": 0.9013, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.3482034522463522e-06, |
|
"loss": 0.9255, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.3367997858016619e-06, |
|
"loss": 0.9678, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.325437106139607e-06, |
|
"loss": 0.9334, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.3141155403948358e-06, |
|
"loss": 0.9455, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 1.3028352152419876e-06, |
|
"loss": 0.9025, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 1.291596256894263e-06, |
|
"loss": 0.8933, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 1.2803987911020239e-06, |
|
"loss": 0.999, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.269242943151377e-06, |
|
"loss": 0.8996, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.2581288378627759e-06, |
|
"loss": 0.9594, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1.2470565995896244e-06, |
|
"loss": 0.9385, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1.236026352216888e-06, |
|
"loss": 0.9508, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.2250382191597015e-06, |
|
"loss": 0.9479, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.21409232336199e-06, |
|
"loss": 0.8861, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.2031887872951004e-06, |
|
"loss": 0.9539, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 1.1923277329564192e-06, |
|
"loss": 0.8969, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 1.181509281868019e-06, |
|
"loss": 0.9248, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.1707335550752901e-06, |
|
"loss": 0.8923, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.1600006731455888e-06, |
|
"loss": 0.8534, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.1493107561668943e-06, |
|
"loss": 0.9193, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.1386639237464542e-06, |
|
"loss": 0.9688, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.1280602950094532e-06, |
|
"loss": 0.8982, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.1174999885976834e-06, |
|
"loss": 0.9001, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.106983122668206e-06, |
|
"loss": 0.9189, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.0965098148920422e-06, |
|
"loss": 0.9842, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.0860801824528443e-06, |
|
"loss": 0.9438, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.0756943420455934e-06, |
|
"loss": 0.9412, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.0653524098752894e-06, |
|
"loss": 0.9695, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.055054501655654e-06, |
|
"loss": 0.9145, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.0448007326078336e-06, |
|
"loss": 0.9602, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.0345912174591071e-06, |
|
"loss": 0.9009, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.0244260704416104e-06, |
|
"loss": 0.9375, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.0143054052910534e-06, |
|
"loss": 0.9402, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.0042293352454446e-06, |
|
"loss": 0.9182, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 9.94197973043829e-07, |
|
"loss": 0.909, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 9.842114309250222e-07, |
|
"loss": 0.9285, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 9.74269820626364e-07, |
|
"loss": 0.9264, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 9.643732533824545e-07, |
|
"loss": 0.9205, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 9.545218399239186e-07, |
|
"loss": 0.96, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 9.447156904761668e-07, |
|
"loss": 0.9473, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 9.349549147581571e-07, |
|
"loss": 0.9281, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 9.252396219811737e-07, |
|
"loss": 0.9311, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 9.155699208475988e-07, |
|
"loss": 0.9789, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 9.059459195496989e-07, |
|
"loss": 0.8984, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 8.963677257684184e-07, |
|
"loss": 0.9564, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 8.868354466721668e-07, |
|
"loss": 0.9293, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 8.773491889156254e-07, |
|
"loss": 0.9678, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 8.679090586385519e-07, |
|
"loss": 0.9275, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 8.585151614645942e-07, |
|
"loss": 0.966, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 8.491676025001083e-07, |
|
"loss": 0.9049, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 8.398664863329792e-07, |
|
"loss": 0.9385, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 8.306119170314553e-07, |
|
"loss": 0.9529, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 8.214039981429789e-07, |
|
"loss": 0.9412, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 8.122428326930348e-07, |
|
"loss": 0.9852, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 8.031285231839908e-07, |
|
"loss": 0.9223, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 7.940611715939522e-07, |
|
"loss": 0.9592, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 7.850408793756242e-07, |
|
"loss": 0.9758, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 7.760677474551759e-07, |
|
"loss": 0.842, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 7.67141876231105e-07, |
|
"loss": 0.9406, |
|
"step": 4105 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 7.582633655731231e-07, |
|
"loss": 0.9397, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 7.494323148210303e-07, |
|
"loss": 0.9193, |
|
"step": 4115 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 7.406488227836139e-07, |
|
"loss": 0.9529, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 7.319129877375314e-07, |
|
"loss": 0.973, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 7.232249074262176e-07, |
|
"loss": 0.9596, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 7.145846790587891e-07, |
|
"loss": 0.9477, |
|
"step": 4135 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 7.059923993089585e-07, |
|
"loss": 0.9809, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 6.974481643139514e-07, |
|
"loss": 0.9863, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 6.889520696734297e-07, |
|
"loss": 0.9666, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 6.805042104484216e-07, |
|
"loss": 0.9328, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 6.721046811602622e-07, |
|
"loss": 0.8867, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 6.63753575789532e-07, |
|
"loss": 0.9635, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 6.554509877750042e-07, |
|
"loss": 0.9605, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 6.471970100126035e-07, |
|
"loss": 0.989, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 6.389917348543651e-07, |
|
"loss": 0.9393, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 6.308352541074014e-07, |
|
"loss": 0.9385, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 6.227276590328713e-07, |
|
"loss": 0.9325, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 6.146690403449646e-07, |
|
"loss": 0.9801, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 6.066594882098831e-07, |
|
"loss": 0.976, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 5.98699092244835e-07, |
|
"loss": 0.9523, |
|
"step": 4205 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 5.907879415170287e-07, |
|
"loss": 0.8773, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 5.829261245426793e-07, |
|
"loss": 0.8939, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 5.751137292860126e-07, |
|
"loss": 0.9383, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 5.673508431582936e-07, |
|
"loss": 0.9797, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 5.596375530168329e-07, |
|
"loss": 0.932, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 5.519739451640238e-07, |
|
"loss": 0.9015, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 5.443601053463743e-07, |
|
"loss": 0.966, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 5.367961187535504e-07, |
|
"loss": 0.9252, |
|
"step": 4245 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 5.292820700174189e-07, |
|
"loss": 0.925, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 5.218180432111026e-07, |
|
"loss": 0.9445, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 5.144041218480389e-07, |
|
"loss": 0.9461, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 5.070403888810471e-07, |
|
"loss": 0.926, |
|
"step": 4265 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 4.997269267013993e-07, |
|
"loss": 0.9242, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 4.924638171378976e-07, |
|
"loss": 0.9514, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 4.852511414559575e-07, |
|
"loss": 0.9877, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 4.780889803567018e-07, |
|
"loss": 0.9541, |
|
"step": 4285 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 4.7097741397605754e-07, |
|
"loss": 0.9449, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 4.639165218838559e-07, |
|
"loss": 0.9361, |
|
"step": 4295 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.569063830829445e-07, |
|
"loss": 0.9908, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.49947076008303e-07, |
|
"loss": 0.9355, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 4.4303867852616755e-07, |
|
"loss": 0.9096, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 4.361812679331551e-07, |
|
"loss": 0.9555, |
|
"step": 4315 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 4.2937492095540043e-07, |
|
"loss": 0.9221, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 4.2261971374769893e-07, |
|
"loss": 0.9594, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 4.159157218926557e-07, |
|
"loss": 0.914, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 4.09263020399836e-07, |
|
"loss": 0.9935, |
|
"step": 4335 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 4.02661683704928e-07, |
|
"loss": 0.9467, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 3.9611178566890894e-07, |
|
"loss": 0.943, |
|
"step": 4345 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 3.896133995772233e-07, |
|
"loss": 0.9232, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 3.8316659813895597e-07, |
|
"loss": 0.9545, |
|
"step": 4355 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 3.767714534860223e-07, |
|
"loss": 0.9242, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 3.704280371723601e-07, |
|
"loss": 0.9379, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 3.6413642017313233e-07, |
|
"loss": 0.9506, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 3.5789667288392784e-07, |
|
"loss": 0.9465, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 3.517088651199768e-07, |
|
"loss": 0.9365, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 3.455730661153672e-07, |
|
"loss": 0.9195, |
|
"step": 4385 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 3.394893445222752e-07, |
|
"loss": 0.9746, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 3.334577684101925e-07, |
|
"loss": 0.9289, |
|
"step": 4395 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 3.2747840526516414e-07, |
|
"loss": 0.9038, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 3.215513219890365e-07, |
|
"loss": 0.9098, |
|
"step": 4405 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 3.15676584898707e-07, |
|
"loss": 0.9435, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 3.0985425972538343e-07, |
|
"loss": 0.9098, |
|
"step": 4415 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 3.040844116138475e-07, |
|
"loss": 0.9318, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 2.9836710512172353e-07, |
|
"loss": 0.9592, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 2.9270240421876204e-07, |
|
"loss": 0.9756, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 2.8709037228611903e-07, |
|
"loss": 0.9189, |
|
"step": 4435 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 2.815310721156489e-07, |
|
"loss": 0.9139, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 2.7602456590920034e-07, |
|
"loss": 0.9127, |
|
"step": 4445 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 2.7057091527792125e-07, |
|
"loss": 0.9602, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 2.6517018124157137e-07, |
|
"loss": 0.9787, |
|
"step": 4455 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 2.598224242278369e-07, |
|
"loss": 0.916, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 2.545277040716537e-07, |
|
"loss": 0.9846, |
|
"step": 4465 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 2.492860800145408e-07, |
|
"loss": 0.9484, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 2.4409761070393614e-07, |
|
"loss": 0.9191, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 2.389623541925407e-07, |
|
"loss": 0.9266, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 2.3388036793766723e-07, |
|
"loss": 0.9034, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 2.2885170880059758e-07, |
|
"loss": 0.896, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 2.2387643304595196e-07, |
|
"loss": 0.9574, |
|
"step": 4495 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 2.189545963410511e-07, |
|
"loss": 0.9387, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 2.1408625375529845e-07, |
|
"loss": 0.9322, |
|
"step": 4505 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 2.0927145975956297e-07, |
|
"loss": 0.9088, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 2.0451026822556952e-07, |
|
"loss": 0.9168, |
|
"step": 4515 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.9980273242529825e-07, |
|
"loss": 0.951, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.951489050303834e-07, |
|
"loss": 0.916, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.9054883811152837e-07, |
|
"loss": 0.8936, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.8600258313792142e-07, |
|
"loss": 0.9279, |
|
"step": 4535 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.8151019097666146e-07, |
|
"loss": 0.9666, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.7707171189218663e-07, |
|
"loss": 0.9555, |
|
"step": 4545 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.7268719554571157e-07, |
|
"loss": 0.945, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 1.683566909946771e-07, |
|
"loss": 0.9357, |
|
"step": 4555 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 1.640802466921926e-07, |
|
"loss": 0.9528, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 1.5985791048650223e-07, |
|
"loss": 0.8418, |
|
"step": 4565 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.5568972962044405e-07, |
|
"loss": 0.9797, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.515757507309229e-07, |
|
"loss": 0.9197, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 1.4751601984839159e-07, |
|
"loss": 1.0133, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 1.4351058239633065e-07, |
|
"loss": 0.9518, |
|
"step": 4585 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 1.3955948319074374e-07, |
|
"loss": 0.881, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 1.3566276643965538e-07, |
|
"loss": 0.9238, |
|
"step": 4595 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 1.3182047574261557e-07, |
|
"loss": 0.9002, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.2803265409021436e-07, |
|
"loss": 0.948, |
|
"step": 4605 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.2429934386359643e-07, |
|
"loss": 0.9025, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.2062058683399048e-07, |
|
"loss": 0.9354, |
|
"step": 4615 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.1699642416224233e-07, |
|
"loss": 0.9582, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.1342689639835036e-07, |
|
"loss": 0.9734, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 1.0991204348101692e-07, |
|
"loss": 0.9267, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 1.0645190473719647e-07, |
|
"loss": 0.9705, |
|
"step": 4635 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 1.0304651888166039e-07, |
|
"loss": 0.9285, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 9.969592401655903e-08, |
|
"loss": 0.9494, |
|
"step": 4645 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 9.640015763100031e-08, |
|
"loss": 0.8965, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 9.315925660062619e-08, |
|
"loss": 0.9922, |
|
"step": 4655 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 8.997325718720085e-08, |
|
"loss": 0.9295, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 8.684219503820756e-08, |
|
"loss": 0.9564, |
|
"step": 4665 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 8.376610518644746e-08, |
|
"loss": 0.9201, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 8.074502204964696e-08, |
|
"loss": 0.9303, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 7.777897943007595e-08, |
|
"loss": 0.9636, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 7.486801051416525e-08, |
|
"loss": 0.9542, |
|
"step": 4685 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 7.201214787213862e-08, |
|
"loss": 0.9684, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 6.921142345764798e-08, |
|
"loss": 0.924, |
|
"step": 4695 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 6.646586860741322e-08, |
|
"loss": 0.9271, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 6.377551404087467e-08, |
|
"loss": 0.9333, |
|
"step": 4705 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 6.114038985984894e-08, |
|
"loss": 0.9413, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 5.856052554818969e-08, |
|
"loss": 0.9223, |
|
"step": 4715 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 5.603594997145967e-08, |
|
"loss": 0.9301, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 5.3566691376609744e-08, |
|
"loss": 0.9072, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 5.115277739165703e-08, |
|
"loss": 0.9152, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.8794235025383386e-08, |
|
"loss": 0.9234, |
|
"step": 4735 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.6491090667025176e-08, |
|
"loss": 0.943, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 4.4243370085985114e-08, |
|
"loss": 0.8847, |
|
"step": 4745 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 4.2051098431539764e-08, |
|
"loss": 1.0156, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 3.991430023255804e-08, |
|
"loss": 0.866, |
|
"step": 4755 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 3.783299939722984e-08, |
|
"loss": 0.9083, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 3.580721921279562e-08, |
|
"loss": 0.9077, |
|
"step": 4765 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 3.383698234528665e-08, |
|
"loss": 0.9351, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 3.1922310839272444e-08, |
|
"loss": 0.9322, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 3.006322611761314e-08, |
|
"loss": 0.9379, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.8259748981219194e-08, |
|
"loss": 0.9136, |
|
"step": 4785 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 2.651189960882039e-08, |
|
"loss": 0.9764, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 2.4819697556737742e-08, |
|
"loss": 0.9348, |
|
"step": 4795 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 2.318316175866697e-08, |
|
"loss": 0.9345, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 2.1602310525466464e-08, |
|
"loss": 0.8879, |
|
"step": 4805 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 2.007716154494965e-08, |
|
"loss": 0.9619, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 1.8607731881690737e-08, |
|
"loss": 0.9516, |
|
"step": 4815 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 1.7194037976831502e-08, |
|
"loss": 0.9471, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 1.583609564789812e-08, |
|
"loss": 0.9197, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 1.4533920088623533e-08, |
|
"loss": 0.8611, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 1.3287525868778128e-08, |
|
"loss": 0.9449, |
|
"step": 4835 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1.2096926934007103e-08, |
|
"loss": 0.9418, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1.0962136605673357e-08, |
|
"loss": 0.9337, |
|
"step": 4845 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 9.883167580709285e-09, |
|
"loss": 0.9118, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 8.860031931473555e-09, |
|
"loss": 0.9563, |
|
"step": 4855 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 7.892741105617329e-09, |
|
"loss": 0.9342, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 6.981305925956583e-09, |
|
"loss": 0.9553, |
|
"step": 4865 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 6.1257365903488745e-09, |
|
"loss": 0.9455, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 5.326042671580655e-09, |
|
"loss": 0.8813, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 4.582233117260693e-09, |
|
"loss": 0.8929, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 3.894316249717922e-09, |
|
"loss": 0.9463, |
|
"step": 4885 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 3.2622997659120802e-09, |
|
"loss": 0.9428, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 2.6861907373432193e-09, |
|
"loss": 0.866, |
|
"step": 4895 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 2.165995609973992e-09, |
|
"loss": 0.94, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 1.7017202041602621e-09, |
|
"loss": 0.9525, |
|
"step": 4905 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 1.293369714582271e-09, |
|
"loss": 0.9548, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 9.409487101880167e-10, |
|
"loss": 0.9668, |
|
"step": 4915 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 6.444611341432927e-10, |
|
"loss": 0.9349, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 4.0391030378561513e-10, |
|
"loss": 0.974, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 2.1929891058758424e-10, |
|
"loss": 0.9563, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 9.0629020127464e-11, |
|
"loss": 0.9373, |
|
"step": 4935 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.790207206586736e-11, |
|
"loss": 0.9326, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 4944, |
|
"total_flos": 2.446826463366742e+18, |
|
"train_loss": 1.1695684537918436, |
|
"train_runtime": 57751.42, |
|
"train_samples_per_second": 5.478, |
|
"train_steps_per_second": 0.086 |
|
} |
|
], |
|
"max_steps": 4944, |
|
"num_train_epochs": 4, |
|
"total_flos": 2.446826463366742e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|