{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 3.2722513089005236, | |
"eval_steps": 500, | |
"global_step": 5000, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.0, | |
"learning_rate": 0.001, | |
"loss": 1.3845, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.001, | |
"loss": 1.6284, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.001, | |
"loss": 1.3152, | |
"step": 15 | |
}, | |
{ | |
"epoch": 0.01, | |
"learning_rate": 0.001, | |
"loss": 1.6192, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.001, | |
"loss": 1.7394, | |
"step": 25 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.001, | |
"loss": 1.9925, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.02, | |
"learning_rate": 0.001, | |
"loss": 1.902, | |
"step": 35 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.001, | |
"loss": 1.822, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.001, | |
"loss": 2.137, | |
"step": 45 | |
}, | |
{ | |
"epoch": 0.03, | |
"learning_rate": 0.001, | |
"loss": 2.2556, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.001, | |
"loss": 2.2468, | |
"step": 55 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.001, | |
"loss": 2.0746, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.04, | |
"learning_rate": 0.001, | |
"loss": 2.1092, | |
"step": 65 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.001, | |
"loss": 1.9553, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.001, | |
"loss": 2.121, | |
"step": 75 | |
}, | |
{ | |
"epoch": 0.05, | |
"learning_rate": 0.001, | |
"loss": 2.1722, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 0.001, | |
"loss": 2.1552, | |
"step": 85 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 0.001, | |
"loss": 2.5971, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.06, | |
"learning_rate": 0.001, | |
"loss": 2.4684, | |
"step": 95 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 0.001, | |
"loss": 2.0248, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 0.001, | |
"loss": 2.5156, | |
"step": 105 | |
}, | |
{ | |
"epoch": 0.07, | |
"learning_rate": 0.001, | |
"loss": 2.3322, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 0.001, | |
"loss": 1.7698, | |
"step": 115 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 0.001, | |
"loss": 2.3363, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.08, | |
"learning_rate": 0.001, | |
"loss": 2.1079, | |
"step": 125 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 0.001, | |
"loss": 2.0998, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 0.001, | |
"loss": 2.8265, | |
"step": 135 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 0.001, | |
"loss": 2.6611, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.09, | |
"learning_rate": 0.001, | |
"loss": 2.2187, | |
"step": 145 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 0.001, | |
"loss": 2.33, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 0.001, | |
"loss": 2.6119, | |
"step": 155 | |
}, | |
{ | |
"epoch": 0.1, | |
"learning_rate": 0.001, | |
"loss": 2.2203, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 0.001, | |
"loss": 2.5474, | |
"step": 165 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 0.001, | |
"loss": 2.4763, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 0.001, | |
"loss": 2.6068, | |
"step": 175 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 0.001, | |
"loss": 2.1221, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 0.001, | |
"loss": 2.1185, | |
"step": 185 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 0.001, | |
"loss": 1.9481, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 0.001, | |
"loss": 2.823, | |
"step": 195 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 0.001, | |
"loss": 2.4702, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.13, | |
"learning_rate": 0.001, | |
"loss": 2.3721, | |
"step": 205 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 0.001, | |
"loss": 2.4932, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 0.001, | |
"loss": 2.3977, | |
"step": 215 | |
}, | |
{ | |
"epoch": 0.14, | |
"learning_rate": 0.001, | |
"loss": 2.5299, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 0.001, | |
"loss": 2.4818, | |
"step": 225 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 0.001, | |
"loss": 2.3404, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 0.001, | |
"loss": 2.3614, | |
"step": 235 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 0.001, | |
"loss": 2.1509, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 0.001, | |
"loss": 2.5301, | |
"step": 245 | |
}, | |
{ | |
"epoch": 0.16, | |
"learning_rate": 0.001, | |
"loss": 2.3328, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 0.001, | |
"loss": 2.412, | |
"step": 255 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 0.001, | |
"loss": 2.5923, | |
"step": 260 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 0.001, | |
"loss": 2.2655, | |
"step": 265 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 0.001, | |
"loss": 2.7524, | |
"step": 270 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 0.001, | |
"loss": 2.6456, | |
"step": 275 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 0.001, | |
"loss": 2.3814, | |
"step": 280 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 0.001, | |
"loss": 2.3805, | |
"step": 285 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 0.001, | |
"loss": 2.6727, | |
"step": 290 | |
}, | |
{ | |
"epoch": 0.19, | |
"learning_rate": 0.001, | |
"loss": 2.3931, | |
"step": 295 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 0.001, | |
"loss": 1.9526, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 0.001, | |
"loss": 2.7219, | |
"step": 305 | |
}, | |
{ | |
"epoch": 0.2, | |
"learning_rate": 0.001, | |
"loss": 2.4869, | |
"step": 310 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 0.001, | |
"loss": 2.4303, | |
"step": 315 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 0.001, | |
"loss": 2.5916, | |
"step": 320 | |
}, | |
{ | |
"epoch": 0.21, | |
"learning_rate": 0.001, | |
"loss": 2.3917, | |
"step": 325 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 0.001, | |
"loss": 2.4881, | |
"step": 330 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 0.001, | |
"loss": 2.3369, | |
"step": 335 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 0.001, | |
"loss": 2.45, | |
"step": 340 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 0.001, | |
"loss": 2.1916, | |
"step": 345 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 0.001, | |
"loss": 2.2606, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.23, | |
"learning_rate": 0.001, | |
"loss": 2.5533, | |
"step": 355 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 0.001, | |
"loss": 2.1873, | |
"step": 360 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 0.001, | |
"loss": 2.6057, | |
"step": 365 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 0.001, | |
"loss": 2.4611, | |
"step": 370 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 0.001, | |
"loss": 2.606, | |
"step": 375 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 0.001, | |
"loss": 2.5645, | |
"step": 380 | |
}, | |
{ | |
"epoch": 0.25, | |
"learning_rate": 0.001, | |
"loss": 2.7591, | |
"step": 385 | |
}, | |
{ | |
"epoch": 0.26, | |
"learning_rate": 0.001, | |
"loss": 2.3399, | |
"step": 390 | |
}, | |
{ | |
"epoch": 0.26, | |
"learning_rate": 0.001, | |
"loss": 2.3675, | |
"step": 395 | |
}, | |
{ | |
"epoch": 0.26, | |
"learning_rate": 0.001, | |
"loss": 2.5141, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.27, | |
"learning_rate": 0.001, | |
"loss": 2.2407, | |
"step": 405 | |
}, | |
{ | |
"epoch": 0.27, | |
"learning_rate": 0.001, | |
"loss": 2.6321, | |
"step": 410 | |
}, | |
{ | |
"epoch": 0.27, | |
"learning_rate": 0.001, | |
"loss": 2.6431, | |
"step": 415 | |
}, | |
{ | |
"epoch": 0.27, | |
"learning_rate": 0.001, | |
"loss": 2.4707, | |
"step": 420 | |
}, | |
{ | |
"epoch": 0.28, | |
"learning_rate": 0.001, | |
"loss": 2.3744, | |
"step": 425 | |
}, | |
{ | |
"epoch": 0.28, | |
"learning_rate": 0.001, | |
"loss": 2.5658, | |
"step": 430 | |
}, | |
{ | |
"epoch": 0.28, | |
"learning_rate": 0.001, | |
"loss": 1.9922, | |
"step": 435 | |
}, | |
{ | |
"epoch": 0.29, | |
"learning_rate": 0.001, | |
"loss": 2.5948, | |
"step": 440 | |
}, | |
{ | |
"epoch": 0.29, | |
"learning_rate": 0.001, | |
"loss": 2.2519, | |
"step": 445 | |
}, | |
{ | |
"epoch": 0.29, | |
"learning_rate": 0.001, | |
"loss": 2.6248, | |
"step": 450 | |
}, | |
{ | |
"epoch": 0.3, | |
"learning_rate": 0.001, | |
"loss": 2.5877, | |
"step": 455 | |
}, | |
{ | |
"epoch": 0.3, | |
"learning_rate": 0.001, | |
"loss": 2.1311, | |
"step": 460 | |
}, | |
{ | |
"epoch": 0.3, | |
"learning_rate": 0.001, | |
"loss": 2.6091, | |
"step": 465 | |
}, | |
{ | |
"epoch": 0.31, | |
"learning_rate": 0.001, | |
"loss": 2.5488, | |
"step": 470 | |
}, | |
{ | |
"epoch": 0.31, | |
"learning_rate": 0.001, | |
"loss": 2.499, | |
"step": 475 | |
}, | |
{ | |
"epoch": 0.31, | |
"learning_rate": 0.001, | |
"loss": 2.5031, | |
"step": 480 | |
}, | |
{ | |
"epoch": 0.32, | |
"learning_rate": 0.001, | |
"loss": 2.5783, | |
"step": 485 | |
}, | |
{ | |
"epoch": 0.32, | |
"learning_rate": 0.001, | |
"loss": 2.6303, | |
"step": 490 | |
}, | |
{ | |
"epoch": 0.32, | |
"learning_rate": 0.001, | |
"loss": 2.6344, | |
"step": 495 | |
}, | |
{ | |
"epoch": 0.33, | |
"learning_rate": 0.001, | |
"loss": 2.7031, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.33, | |
"learning_rate": 0.001, | |
"loss": 2.6677, | |
"step": 505 | |
}, | |
{ | |
"epoch": 0.33, | |
"learning_rate": 0.001, | |
"loss": 2.7617, | |
"step": 510 | |
}, | |
{ | |
"epoch": 0.34, | |
"learning_rate": 0.001, | |
"loss": 2.3122, | |
"step": 515 | |
}, | |
{ | |
"epoch": 0.34, | |
"learning_rate": 0.001, | |
"loss": 2.3887, | |
"step": 520 | |
}, | |
{ | |
"epoch": 0.34, | |
"learning_rate": 0.001, | |
"loss": 2.5569, | |
"step": 525 | |
}, | |
{ | |
"epoch": 0.35, | |
"learning_rate": 0.001, | |
"loss": 2.4038, | |
"step": 530 | |
}, | |
{ | |
"epoch": 0.35, | |
"learning_rate": 0.001, | |
"loss": 2.7413, | |
"step": 535 | |
}, | |
{ | |
"epoch": 0.35, | |
"learning_rate": 0.001, | |
"loss": 2.5264, | |
"step": 540 | |
}, | |
{ | |
"epoch": 0.36, | |
"learning_rate": 0.001, | |
"loss": 2.335, | |
"step": 545 | |
}, | |
{ | |
"epoch": 0.36, | |
"learning_rate": 0.001, | |
"loss": 2.7574, | |
"step": 550 | |
}, | |
{ | |
"epoch": 0.36, | |
"learning_rate": 0.001, | |
"loss": 2.4542, | |
"step": 555 | |
}, | |
{ | |
"epoch": 0.37, | |
"learning_rate": 0.001, | |
"loss": 2.0885, | |
"step": 560 | |
}, | |
{ | |
"epoch": 0.37, | |
"learning_rate": 0.001, | |
"loss": 2.6057, | |
"step": 565 | |
}, | |
{ | |
"epoch": 0.37, | |
"learning_rate": 0.001, | |
"loss": 2.4971, | |
"step": 570 | |
}, | |
{ | |
"epoch": 0.38, | |
"learning_rate": 0.001, | |
"loss": 2.6251, | |
"step": 575 | |
}, | |
{ | |
"epoch": 0.38, | |
"learning_rate": 0.001, | |
"loss": 2.5784, | |
"step": 580 | |
}, | |
{ | |
"epoch": 0.38, | |
"learning_rate": 0.001, | |
"loss": 2.3878, | |
"step": 585 | |
}, | |
{ | |
"epoch": 0.39, | |
"learning_rate": 0.001, | |
"loss": 2.4271, | |
"step": 590 | |
}, | |
{ | |
"epoch": 0.39, | |
"learning_rate": 0.001, | |
"loss": 2.6751, | |
"step": 595 | |
}, | |
{ | |
"epoch": 0.39, | |
"learning_rate": 0.001, | |
"loss": 2.17, | |
"step": 600 | |
}, | |
{ | |
"epoch": 0.4, | |
"learning_rate": 0.001, | |
"loss": 2.0794, | |
"step": 605 | |
}, | |
{ | |
"epoch": 0.4, | |
"learning_rate": 0.001, | |
"loss": 2.6073, | |
"step": 610 | |
}, | |
{ | |
"epoch": 0.4, | |
"learning_rate": 0.001, | |
"loss": 2.6717, | |
"step": 615 | |
}, | |
{ | |
"epoch": 0.41, | |
"learning_rate": 0.001, | |
"loss": 2.8236, | |
"step": 620 | |
}, | |
{ | |
"epoch": 0.41, | |
"learning_rate": 0.001, | |
"loss": 2.4193, | |
"step": 625 | |
}, | |
{ | |
"epoch": 0.41, | |
"learning_rate": 0.001, | |
"loss": 2.7181, | |
"step": 630 | |
}, | |
{ | |
"epoch": 0.42, | |
"learning_rate": 0.001, | |
"loss": 2.6567, | |
"step": 635 | |
}, | |
{ | |
"epoch": 0.42, | |
"learning_rate": 0.001, | |
"loss": 2.1919, | |
"step": 640 | |
}, | |
{ | |
"epoch": 0.42, | |
"learning_rate": 0.001, | |
"loss": 2.5645, | |
"step": 645 | |
}, | |
{ | |
"epoch": 0.43, | |
"learning_rate": 0.001, | |
"loss": 2.3313, | |
"step": 650 | |
}, | |
{ | |
"epoch": 0.43, | |
"learning_rate": 0.001, | |
"loss": 2.533, | |
"step": 655 | |
}, | |
{ | |
"epoch": 0.43, | |
"learning_rate": 0.001, | |
"loss": 2.621, | |
"step": 660 | |
}, | |
{ | |
"epoch": 0.44, | |
"learning_rate": 0.001, | |
"loss": 2.5693, | |
"step": 665 | |
}, | |
{ | |
"epoch": 0.44, | |
"learning_rate": 0.001, | |
"loss": 2.6943, | |
"step": 670 | |
}, | |
{ | |
"epoch": 0.44, | |
"learning_rate": 0.001, | |
"loss": 2.2715, | |
"step": 675 | |
}, | |
{ | |
"epoch": 0.45, | |
"learning_rate": 0.001, | |
"loss": 2.5614, | |
"step": 680 | |
}, | |
{ | |
"epoch": 0.45, | |
"learning_rate": 0.001, | |
"loss": 2.5784, | |
"step": 685 | |
}, | |
{ | |
"epoch": 0.45, | |
"learning_rate": 0.001, | |
"loss": 2.0613, | |
"step": 690 | |
}, | |
{ | |
"epoch": 0.45, | |
"learning_rate": 0.001, | |
"loss": 2.582, | |
"step": 695 | |
}, | |
{ | |
"epoch": 0.46, | |
"learning_rate": 0.001, | |
"loss": 2.5178, | |
"step": 700 | |
}, | |
{ | |
"epoch": 0.46, | |
"learning_rate": 0.001, | |
"loss": 2.432, | |
"step": 705 | |
}, | |
{ | |
"epoch": 0.46, | |
"learning_rate": 0.001, | |
"loss": 2.1979, | |
"step": 710 | |
}, | |
{ | |
"epoch": 0.47, | |
"learning_rate": 0.001, | |
"loss": 2.4317, | |
"step": 715 | |
}, | |
{ | |
"epoch": 0.47, | |
"learning_rate": 0.001, | |
"loss": 2.285, | |
"step": 720 | |
}, | |
{ | |
"epoch": 0.47, | |
"learning_rate": 0.001, | |
"loss": 2.6127, | |
"step": 725 | |
}, | |
{ | |
"epoch": 0.48, | |
"learning_rate": 0.001, | |
"loss": 2.6221, | |
"step": 730 | |
}, | |
{ | |
"epoch": 0.48, | |
"learning_rate": 0.001, | |
"loss": 2.7519, | |
"step": 735 | |
}, | |
{ | |
"epoch": 0.48, | |
"learning_rate": 0.001, | |
"loss": 2.7351, | |
"step": 740 | |
}, | |
{ | |
"epoch": 0.49, | |
"learning_rate": 0.001, | |
"loss": 2.5224, | |
"step": 745 | |
}, | |
{ | |
"epoch": 0.49, | |
"learning_rate": 0.001, | |
"loss": 2.5145, | |
"step": 750 | |
}, | |
{ | |
"epoch": 0.49, | |
"learning_rate": 0.001, | |
"loss": 2.5122, | |
"step": 755 | |
}, | |
{ | |
"epoch": 0.5, | |
"learning_rate": 0.001, | |
"loss": 2.2971, | |
"step": 760 | |
}, | |
{ | |
"epoch": 0.5, | |
"learning_rate": 0.001, | |
"loss": 2.5628, | |
"step": 765 | |
}, | |
{ | |
"epoch": 0.5, | |
"learning_rate": 0.001, | |
"loss": 2.5803, | |
"step": 770 | |
}, | |
{ | |
"epoch": 0.51, | |
"learning_rate": 0.001, | |
"loss": 2.5209, | |
"step": 775 | |
}, | |
{ | |
"epoch": 0.51, | |
"learning_rate": 0.001, | |
"loss": 2.5051, | |
"step": 780 | |
}, | |
{ | |
"epoch": 0.51, | |
"learning_rate": 0.001, | |
"loss": 2.5275, | |
"step": 785 | |
}, | |
{ | |
"epoch": 0.52, | |
"learning_rate": 0.001, | |
"loss": 2.5019, | |
"step": 790 | |
}, | |
{ | |
"epoch": 0.52, | |
"learning_rate": 0.001, | |
"loss": 2.1714, | |
"step": 795 | |
}, | |
{ | |
"epoch": 0.52, | |
"learning_rate": 0.001, | |
"loss": 2.87, | |
"step": 800 | |
}, | |
{ | |
"epoch": 0.53, | |
"learning_rate": 0.001, | |
"loss": 2.6468, | |
"step": 805 | |
}, | |
{ | |
"epoch": 0.53, | |
"learning_rate": 0.001, | |
"loss": 2.4115, | |
"step": 810 | |
}, | |
{ | |
"epoch": 0.53, | |
"learning_rate": 0.001, | |
"loss": 2.5377, | |
"step": 815 | |
}, | |
{ | |
"epoch": 0.54, | |
"learning_rate": 0.001, | |
"loss": 2.4986, | |
"step": 820 | |
}, | |
{ | |
"epoch": 0.54, | |
"learning_rate": 0.001, | |
"loss": 2.6485, | |
"step": 825 | |
}, | |
{ | |
"epoch": 0.54, | |
"learning_rate": 0.001, | |
"loss": 2.6314, | |
"step": 830 | |
}, | |
{ | |
"epoch": 0.55, | |
"learning_rate": 0.001, | |
"loss": 2.3516, | |
"step": 835 | |
}, | |
{ | |
"epoch": 0.55, | |
"learning_rate": 0.001, | |
"loss": 2.39, | |
"step": 840 | |
}, | |
{ | |
"epoch": 0.55, | |
"learning_rate": 0.001, | |
"loss": 2.3759, | |
"step": 845 | |
}, | |
{ | |
"epoch": 0.56, | |
"learning_rate": 0.001, | |
"loss": 2.5348, | |
"step": 850 | |
}, | |
{ | |
"epoch": 0.56, | |
"learning_rate": 0.001, | |
"loss": 2.6843, | |
"step": 855 | |
}, | |
{ | |
"epoch": 0.56, | |
"learning_rate": 0.001, | |
"loss": 2.6289, | |
"step": 860 | |
}, | |
{ | |
"epoch": 0.57, | |
"learning_rate": 0.001, | |
"loss": 2.4023, | |
"step": 865 | |
}, | |
{ | |
"epoch": 0.57, | |
"learning_rate": 0.001, | |
"loss": 2.5979, | |
"step": 870 | |
}, | |
{ | |
"epoch": 0.57, | |
"learning_rate": 0.001, | |
"loss": 2.4683, | |
"step": 875 | |
}, | |
{ | |
"epoch": 0.58, | |
"learning_rate": 0.001, | |
"loss": 2.7099, | |
"step": 880 | |
}, | |
{ | |
"epoch": 0.58, | |
"learning_rate": 0.001, | |
"loss": 2.3129, | |
"step": 885 | |
}, | |
{ | |
"epoch": 0.58, | |
"learning_rate": 0.001, | |
"loss": 2.4508, | |
"step": 890 | |
}, | |
{ | |
"epoch": 0.59, | |
"learning_rate": 0.001, | |
"loss": 2.6484, | |
"step": 895 | |
}, | |
{ | |
"epoch": 0.59, | |
"learning_rate": 0.001, | |
"loss": 2.4541, | |
"step": 900 | |
}, | |
{ | |
"epoch": 0.59, | |
"learning_rate": 0.001, | |
"loss": 2.5101, | |
"step": 905 | |
}, | |
{ | |
"epoch": 0.6, | |
"learning_rate": 0.001, | |
"loss": 2.6002, | |
"step": 910 | |
}, | |
{ | |
"epoch": 0.6, | |
"learning_rate": 0.001, | |
"loss": 2.742, | |
"step": 915 | |
}, | |
{ | |
"epoch": 0.6, | |
"learning_rate": 0.001, | |
"loss": 2.4772, | |
"step": 920 | |
}, | |
{ | |
"epoch": 0.61, | |
"learning_rate": 0.001, | |
"loss": 2.4328, | |
"step": 925 | |
}, | |
{ | |
"epoch": 0.61, | |
"learning_rate": 0.001, | |
"loss": 2.7404, | |
"step": 930 | |
}, | |
{ | |
"epoch": 0.61, | |
"learning_rate": 0.001, | |
"loss": 2.5969, | |
"step": 935 | |
}, | |
{ | |
"epoch": 0.62, | |
"learning_rate": 0.001, | |
"loss": 2.1367, | |
"step": 940 | |
}, | |
{ | |
"epoch": 0.62, | |
"learning_rate": 0.001, | |
"loss": 2.703, | |
"step": 945 | |
}, | |
{ | |
"epoch": 0.62, | |
"learning_rate": 0.001, | |
"loss": 2.5638, | |
"step": 950 | |
}, | |
{ | |
"epoch": 0.62, | |
"learning_rate": 0.001, | |
"loss": 1.8322, | |
"step": 955 | |
}, | |
{ | |
"epoch": 0.63, | |
"learning_rate": 0.001, | |
"loss": 2.5764, | |
"step": 960 | |
}, | |
{ | |
"epoch": 0.63, | |
"learning_rate": 0.001, | |
"loss": 2.4679, | |
"step": 965 | |
}, | |
{ | |
"epoch": 0.63, | |
"learning_rate": 0.001, | |
"loss": 2.1691, | |
"step": 970 | |
}, | |
{ | |
"epoch": 0.64, | |
"learning_rate": 0.001, | |
"loss": 2.3252, | |
"step": 975 | |
}, | |
{ | |
"epoch": 0.64, | |
"learning_rate": 0.001, | |
"loss": 2.3988, | |
"step": 980 | |
}, | |
{ | |
"epoch": 0.64, | |
"learning_rate": 0.001, | |
"loss": 2.221, | |
"step": 985 | |
}, | |
{ | |
"epoch": 0.65, | |
"learning_rate": 0.001, | |
"loss": 2.4715, | |
"step": 990 | |
}, | |
{ | |
"epoch": 0.65, | |
"learning_rate": 0.001, | |
"loss": 2.2479, | |
"step": 995 | |
}, | |
{ | |
"epoch": 0.65, | |
"learning_rate": 0.001, | |
"loss": 2.3859, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 0.66, | |
"learning_rate": 0.001, | |
"loss": 2.7889, | |
"step": 1005 | |
}, | |
{ | |
"epoch": 0.66, | |
"learning_rate": 0.001, | |
"loss": 2.625, | |
"step": 1010 | |
}, | |
{ | |
"epoch": 0.66, | |
"learning_rate": 0.001, | |
"loss": 2.324, | |
"step": 1015 | |
}, | |
{ | |
"epoch": 0.67, | |
"learning_rate": 0.001, | |
"loss": 2.5879, | |
"step": 1020 | |
}, | |
{ | |
"epoch": 0.67, | |
"learning_rate": 0.001, | |
"loss": 2.4721, | |
"step": 1025 | |
}, | |
{ | |
"epoch": 0.67, | |
"learning_rate": 0.001, | |
"loss": 2.6365, | |
"step": 1030 | |
}, | |
{ | |
"epoch": 0.68, | |
"learning_rate": 0.001, | |
"loss": 2.6356, | |
"step": 1035 | |
}, | |
{ | |
"epoch": 0.68, | |
"learning_rate": 0.001, | |
"loss": 2.4022, | |
"step": 1040 | |
}, | |
{ | |
"epoch": 0.68, | |
"learning_rate": 0.001, | |
"loss": 2.7109, | |
"step": 1045 | |
}, | |
{ | |
"epoch": 0.69, | |
"learning_rate": 0.001, | |
"loss": 2.373, | |
"step": 1050 | |
}, | |
{ | |
"epoch": 0.69, | |
"learning_rate": 0.001, | |
"loss": 2.523, | |
"step": 1055 | |
}, | |
{ | |
"epoch": 0.69, | |
"learning_rate": 0.001, | |
"loss": 2.536, | |
"step": 1060 | |
}, | |
{ | |
"epoch": 0.7, | |
"learning_rate": 0.001, | |
"loss": 2.6047, | |
"step": 1065 | |
}, | |
{ | |
"epoch": 0.7, | |
"learning_rate": 0.001, | |
"loss": 2.5583, | |
"step": 1070 | |
}, | |
{ | |
"epoch": 0.7, | |
"learning_rate": 0.001, | |
"loss": 2.7925, | |
"step": 1075 | |
}, | |
{ | |
"epoch": 0.71, | |
"learning_rate": 0.001, | |
"loss": 2.0572, | |
"step": 1080 | |
}, | |
{ | |
"epoch": 0.71, | |
"learning_rate": 0.001, | |
"loss": 2.3024, | |
"step": 1085 | |
}, | |
{ | |
"epoch": 0.71, | |
"learning_rate": 0.001, | |
"loss": 2.0677, | |
"step": 1090 | |
}, | |
{ | |
"epoch": 0.72, | |
"learning_rate": 0.001, | |
"loss": 2.5241, | |
"step": 1095 | |
}, | |
{ | |
"epoch": 0.72, | |
"learning_rate": 0.001, | |
"loss": 2.7447, | |
"step": 1100 | |
}, | |
{ | |
"epoch": 0.72, | |
"learning_rate": 0.001, | |
"loss": 2.6435, | |
"step": 1105 | |
}, | |
{ | |
"epoch": 0.73, | |
"learning_rate": 0.001, | |
"loss": 2.4226, | |
"step": 1110 | |
}, | |
{ | |
"epoch": 0.73, | |
"learning_rate": 0.001, | |
"loss": 2.6891, | |
"step": 1115 | |
}, | |
{ | |
"epoch": 0.73, | |
"learning_rate": 0.001, | |
"loss": 2.458, | |
"step": 1120 | |
}, | |
{ | |
"epoch": 0.74, | |
"learning_rate": 0.001, | |
"loss": 2.6884, | |
"step": 1125 | |
}, | |
{ | |
"epoch": 0.74, | |
"learning_rate": 0.001, | |
"loss": 2.7058, | |
"step": 1130 | |
}, | |
{ | |
"epoch": 0.74, | |
"learning_rate": 0.001, | |
"loss": 2.5828, | |
"step": 1135 | |
}, | |
{ | |
"epoch": 0.75, | |
"learning_rate": 0.001, | |
"loss": 2.7107, | |
"step": 1140 | |
}, | |
{ | |
"epoch": 0.75, | |
"learning_rate": 0.001, | |
"loss": 2.3635, | |
"step": 1145 | |
}, | |
{ | |
"epoch": 0.75, | |
"learning_rate": 0.001, | |
"loss": 2.5258, | |
"step": 1150 | |
}, | |
{ | |
"epoch": 0.76, | |
"learning_rate": 0.001, | |
"loss": 2.6046, | |
"step": 1155 | |
}, | |
{ | |
"epoch": 0.76, | |
"learning_rate": 0.001, | |
"loss": 2.5215, | |
"step": 1160 | |
}, | |
{ | |
"epoch": 0.76, | |
"learning_rate": 0.001, | |
"loss": 2.1417, | |
"step": 1165 | |
}, | |
{ | |
"epoch": 0.77, | |
"learning_rate": 0.001, | |
"loss": 2.5903, | |
"step": 1170 | |
}, | |
{ | |
"epoch": 0.77, | |
"learning_rate": 0.001, | |
"loss": 2.5376, | |
"step": 1175 | |
}, | |
{ | |
"epoch": 0.77, | |
"learning_rate": 0.001, | |
"loss": 2.5716, | |
"step": 1180 | |
}, | |
{ | |
"epoch": 0.78, | |
"learning_rate": 0.001, | |
"loss": 2.5904, | |
"step": 1185 | |
}, | |
{ | |
"epoch": 0.78, | |
"learning_rate": 0.001, | |
"loss": 2.6511, | |
"step": 1190 | |
}, | |
{ | |
"epoch": 0.78, | |
"learning_rate": 0.001, | |
"loss": 2.1679, | |
"step": 1195 | |
}, | |
{ | |
"epoch": 0.79, | |
"learning_rate": 0.001, | |
"loss": 2.4169, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 0.79, | |
"learning_rate": 0.001, | |
"loss": 2.7519, | |
"step": 1205 | |
}, | |
{ | |
"epoch": 0.79, | |
"learning_rate": 0.001, | |
"loss": 2.4385, | |
"step": 1210 | |
}, | |
{ | |
"epoch": 0.8, | |
"learning_rate": 0.001, | |
"loss": 1.9145, | |
"step": 1215 | |
}, | |
{ | |
"epoch": 0.8, | |
"learning_rate": 0.001, | |
"loss": 2.5898, | |
"step": 1220 | |
}, | |
{ | |
"epoch": 0.8, | |
"learning_rate": 0.001, | |
"loss": 2.4768, | |
"step": 1225 | |
}, | |
{ | |
"epoch": 0.8, | |
"learning_rate": 0.001, | |
"loss": 2.5546, | |
"step": 1230 | |
}, | |
{ | |
"epoch": 0.81, | |
"learning_rate": 0.001, | |
"loss": 2.5279, | |
"step": 1235 | |
}, | |
{ | |
"epoch": 0.81, | |
"learning_rate": 0.001, | |
"loss": 2.498, | |
"step": 1240 | |
}, | |
{ | |
"epoch": 0.81, | |
"learning_rate": 0.001, | |
"loss": 2.4277, | |
"step": 1245 | |
}, | |
{ | |
"epoch": 0.82, | |
"learning_rate": 0.001, | |
"loss": 2.9934, | |
"step": 1250 | |
}, | |
{ | |
"epoch": 0.82, | |
"learning_rate": 0.001, | |
"loss": 2.3367, | |
"step": 1255 | |
}, | |
{ | |
"epoch": 0.82, | |
"learning_rate": 0.001, | |
"loss": 2.012, | |
"step": 1260 | |
}, | |
{ | |
"epoch": 0.83, | |
"learning_rate": 0.001, | |
"loss": 2.4148, | |
"step": 1265 | |
}, | |
{ | |
"epoch": 0.83, | |
"learning_rate": 0.001, | |
"loss": 2.3207, | |
"step": 1270 | |
}, | |
{ | |
"epoch": 0.83, | |
"learning_rate": 0.001, | |
"loss": 2.5629, | |
"step": 1275 | |
}, | |
{ | |
"epoch": 0.84, | |
"learning_rate": 0.001, | |
"loss": 2.4338, | |
"step": 1280 | |
}, | |
{ | |
"epoch": 0.84, | |
"learning_rate": 0.001, | |
"loss": 2.4612, | |
"step": 1285 | |
}, | |
{ | |
"epoch": 0.84, | |
"learning_rate": 0.001, | |
"loss": 2.5406, | |
"step": 1290 | |
}, | |
{ | |
"epoch": 0.85, | |
"learning_rate": 0.001, | |
"loss": 2.5438, | |
"step": 1295 | |
}, | |
{ | |
"epoch": 0.85, | |
"learning_rate": 0.001, | |
"loss": 2.6348, | |
"step": 1300 | |
}, | |
{ | |
"epoch": 0.85, | |
"learning_rate": 0.001, | |
"loss": 2.1436, | |
"step": 1305 | |
}, | |
{ | |
"epoch": 0.86, | |
"learning_rate": 0.001, | |
"loss": 2.4825, | |
"step": 1310 | |
}, | |
{ | |
"epoch": 0.86, | |
"learning_rate": 0.001, | |
"loss": 2.2579, | |
"step": 1315 | |
}, | |
{ | |
"epoch": 0.86, | |
"learning_rate": 0.001, | |
"loss": 2.5234, | |
"step": 1320 | |
}, | |
{ | |
"epoch": 0.87, | |
"learning_rate": 0.001, | |
"loss": 2.5127, | |
"step": 1325 | |
}, | |
{ | |
"epoch": 0.87, | |
"learning_rate": 0.001, | |
"loss": 2.3415, | |
"step": 1330 | |
}, | |
{ | |
"epoch": 0.87, | |
"learning_rate": 0.001, | |
"loss": 2.4005, | |
"step": 1335 | |
}, | |
{ | |
"epoch": 0.88, | |
"learning_rate": 0.001, | |
"loss": 2.4731, | |
"step": 1340 | |
}, | |
{ | |
"epoch": 0.88, | |
"learning_rate": 0.001, | |
"loss": 2.8025, | |
"step": 1345 | |
}, | |
{ | |
"epoch": 0.88, | |
"learning_rate": 0.001, | |
"loss": 2.1733, | |
"step": 1350 | |
}, | |
{ | |
"epoch": 0.89, | |
"learning_rate": 0.001, | |
"loss": 2.5599, | |
"step": 1355 | |
}, | |
{ | |
"epoch": 0.89, | |
"learning_rate": 0.001, | |
"loss": 2.6287, | |
"step": 1360 | |
}, | |
{ | |
"epoch": 0.89, | |
"learning_rate": 0.001, | |
"loss": 2.5367, | |
"step": 1365 | |
}, | |
{ | |
"epoch": 0.9, | |
"learning_rate": 0.001, | |
"loss": 2.3331, | |
"step": 1370 | |
}, | |
{ | |
"epoch": 0.9, | |
"learning_rate": 0.001, | |
"loss": 2.2061, | |
"step": 1375 | |
}, | |
{ | |
"epoch": 0.9, | |
"learning_rate": 0.001, | |
"loss": 2.3905, | |
"step": 1380 | |
}, | |
{ | |
"epoch": 0.91, | |
"learning_rate": 0.001, | |
"loss": 2.6162, | |
"step": 1385 | |
}, | |
{ | |
"epoch": 0.91, | |
"learning_rate": 0.001, | |
"loss": 2.2712, | |
"step": 1390 | |
}, | |
{ | |
"epoch": 0.91, | |
"learning_rate": 0.001, | |
"loss": 2.3674, | |
"step": 1395 | |
}, | |
{ | |
"epoch": 0.92, | |
"learning_rate": 0.001, | |
"loss": 2.4791, | |
"step": 1400 | |
}, | |
{ | |
"epoch": 0.92, | |
"learning_rate": 0.001, | |
"loss": 2.6425, | |
"step": 1405 | |
}, | |
{ | |
"epoch": 0.92, | |
"learning_rate": 0.001, | |
"loss": 2.4496, | |
"step": 1410 | |
}, | |
{ | |
"epoch": 0.93, | |
"learning_rate": 0.001, | |
"loss": 2.5499, | |
"step": 1415 | |
}, | |
{ | |
"epoch": 0.93, | |
"learning_rate": 0.001, | |
"loss": 2.5993, | |
"step": 1420 | |
}, | |
{ | |
"epoch": 0.93, | |
"learning_rate": 0.001, | |
"loss": 2.5543, | |
"step": 1425 | |
}, | |
{ | |
"epoch": 0.94, | |
"learning_rate": 0.001, | |
"loss": 2.2981, | |
"step": 1430 | |
}, | |
{ | |
"epoch": 0.94, | |
"learning_rate": 0.001, | |
"loss": 2.7974, | |
"step": 1435 | |
}, | |
{ | |
"epoch": 0.94, | |
"learning_rate": 0.001, | |
"loss": 2.0898, | |
"step": 1440 | |
}, | |
{ | |
"epoch": 0.95, | |
"learning_rate": 0.001, | |
"loss": 2.561, | |
"step": 1445 | |
}, | |
{ | |
"epoch": 0.95, | |
"learning_rate": 0.001, | |
"loss": 2.2922, | |
"step": 1450 | |
}, | |
{ | |
"epoch": 0.95, | |
"learning_rate": 0.001, | |
"loss": 2.3195, | |
"step": 1455 | |
}, | |
{ | |
"epoch": 0.96, | |
"learning_rate": 0.001, | |
"loss": 2.4302, | |
"step": 1460 | |
}, | |
{ | |
"epoch": 0.96, | |
"learning_rate": 0.001, | |
"loss": 2.6357, | |
"step": 1465 | |
}, | |
{ | |
"epoch": 0.96, | |
"learning_rate": 0.001, | |
"loss": 2.1727, | |
"step": 1470 | |
}, | |
{ | |
"epoch": 0.97, | |
"learning_rate": 0.001, | |
"loss": 2.5076, | |
"step": 1475 | |
}, | |
{ | |
"epoch": 0.97, | |
"learning_rate": 0.001, | |
"loss": 2.7291, | |
"step": 1480 | |
}, | |
{ | |
"epoch": 0.97, | |
"learning_rate": 0.001, | |
"loss": 2.443, | |
"step": 1485 | |
}, | |
{ | |
"epoch": 0.98, | |
"learning_rate": 0.001, | |
"loss": 2.4889, | |
"step": 1490 | |
}, | |
{ | |
"epoch": 0.98, | |
"learning_rate": 0.001, | |
"loss": 2.4709, | |
"step": 1495 | |
}, | |
{ | |
"epoch": 0.98, | |
"learning_rate": 0.001, | |
"loss": 2.3124, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 0.98, | |
"learning_rate": 0.001, | |
"loss": 2.5515, | |
"step": 1505 | |
}, | |
{ | |
"epoch": 0.99, | |
"learning_rate": 0.001, | |
"loss": 2.3861, | |
"step": 1510 | |
}, | |
{ | |
"epoch": 0.99, | |
"learning_rate": 0.001, | |
"loss": 2.4269, | |
"step": 1515 | |
}, | |
{ | |
"epoch": 0.99, | |
"learning_rate": 0.001, | |
"loss": 2.4074, | |
"step": 1520 | |
}, | |
{ | |
"epoch": 1.0, | |
"learning_rate": 0.001, | |
"loss": 2.2783, | |
"step": 1525 | |
}, | |
{ | |
"epoch": 1.0, | |
"learning_rate": 0.001, | |
"loss": 2.4139, | |
"step": 1530 | |
}, | |
{ | |
"epoch": 1.0, | |
"learning_rate": 0.001, | |
"loss": 1.9251, | |
"step": 1535 | |
}, | |
{ | |
"epoch": 1.01, | |
"learning_rate": 0.001, | |
"loss": 2.1038, | |
"step": 1540 | |
}, | |
{ | |
"epoch": 1.01, | |
"learning_rate": 0.001, | |
"loss": 1.9436, | |
"step": 1545 | |
}, | |
{ | |
"epoch": 1.01, | |
"learning_rate": 0.001, | |
"loss": 1.9581, | |
"step": 1550 | |
}, | |
{ | |
"epoch": 1.02, | |
"learning_rate": 0.001, | |
"loss": 2.1705, | |
"step": 1555 | |
}, | |
{ | |
"epoch": 1.02, | |
"learning_rate": 0.001, | |
"loss": 2.1393, | |
"step": 1560 | |
}, | |
{ | |
"epoch": 1.02, | |
"learning_rate": 0.001, | |
"loss": 1.741, | |
"step": 1565 | |
}, | |
{ | |
"epoch": 1.03, | |
"learning_rate": 0.001, | |
"loss": 1.9232, | |
"step": 1570 | |
}, | |
{ | |
"epoch": 1.03, | |
"learning_rate": 0.001, | |
"loss": 1.9046, | |
"step": 1575 | |
}, | |
{ | |
"epoch": 1.03, | |
"learning_rate": 0.001, | |
"loss": 1.8102, | |
"step": 1580 | |
}, | |
{ | |
"epoch": 1.04, | |
"learning_rate": 0.001, | |
"loss": 1.7362, | |
"step": 1585 | |
}, | |
{ | |
"epoch": 1.04, | |
"learning_rate": 0.001, | |
"loss": 1.8057, | |
"step": 1590 | |
}, | |
{ | |
"epoch": 1.04, | |
"learning_rate": 0.001, | |
"loss": 2.0777, | |
"step": 1595 | |
}, | |
{ | |
"epoch": 1.05, | |
"learning_rate": 0.001, | |
"loss": 2.1163, | |
"step": 1600 | |
}, | |
{ | |
"epoch": 1.05, | |
"learning_rate": 0.001, | |
"loss": 1.8468, | |
"step": 1605 | |
}, | |
{ | |
"epoch": 1.05, | |
"learning_rate": 0.001, | |
"loss": 2.3334, | |
"step": 1610 | |
}, | |
{ | |
"epoch": 1.06, | |
"learning_rate": 0.001, | |
"loss": 2.1232, | |
"step": 1615 | |
}, | |
{ | |
"epoch": 1.06, | |
"learning_rate": 0.001, | |
"loss": 1.7049, | |
"step": 1620 | |
}, | |
{ | |
"epoch": 1.06, | |
"learning_rate": 0.001, | |
"loss": 2.1545, | |
"step": 1625 | |
}, | |
{ | |
"epoch": 1.07, | |
"learning_rate": 0.001, | |
"loss": 2.3166, | |
"step": 1630 | |
}, | |
{ | |
"epoch": 1.07, | |
"learning_rate": 0.001, | |
"loss": 2.103, | |
"step": 1635 | |
}, | |
{ | |
"epoch": 1.07, | |
"learning_rate": 0.001, | |
"loss": 1.9295, | |
"step": 1640 | |
}, | |
{ | |
"epoch": 1.08, | |
"learning_rate": 0.001, | |
"loss": 2.0598, | |
"step": 1645 | |
}, | |
{ | |
"epoch": 1.08, | |
"learning_rate": 0.001, | |
"loss": 2.3646, | |
"step": 1650 | |
}, | |
{ | |
"epoch": 1.08, | |
"learning_rate": 0.001, | |
"loss": 2.1853, | |
"step": 1655 | |
}, | |
{ | |
"epoch": 1.09, | |
"learning_rate": 0.001, | |
"loss": 2.0635, | |
"step": 1660 | |
}, | |
{ | |
"epoch": 1.09, | |
"learning_rate": 0.001, | |
"loss": 1.9439, | |
"step": 1665 | |
}, | |
{ | |
"epoch": 1.09, | |
"learning_rate": 0.001, | |
"loss": 2.469, | |
"step": 1670 | |
}, | |
{ | |
"epoch": 1.1, | |
"learning_rate": 0.001, | |
"loss": 2.1583, | |
"step": 1675 | |
}, | |
{ | |
"epoch": 1.1, | |
"learning_rate": 0.001, | |
"loss": 2.079, | |
"step": 1680 | |
}, | |
{ | |
"epoch": 1.1, | |
"learning_rate": 0.001, | |
"loss": 2.0118, | |
"step": 1685 | |
}, | |
{ | |
"epoch": 1.11, | |
"learning_rate": 0.001, | |
"loss": 2.011, | |
"step": 1690 | |
}, | |
{ | |
"epoch": 1.11, | |
"learning_rate": 0.001, | |
"loss": 2.3133, | |
"step": 1695 | |
}, | |
{ | |
"epoch": 1.11, | |
"learning_rate": 0.001, | |
"loss": 2.1873, | |
"step": 1700 | |
}, | |
{ | |
"epoch": 1.12, | |
"learning_rate": 0.001, | |
"loss": 1.7311, | |
"step": 1705 | |
}, | |
{ | |
"epoch": 1.12, | |
"learning_rate": 0.001, | |
"loss": 1.7269, | |
"step": 1710 | |
}, | |
{ | |
"epoch": 1.12, | |
"learning_rate": 0.001, | |
"loss": 2.3935, | |
"step": 1715 | |
}, | |
{ | |
"epoch": 1.13, | |
"learning_rate": 0.001, | |
"loss": 2.1104, | |
"step": 1720 | |
}, | |
{ | |
"epoch": 1.13, | |
"learning_rate": 0.001, | |
"loss": 2.2349, | |
"step": 1725 | |
}, | |
{ | |
"epoch": 1.13, | |
"learning_rate": 0.001, | |
"loss": 1.6555, | |
"step": 1730 | |
}, | |
{ | |
"epoch": 1.14, | |
"learning_rate": 0.001, | |
"loss": 1.9011, | |
"step": 1735 | |
}, | |
{ | |
"epoch": 1.14, | |
"learning_rate": 0.001, | |
"loss": 2.2142, | |
"step": 1740 | |
}, | |
{ | |
"epoch": 1.14, | |
"learning_rate": 0.001, | |
"loss": 2.1273, | |
"step": 1745 | |
}, | |
{ | |
"epoch": 1.15, | |
"learning_rate": 0.001, | |
"loss": 2.4855, | |
"step": 1750 | |
}, | |
{ | |
"epoch": 1.15, | |
"learning_rate": 0.001, | |
"loss": 2.3025, | |
"step": 1755 | |
}, | |
{ | |
"epoch": 1.15, | |
"learning_rate": 0.001, | |
"loss": 1.9496, | |
"step": 1760 | |
}, | |
{ | |
"epoch": 1.16, | |
"learning_rate": 0.001, | |
"loss": 1.9856, | |
"step": 1765 | |
}, | |
{ | |
"epoch": 1.16, | |
"learning_rate": 0.001, | |
"loss": 2.1494, | |
"step": 1770 | |
}, | |
{ | |
"epoch": 1.16, | |
"learning_rate": 0.001, | |
"loss": 1.9905, | |
"step": 1775 | |
}, | |
{ | |
"epoch": 1.16, | |
"learning_rate": 0.001, | |
"loss": 2.1177, | |
"step": 1780 | |
}, | |
{ | |
"epoch": 1.17, | |
"learning_rate": 0.001, | |
"loss": 1.9396, | |
"step": 1785 | |
}, | |
{ | |
"epoch": 1.17, | |
"learning_rate": 0.001, | |
"loss": 1.8821, | |
"step": 1790 | |
}, | |
{ | |
"epoch": 1.17, | |
"learning_rate": 0.001, | |
"loss": 2.1909, | |
"step": 1795 | |
}, | |
{ | |
"epoch": 1.18, | |
"learning_rate": 0.001, | |
"loss": 1.9459, | |
"step": 1800 | |
}, | |
{ | |
"epoch": 1.18, | |
"learning_rate": 0.001, | |
"loss": 2.1047, | |
"step": 1805 | |
}, | |
{ | |
"epoch": 1.18, | |
"learning_rate": 0.001, | |
"loss": 1.833, | |
"step": 1810 | |
}, | |
{ | |
"epoch": 1.19, | |
"learning_rate": 0.001, | |
"loss": 2.0839, | |
"step": 1815 | |
}, | |
{ | |
"epoch": 1.19, | |
"learning_rate": 0.001, | |
"loss": 2.0736, | |
"step": 1820 | |
}, | |
{ | |
"epoch": 1.19, | |
"learning_rate": 0.001, | |
"loss": 2.0034, | |
"step": 1825 | |
}, | |
{ | |
"epoch": 1.2, | |
"learning_rate": 0.001, | |
"loss": 2.0788, | |
"step": 1830 | |
}, | |
{ | |
"epoch": 1.2, | |
"learning_rate": 0.001, | |
"loss": 2.1903, | |
"step": 1835 | |
}, | |
{ | |
"epoch": 1.2, | |
"learning_rate": 0.001, | |
"loss": 2.0829, | |
"step": 1840 | |
}, | |
{ | |
"epoch": 1.21, | |
"learning_rate": 0.001, | |
"loss": 2.2449, | |
"step": 1845 | |
}, | |
{ | |
"epoch": 1.21, | |
"learning_rate": 0.001, | |
"loss": 2.1561, | |
"step": 1850 | |
}, | |
{ | |
"epoch": 1.21, | |
"learning_rate": 0.001, | |
"loss": 2.1827, | |
"step": 1855 | |
}, | |
{ | |
"epoch": 1.22, | |
"learning_rate": 0.001, | |
"loss": 2.1885, | |
"step": 1860 | |
}, | |
{ | |
"epoch": 1.22, | |
"learning_rate": 0.001, | |
"loss": 2.049, | |
"step": 1865 | |
}, | |
{ | |
"epoch": 1.22, | |
"learning_rate": 0.001, | |
"loss": 2.2107, | |
"step": 1870 | |
}, | |
{ | |
"epoch": 1.23, | |
"learning_rate": 0.001, | |
"loss": 2.0679, | |
"step": 1875 | |
}, | |
{ | |
"epoch": 1.23, | |
"learning_rate": 0.001, | |
"loss": 2.2887, | |
"step": 1880 | |
}, | |
{ | |
"epoch": 1.23, | |
"learning_rate": 0.001, | |
"loss": 2.2472, | |
"step": 1885 | |
}, | |
{ | |
"epoch": 1.24, | |
"learning_rate": 0.001, | |
"loss": 2.1993, | |
"step": 1890 | |
}, | |
{ | |
"epoch": 1.24, | |
"learning_rate": 0.001, | |
"loss": 2.1636, | |
"step": 1895 | |
}, | |
{ | |
"epoch": 1.24, | |
"learning_rate": 0.001, | |
"loss": 1.9547, | |
"step": 1900 | |
}, | |
{ | |
"epoch": 1.25, | |
"learning_rate": 0.001, | |
"loss": 2.3164, | |
"step": 1905 | |
}, | |
{ | |
"epoch": 1.25, | |
"learning_rate": 0.001, | |
"loss": 2.2847, | |
"step": 1910 | |
}, | |
{ | |
"epoch": 1.25, | |
"learning_rate": 0.001, | |
"loss": 2.2196, | |
"step": 1915 | |
}, | |
{ | |
"epoch": 1.26, | |
"learning_rate": 0.001, | |
"loss": 2.1823, | |
"step": 1920 | |
}, | |
{ | |
"epoch": 1.26, | |
"learning_rate": 0.001, | |
"loss": 2.2532, | |
"step": 1925 | |
}, | |
{ | |
"epoch": 1.26, | |
"learning_rate": 0.001, | |
"loss": 2.2451, | |
"step": 1930 | |
}, | |
{ | |
"epoch": 1.27, | |
"learning_rate": 0.001, | |
"loss": 1.984, | |
"step": 1935 | |
}, | |
{ | |
"epoch": 1.27, | |
"learning_rate": 0.001, | |
"loss": 2.0976, | |
"step": 1940 | |
}, | |
{ | |
"epoch": 1.27, | |
"learning_rate": 0.001, | |
"loss": 2.0658, | |
"step": 1945 | |
}, | |
{ | |
"epoch": 1.28, | |
"learning_rate": 0.001, | |
"loss": 2.316, | |
"step": 1950 | |
}, | |
{ | |
"epoch": 1.28, | |
"learning_rate": 0.001, | |
"loss": 2.2036, | |
"step": 1955 | |
}, | |
{ | |
"epoch": 1.28, | |
"learning_rate": 0.001, | |
"loss": 2.0602, | |
"step": 1960 | |
}, | |
{ | |
"epoch": 1.29, | |
"learning_rate": 0.001, | |
"loss": 1.971, | |
"step": 1965 | |
}, | |
{ | |
"epoch": 1.29, | |
"learning_rate": 0.001, | |
"loss": 2.1463, | |
"step": 1970 | |
}, | |
{ | |
"epoch": 1.29, | |
"learning_rate": 0.001, | |
"loss": 1.7689, | |
"step": 1975 | |
}, | |
{ | |
"epoch": 1.3, | |
"learning_rate": 0.001, | |
"loss": 2.014, | |
"step": 1980 | |
}, | |
{ | |
"epoch": 1.3, | |
"learning_rate": 0.001, | |
"loss": 2.3631, | |
"step": 1985 | |
}, | |
{ | |
"epoch": 1.3, | |
"learning_rate": 0.001, | |
"loss": 2.2294, | |
"step": 1990 | |
}, | |
{ | |
"epoch": 1.31, | |
"learning_rate": 0.001, | |
"loss": 1.8407, | |
"step": 1995 | |
}, | |
{ | |
"epoch": 1.31, | |
"learning_rate": 0.001, | |
"loss": 1.9365, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 1.31, | |
"learning_rate": 0.001, | |
"loss": 2.0895, | |
"step": 2005 | |
}, | |
{ | |
"epoch": 1.32, | |
"learning_rate": 0.001, | |
"loss": 2.1625, | |
"step": 2010 | |
}, | |
{ | |
"epoch": 1.32, | |
"learning_rate": 0.001, | |
"loss": 2.053, | |
"step": 2015 | |
}, | |
{ | |
"epoch": 1.32, | |
"learning_rate": 0.001, | |
"loss": 2.0925, | |
"step": 2020 | |
}, | |
{ | |
"epoch": 1.33, | |
"learning_rate": 0.001, | |
"loss": 1.9764, | |
"step": 2025 | |
}, | |
{ | |
"epoch": 1.33, | |
"learning_rate": 0.001, | |
"loss": 2.1212, | |
"step": 2030 | |
}, | |
{ | |
"epoch": 1.33, | |
"learning_rate": 0.001, | |
"loss": 1.923, | |
"step": 2035 | |
}, | |
{ | |
"epoch": 1.34, | |
"learning_rate": 0.001, | |
"loss": 2.2899, | |
"step": 2040 | |
}, | |
{ | |
"epoch": 1.34, | |
"learning_rate": 0.001, | |
"loss": 2.3056, | |
"step": 2045 | |
}, | |
{ | |
"epoch": 1.34, | |
"learning_rate": 0.001, | |
"loss": 2.0217, | |
"step": 2050 | |
}, | |
{ | |
"epoch": 1.34, | |
"learning_rate": 0.001, | |
"loss": 2.1757, | |
"step": 2055 | |
}, | |
{ | |
"epoch": 1.35, | |
"learning_rate": 0.001, | |
"loss": 1.7524, | |
"step": 2060 | |
}, | |
{ | |
"epoch": 1.35, | |
"learning_rate": 0.001, | |
"loss": 2.3042, | |
"step": 2065 | |
}, | |
{ | |
"epoch": 1.35, | |
"learning_rate": 0.001, | |
"loss": 2.164, | |
"step": 2070 | |
}, | |
{ | |
"epoch": 1.36, | |
"learning_rate": 0.001, | |
"loss": 2.3265, | |
"step": 2075 | |
}, | |
{ | |
"epoch": 1.36, | |
"learning_rate": 0.001, | |
"loss": 2.4762, | |
"step": 2080 | |
}, | |
{ | |
"epoch": 1.36, | |
"learning_rate": 0.001, | |
"loss": 2.2577, | |
"step": 2085 | |
}, | |
{ | |
"epoch": 1.37, | |
"learning_rate": 0.001, | |
"loss": 2.4857, | |
"step": 2090 | |
}, | |
{ | |
"epoch": 1.37, | |
"learning_rate": 0.001, | |
"loss": 2.1887, | |
"step": 2095 | |
}, | |
{ | |
"epoch": 1.37, | |
"learning_rate": 0.001, | |
"loss": 2.3144, | |
"step": 2100 | |
}, | |
{ | |
"epoch": 1.38, | |
"learning_rate": 0.001, | |
"loss": 2.2387, | |
"step": 2105 | |
}, | |
{ | |
"epoch": 1.38, | |
"learning_rate": 0.001, | |
"loss": 2.5008, | |
"step": 2110 | |
}, | |
{ | |
"epoch": 1.38, | |
"learning_rate": 0.001, | |
"loss": 2.0604, | |
"step": 2115 | |
}, | |
{ | |
"epoch": 1.39, | |
"learning_rate": 0.001, | |
"loss": 2.414, | |
"step": 2120 | |
}, | |
{ | |
"epoch": 1.39, | |
"learning_rate": 0.001, | |
"loss": 2.2392, | |
"step": 2125 | |
}, | |
{ | |
"epoch": 1.39, | |
"learning_rate": 0.001, | |
"loss": 2.1842, | |
"step": 2130 | |
}, | |
{ | |
"epoch": 1.4, | |
"learning_rate": 0.001, | |
"loss": 2.0674, | |
"step": 2135 | |
}, | |
{ | |
"epoch": 1.4, | |
"learning_rate": 0.001, | |
"loss": 2.2088, | |
"step": 2140 | |
}, | |
{ | |
"epoch": 1.4, | |
"learning_rate": 0.001, | |
"loss": 2.363, | |
"step": 2145 | |
}, | |
{ | |
"epoch": 1.41, | |
"learning_rate": 0.001, | |
"loss": 2.1834, | |
"step": 2150 | |
}, | |
{ | |
"epoch": 1.41, | |
"learning_rate": 0.001, | |
"loss": 2.1047, | |
"step": 2155 | |
}, | |
{ | |
"epoch": 1.41, | |
"learning_rate": 0.001, | |
"loss": 1.8624, | |
"step": 2160 | |
}, | |
{ | |
"epoch": 1.42, | |
"learning_rate": 0.001, | |
"loss": 2.0746, | |
"step": 2165 | |
}, | |
{ | |
"epoch": 1.42, | |
"learning_rate": 0.001, | |
"loss": 2.4054, | |
"step": 2170 | |
}, | |
{ | |
"epoch": 1.42, | |
"learning_rate": 0.001, | |
"loss": 2.326, | |
"step": 2175 | |
}, | |
{ | |
"epoch": 1.43, | |
"learning_rate": 0.001, | |
"loss": 2.1054, | |
"step": 2180 | |
}, | |
{ | |
"epoch": 1.43, | |
"learning_rate": 0.001, | |
"loss": 1.7848, | |
"step": 2185 | |
}, | |
{ | |
"epoch": 1.43, | |
"learning_rate": 0.001, | |
"loss": 2.2795, | |
"step": 2190 | |
}, | |
{ | |
"epoch": 1.44, | |
"learning_rate": 0.001, | |
"loss": 1.9866, | |
"step": 2195 | |
}, | |
{ | |
"epoch": 1.44, | |
"learning_rate": 0.001, | |
"loss": 2.5626, | |
"step": 2200 | |
}, | |
{ | |
"epoch": 1.44, | |
"learning_rate": 0.001, | |
"loss": 2.2101, | |
"step": 2205 | |
}, | |
{ | |
"epoch": 1.45, | |
"learning_rate": 0.001, | |
"loss": 2.286, | |
"step": 2210 | |
}, | |
{ | |
"epoch": 1.45, | |
"learning_rate": 0.001, | |
"loss": 1.8204, | |
"step": 2215 | |
}, | |
{ | |
"epoch": 1.45, | |
"learning_rate": 0.001, | |
"loss": 2.1886, | |
"step": 2220 | |
}, | |
{ | |
"epoch": 1.46, | |
"learning_rate": 0.001, | |
"loss": 2.0936, | |
"step": 2225 | |
}, | |
{ | |
"epoch": 1.46, | |
"learning_rate": 0.001, | |
"loss": 2.5121, | |
"step": 2230 | |
}, | |
{ | |
"epoch": 1.46, | |
"learning_rate": 0.001, | |
"loss": 2.2165, | |
"step": 2235 | |
}, | |
{ | |
"epoch": 1.47, | |
"learning_rate": 0.001, | |
"loss": 1.8987, | |
"step": 2240 | |
}, | |
{ | |
"epoch": 1.47, | |
"learning_rate": 0.001, | |
"loss": 1.8333, | |
"step": 2245 | |
}, | |
{ | |
"epoch": 1.47, | |
"learning_rate": 0.001, | |
"loss": 2.2611, | |
"step": 2250 | |
}, | |
{ | |
"epoch": 1.48, | |
"learning_rate": 0.001, | |
"loss": 2.2649, | |
"step": 2255 | |
}, | |
{ | |
"epoch": 1.48, | |
"learning_rate": 0.001, | |
"loss": 2.0907, | |
"step": 2260 | |
}, | |
{ | |
"epoch": 1.48, | |
"learning_rate": 0.001, | |
"loss": 2.2594, | |
"step": 2265 | |
}, | |
{ | |
"epoch": 1.49, | |
"learning_rate": 0.001, | |
"loss": 2.1826, | |
"step": 2270 | |
}, | |
{ | |
"epoch": 1.49, | |
"learning_rate": 0.001, | |
"loss": 2.0087, | |
"step": 2275 | |
}, | |
{ | |
"epoch": 1.49, | |
"learning_rate": 0.001, | |
"loss": 2.0936, | |
"step": 2280 | |
}, | |
{ | |
"epoch": 1.5, | |
"learning_rate": 0.001, | |
"loss": 2.1456, | |
"step": 2285 | |
}, | |
{ | |
"epoch": 1.5, | |
"learning_rate": 0.001, | |
"loss": 2.3667, | |
"step": 2290 | |
}, | |
{ | |
"epoch": 1.5, | |
"learning_rate": 0.001, | |
"loss": 1.9434, | |
"step": 2295 | |
}, | |
{ | |
"epoch": 1.51, | |
"learning_rate": 0.001, | |
"loss": 2.2934, | |
"step": 2300 | |
}, | |
{ | |
"epoch": 1.51, | |
"learning_rate": 0.001, | |
"loss": 2.2615, | |
"step": 2305 | |
}, | |
{ | |
"epoch": 1.51, | |
"learning_rate": 0.001, | |
"loss": 2.0636, | |
"step": 2310 | |
}, | |
{ | |
"epoch": 1.52, | |
"learning_rate": 0.001, | |
"loss": 2.0307, | |
"step": 2315 | |
}, | |
{ | |
"epoch": 1.52, | |
"learning_rate": 0.001, | |
"loss": 2.0033, | |
"step": 2320 | |
}, | |
{ | |
"epoch": 1.52, | |
"learning_rate": 0.001, | |
"loss": 2.0915, | |
"step": 2325 | |
}, | |
{ | |
"epoch": 1.52, | |
"learning_rate": 0.001, | |
"loss": 2.0934, | |
"step": 2330 | |
}, | |
{ | |
"epoch": 1.53, | |
"learning_rate": 0.001, | |
"loss": 2.2811, | |
"step": 2335 | |
}, | |
{ | |
"epoch": 1.53, | |
"learning_rate": 0.001, | |
"loss": 1.962, | |
"step": 2340 | |
}, | |
{ | |
"epoch": 1.53, | |
"learning_rate": 0.001, | |
"loss": 1.8715, | |
"step": 2345 | |
}, | |
{ | |
"epoch": 1.54, | |
"learning_rate": 0.001, | |
"loss": 2.2623, | |
"step": 2350 | |
}, | |
{ | |
"epoch": 1.54, | |
"learning_rate": 0.001, | |
"loss": 2.1543, | |
"step": 2355 | |
}, | |
{ | |
"epoch": 1.54, | |
"learning_rate": 0.001, | |
"loss": 2.1026, | |
"step": 2360 | |
}, | |
{ | |
"epoch": 1.55, | |
"learning_rate": 0.001, | |
"loss": 2.4501, | |
"step": 2365 | |
}, | |
{ | |
"epoch": 1.55, | |
"learning_rate": 0.001, | |
"loss": 2.1931, | |
"step": 2370 | |
}, | |
{ | |
"epoch": 1.55, | |
"learning_rate": 0.001, | |
"loss": 2.0309, | |
"step": 2375 | |
}, | |
{ | |
"epoch": 1.56, | |
"learning_rate": 0.001, | |
"loss": 2.0723, | |
"step": 2380 | |
}, | |
{ | |
"epoch": 1.56, | |
"learning_rate": 0.001, | |
"loss": 2.3666, | |
"step": 2385 | |
}, | |
{ | |
"epoch": 1.56, | |
"learning_rate": 0.001, | |
"loss": 2.1858, | |
"step": 2390 | |
}, | |
{ | |
"epoch": 1.57, | |
"learning_rate": 0.001, | |
"loss": 2.031, | |
"step": 2395 | |
}, | |
{ | |
"epoch": 1.57, | |
"learning_rate": 0.001, | |
"loss": 1.6752, | |
"step": 2400 | |
}, | |
{ | |
"epoch": 1.57, | |
"learning_rate": 0.001, | |
"loss": 2.1003, | |
"step": 2405 | |
}, | |
{ | |
"epoch": 1.58, | |
"learning_rate": 0.001, | |
"loss": 2.4007, | |
"step": 2410 | |
}, | |
{ | |
"epoch": 1.58, | |
"learning_rate": 0.001, | |
"loss": 2.2498, | |
"step": 2415 | |
}, | |
{ | |
"epoch": 1.58, | |
"learning_rate": 0.001, | |
"loss": 2.2472, | |
"step": 2420 | |
}, | |
{ | |
"epoch": 1.59, | |
"learning_rate": 0.001, | |
"loss": 2.202, | |
"step": 2425 | |
}, | |
{ | |
"epoch": 1.59, | |
"learning_rate": 0.001, | |
"loss": 2.4824, | |
"step": 2430 | |
}, | |
{ | |
"epoch": 1.59, | |
"learning_rate": 0.001, | |
"loss": 2.1637, | |
"step": 2435 | |
}, | |
{ | |
"epoch": 1.6, | |
"learning_rate": 0.001, | |
"loss": 2.4039, | |
"step": 2440 | |
}, | |
{ | |
"epoch": 1.6, | |
"learning_rate": 0.001, | |
"loss": 2.1053, | |
"step": 2445 | |
}, | |
{ | |
"epoch": 1.6, | |
"learning_rate": 0.001, | |
"loss": 1.9858, | |
"step": 2450 | |
}, | |
{ | |
"epoch": 1.61, | |
"learning_rate": 0.001, | |
"loss": 2.2371, | |
"step": 2455 | |
}, | |
{ | |
"epoch": 1.61, | |
"learning_rate": 0.001, | |
"loss": 1.8063, | |
"step": 2460 | |
}, | |
{ | |
"epoch": 1.61, | |
"learning_rate": 0.001, | |
"loss": 2.2199, | |
"step": 2465 | |
}, | |
{ | |
"epoch": 1.62, | |
"learning_rate": 0.001, | |
"loss": 2.2837, | |
"step": 2470 | |
}, | |
{ | |
"epoch": 1.62, | |
"learning_rate": 0.001, | |
"loss": 2.2889, | |
"step": 2475 | |
}, | |
{ | |
"epoch": 1.62, | |
"learning_rate": 0.001, | |
"loss": 2.1687, | |
"step": 2480 | |
}, | |
{ | |
"epoch": 1.63, | |
"learning_rate": 0.001, | |
"loss": 2.2346, | |
"step": 2485 | |
}, | |
{ | |
"epoch": 1.63, | |
"learning_rate": 0.001, | |
"loss": 2.3059, | |
"step": 2490 | |
}, | |
{ | |
"epoch": 1.63, | |
"learning_rate": 0.001, | |
"loss": 2.3096, | |
"step": 2495 | |
}, | |
{ | |
"epoch": 1.64, | |
"learning_rate": 0.001, | |
"loss": 2.4252, | |
"step": 2500 | |
}, | |
{ | |
"epoch": 1.64, | |
"learning_rate": 0.001, | |
"loss": 1.9604, | |
"step": 2505 | |
}, | |
{ | |
"epoch": 1.64, | |
"learning_rate": 0.001, | |
"loss": 2.154, | |
"step": 2510 | |
}, | |
{ | |
"epoch": 1.65, | |
"learning_rate": 0.001, | |
"loss": 2.3648, | |
"step": 2515 | |
}, | |
{ | |
"epoch": 1.65, | |
"learning_rate": 0.001, | |
"loss": 2.3436, | |
"step": 2520 | |
}, | |
{ | |
"epoch": 1.65, | |
"learning_rate": 0.001, | |
"loss": 2.3471, | |
"step": 2525 | |
}, | |
{ | |
"epoch": 1.66, | |
"learning_rate": 0.001, | |
"loss": 2.1766, | |
"step": 2530 | |
}, | |
{ | |
"epoch": 1.66, | |
"learning_rate": 0.001, | |
"loss": 2.1967, | |
"step": 2535 | |
}, | |
{ | |
"epoch": 1.66, | |
"learning_rate": 0.001, | |
"loss": 2.1945, | |
"step": 2540 | |
}, | |
{ | |
"epoch": 1.67, | |
"learning_rate": 0.001, | |
"loss": 2.1473, | |
"step": 2545 | |
}, | |
{ | |
"epoch": 1.67, | |
"learning_rate": 0.001, | |
"loss": 2.4883, | |
"step": 2550 | |
}, | |
{ | |
"epoch": 1.67, | |
"learning_rate": 0.001, | |
"loss": 1.9596, | |
"step": 2555 | |
}, | |
{ | |
"epoch": 1.68, | |
"learning_rate": 0.001, | |
"loss": 2.3908, | |
"step": 2560 | |
}, | |
{ | |
"epoch": 1.68, | |
"learning_rate": 0.001, | |
"loss": 2.2533, | |
"step": 2565 | |
}, | |
{ | |
"epoch": 1.68, | |
"learning_rate": 0.001, | |
"loss": 2.0808, | |
"step": 2570 | |
}, | |
{ | |
"epoch": 1.69, | |
"learning_rate": 0.001, | |
"loss": 2.028, | |
"step": 2575 | |
}, | |
{ | |
"epoch": 1.69, | |
"learning_rate": 0.001, | |
"loss": 2.2471, | |
"step": 2580 | |
}, | |
{ | |
"epoch": 1.69, | |
"learning_rate": 0.001, | |
"loss": 2.1975, | |
"step": 2585 | |
}, | |
{ | |
"epoch": 1.7, | |
"learning_rate": 0.001, | |
"loss": 2.404, | |
"step": 2590 | |
}, | |
{ | |
"epoch": 1.7, | |
"learning_rate": 0.001, | |
"loss": 2.4238, | |
"step": 2595 | |
}, | |
{ | |
"epoch": 1.7, | |
"learning_rate": 0.001, | |
"loss": 2.2742, | |
"step": 2600 | |
}, | |
{ | |
"epoch": 1.7, | |
"learning_rate": 0.001, | |
"loss": 2.3284, | |
"step": 2605 | |
}, | |
{ | |
"epoch": 1.71, | |
"learning_rate": 0.001, | |
"loss": 2.0517, | |
"step": 2610 | |
}, | |
{ | |
"epoch": 1.71, | |
"learning_rate": 0.001, | |
"loss": 2.488, | |
"step": 2615 | |
}, | |
{ | |
"epoch": 1.71, | |
"learning_rate": 0.001, | |
"loss": 2.1215, | |
"step": 2620 | |
}, | |
{ | |
"epoch": 1.72, | |
"learning_rate": 0.001, | |
"loss": 2.1813, | |
"step": 2625 | |
}, | |
{ | |
"epoch": 1.72, | |
"learning_rate": 0.001, | |
"loss": 1.9404, | |
"step": 2630 | |
}, | |
{ | |
"epoch": 1.72, | |
"learning_rate": 0.001, | |
"loss": 2.1978, | |
"step": 2635 | |
}, | |
{ | |
"epoch": 1.73, | |
"learning_rate": 0.001, | |
"loss": 2.1462, | |
"step": 2640 | |
}, | |
{ | |
"epoch": 1.73, | |
"learning_rate": 0.001, | |
"loss": 1.9796, | |
"step": 2645 | |
}, | |
{ | |
"epoch": 1.73, | |
"learning_rate": 0.001, | |
"loss": 2.267, | |
"step": 2650 | |
}, | |
{ | |
"epoch": 1.74, | |
"learning_rate": 0.001, | |
"loss": 2.0169, | |
"step": 2655 | |
}, | |
{ | |
"epoch": 1.74, | |
"learning_rate": 0.001, | |
"loss": 2.1779, | |
"step": 2660 | |
}, | |
{ | |
"epoch": 1.74, | |
"learning_rate": 0.001, | |
"loss": 2.2851, | |
"step": 2665 | |
}, | |
{ | |
"epoch": 1.75, | |
"learning_rate": 0.001, | |
"loss": 2.5845, | |
"step": 2670 | |
}, | |
{ | |
"epoch": 1.75, | |
"learning_rate": 0.001, | |
"loss": 2.006, | |
"step": 2675 | |
}, | |
{ | |
"epoch": 1.75, | |
"learning_rate": 0.001, | |
"loss": 2.0386, | |
"step": 2680 | |
}, | |
{ | |
"epoch": 1.76, | |
"learning_rate": 0.001, | |
"loss": 2.2271, | |
"step": 2685 | |
}, | |
{ | |
"epoch": 1.76, | |
"learning_rate": 0.001, | |
"loss": 2.1557, | |
"step": 2690 | |
}, | |
{ | |
"epoch": 1.76, | |
"learning_rate": 0.001, | |
"loss": 2.0532, | |
"step": 2695 | |
}, | |
{ | |
"epoch": 1.77, | |
"learning_rate": 0.001, | |
"loss": 2.14, | |
"step": 2700 | |
}, | |
{ | |
"epoch": 1.77, | |
"learning_rate": 0.001, | |
"loss": 1.995, | |
"step": 2705 | |
}, | |
{ | |
"epoch": 1.77, | |
"learning_rate": 0.001, | |
"loss": 1.9508, | |
"step": 2710 | |
}, | |
{ | |
"epoch": 1.78, | |
"learning_rate": 0.001, | |
"loss": 2.4736, | |
"step": 2715 | |
}, | |
{ | |
"epoch": 1.78, | |
"learning_rate": 0.001, | |
"loss": 2.0145, | |
"step": 2720 | |
}, | |
{ | |
"epoch": 1.78, | |
"learning_rate": 0.001, | |
"loss": 2.1276, | |
"step": 2725 | |
}, | |
{ | |
"epoch": 1.79, | |
"learning_rate": 0.001, | |
"loss": 2.1232, | |
"step": 2730 | |
}, | |
{ | |
"epoch": 1.79, | |
"learning_rate": 0.001, | |
"loss": 2.3248, | |
"step": 2735 | |
}, | |
{ | |
"epoch": 1.79, | |
"learning_rate": 0.001, | |
"loss": 2.0148, | |
"step": 2740 | |
}, | |
{ | |
"epoch": 1.8, | |
"learning_rate": 0.001, | |
"loss": 1.974, | |
"step": 2745 | |
}, | |
{ | |
"epoch": 1.8, | |
"learning_rate": 0.001, | |
"loss": 2.2313, | |
"step": 2750 | |
}, | |
{ | |
"epoch": 1.8, | |
"learning_rate": 0.001, | |
"loss": 2.0893, | |
"step": 2755 | |
}, | |
{ | |
"epoch": 1.81, | |
"learning_rate": 0.001, | |
"loss": 2.1423, | |
"step": 2760 | |
}, | |
{ | |
"epoch": 1.81, | |
"learning_rate": 0.001, | |
"loss": 2.2085, | |
"step": 2765 | |
}, | |
{ | |
"epoch": 1.81, | |
"learning_rate": 0.001, | |
"loss": 2.2314, | |
"step": 2770 | |
}, | |
{ | |
"epoch": 1.82, | |
"learning_rate": 0.001, | |
"loss": 2.1122, | |
"step": 2775 | |
}, | |
{ | |
"epoch": 1.82, | |
"learning_rate": 0.001, | |
"loss": 2.0739, | |
"step": 2780 | |
}, | |
{ | |
"epoch": 1.82, | |
"learning_rate": 0.001, | |
"loss": 2.2233, | |
"step": 2785 | |
}, | |
{ | |
"epoch": 1.83, | |
"learning_rate": 0.001, | |
"loss": 2.2696, | |
"step": 2790 | |
}, | |
{ | |
"epoch": 1.83, | |
"learning_rate": 0.001, | |
"loss": 1.9375, | |
"step": 2795 | |
}, | |
{ | |
"epoch": 1.83, | |
"learning_rate": 0.001, | |
"loss": 2.2126, | |
"step": 2800 | |
}, | |
{ | |
"epoch": 1.84, | |
"learning_rate": 0.001, | |
"loss": 2.2643, | |
"step": 2805 | |
}, | |
{ | |
"epoch": 1.84, | |
"learning_rate": 0.001, | |
"loss": 1.881, | |
"step": 2810 | |
}, | |
{ | |
"epoch": 1.84, | |
"learning_rate": 0.001, | |
"loss": 2.427, | |
"step": 2815 | |
}, | |
{ | |
"epoch": 1.85, | |
"learning_rate": 0.001, | |
"loss": 2.419, | |
"step": 2820 | |
}, | |
{ | |
"epoch": 1.85, | |
"learning_rate": 0.001, | |
"loss": 2.2486, | |
"step": 2825 | |
}, | |
{ | |
"epoch": 1.85, | |
"learning_rate": 0.001, | |
"loss": 2.6872, | |
"step": 2830 | |
}, | |
{ | |
"epoch": 1.86, | |
"learning_rate": 0.001, | |
"loss": 2.0539, | |
"step": 2835 | |
}, | |
{ | |
"epoch": 1.86, | |
"learning_rate": 0.001, | |
"loss": 2.4746, | |
"step": 2840 | |
}, | |
{ | |
"epoch": 1.86, | |
"learning_rate": 0.001, | |
"loss": 2.2573, | |
"step": 2845 | |
}, | |
{ | |
"epoch": 1.87, | |
"learning_rate": 0.001, | |
"loss": 2.0748, | |
"step": 2850 | |
}, | |
{ | |
"epoch": 1.87, | |
"learning_rate": 0.001, | |
"loss": 2.2958, | |
"step": 2855 | |
}, | |
{ | |
"epoch": 1.87, | |
"learning_rate": 0.001, | |
"loss": 1.8544, | |
"step": 2860 | |
}, | |
{ | |
"epoch": 1.88, | |
"learning_rate": 0.001, | |
"loss": 2.4232, | |
"step": 2865 | |
}, | |
{ | |
"epoch": 1.88, | |
"learning_rate": 0.001, | |
"loss": 2.3381, | |
"step": 2870 | |
}, | |
{ | |
"epoch": 1.88, | |
"learning_rate": 0.001, | |
"loss": 2.276, | |
"step": 2875 | |
}, | |
{ | |
"epoch": 1.88, | |
"learning_rate": 0.001, | |
"loss": 2.3278, | |
"step": 2880 | |
}, | |
{ | |
"epoch": 1.89, | |
"learning_rate": 0.001, | |
"loss": 2.3119, | |
"step": 2885 | |
}, | |
{ | |
"epoch": 1.89, | |
"learning_rate": 0.001, | |
"loss": 2.2123, | |
"step": 2890 | |
}, | |
{ | |
"epoch": 1.89, | |
"learning_rate": 0.001, | |
"loss": 2.1596, | |
"step": 2895 | |
}, | |
{ | |
"epoch": 1.9, | |
"learning_rate": 0.001, | |
"loss": 2.04, | |
"step": 2900 | |
}, | |
{ | |
"epoch": 1.9, | |
"learning_rate": 0.001, | |
"loss": 2.3909, | |
"step": 2905 | |
}, | |
{ | |
"epoch": 1.9, | |
"learning_rate": 0.001, | |
"loss": 2.1027, | |
"step": 2910 | |
}, | |
{ | |
"epoch": 1.91, | |
"learning_rate": 0.001, | |
"loss": 2.1365, | |
"step": 2915 | |
}, | |
{ | |
"epoch": 1.91, | |
"learning_rate": 0.001, | |
"loss": 2.2151, | |
"step": 2920 | |
}, | |
{ | |
"epoch": 1.91, | |
"learning_rate": 0.001, | |
"loss": 2.2469, | |
"step": 2925 | |
}, | |
{ | |
"epoch": 1.92, | |
"learning_rate": 0.001, | |
"loss": 2.1939, | |
"step": 2930 | |
}, | |
{ | |
"epoch": 1.92, | |
"learning_rate": 0.001, | |
"loss": 2.0386, | |
"step": 2935 | |
}, | |
{ | |
"epoch": 1.92, | |
"learning_rate": 0.001, | |
"loss": 2.094, | |
"step": 2940 | |
}, | |
{ | |
"epoch": 1.93, | |
"learning_rate": 0.001, | |
"loss": 2.3225, | |
"step": 2945 | |
}, | |
{ | |
"epoch": 1.93, | |
"learning_rate": 0.001, | |
"loss": 2.1019, | |
"step": 2950 | |
}, | |
{ | |
"epoch": 1.93, | |
"learning_rate": 0.001, | |
"loss": 2.3213, | |
"step": 2955 | |
}, | |
{ | |
"epoch": 1.94, | |
"learning_rate": 0.001, | |
"loss": 2.4173, | |
"step": 2960 | |
}, | |
{ | |
"epoch": 1.94, | |
"learning_rate": 0.001, | |
"loss": 2.1919, | |
"step": 2965 | |
}, | |
{ | |
"epoch": 1.94, | |
"learning_rate": 0.001, | |
"loss": 2.1733, | |
"step": 2970 | |
}, | |
{ | |
"epoch": 1.95, | |
"learning_rate": 0.001, | |
"loss": 2.3576, | |
"step": 2975 | |
}, | |
{ | |
"epoch": 1.95, | |
"learning_rate": 0.001, | |
"loss": 2.0042, | |
"step": 2980 | |
}, | |
{ | |
"epoch": 1.95, | |
"learning_rate": 0.001, | |
"loss": 2.0977, | |
"step": 2985 | |
}, | |
{ | |
"epoch": 1.96, | |
"learning_rate": 0.001, | |
"loss": 2.2674, | |
"step": 2990 | |
}, | |
{ | |
"epoch": 1.96, | |
"learning_rate": 0.001, | |
"loss": 1.9316, | |
"step": 2995 | |
}, | |
{ | |
"epoch": 1.96, | |
"learning_rate": 0.001, | |
"loss": 2.2472, | |
"step": 3000 | |
}, | |
{ | |
"epoch": 1.97, | |
"learning_rate": 0.001, | |
"loss": 2.3264, | |
"step": 3005 | |
}, | |
{ | |
"epoch": 1.97, | |
"learning_rate": 0.001, | |
"loss": 2.1732, | |
"step": 3010 | |
}, | |
{ | |
"epoch": 1.97, | |
"learning_rate": 0.001, | |
"loss": 2.3932, | |
"step": 3015 | |
}, | |
{ | |
"epoch": 1.98, | |
"learning_rate": 0.001, | |
"loss": 2.168, | |
"step": 3020 | |
}, | |
{ | |
"epoch": 1.98, | |
"learning_rate": 0.001, | |
"loss": 2.2907, | |
"step": 3025 | |
}, | |
{ | |
"epoch": 1.98, | |
"learning_rate": 0.001, | |
"loss": 2.1086, | |
"step": 3030 | |
}, | |
{ | |
"epoch": 1.99, | |
"learning_rate": 0.001, | |
"loss": 2.2868, | |
"step": 3035 | |
}, | |
{ | |
"epoch": 1.99, | |
"learning_rate": 0.001, | |
"loss": 1.9612, | |
"step": 3040 | |
}, | |
{ | |
"epoch": 1.99, | |
"learning_rate": 0.001, | |
"loss": 2.1025, | |
"step": 3045 | |
}, | |
{ | |
"epoch": 2.0, | |
"learning_rate": 0.001, | |
"loss": 2.1001, | |
"step": 3050 | |
}, | |
{ | |
"epoch": 2.0, | |
"learning_rate": 0.001, | |
"loss": 2.0419, | |
"step": 3055 | |
}, | |
{ | |
"epoch": 2.0, | |
"learning_rate": 0.001, | |
"loss": 2.1358, | |
"step": 3060 | |
}, | |
{ | |
"epoch": 2.01, | |
"learning_rate": 0.001, | |
"loss": 1.762, | |
"step": 3065 | |
}, | |
{ | |
"epoch": 2.01, | |
"learning_rate": 0.001, | |
"loss": 1.8736, | |
"step": 3070 | |
}, | |
{ | |
"epoch": 2.01, | |
"learning_rate": 0.001, | |
"loss": 1.7017, | |
"step": 3075 | |
}, | |
{ | |
"epoch": 2.02, | |
"learning_rate": 0.001, | |
"loss": 2.0051, | |
"step": 3080 | |
}, | |
{ | |
"epoch": 2.02, | |
"learning_rate": 0.001, | |
"loss": 1.9062, | |
"step": 3085 | |
}, | |
{ | |
"epoch": 2.02, | |
"learning_rate": 0.001, | |
"loss": 1.8619, | |
"step": 3090 | |
}, | |
{ | |
"epoch": 2.03, | |
"learning_rate": 0.001, | |
"loss": 1.9188, | |
"step": 3095 | |
}, | |
{ | |
"epoch": 2.03, | |
"learning_rate": 0.001, | |
"loss": 1.7303, | |
"step": 3100 | |
}, | |
{ | |
"epoch": 2.03, | |
"learning_rate": 0.001, | |
"loss": 1.846, | |
"step": 3105 | |
}, | |
{ | |
"epoch": 2.04, | |
"learning_rate": 0.001, | |
"loss": 1.8145, | |
"step": 3110 | |
}, | |
{ | |
"epoch": 2.04, | |
"learning_rate": 0.001, | |
"loss": 1.8482, | |
"step": 3115 | |
}, | |
{ | |
"epoch": 2.04, | |
"learning_rate": 0.001, | |
"loss": 1.5815, | |
"step": 3120 | |
}, | |
{ | |
"epoch": 2.05, | |
"learning_rate": 0.001, | |
"loss": 1.5672, | |
"step": 3125 | |
}, | |
{ | |
"epoch": 2.05, | |
"learning_rate": 0.001, | |
"loss": 1.7365, | |
"step": 3130 | |
}, | |
{ | |
"epoch": 2.05, | |
"learning_rate": 0.001, | |
"loss": 1.971, | |
"step": 3135 | |
}, | |
{ | |
"epoch": 2.05, | |
"learning_rate": 0.001, | |
"loss": 1.9756, | |
"step": 3140 | |
}, | |
{ | |
"epoch": 2.06, | |
"learning_rate": 0.001, | |
"loss": 1.8266, | |
"step": 3145 | |
}, | |
{ | |
"epoch": 2.06, | |
"learning_rate": 0.001, | |
"loss": 1.9481, | |
"step": 3150 | |
}, | |
{ | |
"epoch": 2.06, | |
"learning_rate": 0.001, | |
"loss": 1.7729, | |
"step": 3155 | |
}, | |
{ | |
"epoch": 2.07, | |
"learning_rate": 0.001, | |
"loss": 1.7883, | |
"step": 3160 | |
}, | |
{ | |
"epoch": 2.07, | |
"learning_rate": 0.001, | |
"loss": 1.6513, | |
"step": 3165 | |
}, | |
{ | |
"epoch": 2.07, | |
"learning_rate": 0.001, | |
"loss": 1.9425, | |
"step": 3170 | |
}, | |
{ | |
"epoch": 2.08, | |
"learning_rate": 0.001, | |
"loss": 1.7869, | |
"step": 3175 | |
}, | |
{ | |
"epoch": 2.08, | |
"learning_rate": 0.001, | |
"loss": 1.9321, | |
"step": 3180 | |
}, | |
{ | |
"epoch": 2.08, | |
"learning_rate": 0.001, | |
"loss": 2.0544, | |
"step": 3185 | |
}, | |
{ | |
"epoch": 2.09, | |
"learning_rate": 0.001, | |
"loss": 1.9293, | |
"step": 3190 | |
}, | |
{ | |
"epoch": 2.09, | |
"learning_rate": 0.001, | |
"loss": 1.8088, | |
"step": 3195 | |
}, | |
{ | |
"epoch": 2.09, | |
"learning_rate": 0.001, | |
"loss": 1.9035, | |
"step": 3200 | |
}, | |
{ | |
"epoch": 2.1, | |
"learning_rate": 0.001, | |
"loss": 1.8003, | |
"step": 3205 | |
}, | |
{ | |
"epoch": 2.1, | |
"learning_rate": 0.001, | |
"loss": 1.924, | |
"step": 3210 | |
}, | |
{ | |
"epoch": 2.1, | |
"learning_rate": 0.001, | |
"loss": 1.5344, | |
"step": 3215 | |
}, | |
{ | |
"epoch": 2.11, | |
"learning_rate": 0.001, | |
"loss": 1.7529, | |
"step": 3220 | |
}, | |
{ | |
"epoch": 2.11, | |
"learning_rate": 0.001, | |
"loss": 1.9707, | |
"step": 3225 | |
}, | |
{ | |
"epoch": 2.11, | |
"learning_rate": 0.001, | |
"loss": 2.1602, | |
"step": 3230 | |
}, | |
{ | |
"epoch": 2.12, | |
"learning_rate": 0.001, | |
"loss": 2.0995, | |
"step": 3235 | |
}, | |
{ | |
"epoch": 2.12, | |
"learning_rate": 0.001, | |
"loss": 1.7761, | |
"step": 3240 | |
}, | |
{ | |
"epoch": 2.12, | |
"learning_rate": 0.001, | |
"loss": 1.7302, | |
"step": 3245 | |
}, | |
{ | |
"epoch": 2.13, | |
"learning_rate": 0.001, | |
"loss": 2.0175, | |
"step": 3250 | |
}, | |
{ | |
"epoch": 2.13, | |
"learning_rate": 0.001, | |
"loss": 1.9675, | |
"step": 3255 | |
}, | |
{ | |
"epoch": 2.13, | |
"learning_rate": 0.001, | |
"loss": 1.7262, | |
"step": 3260 | |
}, | |
{ | |
"epoch": 2.14, | |
"learning_rate": 0.001, | |
"loss": 1.8225, | |
"step": 3265 | |
}, | |
{ | |
"epoch": 2.14, | |
"learning_rate": 0.001, | |
"loss": 1.9512, | |
"step": 3270 | |
}, | |
{ | |
"epoch": 2.14, | |
"learning_rate": 0.001, | |
"loss": 1.8595, | |
"step": 3275 | |
}, | |
{ | |
"epoch": 2.15, | |
"learning_rate": 0.001, | |
"loss": 1.8585, | |
"step": 3280 | |
}, | |
{ | |
"epoch": 2.15, | |
"learning_rate": 0.001, | |
"loss": 1.8572, | |
"step": 3285 | |
}, | |
{ | |
"epoch": 2.15, | |
"learning_rate": 0.001, | |
"loss": 1.9084, | |
"step": 3290 | |
}, | |
{ | |
"epoch": 2.16, | |
"learning_rate": 0.001, | |
"loss": 1.9047, | |
"step": 3295 | |
}, | |
{ | |
"epoch": 2.16, | |
"learning_rate": 0.001, | |
"loss": 2.0312, | |
"step": 3300 | |
}, | |
{ | |
"epoch": 2.16, | |
"learning_rate": 0.001, | |
"loss": 1.7408, | |
"step": 3305 | |
}, | |
{ | |
"epoch": 2.17, | |
"learning_rate": 0.001, | |
"loss": 1.9193, | |
"step": 3310 | |
}, | |
{ | |
"epoch": 2.17, | |
"learning_rate": 0.001, | |
"loss": 1.8676, | |
"step": 3315 | |
}, | |
{ | |
"epoch": 2.17, | |
"learning_rate": 0.001, | |
"loss": 1.9841, | |
"step": 3320 | |
}, | |
{ | |
"epoch": 2.18, | |
"learning_rate": 0.001, | |
"loss": 2.0652, | |
"step": 3325 | |
}, | |
{ | |
"epoch": 2.18, | |
"learning_rate": 0.001, | |
"loss": 1.7531, | |
"step": 3330 | |
}, | |
{ | |
"epoch": 2.18, | |
"learning_rate": 0.001, | |
"loss": 1.8534, | |
"step": 3335 | |
}, | |
{ | |
"epoch": 2.19, | |
"learning_rate": 0.001, | |
"loss": 1.9164, | |
"step": 3340 | |
}, | |
{ | |
"epoch": 2.19, | |
"learning_rate": 0.001, | |
"loss": 2.1042, | |
"step": 3345 | |
}, | |
{ | |
"epoch": 2.19, | |
"learning_rate": 0.001, | |
"loss": 1.7698, | |
"step": 3350 | |
}, | |
{ | |
"epoch": 2.2, | |
"learning_rate": 0.001, | |
"loss": 1.9511, | |
"step": 3355 | |
}, | |
{ | |
"epoch": 2.2, | |
"learning_rate": 0.001, | |
"loss": 1.7882, | |
"step": 3360 | |
}, | |
{ | |
"epoch": 2.2, | |
"learning_rate": 0.001, | |
"loss": 2.0131, | |
"step": 3365 | |
}, | |
{ | |
"epoch": 2.21, | |
"learning_rate": 0.001, | |
"loss": 1.8166, | |
"step": 3370 | |
}, | |
{ | |
"epoch": 2.21, | |
"learning_rate": 0.001, | |
"loss": 1.7719, | |
"step": 3375 | |
}, | |
{ | |
"epoch": 2.21, | |
"learning_rate": 0.001, | |
"loss": 1.7812, | |
"step": 3380 | |
}, | |
{ | |
"epoch": 2.22, | |
"learning_rate": 0.001, | |
"loss": 1.8818, | |
"step": 3385 | |
}, | |
{ | |
"epoch": 2.22, | |
"learning_rate": 0.001, | |
"loss": 1.8874, | |
"step": 3390 | |
}, | |
{ | |
"epoch": 2.22, | |
"learning_rate": 0.001, | |
"loss": 1.8422, | |
"step": 3395 | |
}, | |
{ | |
"epoch": 2.23, | |
"learning_rate": 0.001, | |
"loss": 2.1017, | |
"step": 3400 | |
}, | |
{ | |
"epoch": 2.23, | |
"learning_rate": 0.001, | |
"loss": 1.8183, | |
"step": 3405 | |
}, | |
{ | |
"epoch": 2.23, | |
"learning_rate": 0.001, | |
"loss": 1.9675, | |
"step": 3410 | |
}, | |
{ | |
"epoch": 2.23, | |
"learning_rate": 0.001, | |
"loss": 1.7103, | |
"step": 3415 | |
}, | |
{ | |
"epoch": 2.24, | |
"learning_rate": 0.001, | |
"loss": 1.9424, | |
"step": 3420 | |
}, | |
{ | |
"epoch": 2.24, | |
"learning_rate": 0.001, | |
"loss": 1.6799, | |
"step": 3425 | |
}, | |
{ | |
"epoch": 2.24, | |
"learning_rate": 0.001, | |
"loss": 1.6281, | |
"step": 3430 | |
}, | |
{ | |
"epoch": 2.25, | |
"learning_rate": 0.001, | |
"loss": 1.9875, | |
"step": 3435 | |
}, | |
{ | |
"epoch": 2.25, | |
"learning_rate": 0.001, | |
"loss": 1.6356, | |
"step": 3440 | |
}, | |
{ | |
"epoch": 2.25, | |
"learning_rate": 0.001, | |
"loss": 1.8295, | |
"step": 3445 | |
}, | |
{ | |
"epoch": 2.26, | |
"learning_rate": 0.001, | |
"loss": 2.1965, | |
"step": 3450 | |
}, | |
{ | |
"epoch": 2.26, | |
"learning_rate": 0.001, | |
"loss": 1.8972, | |
"step": 3455 | |
}, | |
{ | |
"epoch": 2.26, | |
"learning_rate": 0.001, | |
"loss": 2.1296, | |
"step": 3460 | |
}, | |
{ | |
"epoch": 2.27, | |
"learning_rate": 0.001, | |
"loss": 1.987, | |
"step": 3465 | |
}, | |
{ | |
"epoch": 2.27, | |
"learning_rate": 0.001, | |
"loss": 1.7851, | |
"step": 3470 | |
}, | |
{ | |
"epoch": 2.27, | |
"learning_rate": 0.001, | |
"loss": 1.758, | |
"step": 3475 | |
}, | |
{ | |
"epoch": 2.28, | |
"learning_rate": 0.001, | |
"loss": 1.752, | |
"step": 3480 | |
}, | |
{ | |
"epoch": 2.28, | |
"learning_rate": 0.001, | |
"loss": 1.9093, | |
"step": 3485 | |
}, | |
{ | |
"epoch": 2.28, | |
"learning_rate": 0.001, | |
"loss": 1.9931, | |
"step": 3490 | |
}, | |
{ | |
"epoch": 2.29, | |
"learning_rate": 0.001, | |
"loss": 1.8762, | |
"step": 3495 | |
}, | |
{ | |
"epoch": 2.29, | |
"learning_rate": 0.001, | |
"loss": 1.9769, | |
"step": 3500 | |
}, | |
{ | |
"epoch": 2.29, | |
"learning_rate": 0.001, | |
"loss": 2.0007, | |
"step": 3505 | |
}, | |
{ | |
"epoch": 2.3, | |
"learning_rate": 0.001, | |
"loss": 2.2291, | |
"step": 3510 | |
}, | |
{ | |
"epoch": 2.3, | |
"learning_rate": 0.001, | |
"loss": 2.0776, | |
"step": 3515 | |
}, | |
{ | |
"epoch": 2.3, | |
"learning_rate": 0.001, | |
"loss": 2.2868, | |
"step": 3520 | |
}, | |
{ | |
"epoch": 2.31, | |
"learning_rate": 0.001, | |
"loss": 2.0916, | |
"step": 3525 | |
}, | |
{ | |
"epoch": 2.31, | |
"learning_rate": 0.001, | |
"loss": 2.0035, | |
"step": 3530 | |
}, | |
{ | |
"epoch": 2.31, | |
"learning_rate": 0.001, | |
"loss": 1.9706, | |
"step": 3535 | |
}, | |
{ | |
"epoch": 2.32, | |
"learning_rate": 0.001, | |
"loss": 1.6881, | |
"step": 3540 | |
}, | |
{ | |
"epoch": 2.32, | |
"learning_rate": 0.001, | |
"loss": 2.0781, | |
"step": 3545 | |
}, | |
{ | |
"epoch": 2.32, | |
"learning_rate": 0.001, | |
"loss": 1.9761, | |
"step": 3550 | |
}, | |
{ | |
"epoch": 2.33, | |
"learning_rate": 0.001, | |
"loss": 1.9865, | |
"step": 3555 | |
}, | |
{ | |
"epoch": 2.33, | |
"learning_rate": 0.001, | |
"loss": 1.7636, | |
"step": 3560 | |
}, | |
{ | |
"epoch": 2.33, | |
"learning_rate": 0.001, | |
"loss": 2.1409, | |
"step": 3565 | |
}, | |
{ | |
"epoch": 2.34, | |
"learning_rate": 0.001, | |
"loss": 2.11, | |
"step": 3570 | |
}, | |
{ | |
"epoch": 2.34, | |
"learning_rate": 0.001, | |
"loss": 1.7359, | |
"step": 3575 | |
}, | |
{ | |
"epoch": 2.34, | |
"learning_rate": 0.001, | |
"loss": 1.9619, | |
"step": 3580 | |
}, | |
{ | |
"epoch": 2.35, | |
"learning_rate": 0.001, | |
"loss": 2.0434, | |
"step": 3585 | |
}, | |
{ | |
"epoch": 2.35, | |
"learning_rate": 0.001, | |
"loss": 1.8588, | |
"step": 3590 | |
}, | |
{ | |
"epoch": 2.35, | |
"learning_rate": 0.001, | |
"loss": 2.0681, | |
"step": 3595 | |
}, | |
{ | |
"epoch": 2.36, | |
"learning_rate": 0.001, | |
"loss": 1.8467, | |
"step": 3600 | |
}, | |
{ | |
"epoch": 2.36, | |
"learning_rate": 0.001, | |
"loss": 1.5267, | |
"step": 3605 | |
}, | |
{ | |
"epoch": 2.36, | |
"learning_rate": 0.001, | |
"loss": 1.9642, | |
"step": 3610 | |
}, | |
{ | |
"epoch": 2.37, | |
"learning_rate": 0.001, | |
"loss": 1.9597, | |
"step": 3615 | |
}, | |
{ | |
"epoch": 2.37, | |
"learning_rate": 0.001, | |
"loss": 1.8687, | |
"step": 3620 | |
}, | |
{ | |
"epoch": 2.37, | |
"learning_rate": 0.001, | |
"loss": 2.0584, | |
"step": 3625 | |
}, | |
{ | |
"epoch": 2.38, | |
"learning_rate": 0.001, | |
"loss": 2.0966, | |
"step": 3630 | |
}, | |
{ | |
"epoch": 2.38, | |
"learning_rate": 0.001, | |
"loss": 2.0021, | |
"step": 3635 | |
}, | |
{ | |
"epoch": 2.38, | |
"learning_rate": 0.001, | |
"loss": 1.8767, | |
"step": 3640 | |
}, | |
{ | |
"epoch": 2.39, | |
"learning_rate": 0.001, | |
"loss": 1.713, | |
"step": 3645 | |
}, | |
{ | |
"epoch": 2.39, | |
"learning_rate": 0.001, | |
"loss": 1.5825, | |
"step": 3650 | |
}, | |
{ | |
"epoch": 2.39, | |
"learning_rate": 0.001, | |
"loss": 1.8617, | |
"step": 3655 | |
}, | |
{ | |
"epoch": 2.4, | |
"learning_rate": 0.001, | |
"loss": 2.0615, | |
"step": 3660 | |
}, | |
{ | |
"epoch": 2.4, | |
"learning_rate": 0.001, | |
"loss": 1.6553, | |
"step": 3665 | |
}, | |
{ | |
"epoch": 2.4, | |
"learning_rate": 0.001, | |
"loss": 1.6166, | |
"step": 3670 | |
}, | |
{ | |
"epoch": 2.41, | |
"learning_rate": 0.001, | |
"loss": 1.6974, | |
"step": 3675 | |
}, | |
{ | |
"epoch": 2.41, | |
"learning_rate": 0.001, | |
"loss": 2.0093, | |
"step": 3680 | |
}, | |
{ | |
"epoch": 2.41, | |
"learning_rate": 0.001, | |
"loss": 1.8497, | |
"step": 3685 | |
}, | |
{ | |
"epoch": 2.41, | |
"learning_rate": 0.001, | |
"loss": 1.7226, | |
"step": 3690 | |
}, | |
{ | |
"epoch": 2.42, | |
"learning_rate": 0.001, | |
"loss": 1.668, | |
"step": 3695 | |
}, | |
{ | |
"epoch": 2.42, | |
"learning_rate": 0.001, | |
"loss": 2.115, | |
"step": 3700 | |
}, | |
{ | |
"epoch": 2.42, | |
"learning_rate": 0.001, | |
"loss": 2.145, | |
"step": 3705 | |
}, | |
{ | |
"epoch": 2.43, | |
"learning_rate": 0.001, | |
"loss": 1.7443, | |
"step": 3710 | |
}, | |
{ | |
"epoch": 2.43, | |
"learning_rate": 0.001, | |
"loss": 1.7406, | |
"step": 3715 | |
}, | |
{ | |
"epoch": 2.43, | |
"learning_rate": 0.001, | |
"loss": 1.8569, | |
"step": 3720 | |
}, | |
{ | |
"epoch": 2.44, | |
"learning_rate": 0.001, | |
"loss": 2.0726, | |
"step": 3725 | |
}, | |
{ | |
"epoch": 2.44, | |
"learning_rate": 0.001, | |
"loss": 2.1467, | |
"step": 3730 | |
}, | |
{ | |
"epoch": 2.44, | |
"learning_rate": 0.001, | |
"loss": 2.019, | |
"step": 3735 | |
}, | |
{ | |
"epoch": 2.45, | |
"learning_rate": 0.001, | |
"loss": 2.1104, | |
"step": 3740 | |
}, | |
{ | |
"epoch": 2.45, | |
"learning_rate": 0.001, | |
"loss": 2.1444, | |
"step": 3745 | |
}, | |
{ | |
"epoch": 2.45, | |
"learning_rate": 0.001, | |
"loss": 2.0757, | |
"step": 3750 | |
}, | |
{ | |
"epoch": 2.46, | |
"learning_rate": 0.001, | |
"loss": 1.7169, | |
"step": 3755 | |
}, | |
{ | |
"epoch": 2.46, | |
"learning_rate": 0.001, | |
"loss": 1.7558, | |
"step": 3760 | |
}, | |
{ | |
"epoch": 2.46, | |
"learning_rate": 0.001, | |
"loss": 1.6213, | |
"step": 3765 | |
}, | |
{ | |
"epoch": 2.47, | |
"learning_rate": 0.001, | |
"loss": 1.8306, | |
"step": 3770 | |
}, | |
{ | |
"epoch": 2.47, | |
"learning_rate": 0.001, | |
"loss": 2.015, | |
"step": 3775 | |
}, | |
{ | |
"epoch": 2.47, | |
"learning_rate": 0.001, | |
"loss": 2.0952, | |
"step": 3780 | |
}, | |
{ | |
"epoch": 2.48, | |
"learning_rate": 0.001, | |
"loss": 1.6549, | |
"step": 3785 | |
}, | |
{ | |
"epoch": 2.48, | |
"learning_rate": 0.001, | |
"loss": 1.8965, | |
"step": 3790 | |
}, | |
{ | |
"epoch": 2.48, | |
"learning_rate": 0.001, | |
"loss": 2.0753, | |
"step": 3795 | |
}, | |
{ | |
"epoch": 2.49, | |
"learning_rate": 0.001, | |
"loss": 2.135, | |
"step": 3800 | |
}, | |
{ | |
"epoch": 2.49, | |
"learning_rate": 0.001, | |
"loss": 1.8428, | |
"step": 3805 | |
}, | |
{ | |
"epoch": 2.49, | |
"learning_rate": 0.001, | |
"loss": 2.1602, | |
"step": 3810 | |
}, | |
{ | |
"epoch": 2.5, | |
"learning_rate": 0.001, | |
"loss": 1.8219, | |
"step": 3815 | |
}, | |
{ | |
"epoch": 2.5, | |
"learning_rate": 0.001, | |
"loss": 1.8266, | |
"step": 3820 | |
}, | |
{ | |
"epoch": 2.5, | |
"learning_rate": 0.001, | |
"loss": 1.839, | |
"step": 3825 | |
}, | |
{ | |
"epoch": 2.51, | |
"learning_rate": 0.001, | |
"loss": 1.8746, | |
"step": 3830 | |
}, | |
{ | |
"epoch": 2.51, | |
"learning_rate": 0.001, | |
"loss": 2.1668, | |
"step": 3835 | |
}, | |
{ | |
"epoch": 2.51, | |
"learning_rate": 0.001, | |
"loss": 2.1224, | |
"step": 3840 | |
}, | |
{ | |
"epoch": 2.52, | |
"learning_rate": 0.001, | |
"loss": 2.0099, | |
"step": 3845 | |
}, | |
{ | |
"epoch": 2.52, | |
"learning_rate": 0.001, | |
"loss": 2.2438, | |
"step": 3850 | |
}, | |
{ | |
"epoch": 2.52, | |
"learning_rate": 0.001, | |
"loss": 1.8494, | |
"step": 3855 | |
}, | |
{ | |
"epoch": 2.53, | |
"learning_rate": 0.001, | |
"loss": 1.8063, | |
"step": 3860 | |
}, | |
{ | |
"epoch": 2.53, | |
"learning_rate": 0.001, | |
"loss": 1.7068, | |
"step": 3865 | |
}, | |
{ | |
"epoch": 2.53, | |
"learning_rate": 0.001, | |
"loss": 1.8158, | |
"step": 3870 | |
}, | |
{ | |
"epoch": 2.54, | |
"learning_rate": 0.001, | |
"loss": 1.6616, | |
"step": 3875 | |
}, | |
{ | |
"epoch": 2.54, | |
"learning_rate": 0.001, | |
"loss": 1.9542, | |
"step": 3880 | |
}, | |
{ | |
"epoch": 2.54, | |
"learning_rate": 0.001, | |
"loss": 1.9595, | |
"step": 3885 | |
}, | |
{ | |
"epoch": 2.55, | |
"learning_rate": 0.001, | |
"loss": 2.1507, | |
"step": 3890 | |
}, | |
{ | |
"epoch": 2.55, | |
"learning_rate": 0.001, | |
"loss": 1.883, | |
"step": 3895 | |
}, | |
{ | |
"epoch": 2.55, | |
"learning_rate": 0.001, | |
"loss": 1.6634, | |
"step": 3900 | |
}, | |
{ | |
"epoch": 2.56, | |
"learning_rate": 0.001, | |
"loss": 1.7621, | |
"step": 3905 | |
}, | |
{ | |
"epoch": 2.56, | |
"learning_rate": 0.001, | |
"loss": 1.9474, | |
"step": 3910 | |
}, | |
{ | |
"epoch": 2.56, | |
"learning_rate": 0.001, | |
"loss": 2.094, | |
"step": 3915 | |
}, | |
{ | |
"epoch": 2.57, | |
"learning_rate": 0.001, | |
"loss": 2.0111, | |
"step": 3920 | |
}, | |
{ | |
"epoch": 2.57, | |
"learning_rate": 0.001, | |
"loss": 1.872, | |
"step": 3925 | |
}, | |
{ | |
"epoch": 2.57, | |
"learning_rate": 0.001, | |
"loss": 1.604, | |
"step": 3930 | |
}, | |
{ | |
"epoch": 2.58, | |
"learning_rate": 0.001, | |
"loss": 1.8784, | |
"step": 3935 | |
}, | |
{ | |
"epoch": 2.58, | |
"learning_rate": 0.001, | |
"loss": 1.9175, | |
"step": 3940 | |
}, | |
{ | |
"epoch": 2.58, | |
"learning_rate": 0.001, | |
"loss": 1.8607, | |
"step": 3945 | |
}, | |
{ | |
"epoch": 2.59, | |
"learning_rate": 0.001, | |
"loss": 1.9768, | |
"step": 3950 | |
}, | |
{ | |
"epoch": 2.59, | |
"learning_rate": 0.001, | |
"loss": 1.9017, | |
"step": 3955 | |
}, | |
{ | |
"epoch": 2.59, | |
"learning_rate": 0.001, | |
"loss": 1.6314, | |
"step": 3960 | |
}, | |
{ | |
"epoch": 2.59, | |
"learning_rate": 0.001, | |
"loss": 1.8859, | |
"step": 3965 | |
}, | |
{ | |
"epoch": 2.6, | |
"learning_rate": 0.001, | |
"loss": 2.0082, | |
"step": 3970 | |
}, | |
{ | |
"epoch": 2.6, | |
"learning_rate": 0.001, | |
"loss": 1.7938, | |
"step": 3975 | |
}, | |
{ | |
"epoch": 2.6, | |
"learning_rate": 0.001, | |
"loss": 2.0765, | |
"step": 3980 | |
}, | |
{ | |
"epoch": 2.61, | |
"learning_rate": 0.001, | |
"loss": 2.0694, | |
"step": 3985 | |
}, | |
{ | |
"epoch": 2.61, | |
"learning_rate": 0.001, | |
"loss": 1.8571, | |
"step": 3990 | |
}, | |
{ | |
"epoch": 2.61, | |
"learning_rate": 0.001, | |
"loss": 1.6691, | |
"step": 3995 | |
}, | |
{ | |
"epoch": 2.62, | |
"learning_rate": 0.001, | |
"loss": 2.1105, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 2.62, | |
"learning_rate": 0.001, | |
"loss": 1.9738, | |
"step": 4005 | |
}, | |
{ | |
"epoch": 2.62, | |
"learning_rate": 0.001, | |
"loss": 1.8698, | |
"step": 4010 | |
}, | |
{ | |
"epoch": 2.63, | |
"learning_rate": 0.001, | |
"loss": 2.0725, | |
"step": 4015 | |
}, | |
{ | |
"epoch": 2.63, | |
"learning_rate": 0.001, | |
"loss": 1.8761, | |
"step": 4020 | |
}, | |
{ | |
"epoch": 2.63, | |
"learning_rate": 0.001, | |
"loss": 2.0235, | |
"step": 4025 | |
}, | |
{ | |
"epoch": 2.64, | |
"learning_rate": 0.001, | |
"loss": 2.2568, | |
"step": 4030 | |
}, | |
{ | |
"epoch": 2.64, | |
"learning_rate": 0.001, | |
"loss": 1.901, | |
"step": 4035 | |
}, | |
{ | |
"epoch": 2.64, | |
"learning_rate": 0.001, | |
"loss": 1.7577, | |
"step": 4040 | |
}, | |
{ | |
"epoch": 2.65, | |
"learning_rate": 0.001, | |
"loss": 1.9142, | |
"step": 4045 | |
}, | |
{ | |
"epoch": 2.65, | |
"learning_rate": 0.001, | |
"loss": 2.134, | |
"step": 4050 | |
}, | |
{ | |
"epoch": 2.65, | |
"learning_rate": 0.001, | |
"loss": 2.0052, | |
"step": 4055 | |
}, | |
{ | |
"epoch": 2.66, | |
"learning_rate": 0.001, | |
"loss": 2.0817, | |
"step": 4060 | |
}, | |
{ | |
"epoch": 2.66, | |
"learning_rate": 0.001, | |
"loss": 2.1456, | |
"step": 4065 | |
}, | |
{ | |
"epoch": 2.66, | |
"learning_rate": 0.001, | |
"loss": 2.0291, | |
"step": 4070 | |
}, | |
{ | |
"epoch": 2.67, | |
"learning_rate": 0.001, | |
"loss": 1.9683, | |
"step": 4075 | |
}, | |
{ | |
"epoch": 2.67, | |
"learning_rate": 0.001, | |
"loss": 1.6709, | |
"step": 4080 | |
}, | |
{ | |
"epoch": 2.67, | |
"learning_rate": 0.001, | |
"loss": 2.0227, | |
"step": 4085 | |
}, | |
{ | |
"epoch": 2.68, | |
"learning_rate": 0.001, | |
"loss": 2.0289, | |
"step": 4090 | |
}, | |
{ | |
"epoch": 2.68, | |
"learning_rate": 0.001, | |
"loss": 1.8361, | |
"step": 4095 | |
}, | |
{ | |
"epoch": 2.68, | |
"learning_rate": 0.001, | |
"loss": 1.8289, | |
"step": 4100 | |
}, | |
{ | |
"epoch": 2.69, | |
"learning_rate": 0.001, | |
"loss": 2.0069, | |
"step": 4105 | |
}, | |
{ | |
"epoch": 2.69, | |
"learning_rate": 0.001, | |
"loss": 2.1599, | |
"step": 4110 | |
}, | |
{ | |
"epoch": 2.69, | |
"learning_rate": 0.001, | |
"loss": 2.1124, | |
"step": 4115 | |
}, | |
{ | |
"epoch": 2.7, | |
"learning_rate": 0.001, | |
"loss": 1.8739, | |
"step": 4120 | |
}, | |
{ | |
"epoch": 2.7, | |
"learning_rate": 0.001, | |
"loss": 1.7629, | |
"step": 4125 | |
}, | |
{ | |
"epoch": 2.7, | |
"learning_rate": 0.001, | |
"loss": 1.9235, | |
"step": 4130 | |
}, | |
{ | |
"epoch": 2.71, | |
"learning_rate": 0.001, | |
"loss": 2.0229, | |
"step": 4135 | |
}, | |
{ | |
"epoch": 2.71, | |
"learning_rate": 0.001, | |
"loss": 1.9561, | |
"step": 4140 | |
}, | |
{ | |
"epoch": 2.71, | |
"learning_rate": 0.001, | |
"loss": 1.6533, | |
"step": 4145 | |
}, | |
{ | |
"epoch": 2.72, | |
"learning_rate": 0.001, | |
"loss": 2.1504, | |
"step": 4150 | |
}, | |
{ | |
"epoch": 2.72, | |
"learning_rate": 0.001, | |
"loss": 2.061, | |
"step": 4155 | |
}, | |
{ | |
"epoch": 2.72, | |
"learning_rate": 0.001, | |
"loss": 1.9832, | |
"step": 4160 | |
}, | |
{ | |
"epoch": 2.73, | |
"learning_rate": 0.001, | |
"loss": 1.9235, | |
"step": 4165 | |
}, | |
{ | |
"epoch": 2.73, | |
"learning_rate": 0.001, | |
"loss": 2.0711, | |
"step": 4170 | |
}, | |
{ | |
"epoch": 2.73, | |
"learning_rate": 0.001, | |
"loss": 1.8579, | |
"step": 4175 | |
}, | |
{ | |
"epoch": 2.74, | |
"learning_rate": 0.001, | |
"loss": 2.2342, | |
"step": 4180 | |
}, | |
{ | |
"epoch": 2.74, | |
"learning_rate": 0.001, | |
"loss": 2.2196, | |
"step": 4185 | |
}, | |
{ | |
"epoch": 2.74, | |
"learning_rate": 0.001, | |
"loss": 1.7639, | |
"step": 4190 | |
}, | |
{ | |
"epoch": 2.75, | |
"learning_rate": 0.001, | |
"loss": 2.1289, | |
"step": 4195 | |
}, | |
{ | |
"epoch": 2.75, | |
"learning_rate": 0.001, | |
"loss": 2.0845, | |
"step": 4200 | |
}, | |
{ | |
"epoch": 2.75, | |
"learning_rate": 0.001, | |
"loss": 2.1311, | |
"step": 4205 | |
}, | |
{ | |
"epoch": 2.76, | |
"learning_rate": 0.001, | |
"loss": 1.5461, | |
"step": 4210 | |
}, | |
{ | |
"epoch": 2.76, | |
"learning_rate": 0.001, | |
"loss": 2.0942, | |
"step": 4215 | |
}, | |
{ | |
"epoch": 2.76, | |
"learning_rate": 0.001, | |
"loss": 2.0854, | |
"step": 4220 | |
}, | |
{ | |
"epoch": 2.77, | |
"learning_rate": 0.001, | |
"loss": 2.0704, | |
"step": 4225 | |
}, | |
{ | |
"epoch": 2.77, | |
"learning_rate": 0.001, | |
"loss": 1.9709, | |
"step": 4230 | |
}, | |
{ | |
"epoch": 2.77, | |
"learning_rate": 0.001, | |
"loss": 1.82, | |
"step": 4235 | |
}, | |
{ | |
"epoch": 2.77, | |
"learning_rate": 0.001, | |
"loss": 2.0901, | |
"step": 4240 | |
}, | |
{ | |
"epoch": 2.78, | |
"learning_rate": 0.001, | |
"loss": 2.0107, | |
"step": 4245 | |
}, | |
{ | |
"epoch": 2.78, | |
"learning_rate": 0.001, | |
"loss": 2.0632, | |
"step": 4250 | |
}, | |
{ | |
"epoch": 2.78, | |
"learning_rate": 0.001, | |
"loss": 1.9023, | |
"step": 4255 | |
}, | |
{ | |
"epoch": 2.79, | |
"learning_rate": 0.001, | |
"loss": 2.0827, | |
"step": 4260 | |
}, | |
{ | |
"epoch": 2.79, | |
"learning_rate": 0.001, | |
"loss": 2.0453, | |
"step": 4265 | |
}, | |
{ | |
"epoch": 2.79, | |
"learning_rate": 0.001, | |
"loss": 2.0852, | |
"step": 4270 | |
}, | |
{ | |
"epoch": 2.8, | |
"learning_rate": 0.001, | |
"loss": 2.2534, | |
"step": 4275 | |
}, | |
{ | |
"epoch": 2.8, | |
"learning_rate": 0.001, | |
"loss": 1.8449, | |
"step": 4280 | |
}, | |
{ | |
"epoch": 2.8, | |
"learning_rate": 0.001, | |
"loss": 1.8617, | |
"step": 4285 | |
}, | |
{ | |
"epoch": 2.81, | |
"learning_rate": 0.001, | |
"loss": 2.1616, | |
"step": 4290 | |
}, | |
{ | |
"epoch": 2.81, | |
"learning_rate": 0.001, | |
"loss": 1.99, | |
"step": 4295 | |
}, | |
{ | |
"epoch": 2.81, | |
"learning_rate": 0.001, | |
"loss": 1.9886, | |
"step": 4300 | |
}, | |
{ | |
"epoch": 2.82, | |
"learning_rate": 0.001, | |
"loss": 2.067, | |
"step": 4305 | |
}, | |
{ | |
"epoch": 2.82, | |
"learning_rate": 0.001, | |
"loss": 1.8923, | |
"step": 4310 | |
}, | |
{ | |
"epoch": 2.82, | |
"learning_rate": 0.001, | |
"loss": 2.2929, | |
"step": 4315 | |
}, | |
{ | |
"epoch": 2.83, | |
"learning_rate": 0.001, | |
"loss": 2.2061, | |
"step": 4320 | |
}, | |
{ | |
"epoch": 2.83, | |
"learning_rate": 0.001, | |
"loss": 2.3522, | |
"step": 4325 | |
}, | |
{ | |
"epoch": 2.83, | |
"learning_rate": 0.001, | |
"loss": 1.9623, | |
"step": 4330 | |
}, | |
{ | |
"epoch": 2.84, | |
"learning_rate": 0.001, | |
"loss": 1.7255, | |
"step": 4335 | |
}, | |
{ | |
"epoch": 2.84, | |
"learning_rate": 0.001, | |
"loss": 2.2199, | |
"step": 4340 | |
}, | |
{ | |
"epoch": 2.84, | |
"learning_rate": 0.001, | |
"loss": 1.9392, | |
"step": 4345 | |
}, | |
{ | |
"epoch": 2.85, | |
"learning_rate": 0.001, | |
"loss": 2.0237, | |
"step": 4350 | |
}, | |
{ | |
"epoch": 2.85, | |
"learning_rate": 0.001, | |
"loss": 1.9724, | |
"step": 4355 | |
}, | |
{ | |
"epoch": 2.85, | |
"learning_rate": 0.001, | |
"loss": 2.1273, | |
"step": 4360 | |
}, | |
{ | |
"epoch": 2.86, | |
"learning_rate": 0.001, | |
"loss": 2.0275, | |
"step": 4365 | |
}, | |
{ | |
"epoch": 2.86, | |
"learning_rate": 0.001, | |
"loss": 2.1322, | |
"step": 4370 | |
}, | |
{ | |
"epoch": 2.86, | |
"learning_rate": 0.001, | |
"loss": 2.1448, | |
"step": 4375 | |
}, | |
{ | |
"epoch": 2.87, | |
"learning_rate": 0.001, | |
"loss": 2.1961, | |
"step": 4380 | |
}, | |
{ | |
"epoch": 2.87, | |
"learning_rate": 0.001, | |
"loss": 1.9851, | |
"step": 4385 | |
}, | |
{ | |
"epoch": 2.87, | |
"learning_rate": 0.001, | |
"loss": 2.2935, | |
"step": 4390 | |
}, | |
{ | |
"epoch": 2.88, | |
"learning_rate": 0.001, | |
"loss": 1.9986, | |
"step": 4395 | |
}, | |
{ | |
"epoch": 2.88, | |
"learning_rate": 0.001, | |
"loss": 2.1898, | |
"step": 4400 | |
}, | |
{ | |
"epoch": 2.88, | |
"learning_rate": 0.001, | |
"loss": 1.9743, | |
"step": 4405 | |
}, | |
{ | |
"epoch": 2.89, | |
"learning_rate": 0.001, | |
"loss": 1.9389, | |
"step": 4410 | |
}, | |
{ | |
"epoch": 2.89, | |
"learning_rate": 0.001, | |
"loss": 1.9267, | |
"step": 4415 | |
}, | |
{ | |
"epoch": 2.89, | |
"learning_rate": 0.001, | |
"loss": 2.0713, | |
"step": 4420 | |
}, | |
{ | |
"epoch": 2.9, | |
"learning_rate": 0.001, | |
"loss": 2.2993, | |
"step": 4425 | |
}, | |
{ | |
"epoch": 2.9, | |
"learning_rate": 0.001, | |
"loss": 1.6093, | |
"step": 4430 | |
}, | |
{ | |
"epoch": 2.9, | |
"learning_rate": 0.001, | |
"loss": 2.2791, | |
"step": 4435 | |
}, | |
{ | |
"epoch": 2.91, | |
"learning_rate": 0.001, | |
"loss": 1.8745, | |
"step": 4440 | |
}, | |
{ | |
"epoch": 2.91, | |
"learning_rate": 0.001, | |
"loss": 2.1259, | |
"step": 4445 | |
}, | |
{ | |
"epoch": 2.91, | |
"learning_rate": 0.001, | |
"loss": 2.1719, | |
"step": 4450 | |
}, | |
{ | |
"epoch": 2.92, | |
"learning_rate": 0.001, | |
"loss": 1.9552, | |
"step": 4455 | |
}, | |
{ | |
"epoch": 2.92, | |
"learning_rate": 0.001, | |
"loss": 1.7201, | |
"step": 4460 | |
}, | |
{ | |
"epoch": 2.92, | |
"learning_rate": 0.001, | |
"loss": 1.6175, | |
"step": 4465 | |
}, | |
{ | |
"epoch": 2.93, | |
"learning_rate": 0.001, | |
"loss": 2.0599, | |
"step": 4470 | |
}, | |
{ | |
"epoch": 2.93, | |
"learning_rate": 0.001, | |
"loss": 2.1124, | |
"step": 4475 | |
}, | |
{ | |
"epoch": 2.93, | |
"learning_rate": 0.001, | |
"loss": 2.2331, | |
"step": 4480 | |
}, | |
{ | |
"epoch": 2.94, | |
"learning_rate": 0.001, | |
"loss": 2.1158, | |
"step": 4485 | |
}, | |
{ | |
"epoch": 2.94, | |
"learning_rate": 0.001, | |
"loss": 2.1609, | |
"step": 4490 | |
}, | |
{ | |
"epoch": 2.94, | |
"learning_rate": 0.001, | |
"loss": 2.0898, | |
"step": 4495 | |
}, | |
{ | |
"epoch": 2.95, | |
"learning_rate": 0.001, | |
"loss": 1.7871, | |
"step": 4500 | |
}, | |
{ | |
"epoch": 2.95, | |
"learning_rate": 0.001, | |
"loss": 1.9766, | |
"step": 4505 | |
}, | |
{ | |
"epoch": 2.95, | |
"learning_rate": 0.001, | |
"loss": 1.9859, | |
"step": 4510 | |
}, | |
{ | |
"epoch": 2.95, | |
"learning_rate": 0.001, | |
"loss": 1.8897, | |
"step": 4515 | |
}, | |
{ | |
"epoch": 2.96, | |
"learning_rate": 0.001, | |
"loss": 2.1179, | |
"step": 4520 | |
}, | |
{ | |
"epoch": 2.96, | |
"learning_rate": 0.001, | |
"loss": 1.8307, | |
"step": 4525 | |
}, | |
{ | |
"epoch": 2.96, | |
"learning_rate": 0.001, | |
"loss": 2.0669, | |
"step": 4530 | |
}, | |
{ | |
"epoch": 2.97, | |
"learning_rate": 0.001, | |
"loss": 2.2398, | |
"step": 4535 | |
}, | |
{ | |
"epoch": 2.97, | |
"learning_rate": 0.001, | |
"loss": 2.1574, | |
"step": 4540 | |
}, | |
{ | |
"epoch": 2.97, | |
"learning_rate": 0.001, | |
"loss": 2.0868, | |
"step": 4545 | |
}, | |
{ | |
"epoch": 2.98, | |
"learning_rate": 0.001, | |
"loss": 2.0039, | |
"step": 4550 | |
}, | |
{ | |
"epoch": 2.98, | |
"learning_rate": 0.001, | |
"loss": 2.0899, | |
"step": 4555 | |
}, | |
{ | |
"epoch": 2.98, | |
"learning_rate": 0.001, | |
"loss": 2.0335, | |
"step": 4560 | |
}, | |
{ | |
"epoch": 2.99, | |
"learning_rate": 0.001, | |
"loss": 1.8509, | |
"step": 4565 | |
}, | |
{ | |
"epoch": 2.99, | |
"learning_rate": 0.001, | |
"loss": 2.2191, | |
"step": 4570 | |
}, | |
{ | |
"epoch": 2.99, | |
"learning_rate": 0.001, | |
"loss": 2.158, | |
"step": 4575 | |
}, | |
{ | |
"epoch": 3.0, | |
"learning_rate": 0.001, | |
"loss": 1.8958, | |
"step": 4580 | |
}, | |
{ | |
"epoch": 3.0, | |
"learning_rate": 0.001, | |
"loss": 1.9311, | |
"step": 4585 | |
}, | |
{ | |
"epoch": 3.0, | |
"learning_rate": 0.001, | |
"loss": 1.6468, | |
"step": 4590 | |
}, | |
{ | |
"epoch": 3.01, | |
"learning_rate": 0.001, | |
"loss": 1.4211, | |
"step": 4595 | |
}, | |
{ | |
"epoch": 3.01, | |
"learning_rate": 0.001, | |
"loss": 1.5942, | |
"step": 4600 | |
}, | |
{ | |
"epoch": 3.01, | |
"learning_rate": 0.001, | |
"loss": 1.618, | |
"step": 4605 | |
}, | |
{ | |
"epoch": 3.02, | |
"learning_rate": 0.001, | |
"loss": 1.4268, | |
"step": 4610 | |
}, | |
{ | |
"epoch": 3.02, | |
"learning_rate": 0.001, | |
"loss": 1.8181, | |
"step": 4615 | |
}, | |
{ | |
"epoch": 3.02, | |
"learning_rate": 0.001, | |
"loss": 1.7277, | |
"step": 4620 | |
}, | |
{ | |
"epoch": 3.03, | |
"learning_rate": 0.001, | |
"loss": 1.6623, | |
"step": 4625 | |
}, | |
{ | |
"epoch": 3.03, | |
"learning_rate": 0.001, | |
"loss": 1.7616, | |
"step": 4630 | |
}, | |
{ | |
"epoch": 3.03, | |
"learning_rate": 0.001, | |
"loss": 1.4593, | |
"step": 4635 | |
}, | |
{ | |
"epoch": 3.04, | |
"learning_rate": 0.001, | |
"loss": 1.555, | |
"step": 4640 | |
}, | |
{ | |
"epoch": 3.04, | |
"learning_rate": 0.001, | |
"loss": 1.4423, | |
"step": 4645 | |
}, | |
{ | |
"epoch": 3.04, | |
"learning_rate": 0.001, | |
"loss": 1.7845, | |
"step": 4650 | |
}, | |
{ | |
"epoch": 3.05, | |
"learning_rate": 0.001, | |
"loss": 1.6926, | |
"step": 4655 | |
}, | |
{ | |
"epoch": 3.05, | |
"learning_rate": 0.001, | |
"loss": 1.5921, | |
"step": 4660 | |
}, | |
{ | |
"epoch": 3.05, | |
"learning_rate": 0.001, | |
"loss": 1.4306, | |
"step": 4665 | |
}, | |
{ | |
"epoch": 3.06, | |
"learning_rate": 0.001, | |
"loss": 1.6041, | |
"step": 4670 | |
}, | |
{ | |
"epoch": 3.06, | |
"learning_rate": 0.001, | |
"loss": 1.9242, | |
"step": 4675 | |
}, | |
{ | |
"epoch": 3.06, | |
"learning_rate": 0.001, | |
"loss": 1.7631, | |
"step": 4680 | |
}, | |
{ | |
"epoch": 3.07, | |
"learning_rate": 0.001, | |
"loss": 1.8995, | |
"step": 4685 | |
}, | |
{ | |
"epoch": 3.07, | |
"learning_rate": 0.001, | |
"loss": 1.5928, | |
"step": 4690 | |
}, | |
{ | |
"epoch": 3.07, | |
"learning_rate": 0.001, | |
"loss": 1.6116, | |
"step": 4695 | |
}, | |
{ | |
"epoch": 3.08, | |
"learning_rate": 0.001, | |
"loss": 2.1657, | |
"step": 4700 | |
}, | |
{ | |
"epoch": 3.08, | |
"learning_rate": 0.001, | |
"loss": 1.7712, | |
"step": 4705 | |
}, | |
{ | |
"epoch": 3.08, | |
"learning_rate": 0.001, | |
"loss": 1.8519, | |
"step": 4710 | |
}, | |
{ | |
"epoch": 3.09, | |
"learning_rate": 0.001, | |
"loss": 1.533, | |
"step": 4715 | |
}, | |
{ | |
"epoch": 3.09, | |
"learning_rate": 0.001, | |
"loss": 1.7871, | |
"step": 4720 | |
}, | |
{ | |
"epoch": 3.09, | |
"learning_rate": 0.001, | |
"loss": 1.7858, | |
"step": 4725 | |
}, | |
{ | |
"epoch": 3.1, | |
"learning_rate": 0.001, | |
"loss": 1.6107, | |
"step": 4730 | |
}, | |
{ | |
"epoch": 3.1, | |
"learning_rate": 0.001, | |
"loss": 1.782, | |
"step": 4735 | |
}, | |
{ | |
"epoch": 3.1, | |
"learning_rate": 0.001, | |
"loss": 1.5549, | |
"step": 4740 | |
}, | |
{ | |
"epoch": 3.11, | |
"learning_rate": 0.001, | |
"loss": 1.6906, | |
"step": 4745 | |
}, | |
{ | |
"epoch": 3.11, | |
"learning_rate": 0.001, | |
"loss": 1.8096, | |
"step": 4750 | |
}, | |
{ | |
"epoch": 3.11, | |
"learning_rate": 0.001, | |
"loss": 1.8374, | |
"step": 4755 | |
}, | |
{ | |
"epoch": 3.12, | |
"learning_rate": 0.001, | |
"loss": 1.8821, | |
"step": 4760 | |
}, | |
{ | |
"epoch": 3.12, | |
"learning_rate": 0.001, | |
"loss": 1.7064, | |
"step": 4765 | |
}, | |
{ | |
"epoch": 3.12, | |
"learning_rate": 0.001, | |
"loss": 1.9045, | |
"step": 4770 | |
}, | |
{ | |
"epoch": 3.12, | |
"learning_rate": 0.001, | |
"loss": 1.5462, | |
"step": 4775 | |
}, | |
{ | |
"epoch": 3.13, | |
"learning_rate": 0.001, | |
"loss": 1.857, | |
"step": 4780 | |
}, | |
{ | |
"epoch": 3.13, | |
"learning_rate": 0.001, | |
"loss": 1.7946, | |
"step": 4785 | |
}, | |
{ | |
"epoch": 3.13, | |
"learning_rate": 0.001, | |
"loss": 1.5898, | |
"step": 4790 | |
}, | |
{ | |
"epoch": 3.14, | |
"learning_rate": 0.001, | |
"loss": 1.7049, | |
"step": 4795 | |
}, | |
{ | |
"epoch": 3.14, | |
"learning_rate": 0.001, | |
"loss": 1.9023, | |
"step": 4800 | |
}, | |
{ | |
"epoch": 3.14, | |
"learning_rate": 0.001, | |
"loss": 1.6363, | |
"step": 4805 | |
}, | |
{ | |
"epoch": 3.15, | |
"learning_rate": 0.001, | |
"loss": 1.6567, | |
"step": 4810 | |
}, | |
{ | |
"epoch": 3.15, | |
"learning_rate": 0.001, | |
"loss": 1.978, | |
"step": 4815 | |
}, | |
{ | |
"epoch": 3.15, | |
"learning_rate": 0.001, | |
"loss": 1.6774, | |
"step": 4820 | |
}, | |
{ | |
"epoch": 3.16, | |
"learning_rate": 0.001, | |
"loss": 1.832, | |
"step": 4825 | |
}, | |
{ | |
"epoch": 3.16, | |
"learning_rate": 0.001, | |
"loss": 1.8501, | |
"step": 4830 | |
}, | |
{ | |
"epoch": 3.16, | |
"learning_rate": 0.001, | |
"loss": 1.6633, | |
"step": 4835 | |
}, | |
{ | |
"epoch": 3.17, | |
"learning_rate": 0.001, | |
"loss": 1.455, | |
"step": 4840 | |
}, | |
{ | |
"epoch": 3.17, | |
"learning_rate": 0.001, | |
"loss": 1.7763, | |
"step": 4845 | |
}, | |
{ | |
"epoch": 3.17, | |
"learning_rate": 0.001, | |
"loss": 1.7301, | |
"step": 4850 | |
}, | |
{ | |
"epoch": 3.18, | |
"learning_rate": 0.001, | |
"loss": 1.7956, | |
"step": 4855 | |
}, | |
{ | |
"epoch": 3.18, | |
"learning_rate": 0.001, | |
"loss": 1.7352, | |
"step": 4860 | |
}, | |
{ | |
"epoch": 3.18, | |
"learning_rate": 0.001, | |
"loss": 1.6547, | |
"step": 4865 | |
}, | |
{ | |
"epoch": 3.19, | |
"learning_rate": 0.001, | |
"loss": 1.9271, | |
"step": 4870 | |
}, | |
{ | |
"epoch": 3.19, | |
"learning_rate": 0.001, | |
"loss": 1.68, | |
"step": 4875 | |
}, | |
{ | |
"epoch": 3.19, | |
"learning_rate": 0.001, | |
"loss": 1.9004, | |
"step": 4880 | |
}, | |
{ | |
"epoch": 3.2, | |
"learning_rate": 0.001, | |
"loss": 1.942, | |
"step": 4885 | |
}, | |
{ | |
"epoch": 3.2, | |
"learning_rate": 0.001, | |
"loss": 1.6161, | |
"step": 4890 | |
}, | |
{ | |
"epoch": 3.2, | |
"learning_rate": 0.001, | |
"loss": 1.6055, | |
"step": 4895 | |
}, | |
{ | |
"epoch": 3.21, | |
"learning_rate": 0.001, | |
"loss": 1.7992, | |
"step": 4900 | |
}, | |
{ | |
"epoch": 3.21, | |
"learning_rate": 0.001, | |
"loss": 1.7103, | |
"step": 4905 | |
}, | |
{ | |
"epoch": 3.21, | |
"learning_rate": 0.001, | |
"loss": 1.7935, | |
"step": 4910 | |
}, | |
{ | |
"epoch": 3.22, | |
"learning_rate": 0.001, | |
"loss": 1.8277, | |
"step": 4915 | |
}, | |
{ | |
"epoch": 3.22, | |
"learning_rate": 0.001, | |
"loss": 1.6741, | |
"step": 4920 | |
}, | |
{ | |
"epoch": 3.22, | |
"learning_rate": 0.001, | |
"loss": 1.9507, | |
"step": 4925 | |
}, | |
{ | |
"epoch": 3.23, | |
"learning_rate": 0.001, | |
"loss": 1.7606, | |
"step": 4930 | |
}, | |
{ | |
"epoch": 3.23, | |
"learning_rate": 0.001, | |
"loss": 1.7644, | |
"step": 4935 | |
}, | |
{ | |
"epoch": 3.23, | |
"learning_rate": 0.001, | |
"loss": 1.8093, | |
"step": 4940 | |
}, | |
{ | |
"epoch": 3.24, | |
"learning_rate": 0.001, | |
"loss": 1.9758, | |
"step": 4945 | |
}, | |
{ | |
"epoch": 3.24, | |
"learning_rate": 0.001, | |
"loss": 1.8184, | |
"step": 4950 | |
}, | |
{ | |
"epoch": 3.24, | |
"learning_rate": 0.001, | |
"loss": 1.807, | |
"step": 4955 | |
}, | |
{ | |
"epoch": 3.25, | |
"learning_rate": 0.001, | |
"loss": 1.6004, | |
"step": 4960 | |
}, | |
{ | |
"epoch": 3.25, | |
"learning_rate": 0.001, | |
"loss": 1.8949, | |
"step": 4965 | |
}, | |
{ | |
"epoch": 3.25, | |
"learning_rate": 0.001, | |
"loss": 1.3879, | |
"step": 4970 | |
}, | |
{ | |
"epoch": 3.26, | |
"learning_rate": 0.001, | |
"loss": 1.8248, | |
"step": 4975 | |
}, | |
{ | |
"epoch": 3.26, | |
"learning_rate": 0.001, | |
"loss": 1.8331, | |
"step": 4980 | |
}, | |
{ | |
"epoch": 3.26, | |
"learning_rate": 0.001, | |
"loss": 1.8797, | |
"step": 4985 | |
}, | |
{ | |
"epoch": 3.27, | |
"learning_rate": 0.001, | |
"loss": 1.6555, | |
"step": 4990 | |
}, | |
{ | |
"epoch": 3.27, | |
"learning_rate": 0.001, | |
"loss": 1.45, | |
"step": 4995 | |
}, | |
{ | |
"epoch": 3.27, | |
"learning_rate": 0.001, | |
"loss": 1.8153, | |
"step": 5000 | |
} | |
], | |
"logging_steps": 5, | |
"max_steps": 7640, | |
"num_train_epochs": 5, | |
"save_steps": 1000, | |
"total_flos": 583925760000000.0, | |
"trial_name": null, | |
"trial_params": null | |
} | |