{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.29535224801497, "global_step": 3201, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.4595617233228861e-05, "loss": 5.7174, "step": 5 }, { "epoch": 0.0, "learning_rate": 2.0881607426192572e-05, "loss": 4.073, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.4558675968977286e-05, "loss": 3.8184, "step": 15 }, { "epoch": 0.01, "learning_rate": 2.7167597619156276e-05, "loss": 3.485, "step": 20 }, { "epoch": 0.01, "learning_rate": 2.9191234466457723e-05, "loss": 3.4225, "step": 25 }, { "epoch": 0.01, "learning_rate": 3.0844666161941e-05, "loss": 3.4162, "step": 30 }, { "epoch": 0.01, "learning_rate": 3.2242622741451356e-05, "loss": 3.1498, "step": 35 }, { "epoch": 0.02, "learning_rate": 3.345358781211998e-05, "loss": 3.1313, "step": 40 }, { "epoch": 0.02, "learning_rate": 3.4521734704725705e-05, "loss": 3.1651, "step": 45 }, { "epoch": 0.02, "learning_rate": 3.547722465942143e-05, "loss": 3.0759, "step": 50 }, { "epoch": 0.02, "learning_rate": 3.6341570461211475e-05, "loss": 3.2154, "step": 55 }, { "epoch": 0.02, "learning_rate": 3.71306563549047e-05, "loss": 3.0505, "step": 60 }, { "epoch": 0.03, "learning_rate": 3.785654501111715e-05, "loss": 3.0904, "step": 65 }, { "epoch": 0.03, "learning_rate": 3.8528612934415064e-05, "loss": 3.1834, "step": 70 }, { "epoch": 0.03, "learning_rate": 3.9154293202206144e-05, "loss": 3.1904, "step": 75 }, { "epoch": 0.03, "learning_rate": 3.973957800508369e-05, "loss": 3.1185, "step": 80 }, { "epoch": 0.03, "learning_rate": 4.028936856743207e-05, "loss": 2.8454, "step": 85 }, { "epoch": 0.04, "learning_rate": 4.080772489768941e-05, "loss": 3.1069, "step": 90 }, { "epoch": 0.04, "learning_rate": 4.1298047923155946e-05, "loss": 3.1566, "step": 95 }, { "epoch": 0.04, "learning_rate": 4.1763214852385144e-05, "loss": 2.9029, "step": 100 }, { "epoch": 0.04, "learning_rate": 4.220568147719977e-05, "loss": 3.0556, "step": 105 }, { "epoch": 0.04, "learning_rate": 4.262756065417519e-05, "loss": 3.1721, "step": 110 }, { "epoch": 0.05, "learning_rate": 4.3030683326266976e-05, "loss": 2.9709, "step": 115 }, { "epoch": 0.05, "learning_rate": 4.3416646547868404e-05, "loss": 2.9186, "step": 120 }, { "epoch": 0.05, "learning_rate": 4.378685169968659e-05, "loss": 3.1484, "step": 125 }, { "epoch": 0.05, "learning_rate": 4.414253520408086e-05, "loss": 2.9578, "step": 130 }, { "epoch": 0.05, "learning_rate": 4.4484793440474126e-05, "loss": 3.0733, "step": 135 }, { "epoch": 0.06, "learning_rate": 4.481460312737877e-05, "loss": 3.0312, "step": 140 }, { "epoch": 0.06, "learning_rate": 4.513283812620485e-05, "loss": 2.9714, "step": 145 }, { "epoch": 0.06, "learning_rate": 4.544028339516985e-05, "loss": 2.9715, "step": 150 }, { "epoch": 0.06, "learning_rate": 4.573764665433774e-05, "loss": 3.1439, "step": 155 }, { "epoch": 0.06, "learning_rate": 4.60255681980474e-05, "loss": 2.9821, "step": 160 }, { "epoch": 0.07, "learning_rate": 4.630462919695989e-05, "loss": 2.9551, "step": 165 }, { "epoch": 0.07, "learning_rate": 4.6575358760395774e-05, "loss": 3.0681, "step": 170 }, { "epoch": 0.07, "learning_rate": 4.683823997468022e-05, "loss": 3.0571, "step": 175 }, { "epoch": 0.07, "learning_rate": 4.709371509065312e-05, "loss": 3.0496, "step": 180 }, { "epoch": 0.07, "learning_rate": 4.7342190000274217e-05, "loss": 2.9148, "step": 185 }, { "epoch": 0.08, "learning_rate": 4.758403811611965e-05, "loss": 2.9517, "step": 190 }, { "epoch": 0.08, "learning_rate": 4.781960374686557e-05, "loss": 2.8606, "step": 195 }, { "epoch": 0.08, "learning_rate": 4.8049205045348844e-05, "loss": 2.9665, "step": 200 }, { "epoch": 0.08, "learning_rate": 4.827313659255119e-05, "loss": 2.9399, "step": 205 }, { "epoch": 0.08, "learning_rate": 4.849167167016348e-05, "loss": 2.9148, "step": 210 }, { "epoch": 0.09, "learning_rate": 4.8705064265710865e-05, "loss": 2.8763, "step": 215 }, { "epoch": 0.09, "learning_rate": 4.8913550847138896e-05, "loss": 2.9652, "step": 220 }, { "epoch": 0.09, "learning_rate": 4.911735193795457e-05, "loss": 2.854, "step": 225 }, { "epoch": 0.09, "learning_rate": 4.9316673519230676e-05, "loss": 3.0612, "step": 230 }, { "epoch": 0.1, "learning_rate": 4.951170828082003e-05, "loss": 2.9249, "step": 235 }, { "epoch": 0.1, "learning_rate": 4.970263674083211e-05, "loss": 2.8752, "step": 240 }, { "epoch": 0.1, "learning_rate": 4.988962824967385e-05, "loss": 3.0823, "step": 245 }, { "epoch": 0.1, "learning_rate": 5e-05, "loss": 2.9281, "step": 250 }, { "epoch": 0.1, "learning_rate": 5e-05, "loss": 3.0877, "step": 255 }, { "epoch": 0.11, "learning_rate": 5e-05, "loss": 2.8499, "step": 260 }, { "epoch": 0.11, "learning_rate": 5e-05, "loss": 2.9582, "step": 265 }, { "epoch": 0.11, "learning_rate": 5e-05, "loss": 2.9118, "step": 270 }, { "epoch": 0.11, "learning_rate": 5e-05, "loss": 3.0349, "step": 275 }, { "epoch": 0.11, "learning_rate": 5e-05, "loss": 2.7885, "step": 280 }, { "epoch": 0.12, "learning_rate": 5e-05, "loss": 3.0644, "step": 285 }, { "epoch": 0.12, "learning_rate": 5e-05, "loss": 2.9174, "step": 290 }, { "epoch": 0.12, "learning_rate": 5e-05, "loss": 2.8956, "step": 295 }, { "epoch": 0.12, "learning_rate": 5e-05, "loss": 2.8977, "step": 300 }, { "epoch": 0.12, "learning_rate": 5e-05, "loss": 2.7825, "step": 305 }, { "epoch": 0.13, "learning_rate": 5e-05, "loss": 2.9082, "step": 310 }, { "epoch": 0.13, "learning_rate": 5e-05, "loss": 2.8638, "step": 315 }, { "epoch": 0.13, "learning_rate": 5e-05, "loss": 3.0091, "step": 320 }, { "epoch": 0.13, "learning_rate": 5e-05, "loss": 2.8423, "step": 325 }, { "epoch": 0.13, "learning_rate": 5e-05, "loss": 3.0593, "step": 330 }, { "epoch": 0.14, "learning_rate": 5e-05, "loss": 3.0978, "step": 335 }, { "epoch": 0.14, "learning_rate": 5e-05, "loss": 2.9759, "step": 340 }, { "epoch": 0.14, "learning_rate": 5e-05, "loss": 2.7671, "step": 345 }, { "epoch": 0.14, "learning_rate": 5e-05, "loss": 2.7337, "step": 350 }, { "epoch": 0.14, "learning_rate": 5e-05, "loss": 2.7853, "step": 355 }, { "epoch": 0.15, "learning_rate": 5e-05, "loss": 2.7746, "step": 360 }, { "epoch": 0.15, "learning_rate": 5e-05, "loss": 2.8793, "step": 365 }, { "epoch": 0.15, "learning_rate": 5e-05, "loss": 2.8478, "step": 370 }, { "epoch": 0.15, "learning_rate": 5e-05, "loss": 2.9319, "step": 375 }, { "epoch": 0.15, "learning_rate": 5e-05, "loss": 2.9938, "step": 380 }, { "epoch": 0.16, "learning_rate": 5e-05, "loss": 2.9035, "step": 385 }, { "epoch": 0.16, "learning_rate": 5e-05, "loss": 2.8904, "step": 390 }, { "epoch": 0.16, "learning_rate": 5e-05, "loss": 2.9606, "step": 395 }, { "epoch": 0.16, "learning_rate": 5e-05, "loss": 3.0521, "step": 400 }, { "epoch": 0.16, "learning_rate": 5e-05, "loss": 2.9584, "step": 405 }, { "epoch": 0.17, "learning_rate": 5e-05, "loss": 2.8218, "step": 410 }, { "epoch": 0.17, "learning_rate": 5e-05, "loss": 2.9647, "step": 415 }, { "epoch": 0.17, "learning_rate": 5e-05, "loss": 2.792, "step": 420 }, { "epoch": 0.17, "learning_rate": 5e-05, "loss": 2.656, "step": 425 }, { "epoch": 0.17, "learning_rate": 5e-05, "loss": 2.905, "step": 430 }, { "epoch": 0.18, "learning_rate": 5e-05, "loss": 2.7492, "step": 435 }, { "epoch": 0.18, "learning_rate": 5e-05, "loss": 2.924, "step": 440 }, { "epoch": 0.18, "learning_rate": 5e-05, "loss": 2.672, "step": 445 }, { "epoch": 0.18, "learning_rate": 5e-05, "loss": 2.9088, "step": 450 }, { "epoch": 0.18, "learning_rate": 5e-05, "loss": 2.8127, "step": 455 }, { "epoch": 0.19, "learning_rate": 5e-05, "loss": 2.7125, "step": 460 }, { "epoch": 0.19, "learning_rate": 5e-05, "loss": 2.9101, "step": 465 }, { "epoch": 0.19, "learning_rate": 5e-05, "loss": 2.878, "step": 470 }, { "epoch": 0.19, "learning_rate": 5e-05, "loss": 2.9539, "step": 475 }, { "epoch": 0.19, "learning_rate": 5e-05, "loss": 2.7836, "step": 480 }, { "epoch": 0.2, "learning_rate": 5e-05, "loss": 2.9817, "step": 485 }, { "epoch": 0.2, "learning_rate": 5e-05, "loss": 2.9691, "step": 490 }, { "epoch": 0.2, "learning_rate": 5e-05, "loss": 2.7756, "step": 495 }, { "epoch": 0.2, "learning_rate": 5e-05, "loss": 2.8108, "step": 500 }, { "epoch": 0.2, "learning_rate": 5e-05, "loss": 2.7651, "step": 505 }, { "epoch": 0.21, "learning_rate": 5e-05, "loss": 2.8161, "step": 510 }, { "epoch": 0.21, "learning_rate": 5e-05, "loss": 2.7832, "step": 515 }, { "epoch": 0.21, "learning_rate": 5e-05, "loss": 2.7455, "step": 520 }, { "epoch": 0.21, "learning_rate": 5e-05, "loss": 2.8838, "step": 525 }, { "epoch": 0.21, "learning_rate": 5e-05, "loss": 2.7434, "step": 530 }, { "epoch": 0.22, "learning_rate": 5e-05, "loss": 2.8708, "step": 535 }, { "epoch": 0.22, "learning_rate": 5e-05, "loss": 2.8642, "step": 540 }, { "epoch": 0.22, "learning_rate": 5e-05, "loss": 2.7339, "step": 545 }, { "epoch": 0.22, "learning_rate": 5e-05, "loss": 2.7499, "step": 550 }, { "epoch": 0.22, "learning_rate": 5e-05, "loss": 2.7786, "step": 555 }, { "epoch": 0.23, "learning_rate": 5e-05, "loss": 2.8487, "step": 560 }, { "epoch": 0.23, "learning_rate": 5e-05, "loss": 2.8406, "step": 565 }, { "epoch": 0.23, "learning_rate": 5e-05, "loss": 2.8463, "step": 570 }, { "epoch": 0.23, "learning_rate": 5e-05, "loss": 2.7209, "step": 575 }, { "epoch": 0.23, "learning_rate": 5e-05, "loss": 2.9581, "step": 580 }, { "epoch": 0.24, "learning_rate": 5e-05, "loss": 2.8408, "step": 585 }, { "epoch": 0.24, "learning_rate": 5e-05, "loss": 2.9146, "step": 590 }, { "epoch": 0.24, "learning_rate": 5e-05, "loss": 2.8323, "step": 595 }, { "epoch": 0.24, "learning_rate": 5e-05, "loss": 2.8473, "step": 600 }, { "epoch": 0.24, "learning_rate": 5e-05, "loss": 2.8023, "step": 605 }, { "epoch": 0.25, "learning_rate": 5e-05, "loss": 2.859, "step": 610 }, { "epoch": 0.25, "learning_rate": 5e-05, "loss": 2.8659, "step": 615 }, { "epoch": 0.25, "learning_rate": 5e-05, "loss": 2.6733, "step": 620 }, { "epoch": 0.25, "learning_rate": 5e-05, "loss": 2.7982, "step": 625 }, { "epoch": 0.25, "learning_rate": 5e-05, "loss": 2.8614, "step": 630 }, { "epoch": 0.26, "learning_rate": 5e-05, "loss": 2.8252, "step": 635 }, { "epoch": 0.26, "learning_rate": 5e-05, "loss": 2.7545, "step": 640 }, { "epoch": 0.26, "learning_rate": 5e-05, "loss": 2.9128, "step": 645 }, { "epoch": 0.26, "learning_rate": 5e-05, "loss": 2.6117, "step": 650 }, { "epoch": 0.27, "learning_rate": 5e-05, "loss": 2.8768, "step": 655 }, { "epoch": 0.27, "learning_rate": 5e-05, "loss": 2.8761, "step": 660 }, { "epoch": 0.27, "learning_rate": 5e-05, "loss": 2.7655, "step": 665 }, { "epoch": 0.27, "learning_rate": 5e-05, "loss": 2.7963, "step": 670 }, { "epoch": 0.27, "learning_rate": 5e-05, "loss": 2.8481, "step": 675 }, { "epoch": 0.28, "learning_rate": 5e-05, "loss": 2.7366, "step": 680 }, { "epoch": 0.28, "learning_rate": 5e-05, "loss": 2.9392, "step": 685 }, { "epoch": 0.28, "learning_rate": 5e-05, "loss": 2.8233, "step": 690 }, { "epoch": 0.28, "learning_rate": 5e-05, "loss": 2.7672, "step": 695 }, { "epoch": 0.28, "learning_rate": 5e-05, "loss": 2.789, "step": 700 }, { "epoch": 0.29, "learning_rate": 5e-05, "loss": 2.7065, "step": 705 }, { "epoch": 0.29, "learning_rate": 5e-05, "loss": 2.8101, "step": 710 }, { "epoch": 0.29, "learning_rate": 5e-05, "loss": 2.6257, "step": 715 }, { "epoch": 0.29, "learning_rate": 5e-05, "loss": 2.7759, "step": 720 }, { "epoch": 0.29, "learning_rate": 5e-05, "loss": 2.7739, "step": 725 }, { "epoch": 0.3, "learning_rate": 5e-05, "loss": 2.6239, "step": 730 }, { "epoch": 0.3, "learning_rate": 5e-05, "loss": 2.7561, "step": 735 }, { "epoch": 0.3, "learning_rate": 5e-05, "loss": 2.8663, "step": 740 }, { "epoch": 0.3, "learning_rate": 5e-05, "loss": 2.7199, "step": 745 }, { "epoch": 0.3, "learning_rate": 5e-05, "loss": 2.7612, "step": 750 }, { "epoch": 0.31, "learning_rate": 5e-05, "loss": 2.8215, "step": 755 }, { "epoch": 0.31, "learning_rate": 5e-05, "loss": 2.7875, "step": 760 }, { "epoch": 0.31, "learning_rate": 5e-05, "loss": 2.7699, "step": 765 }, { "epoch": 0.31, "learning_rate": 5e-05, "loss": 2.7907, "step": 770 }, { "epoch": 0.31, "learning_rate": 5e-05, "loss": 3.023, "step": 775 }, { "epoch": 0.32, "learning_rate": 5e-05, "loss": 2.7708, "step": 780 }, { "epoch": 0.32, "learning_rate": 5e-05, "loss": 2.8424, "step": 785 }, { "epoch": 0.32, "learning_rate": 5e-05, "loss": 2.8524, "step": 790 }, { "epoch": 0.32, "learning_rate": 5e-05, "loss": 2.7407, "step": 795 }, { "epoch": 0.32, "learning_rate": 5e-05, "loss": 2.8392, "step": 800 }, { "epoch": 0.33, "learning_rate": 5e-05, "loss": 2.7107, "step": 805 }, { "epoch": 0.33, "learning_rate": 5e-05, "loss": 2.7377, "step": 810 }, { "epoch": 0.33, "learning_rate": 5e-05, "loss": 2.8285, "step": 815 }, { "epoch": 0.33, "learning_rate": 5e-05, "loss": 2.7663, "step": 820 }, { "epoch": 0.33, "learning_rate": 5e-05, "loss": 2.819, "step": 825 }, { "epoch": 0.34, "learning_rate": 5e-05, "loss": 2.7749, "step": 830 }, { "epoch": 0.34, "learning_rate": 5e-05, "loss": 2.8127, "step": 835 }, { "epoch": 0.34, "learning_rate": 5e-05, "loss": 2.7182, "step": 840 }, { "epoch": 0.34, "learning_rate": 5e-05, "loss": 2.7712, "step": 845 }, { "epoch": 0.34, "learning_rate": 5e-05, "loss": 2.6442, "step": 850 }, { "epoch": 0.35, "learning_rate": 5e-05, "loss": 2.9364, "step": 855 }, { "epoch": 0.35, "learning_rate": 5e-05, "loss": 2.9922, "step": 860 }, { "epoch": 0.35, "learning_rate": 5e-05, "loss": 2.7308, "step": 865 }, { "epoch": 0.35, "learning_rate": 5e-05, "loss": 2.6962, "step": 870 }, { "epoch": 0.35, "learning_rate": 5e-05, "loss": 2.6137, "step": 875 }, { "epoch": 0.36, "learning_rate": 5e-05, "loss": 2.7204, "step": 880 }, { "epoch": 0.36, "learning_rate": 5e-05, "loss": 2.7692, "step": 885 }, { "epoch": 0.36, "learning_rate": 5e-05, "loss": 2.794, "step": 890 }, { "epoch": 0.36, "learning_rate": 5e-05, "loss": 2.7687, "step": 895 }, { "epoch": 0.36, "learning_rate": 5e-05, "loss": 2.7605, "step": 900 }, { "epoch": 0.37, "learning_rate": 5e-05, "loss": 2.7807, "step": 905 }, { "epoch": 0.37, "learning_rate": 5e-05, "loss": 2.7144, "step": 910 }, { "epoch": 0.37, "learning_rate": 5e-05, "loss": 2.6465, "step": 915 }, { "epoch": 0.37, "learning_rate": 5e-05, "loss": 2.8023, "step": 920 }, { "epoch": 0.37, "learning_rate": 5e-05, "loss": 2.6612, "step": 925 }, { "epoch": 0.38, "learning_rate": 5e-05, "loss": 2.8403, "step": 930 }, { "epoch": 0.38, "learning_rate": 5e-05, "loss": 2.7133, "step": 935 }, { "epoch": 0.38, "learning_rate": 5e-05, "loss": 2.9143, "step": 940 }, { "epoch": 0.38, "learning_rate": 5e-05, "loss": 2.7716, "step": 945 }, { "epoch": 0.38, "learning_rate": 5e-05, "loss": 2.7732, "step": 950 }, { "epoch": 0.39, "learning_rate": 5e-05, "loss": 2.788, "step": 955 }, { "epoch": 0.39, "learning_rate": 5e-05, "loss": 2.7164, "step": 960 }, { "epoch": 0.39, "learning_rate": 5e-05, "loss": 2.6721, "step": 965 }, { "epoch": 0.39, "learning_rate": 5e-05, "loss": 2.9455, "step": 970 }, { "epoch": 0.39, "learning_rate": 5e-05, "loss": 2.6689, "step": 975 }, { "epoch": 0.4, "learning_rate": 5e-05, "loss": 2.629, "step": 980 }, { "epoch": 0.4, "learning_rate": 5e-05, "loss": 2.8129, "step": 985 }, { "epoch": 0.4, "learning_rate": 5e-05, "loss": 2.6629, "step": 990 }, { "epoch": 0.4, "learning_rate": 5e-05, "loss": 2.8313, "step": 995 }, { "epoch": 0.4, "learning_rate": 5e-05, "loss": 2.9289, "step": 1000 }, { "epoch": 0.41, "learning_rate": 5e-05, "loss": 2.7845, "step": 1005 }, { "epoch": 0.41, "learning_rate": 5e-05, "loss": 2.7841, "step": 1010 }, { "epoch": 0.41, "learning_rate": 5e-05, "loss": 2.78, "step": 1015 }, { "epoch": 0.41, "learning_rate": 5e-05, "loss": 2.8525, "step": 1020 }, { "epoch": 0.41, "learning_rate": 5e-05, "loss": 2.7313, "step": 1025 }, { "epoch": 0.42, "learning_rate": 5e-05, "loss": 2.8678, "step": 1030 }, { "epoch": 0.42, "learning_rate": 5e-05, "loss": 2.8533, "step": 1035 }, { "epoch": 0.42, "learning_rate": 5e-05, "loss": 2.7356, "step": 1040 }, { "epoch": 0.42, "learning_rate": 5e-05, "loss": 2.7197, "step": 1045 }, { "epoch": 0.42, "learning_rate": 5e-05, "loss": 2.7888, "step": 1050 }, { "epoch": 0.43, "learning_rate": 5e-05, "loss": 2.8152, "step": 1055 }, { "epoch": 0.43, "learning_rate": 5e-05, "loss": 2.6529, "step": 1060 }, { "epoch": 0.43, "learning_rate": 5e-05, "loss": 2.715, "step": 1065 }, { "epoch": 0.43, "learning_rate": 5e-05, "loss": 2.7427, "step": 1070 }, { "epoch": 0.43, "learning_rate": 5e-05, "loss": 2.7064, "step": 1075 }, { "epoch": 0.44, "learning_rate": 5e-05, "loss": 2.8462, "step": 1080 }, { "epoch": 0.44, "learning_rate": 5e-05, "loss": 2.6574, "step": 1085 }, { "epoch": 0.44, "learning_rate": 5e-05, "loss": 2.8997, "step": 1090 }, { "epoch": 0.44, "learning_rate": 5e-05, "loss": 2.7303, "step": 1095 }, { "epoch": 0.45, "learning_rate": 5e-05, "loss": 2.7889, "step": 1100 }, { "epoch": 0.45, "learning_rate": 5e-05, "loss": 2.7992, "step": 1105 }, { "epoch": 0.45, "learning_rate": 5e-05, "loss": 2.62, "step": 1110 }, { "epoch": 0.45, "learning_rate": 5e-05, "loss": 2.6867, "step": 1115 }, { "epoch": 0.45, "learning_rate": 5e-05, "loss": 2.5923, "step": 1120 }, { "epoch": 0.46, "learning_rate": 5e-05, "loss": 2.6593, "step": 1125 }, { "epoch": 0.46, "learning_rate": 5e-05, "loss": 2.8382, "step": 1130 }, { "epoch": 0.46, "learning_rate": 5e-05, "loss": 2.5861, "step": 1135 }, { "epoch": 0.46, "learning_rate": 5e-05, "loss": 2.8416, "step": 1140 }, { "epoch": 0.46, "learning_rate": 5e-05, "loss": 2.9029, "step": 1145 }, { "epoch": 0.47, "learning_rate": 5e-05, "loss": 2.6753, "step": 1150 }, { "epoch": 0.47, "learning_rate": 5e-05, "loss": 2.6871, "step": 1155 }, { "epoch": 0.47, "learning_rate": 5e-05, "loss": 2.6733, "step": 1160 }, { "epoch": 0.47, "learning_rate": 5e-05, "loss": 2.6566, "step": 1165 }, { "epoch": 0.47, "learning_rate": 5e-05, "loss": 2.8209, "step": 1170 }, { "epoch": 0.48, "learning_rate": 5e-05, "loss": 2.8289, "step": 1175 }, { "epoch": 0.48, "learning_rate": 5e-05, "loss": 2.8023, "step": 1180 }, { "epoch": 0.48, "learning_rate": 5e-05, "loss": 2.6979, "step": 1185 }, { "epoch": 0.48, "learning_rate": 5e-05, "loss": 2.6954, "step": 1190 }, { "epoch": 0.48, "learning_rate": 5e-05, "loss": 2.7828, "step": 1195 }, { "epoch": 0.49, "learning_rate": 5e-05, "loss": 2.7909, "step": 1200 }, { "epoch": 0.49, "learning_rate": 5e-05, "loss": 2.6024, "step": 1205 }, { "epoch": 0.49, "learning_rate": 5e-05, "loss": 2.7298, "step": 1210 }, { "epoch": 0.49, "learning_rate": 5e-05, "loss": 2.5615, "step": 1215 }, { "epoch": 0.49, "learning_rate": 5e-05, "loss": 2.6645, "step": 1220 }, { "epoch": 0.5, "learning_rate": 5e-05, "loss": 2.6223, "step": 1225 }, { "epoch": 0.5, "learning_rate": 5e-05, "loss": 2.8121, "step": 1230 }, { "epoch": 0.5, "learning_rate": 5e-05, "loss": 2.7913, "step": 1235 }, { "epoch": 0.5, "learning_rate": 5e-05, "loss": 2.6475, "step": 1240 }, { "epoch": 0.5, "learning_rate": 5e-05, "loss": 2.7489, "step": 1245 }, { "epoch": 0.51, "learning_rate": 5e-05, "loss": 2.5228, "step": 1250 }, { "epoch": 0.51, "learning_rate": 5e-05, "loss": 2.7824, "step": 1255 }, { "epoch": 0.51, "learning_rate": 5e-05, "loss": 2.7072, "step": 1260 }, { "epoch": 0.51, "learning_rate": 5e-05, "loss": 2.8302, "step": 1265 }, { "epoch": 0.51, "learning_rate": 5e-05, "loss": 2.562, "step": 1270 }, { "epoch": 0.52, "learning_rate": 5e-05, "loss": 2.6751, "step": 1275 }, { "epoch": 0.52, "learning_rate": 5e-05, "loss": 2.6708, "step": 1280 }, { "epoch": 0.52, "learning_rate": 5e-05, "loss": 2.7931, "step": 1285 }, { "epoch": 0.52, "learning_rate": 5e-05, "loss": 2.8918, "step": 1290 }, { "epoch": 0.52, "learning_rate": 5e-05, "loss": 2.7633, "step": 1295 }, { "epoch": 0.53, "learning_rate": 5e-05, "loss": 2.6468, "step": 1300 }, { "epoch": 0.53, "learning_rate": 5e-05, "loss": 2.829, "step": 1305 }, { "epoch": 0.53, "learning_rate": 5e-05, "loss": 2.5203, "step": 1310 }, { "epoch": 0.53, "learning_rate": 5e-05, "loss": 2.8926, "step": 1315 }, { "epoch": 0.53, "learning_rate": 5e-05, "loss": 2.5314, "step": 1320 }, { "epoch": 0.54, "learning_rate": 5e-05, "loss": 2.6843, "step": 1325 }, { "epoch": 0.54, "learning_rate": 5e-05, "loss": 2.6962, "step": 1330 }, { "epoch": 0.54, "learning_rate": 5e-05, "loss": 2.9101, "step": 1335 }, { "epoch": 0.54, "learning_rate": 5e-05, "loss": 2.7593, "step": 1340 }, { "epoch": 0.54, "learning_rate": 5e-05, "loss": 2.7953, "step": 1345 }, { "epoch": 0.55, "learning_rate": 5e-05, "loss": 2.6882, "step": 1350 }, { "epoch": 0.55, "learning_rate": 5e-05, "loss": 2.6233, "step": 1355 }, { "epoch": 0.55, "learning_rate": 5e-05, "loss": 2.7277, "step": 1360 }, { "epoch": 0.55, "learning_rate": 5e-05, "loss": 2.5897, "step": 1365 }, { "epoch": 0.55, "learning_rate": 5e-05, "loss": 2.618, "step": 1370 }, { "epoch": 0.56, "learning_rate": 5e-05, "loss": 2.7698, "step": 1375 }, { "epoch": 0.56, "learning_rate": 5e-05, "loss": 2.6026, "step": 1380 }, { "epoch": 0.56, "learning_rate": 5e-05, "loss": 2.8144, "step": 1385 }, { "epoch": 0.56, "learning_rate": 5e-05, "loss": 2.7199, "step": 1390 }, { "epoch": 0.56, "learning_rate": 5e-05, "loss": 2.612, "step": 1395 }, { "epoch": 0.57, "learning_rate": 5e-05, "loss": 2.7256, "step": 1400 }, { "epoch": 0.57, "learning_rate": 5e-05, "loss": 2.7822, "step": 1405 }, { "epoch": 0.57, "learning_rate": 5e-05, "loss": 2.7898, "step": 1410 }, { "epoch": 0.57, "learning_rate": 5e-05, "loss": 2.6511, "step": 1415 }, { "epoch": 0.57, "learning_rate": 5e-05, "loss": 2.68, "step": 1420 }, { "epoch": 0.58, "learning_rate": 5e-05, "loss": 2.6535, "step": 1425 }, { "epoch": 0.58, "learning_rate": 5e-05, "loss": 2.7386, "step": 1430 }, { "epoch": 0.58, "learning_rate": 5e-05, "loss": 2.7598, "step": 1435 }, { "epoch": 0.58, "learning_rate": 5e-05, "loss": 2.7257, "step": 1440 }, { "epoch": 0.58, "learning_rate": 5e-05, "loss": 2.7083, "step": 1445 }, { "epoch": 0.59, "learning_rate": 5e-05, "loss": 2.6456, "step": 1450 }, { "epoch": 0.59, "learning_rate": 5e-05, "loss": 2.8651, "step": 1455 }, { "epoch": 0.59, "learning_rate": 5e-05, "loss": 2.6377, "step": 1460 }, { "epoch": 0.59, "learning_rate": 5e-05, "loss": 2.7162, "step": 1465 }, { "epoch": 0.59, "learning_rate": 5e-05, "loss": 2.7777, "step": 1470 }, { "epoch": 0.6, "learning_rate": 5e-05, "loss": 2.6719, "step": 1475 }, { "epoch": 0.6, "learning_rate": 5e-05, "loss": 2.5657, "step": 1480 }, { "epoch": 0.6, "learning_rate": 5e-05, "loss": 2.6202, "step": 1485 }, { "epoch": 0.6, "learning_rate": 5e-05, "loss": 2.7201, "step": 1490 }, { "epoch": 0.6, "learning_rate": 5e-05, "loss": 2.8134, "step": 1495 }, { "epoch": 0.61, "learning_rate": 5e-05, "loss": 2.9, "step": 1500 }, { "epoch": 0.61, "learning_rate": 5e-05, "loss": 2.8065, "step": 1505 }, { "epoch": 0.61, "learning_rate": 5e-05, "loss": 2.7656, "step": 1510 }, { "epoch": 0.61, "learning_rate": 5e-05, "loss": 2.7519, "step": 1515 }, { "epoch": 0.61, "learning_rate": 5e-05, "loss": 2.6922, "step": 1520 }, { "epoch": 0.62, "learning_rate": 5e-05, "loss": 2.5151, "step": 1525 }, { "epoch": 0.62, "learning_rate": 5e-05, "loss": 2.6186, "step": 1530 }, { "epoch": 0.62, "learning_rate": 5e-05, "loss": 2.6513, "step": 1535 }, { "epoch": 0.62, "learning_rate": 5e-05, "loss": 2.6852, "step": 1540 }, { "epoch": 0.63, "learning_rate": 5e-05, "loss": 2.7875, "step": 1545 }, { "epoch": 0.63, "learning_rate": 5e-05, "loss": 2.8141, "step": 1550 }, { "epoch": 0.63, "learning_rate": 5e-05, "loss": 2.6425, "step": 1555 }, { "epoch": 0.63, "learning_rate": 5e-05, "loss": 2.72, "step": 1560 }, { "epoch": 0.63, "learning_rate": 5e-05, "loss": 2.7421, "step": 1565 }, { "epoch": 0.64, "learning_rate": 5e-05, "loss": 2.7139, "step": 1570 }, { "epoch": 0.64, "learning_rate": 5e-05, "loss": 2.6792, "step": 1575 }, { "epoch": 0.64, "learning_rate": 5e-05, "loss": 2.5915, "step": 1580 }, { "epoch": 0.64, "learning_rate": 5e-05, "loss": 2.6162, "step": 1585 }, { "epoch": 0.64, "learning_rate": 5e-05, "loss": 2.617, "step": 1590 }, { "epoch": 0.65, "learning_rate": 5e-05, "loss": 2.7125, "step": 1595 }, { "epoch": 0.65, "learning_rate": 5e-05, "loss": 2.7146, "step": 1600 }, { "epoch": 0.65, "learning_rate": 5e-05, "loss": 2.5648, "step": 1605 }, { "epoch": 0.65, "learning_rate": 5e-05, "loss": 2.8611, "step": 1610 }, { "epoch": 0.65, "learning_rate": 5e-05, "loss": 2.5752, "step": 1615 }, { "epoch": 0.66, "learning_rate": 5e-05, "loss": 2.6658, "step": 1620 }, { "epoch": 0.66, "learning_rate": 5e-05, "loss": 2.7633, "step": 1625 }, { "epoch": 0.66, "learning_rate": 5e-05, "loss": 2.6336, "step": 1630 }, { "epoch": 0.66, "learning_rate": 5e-05, "loss": 2.771, "step": 1635 }, { "epoch": 0.66, "learning_rate": 5e-05, "loss": 2.8223, "step": 1640 }, { "epoch": 0.67, "learning_rate": 5e-05, "loss": 2.7626, "step": 1645 }, { "epoch": 0.67, "learning_rate": 5e-05, "loss": 2.7405, "step": 1650 }, { "epoch": 0.67, "learning_rate": 5e-05, "loss": 2.7567, "step": 1655 }, { "epoch": 0.67, "learning_rate": 5e-05, "loss": 2.8065, "step": 1660 }, { "epoch": 0.67, "learning_rate": 5e-05, "loss": 2.6281, "step": 1665 }, { "epoch": 0.68, "learning_rate": 5e-05, "loss": 2.5907, "step": 1670 }, { "epoch": 0.68, "learning_rate": 5e-05, "loss": 2.8343, "step": 1675 }, { "epoch": 0.68, "learning_rate": 5e-05, "loss": 2.5374, "step": 1680 }, { "epoch": 0.68, "learning_rate": 5e-05, "loss": 2.6573, "step": 1685 }, { "epoch": 0.68, "learning_rate": 5e-05, "loss": 2.7715, "step": 1690 }, { "epoch": 0.69, "learning_rate": 5e-05, "loss": 2.7022, "step": 1695 }, { "epoch": 0.69, "learning_rate": 5e-05, "loss": 2.6918, "step": 1700 }, { "epoch": 0.69, "learning_rate": 5e-05, "loss": 2.7902, "step": 1705 }, { "epoch": 0.69, "learning_rate": 5e-05, "loss": 2.7458, "step": 1710 }, { "epoch": 0.69, "learning_rate": 5e-05, "loss": 2.6979, "step": 1715 }, { "epoch": 0.7, "learning_rate": 5e-05, "loss": 2.7609, "step": 1720 }, { "epoch": 0.7, "learning_rate": 5e-05, "loss": 2.6818, "step": 1725 }, { "epoch": 0.7, "learning_rate": 5e-05, "loss": 2.6761, "step": 1730 }, { "epoch": 0.7, "learning_rate": 5e-05, "loss": 2.7184, "step": 1735 }, { "epoch": 0.7, "learning_rate": 5e-05, "loss": 2.585, "step": 1740 }, { "epoch": 0.71, "learning_rate": 5e-05, "loss": 2.7434, "step": 1745 }, { "epoch": 0.71, "learning_rate": 5e-05, "loss": 2.6304, "step": 1750 }, { "epoch": 0.71, "learning_rate": 5e-05, "loss": 2.841, "step": 1755 }, { "epoch": 0.71, "learning_rate": 5e-05, "loss": 2.7202, "step": 1760 }, { "epoch": 0.71, "learning_rate": 5e-05, "loss": 2.5044, "step": 1765 }, { "epoch": 0.72, "learning_rate": 5e-05, "loss": 2.655, "step": 1770 }, { "epoch": 0.72, "learning_rate": 5e-05, "loss": 2.5988, "step": 1775 }, { "epoch": 0.72, "learning_rate": 5e-05, "loss": 2.588, "step": 1780 }, { "epoch": 0.72, "learning_rate": 5e-05, "loss": 2.7623, "step": 1785 }, { "epoch": 0.72, "learning_rate": 5e-05, "loss": 2.7245, "step": 1790 }, { "epoch": 0.73, "learning_rate": 5e-05, "loss": 2.6365, "step": 1795 }, { "epoch": 0.73, "learning_rate": 5e-05, "loss": 2.6877, "step": 1800 }, { "epoch": 0.73, "learning_rate": 5e-05, "loss": 2.5983, "step": 1805 }, { "epoch": 0.73, "learning_rate": 5e-05, "loss": 2.7249, "step": 1810 }, { "epoch": 0.73, "learning_rate": 5e-05, "loss": 2.7584, "step": 1815 }, { "epoch": 0.74, "learning_rate": 5e-05, "loss": 2.7737, "step": 1820 }, { "epoch": 0.74, "learning_rate": 5e-05, "loss": 2.7943, "step": 1825 }, { "epoch": 0.74, "learning_rate": 5e-05, "loss": 2.6356, "step": 1830 }, { "epoch": 0.74, "learning_rate": 5e-05, "loss": 2.535, "step": 1835 }, { "epoch": 0.74, "learning_rate": 5e-05, "loss": 2.7207, "step": 1840 }, { "epoch": 0.75, "learning_rate": 5e-05, "loss": 2.7745, "step": 1845 }, { "epoch": 0.75, "learning_rate": 5e-05, "loss": 2.6661, "step": 1850 }, { "epoch": 0.75, "learning_rate": 5e-05, "loss": 2.6929, "step": 1855 }, { "epoch": 0.75, "learning_rate": 5e-05, "loss": 2.6875, "step": 1860 }, { "epoch": 0.75, "learning_rate": 5e-05, "loss": 2.7406, "step": 1865 }, { "epoch": 0.76, "learning_rate": 5e-05, "loss": 2.8286, "step": 1870 }, { "epoch": 0.76, "learning_rate": 5e-05, "loss": 2.7516, "step": 1875 }, { "epoch": 0.76, "learning_rate": 5e-05, "loss": 2.7069, "step": 1880 }, { "epoch": 0.76, "learning_rate": 5e-05, "loss": 2.6228, "step": 1885 }, { "epoch": 0.76, "learning_rate": 5e-05, "loss": 2.7762, "step": 1890 }, { "epoch": 0.77, "learning_rate": 5e-05, "loss": 2.694, "step": 1895 }, { "epoch": 0.77, "learning_rate": 5e-05, "loss": 2.6888, "step": 1900 }, { "epoch": 0.77, "learning_rate": 5e-05, "loss": 2.7838, "step": 1905 }, { "epoch": 0.77, "learning_rate": 5e-05, "loss": 2.6155, "step": 1910 }, { "epoch": 0.77, "learning_rate": 5e-05, "loss": 2.5779, "step": 1915 }, { "epoch": 0.78, "learning_rate": 5e-05, "loss": 2.6237, "step": 1920 }, { "epoch": 0.78, "learning_rate": 5e-05, "loss": 2.6635, "step": 1925 }, { "epoch": 0.78, "learning_rate": 5e-05, "loss": 2.6905, "step": 1930 }, { "epoch": 0.78, "learning_rate": 5e-05, "loss": 2.6694, "step": 1935 }, { "epoch": 0.78, "learning_rate": 5e-05, "loss": 2.4783, "step": 1940 }, { "epoch": 0.79, "learning_rate": 5e-05, "loss": 2.7565, "step": 1945 }, { "epoch": 0.79, "learning_rate": 5e-05, "loss": 2.7145, "step": 1950 }, { "epoch": 0.79, "learning_rate": 5e-05, "loss": 2.6378, "step": 1955 }, { "epoch": 0.79, "learning_rate": 5e-05, "loss": 2.7469, "step": 1960 }, { "epoch": 0.8, "learning_rate": 5e-05, "loss": 2.6073, "step": 1965 }, { "epoch": 0.8, "learning_rate": 5e-05, "loss": 2.7848, "step": 1970 }, { "epoch": 0.8, "learning_rate": 5e-05, "loss": 2.6595, "step": 1975 }, { "epoch": 0.8, "learning_rate": 5e-05, "loss": 2.8059, "step": 1980 }, { "epoch": 0.8, "learning_rate": 5e-05, "loss": 2.7439, "step": 1985 }, { "epoch": 0.81, "learning_rate": 5e-05, "loss": 2.583, "step": 1990 }, { "epoch": 0.81, "learning_rate": 5e-05, "loss": 2.7066, "step": 1995 }, { "epoch": 0.81, "learning_rate": 5e-05, "loss": 2.6711, "step": 2000 }, { "epoch": 0.81, "learning_rate": 5e-05, "loss": 2.6781, "step": 2005 }, { "epoch": 0.81, "learning_rate": 5e-05, "loss": 2.6504, "step": 2010 }, { "epoch": 0.82, "learning_rate": 5e-05, "loss": 2.601, "step": 2015 }, { "epoch": 0.82, "learning_rate": 5e-05, "loss": 2.7151, "step": 2020 }, { "epoch": 0.82, "learning_rate": 5e-05, "loss": 2.7105, "step": 2025 }, { "epoch": 0.82, "learning_rate": 5e-05, "loss": 2.4789, "step": 2030 }, { "epoch": 0.82, "learning_rate": 5e-05, "loss": 2.587, "step": 2035 }, { "epoch": 0.83, "learning_rate": 5e-05, "loss": 2.7408, "step": 2040 }, { "epoch": 0.83, "learning_rate": 5e-05, "loss": 2.777, "step": 2045 }, { "epoch": 0.83, "learning_rate": 5e-05, "loss": 2.6372, "step": 2050 }, { "epoch": 0.83, "learning_rate": 5e-05, "loss": 2.6225, "step": 2055 }, { "epoch": 0.83, "learning_rate": 5e-05, "loss": 2.4768, "step": 2060 }, { "epoch": 0.84, "learning_rate": 5e-05, "loss": 2.7418, "step": 2065 }, { "epoch": 0.84, "learning_rate": 5e-05, "loss": 2.758, "step": 2070 }, { "epoch": 0.84, "learning_rate": 5e-05, "loss": 2.6905, "step": 2075 }, { "epoch": 0.84, "learning_rate": 5e-05, "loss": 2.8158, "step": 2080 }, { "epoch": 0.84, "learning_rate": 5e-05, "loss": 2.809, "step": 2085 }, { "epoch": 0.85, "learning_rate": 5e-05, "loss": 2.7938, "step": 2090 }, { "epoch": 0.85, "learning_rate": 5e-05, "loss": 2.6229, "step": 2095 }, { "epoch": 0.85, "learning_rate": 5e-05, "loss": 2.6269, "step": 2100 }, { "epoch": 0.85, "learning_rate": 5e-05, "loss": 2.6908, "step": 2105 }, { "epoch": 0.85, "learning_rate": 5e-05, "loss": 2.7486, "step": 2110 }, { "epoch": 0.86, "learning_rate": 5e-05, "loss": 2.7088, "step": 2115 }, { "epoch": 0.86, "learning_rate": 5e-05, "loss": 2.5786, "step": 2120 }, { "epoch": 0.86, "learning_rate": 5e-05, "loss": 2.725, "step": 2125 }, { "epoch": 0.86, "learning_rate": 5e-05, "loss": 2.6511, "step": 2130 }, { "epoch": 0.86, "learning_rate": 5e-05, "loss": 2.4968, "step": 2135 }, { "epoch": 0.87, "learning_rate": 5e-05, "loss": 2.7425, "step": 2140 }, { "epoch": 0.87, "learning_rate": 5e-05, "loss": 2.5695, "step": 2145 }, { "epoch": 0.87, "learning_rate": 5e-05, "loss": 2.5162, "step": 2150 }, { "epoch": 0.87, "learning_rate": 5e-05, "loss": 2.504, "step": 2155 }, { "epoch": 0.87, "learning_rate": 5e-05, "loss": 2.7378, "step": 2160 }, { "epoch": 0.88, "learning_rate": 5e-05, "loss": 2.6437, "step": 2165 }, { "epoch": 0.88, "learning_rate": 5e-05, "loss": 2.629, "step": 2170 }, { "epoch": 0.88, "learning_rate": 5e-05, "loss": 2.5375, "step": 2175 }, { "epoch": 0.88, "learning_rate": 5e-05, "loss": 2.7463, "step": 2180 }, { "epoch": 0.88, "learning_rate": 5e-05, "loss": 2.5113, "step": 2185 }, { "epoch": 0.89, "learning_rate": 5e-05, "loss": 2.7163, "step": 2190 }, { "epoch": 0.89, "learning_rate": 5e-05, "loss": 2.8432, "step": 2195 }, { "epoch": 0.89, "learning_rate": 5e-05, "loss": 2.7086, "step": 2200 }, { "epoch": 0.89, "learning_rate": 5e-05, "loss": 2.784, "step": 2205 }, { "epoch": 0.89, "learning_rate": 5e-05, "loss": 2.7021, "step": 2210 }, { "epoch": 0.9, "learning_rate": 5e-05, "loss": 2.6244, "step": 2215 }, { "epoch": 0.9, "learning_rate": 5e-05, "loss": 2.633, "step": 2220 }, { "epoch": 0.9, "learning_rate": 5e-05, "loss": 2.7068, "step": 2225 }, { "epoch": 0.9, "learning_rate": 5e-05, "loss": 2.8057, "step": 2230 }, { "epoch": 0.9, "learning_rate": 5e-05, "loss": 2.677, "step": 2235 }, { "epoch": 0.91, "learning_rate": 5e-05, "loss": 2.7191, "step": 2240 }, { "epoch": 0.91, "learning_rate": 5e-05, "loss": 2.7879, "step": 2245 }, { "epoch": 0.91, "learning_rate": 5e-05, "loss": 2.7059, "step": 2250 }, { "epoch": 0.91, "learning_rate": 5e-05, "loss": 2.7086, "step": 2255 }, { "epoch": 0.91, "learning_rate": 5e-05, "loss": 2.6386, "step": 2260 }, { "epoch": 0.92, "learning_rate": 5e-05, "loss": 2.6557, "step": 2265 }, { "epoch": 0.92, "learning_rate": 5e-05, "loss": 2.7518, "step": 2270 }, { "epoch": 0.92, "learning_rate": 5e-05, "loss": 2.6355, "step": 2275 }, { "epoch": 0.92, "learning_rate": 5e-05, "loss": 2.6018, "step": 2280 }, { "epoch": 0.92, "learning_rate": 5e-05, "loss": 2.5666, "step": 2285 }, { "epoch": 0.93, "learning_rate": 5e-05, "loss": 2.7285, "step": 2290 }, { "epoch": 0.93, "learning_rate": 5e-05, "loss": 2.6315, "step": 2295 }, { "epoch": 0.93, "learning_rate": 5e-05, "loss": 2.7684, "step": 2300 }, { "epoch": 0.93, "learning_rate": 5e-05, "loss": 2.8144, "step": 2305 }, { "epoch": 0.93, "learning_rate": 5e-05, "loss": 2.6321, "step": 2310 }, { "epoch": 0.94, "learning_rate": 5e-05, "loss": 2.534, "step": 2315 }, { "epoch": 0.94, "learning_rate": 5e-05, "loss": 2.6426, "step": 2320 }, { "epoch": 0.94, "learning_rate": 5e-05, "loss": 2.7651, "step": 2325 }, { "epoch": 0.94, "learning_rate": 5e-05, "loss": 2.9408, "step": 2330 }, { "epoch": 0.94, "learning_rate": 5e-05, "loss": 2.5735, "step": 2335 }, { "epoch": 0.95, "learning_rate": 5e-05, "loss": 2.7254, "step": 2340 }, { "epoch": 0.95, "learning_rate": 5e-05, "loss": 2.5567, "step": 2345 }, { "epoch": 0.95, "learning_rate": 5e-05, "loss": 2.6888, "step": 2350 }, { "epoch": 0.95, "learning_rate": 5e-05, "loss": 2.615, "step": 2355 }, { "epoch": 0.95, "learning_rate": 5e-05, "loss": 2.7708, "step": 2360 }, { "epoch": 0.96, "learning_rate": 5e-05, "loss": 2.6665, "step": 2365 }, { "epoch": 0.96, "learning_rate": 5e-05, "loss": 2.5748, "step": 2370 }, { "epoch": 0.96, "learning_rate": 5e-05, "loss": 2.6752, "step": 2375 }, { "epoch": 0.96, "learning_rate": 5e-05, "loss": 2.5538, "step": 2380 }, { "epoch": 0.96, "learning_rate": 5e-05, "loss": 2.7701, "step": 2385 }, { "epoch": 0.97, "learning_rate": 5e-05, "loss": 2.7305, "step": 2390 }, { "epoch": 0.97, "learning_rate": 5e-05, "loss": 2.7094, "step": 2395 }, { "epoch": 0.97, "learning_rate": 5e-05, "loss": 2.9193, "step": 2400 }, { "epoch": 0.97, "learning_rate": 5e-05, "loss": 2.6647, "step": 2405 }, { "epoch": 0.98, "learning_rate": 5e-05, "loss": 2.6341, "step": 2410 }, { "epoch": 0.98, "learning_rate": 5e-05, "loss": 2.4908, "step": 2415 }, { "epoch": 0.98, "learning_rate": 5e-05, "loss": 2.5294, "step": 2420 }, { "epoch": 0.98, "learning_rate": 5e-05, "loss": 2.461, "step": 2425 }, { "epoch": 0.98, "learning_rate": 5e-05, "loss": 2.6605, "step": 2430 }, { "epoch": 0.99, "learning_rate": 5e-05, "loss": 2.5215, "step": 2435 }, { "epoch": 0.99, "learning_rate": 5e-05, "loss": 2.539, "step": 2440 }, { "epoch": 0.99, "learning_rate": 5e-05, "loss": 2.644, "step": 2445 }, { "epoch": 0.99, "learning_rate": 5e-05, "loss": 2.5958, "step": 2450 }, { "epoch": 0.99, "learning_rate": 5e-05, "loss": 2.528, "step": 2455 }, { "epoch": 1.0, "learning_rate": 5e-05, "loss": 2.6468, "step": 2460 }, { "epoch": 1.0, "learning_rate": 5e-05, "loss": 2.6027, "step": 2465 }, { "epoch": 1.0, "learning_rate": 5e-05, "loss": 2.4891, "step": 2470 }, { "epoch": 1.0, "learning_rate": 5e-05, "loss": 2.7302, "step": 2475 }, { "epoch": 1.0, "learning_rate": 5e-05, "loss": 2.2587, "step": 2480 }, { "epoch": 1.01, "learning_rate": 5e-05, "loss": 2.3727, "step": 2485 }, { "epoch": 1.01, "learning_rate": 5e-05, "loss": 2.2232, "step": 2490 }, { "epoch": 1.01, "learning_rate": 5e-05, "loss": 2.2597, "step": 2495 }, { "epoch": 1.01, "learning_rate": 5e-05, "loss": 2.0625, "step": 2500 }, { "epoch": 1.01, "learning_rate": 5e-05, "loss": 2.2327, "step": 2505 }, { "epoch": 1.02, "learning_rate": 5e-05, "loss": 2.0604, "step": 2510 }, { "epoch": 1.02, "learning_rate": 5e-05, "loss": 2.0927, "step": 2515 }, { "epoch": 1.02, "learning_rate": 5e-05, "loss": 2.2176, "step": 2520 }, { "epoch": 1.02, "learning_rate": 5e-05, "loss": 2.2972, "step": 2525 }, { "epoch": 1.02, "learning_rate": 5e-05, "loss": 2.1105, "step": 2530 }, { "epoch": 1.03, "learning_rate": 5e-05, "loss": 2.1953, "step": 2535 }, { "epoch": 1.03, "learning_rate": 5e-05, "loss": 2.4188, "step": 2540 }, { "epoch": 1.03, "learning_rate": 5e-05, "loss": 2.2246, "step": 2545 }, { "epoch": 1.03, "learning_rate": 5e-05, "loss": 2.3411, "step": 2550 }, { "epoch": 1.03, "learning_rate": 5e-05, "loss": 2.2848, "step": 2555 }, { "epoch": 1.04, "learning_rate": 5e-05, "loss": 2.0621, "step": 2560 }, { "epoch": 1.04, "learning_rate": 5e-05, "loss": 2.3601, "step": 2565 }, { "epoch": 1.04, "learning_rate": 5e-05, "loss": 2.0717, "step": 2570 }, { "epoch": 1.04, "learning_rate": 5e-05, "loss": 2.0666, "step": 2575 }, { "epoch": 1.04, "learning_rate": 5e-05, "loss": 2.157, "step": 2580 }, { "epoch": 1.05, "learning_rate": 5e-05, "loss": 2.2869, "step": 2585 }, { "epoch": 1.05, "learning_rate": 5e-05, "loss": 2.2177, "step": 2590 }, { "epoch": 1.05, "learning_rate": 5e-05, "loss": 2.2843, "step": 2595 }, { "epoch": 1.05, "learning_rate": 5e-05, "loss": 2.1964, "step": 2600 }, { "epoch": 1.05, "learning_rate": 5e-05, "loss": 2.2299, "step": 2605 }, { "epoch": 1.06, "learning_rate": 5e-05, "loss": 2.1429, "step": 2610 }, { "epoch": 1.06, "learning_rate": 5e-05, "loss": 2.1783, "step": 2615 }, { "epoch": 1.06, "learning_rate": 5e-05, "loss": 2.0898, "step": 2620 }, { "epoch": 1.06, "learning_rate": 5e-05, "loss": 2.2693, "step": 2625 }, { "epoch": 1.06, "learning_rate": 5e-05, "loss": 2.2216, "step": 2630 }, { "epoch": 1.07, "learning_rate": 5e-05, "loss": 2.1387, "step": 2635 }, { "epoch": 1.07, "learning_rate": 5e-05, "loss": 2.2065, "step": 2640 }, { "epoch": 1.07, "learning_rate": 5e-05, "loss": 2.0899, "step": 2645 }, { "epoch": 1.07, "learning_rate": 5e-05, "loss": 2.1705, "step": 2650 }, { "epoch": 1.07, "learning_rate": 5e-05, "loss": 2.1863, "step": 2655 }, { "epoch": 1.08, "learning_rate": 5e-05, "loss": 2.2563, "step": 2660 }, { "epoch": 1.08, "learning_rate": 5e-05, "loss": 2.2669, "step": 2665 }, { "epoch": 1.08, "learning_rate": 5e-05, "loss": 2.2752, "step": 2670 }, { "epoch": 1.08, "learning_rate": 5e-05, "loss": 2.3229, "step": 2675 }, { "epoch": 1.08, "learning_rate": 5e-05, "loss": 2.1955, "step": 2680 }, { "epoch": 1.09, "learning_rate": 5e-05, "loss": 2.1868, "step": 2685 }, { "epoch": 1.09, "learning_rate": 5e-05, "loss": 2.3266, "step": 2690 }, { "epoch": 1.09, "learning_rate": 5e-05, "loss": 2.1649, "step": 2695 }, { "epoch": 1.09, "learning_rate": 5e-05, "loss": 2.1767, "step": 2700 }, { "epoch": 1.09, "learning_rate": 5e-05, "loss": 2.2341, "step": 2705 }, { "epoch": 1.1, "learning_rate": 5e-05, "loss": 2.2082, "step": 2710 }, { "epoch": 1.1, "learning_rate": 5e-05, "loss": 2.1698, "step": 2715 }, { "epoch": 1.1, "learning_rate": 5e-05, "loss": 2.1964, "step": 2720 }, { "epoch": 1.1, "learning_rate": 5e-05, "loss": 2.2899, "step": 2725 }, { "epoch": 1.1, "learning_rate": 5e-05, "loss": 2.2172, "step": 2730 }, { "epoch": 1.11, "learning_rate": 5e-05, "loss": 2.1917, "step": 2735 }, { "epoch": 1.11, "learning_rate": 5e-05, "loss": 2.2461, "step": 2740 }, { "epoch": 1.11, "learning_rate": 5e-05, "loss": 2.143, "step": 2745 }, { "epoch": 1.11, "learning_rate": 5e-05, "loss": 2.1732, "step": 2750 }, { "epoch": 1.11, "learning_rate": 5e-05, "loss": 2.2312, "step": 2755 }, { "epoch": 1.12, "learning_rate": 5e-05, "loss": 2.1724, "step": 2760 }, { "epoch": 1.12, "learning_rate": 5e-05, "loss": 2.1024, "step": 2765 }, { "epoch": 1.12, "learning_rate": 5e-05, "loss": 2.234, "step": 2770 }, { "epoch": 1.12, "learning_rate": 5e-05, "loss": 2.2232, "step": 2775 }, { "epoch": 1.13, "learning_rate": 5e-05, "loss": 2.1624, "step": 2780 }, { "epoch": 1.13, "learning_rate": 5e-05, "loss": 2.1253, "step": 2785 }, { "epoch": 1.13, "learning_rate": 5e-05, "loss": 2.2269, "step": 2790 }, { "epoch": 1.13, "learning_rate": 5e-05, "loss": 2.3, "step": 2795 }, { "epoch": 1.13, "learning_rate": 5e-05, "loss": 2.2228, "step": 2800 }, { "epoch": 1.14, "learning_rate": 5e-05, "loss": 2.2687, "step": 2805 }, { "epoch": 1.14, "learning_rate": 5e-05, "loss": 2.0817, "step": 2810 }, { "epoch": 1.14, "learning_rate": 5e-05, "loss": 2.3083, "step": 2815 }, { "epoch": 1.14, "learning_rate": 5e-05, "loss": 2.1453, "step": 2820 }, { "epoch": 1.14, "learning_rate": 5e-05, "loss": 2.229, "step": 2825 }, { "epoch": 1.15, "learning_rate": 5e-05, "loss": 2.0037, "step": 2830 }, { "epoch": 1.15, "learning_rate": 5e-05, "loss": 2.2472, "step": 2835 }, { "epoch": 1.15, "learning_rate": 5e-05, "loss": 2.2924, "step": 2840 }, { "epoch": 1.15, "learning_rate": 5e-05, "loss": 2.3999, "step": 2845 }, { "epoch": 1.15, "learning_rate": 5e-05, "loss": 2.2672, "step": 2850 }, { "epoch": 1.16, "learning_rate": 5e-05, "loss": 2.2466, "step": 2855 }, { "epoch": 1.16, "learning_rate": 5e-05, "loss": 2.3562, "step": 2860 }, { "epoch": 1.16, "learning_rate": 5e-05, "loss": 2.3754, "step": 2865 }, { "epoch": 1.16, "learning_rate": 5e-05, "loss": 2.1568, "step": 2870 }, { "epoch": 1.16, "learning_rate": 5e-05, "loss": 2.2053, "step": 2875 }, { "epoch": 1.17, "learning_rate": 5e-05, "loss": 2.2557, "step": 2880 }, { "epoch": 1.17, "learning_rate": 5e-05, "loss": 2.2292, "step": 2885 }, { "epoch": 1.17, "learning_rate": 5e-05, "loss": 2.1612, "step": 2890 }, { "epoch": 1.17, "learning_rate": 5e-05, "loss": 2.1222, "step": 2895 }, { "epoch": 1.17, "learning_rate": 5e-05, "loss": 2.3077, "step": 2900 }, { "epoch": 1.18, "learning_rate": 5e-05, "loss": 2.2526, "step": 2905 }, { "epoch": 1.18, "learning_rate": 5e-05, "loss": 2.3626, "step": 2910 }, { "epoch": 1.18, "learning_rate": 5e-05, "loss": 2.2218, "step": 2915 }, { "epoch": 1.18, "learning_rate": 5e-05, "loss": 2.2083, "step": 2920 }, { "epoch": 1.18, "learning_rate": 5e-05, "loss": 2.156, "step": 2925 }, { "epoch": 1.19, "learning_rate": 5e-05, "loss": 2.2767, "step": 2930 }, { "epoch": 1.19, "learning_rate": 5e-05, "loss": 2.289, "step": 2935 }, { "epoch": 1.19, "learning_rate": 5e-05, "loss": 2.2848, "step": 2940 }, { "epoch": 1.19, "learning_rate": 5e-05, "loss": 2.276, "step": 2945 }, { "epoch": 1.19, "learning_rate": 5e-05, "loss": 2.2919, "step": 2950 }, { "epoch": 1.2, "learning_rate": 5e-05, "loss": 2.2985, "step": 2955 }, { "epoch": 1.2, "learning_rate": 5e-05, "loss": 2.2449, "step": 2960 }, { "epoch": 1.2, "learning_rate": 5e-05, "loss": 2.2573, "step": 2965 }, { "epoch": 1.2, "learning_rate": 5e-05, "loss": 2.2188, "step": 2970 }, { "epoch": 1.2, "learning_rate": 5e-05, "loss": 2.3185, "step": 2975 }, { "epoch": 1.21, "learning_rate": 5e-05, "loss": 2.1787, "step": 2980 }, { "epoch": 1.21, "learning_rate": 5e-05, "loss": 2.311, "step": 2985 }, { "epoch": 1.21, "learning_rate": 5e-05, "loss": 2.2696, "step": 2990 }, { "epoch": 1.21, "learning_rate": 5e-05, "loss": 2.1472, "step": 2995 }, { "epoch": 1.21, "learning_rate": 5e-05, "loss": 2.371, "step": 3000 }, { "epoch": 1.22, "learning_rate": 5e-05, "loss": 2.2385, "step": 3005 }, { "epoch": 1.22, "learning_rate": 5e-05, "loss": 2.213, "step": 3010 }, { "epoch": 1.22, "learning_rate": 5e-05, "loss": 2.2276, "step": 3015 }, { "epoch": 1.22, "learning_rate": 5e-05, "loss": 2.3147, "step": 3020 }, { "epoch": 1.22, "learning_rate": 5e-05, "loss": 2.2818, "step": 3025 }, { "epoch": 1.23, "learning_rate": 5e-05, "loss": 2.3763, "step": 3030 }, { "epoch": 1.23, "learning_rate": 5e-05, "loss": 2.2653, "step": 3035 }, { "epoch": 1.23, "learning_rate": 5e-05, "loss": 2.1061, "step": 3040 }, { "epoch": 1.23, "learning_rate": 5e-05, "loss": 2.1532, "step": 3045 }, { "epoch": 1.23, "learning_rate": 5e-05, "loss": 2.2831, "step": 3050 }, { "epoch": 1.24, "learning_rate": 5e-05, "loss": 2.284, "step": 3055 }, { "epoch": 1.24, "learning_rate": 5e-05, "loss": 2.316, "step": 3060 }, { "epoch": 1.24, "learning_rate": 5e-05, "loss": 2.2093, "step": 3065 }, { "epoch": 1.24, "learning_rate": 5e-05, "loss": 2.3092, "step": 3070 }, { "epoch": 1.24, "learning_rate": 5e-05, "loss": 2.121, "step": 3075 }, { "epoch": 1.25, "learning_rate": 5e-05, "loss": 2.1213, "step": 3080 }, { "epoch": 1.25, "learning_rate": 5e-05, "loss": 2.2853, "step": 3085 }, { "epoch": 1.25, "learning_rate": 5e-05, "loss": 2.2103, "step": 3090 }, { "epoch": 1.25, "learning_rate": 5e-05, "loss": 2.1558, "step": 3095 }, { "epoch": 1.25, "learning_rate": 5e-05, "loss": 2.3642, "step": 3100 }, { "epoch": 1.26, "learning_rate": 5e-05, "loss": 2.1975, "step": 3105 }, { "epoch": 1.26, "learning_rate": 5e-05, "loss": 2.3263, "step": 3110 }, { "epoch": 1.26, "learning_rate": 5e-05, "loss": 2.2045, "step": 3115 }, { "epoch": 1.26, "learning_rate": 5e-05, "loss": 2.2657, "step": 3120 }, { "epoch": 1.26, "learning_rate": 5e-05, "loss": 2.1824, "step": 3125 }, { "epoch": 1.27, "learning_rate": 5e-05, "loss": 2.2344, "step": 3130 }, { "epoch": 1.27, "learning_rate": 5e-05, "loss": 2.3884, "step": 3135 }, { "epoch": 1.27, "learning_rate": 5e-05, "loss": 2.1757, "step": 3140 }, { "epoch": 1.27, "learning_rate": 5e-05, "loss": 2.2789, "step": 3145 }, { "epoch": 1.27, "learning_rate": 5e-05, "loss": 2.3038, "step": 3150 }, { "epoch": 1.28, "learning_rate": 5e-05, "loss": 2.2446, "step": 3155 }, { "epoch": 1.28, "learning_rate": 5e-05, "loss": 2.1975, "step": 3160 }, { "epoch": 1.28, "learning_rate": 5e-05, "loss": 2.4234, "step": 3165 }, { "epoch": 1.28, "learning_rate": 5e-05, "loss": 2.2307, "step": 3170 }, { "epoch": 1.28, "learning_rate": 5e-05, "loss": 2.1857, "step": 3175 }, { "epoch": 1.29, "learning_rate": 5e-05, "loss": 2.2006, "step": 3180 }, { "epoch": 1.29, "learning_rate": 5e-05, "loss": 2.2475, "step": 3185 }, { "epoch": 1.29, "learning_rate": 5e-05, "loss": 2.3465, "step": 3190 }, { "epoch": 1.29, "learning_rate": 5e-05, "loss": 2.1186, "step": 3195 }, { "epoch": 1.29, "learning_rate": 5e-05, "loss": 2.2803, "step": 3200 } ], "max_steps": 4942, "num_train_epochs": 2, "total_flos": 3.8026776806424576e+17, "trial_name": null, "trial_params": null }