{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5242463958060288, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.999e-06, "loss": 6.1641, "step": 5 }, { "epoch": 0.0, "learning_rate": 9.994000000000001e-06, "loss": 5.275, "step": 10 }, { "epoch": 0.0, "learning_rate": 9.989e-06, "loss": 4.8629, "step": 15 }, { "epoch": 0.01, "learning_rate": 9.984e-06, "loss": 4.8023, "step": 20 }, { "epoch": 0.01, "learning_rate": 9.979e-06, "loss": 4.7687, "step": 25 }, { "epoch": 0.01, "learning_rate": 9.974e-06, "loss": 4.7188, "step": 30 }, { "epoch": 0.01, "learning_rate": 9.969e-06, "loss": 4.6258, "step": 35 }, { "epoch": 0.01, "learning_rate": 9.964e-06, "loss": 4.6254, "step": 40 }, { "epoch": 0.01, "learning_rate": 9.959e-06, "loss": 4.5867, "step": 45 }, { "epoch": 0.01, "learning_rate": 9.954e-06, "loss": 4.6207, "step": 50 }, { "epoch": 0.01, "learning_rate": 9.949e-06, "loss": 4.6086, "step": 55 }, { "epoch": 0.02, "learning_rate": 9.944e-06, "loss": 4.5559, "step": 60 }, { "epoch": 0.02, "learning_rate": 9.939000000000001e-06, "loss": 4.5836, "step": 65 }, { "epoch": 0.02, "learning_rate": 9.934e-06, "loss": 4.5121, "step": 70 }, { "epoch": 0.02, "learning_rate": 9.929000000000001e-06, "loss": 4.5234, "step": 75 }, { "epoch": 0.02, "learning_rate": 9.924e-06, "loss": 4.4992, "step": 80 }, { "epoch": 0.02, "learning_rate": 9.919000000000001e-06, "loss": 4.4891, "step": 85 }, { "epoch": 0.02, "learning_rate": 9.914e-06, "loss": 4.4688, "step": 90 }, { "epoch": 0.02, "learning_rate": 9.909000000000001e-06, "loss": 4.4836, "step": 95 }, { "epoch": 0.03, "learning_rate": 9.904e-06, "loss": 4.4363, "step": 100 }, { "epoch": 0.03, "learning_rate": 9.899000000000001e-06, "loss": 4.4215, "step": 105 }, { "epoch": 0.03, "learning_rate": 9.894e-06, "loss": 4.4469, "step": 110 }, { "epoch": 0.03, "learning_rate": 9.889000000000001e-06, "loss": 4.3793, "step": 115 }, { "epoch": 0.03, "learning_rate": 9.884e-06, "loss": 4.3934, "step": 120 }, { "epoch": 0.03, "learning_rate": 9.879000000000001e-06, "loss": 4.3309, "step": 125 }, { "epoch": 0.03, "learning_rate": 9.874e-06, "loss": 4.3875, "step": 130 }, { "epoch": 0.04, "learning_rate": 9.869000000000002e-06, "loss": 4.4262, "step": 135 }, { "epoch": 0.04, "learning_rate": 9.864e-06, "loss": 4.4285, "step": 140 }, { "epoch": 0.04, "learning_rate": 9.859e-06, "loss": 4.3965, "step": 145 }, { "epoch": 0.04, "learning_rate": 9.854000000000001e-06, "loss": 4.3359, "step": 150 }, { "epoch": 0.04, "learning_rate": 9.849e-06, "loss": 4.4348, "step": 155 }, { "epoch": 0.04, "learning_rate": 9.844000000000001e-06, "loss": 4.3152, "step": 160 }, { "epoch": 0.04, "learning_rate": 9.839e-06, "loss": 4.3402, "step": 165 }, { "epoch": 0.04, "learning_rate": 9.834000000000001e-06, "loss": 4.316, "step": 170 }, { "epoch": 0.05, "learning_rate": 9.829e-06, "loss": 4.2969, "step": 175 }, { "epoch": 0.05, "learning_rate": 9.824000000000001e-06, "loss": 4.2867, "step": 180 }, { "epoch": 0.05, "learning_rate": 9.819e-06, "loss": 4.3902, "step": 185 }, { "epoch": 0.05, "learning_rate": 9.814000000000001e-06, "loss": 4.2656, "step": 190 }, { "epoch": 0.05, "learning_rate": 9.809e-06, "loss": 4.3797, "step": 195 }, { "epoch": 0.05, "learning_rate": 9.804000000000001e-06, "loss": 4.2863, "step": 200 }, { "epoch": 0.05, "learning_rate": 9.799e-06, "loss": 4.275, "step": 205 }, { "epoch": 0.06, "learning_rate": 9.794000000000001e-06, "loss": 4.2891, "step": 210 }, { "epoch": 0.06, "learning_rate": 9.789e-06, "loss": 4.3059, "step": 215 }, { "epoch": 0.06, "learning_rate": 9.784000000000002e-06, "loss": 4.3832, "step": 220 }, { "epoch": 0.06, "learning_rate": 9.779e-06, "loss": 4.3055, "step": 225 }, { "epoch": 0.06, "learning_rate": 9.774000000000002e-06, "loss": 4.3008, "step": 230 }, { "epoch": 0.06, "learning_rate": 9.769e-06, "loss": 4.25, "step": 235 }, { "epoch": 0.06, "learning_rate": 9.764000000000002e-06, "loss": 4.2676, "step": 240 }, { "epoch": 0.06, "learning_rate": 9.759000000000001e-06, "loss": 4.2422, "step": 245 }, { "epoch": 0.07, "learning_rate": 9.754000000000002e-06, "loss": 4.3137, "step": 250 }, { "epoch": 0.07, "learning_rate": 9.749000000000001e-06, "loss": 4.2555, "step": 255 }, { "epoch": 0.07, "learning_rate": 9.744000000000002e-06, "loss": 4.2637, "step": 260 }, { "epoch": 0.07, "learning_rate": 9.739000000000001e-06, "loss": 4.2996, "step": 265 }, { "epoch": 0.07, "learning_rate": 9.734000000000002e-06, "loss": 4.3176, "step": 270 }, { "epoch": 0.07, "learning_rate": 9.729000000000001e-06, "loss": 4.252, "step": 275 }, { "epoch": 0.07, "learning_rate": 9.724e-06, "loss": 4.266, "step": 280 }, { "epoch": 0.07, "learning_rate": 9.719000000000001e-06, "loss": 4.1748, "step": 285 }, { "epoch": 0.08, "learning_rate": 9.714e-06, "loss": 4.3008, "step": 290 }, { "epoch": 0.08, "learning_rate": 9.709000000000001e-06, "loss": 4.268, "step": 295 }, { "epoch": 0.08, "learning_rate": 9.704e-06, "loss": 4.2523, "step": 300 }, { "epoch": 0.08, "learning_rate": 9.699e-06, "loss": 4.3445, "step": 305 }, { "epoch": 0.08, "learning_rate": 9.694e-06, "loss": 4.298, "step": 310 }, { "epoch": 0.08, "learning_rate": 9.689e-06, "loss": 4.2809, "step": 315 }, { "epoch": 0.08, "learning_rate": 9.684e-06, "loss": 4.2234, "step": 320 }, { "epoch": 0.09, "learning_rate": 9.679e-06, "loss": 4.227, "step": 325 }, { "epoch": 0.09, "learning_rate": 9.674000000000001e-06, "loss": 4.2605, "step": 330 }, { "epoch": 0.09, "learning_rate": 9.669e-06, "loss": 4.2268, "step": 335 }, { "epoch": 0.09, "learning_rate": 9.664000000000001e-06, "loss": 4.1734, "step": 340 }, { "epoch": 0.09, "learning_rate": 9.659e-06, "loss": 4.2355, "step": 345 }, { "epoch": 0.09, "learning_rate": 9.654000000000001e-06, "loss": 4.2123, "step": 350 }, { "epoch": 0.09, "learning_rate": 9.649e-06, "loss": 4.1396, "step": 355 }, { "epoch": 0.09, "learning_rate": 9.644000000000001e-06, "loss": 4.1869, "step": 360 }, { "epoch": 0.1, "learning_rate": 9.639e-06, "loss": 4.2148, "step": 365 }, { "epoch": 0.1, "learning_rate": 9.634000000000001e-06, "loss": 4.1201, "step": 370 }, { "epoch": 0.1, "learning_rate": 9.629e-06, "loss": 4.1891, "step": 375 }, { "epoch": 0.1, "learning_rate": 9.624000000000001e-06, "loss": 4.118, "step": 380 }, { "epoch": 0.1, "learning_rate": 9.619e-06, "loss": 4.1359, "step": 385 }, { "epoch": 0.1, "learning_rate": 9.614000000000001e-06, "loss": 4.1469, "step": 390 }, { "epoch": 0.1, "learning_rate": 9.609e-06, "loss": 4.1941, "step": 395 }, { "epoch": 0.1, "learning_rate": 9.604000000000002e-06, "loss": 4.1219, "step": 400 }, { "epoch": 0.11, "learning_rate": 9.599e-06, "loss": 4.0951, "step": 405 }, { "epoch": 0.11, "learning_rate": 9.594000000000002e-06, "loss": 4.1387, "step": 410 }, { "epoch": 0.11, "learning_rate": 9.589000000000001e-06, "loss": 4.0973, "step": 415 }, { "epoch": 0.11, "learning_rate": 9.584000000000002e-06, "loss": 4.1551, "step": 420 }, { "epoch": 0.11, "learning_rate": 9.579000000000001e-06, "loss": 4.1883, "step": 425 }, { "epoch": 0.11, "learning_rate": 9.574000000000002e-06, "loss": 4.2137, "step": 430 }, { "epoch": 0.11, "learning_rate": 9.569000000000001e-06, "loss": 4.1748, "step": 435 }, { "epoch": 0.12, "learning_rate": 9.564e-06, "loss": 4.1664, "step": 440 }, { "epoch": 0.12, "learning_rate": 9.559000000000001e-06, "loss": 4.0812, "step": 445 }, { "epoch": 0.12, "learning_rate": 9.554e-06, "loss": 4.2215, "step": 450 }, { "epoch": 0.12, "learning_rate": 9.549000000000001e-06, "loss": 4.175, "step": 455 }, { "epoch": 0.12, "learning_rate": 9.544e-06, "loss": 4.0766, "step": 460 }, { "epoch": 0.12, "learning_rate": 9.539e-06, "loss": 4.0873, "step": 465 }, { "epoch": 0.12, "learning_rate": 9.534e-06, "loss": 4.1316, "step": 470 }, { "epoch": 0.12, "learning_rate": 9.529e-06, "loss": 4.108, "step": 475 }, { "epoch": 0.13, "learning_rate": 9.524e-06, "loss": 4.1691, "step": 480 }, { "epoch": 0.13, "learning_rate": 9.519e-06, "loss": 4.1154, "step": 485 }, { "epoch": 0.13, "learning_rate": 9.514e-06, "loss": 4.1035, "step": 490 }, { "epoch": 0.13, "learning_rate": 9.509e-06, "loss": 4.1293, "step": 495 }, { "epoch": 0.13, "learning_rate": 9.504e-06, "loss": 4.1734, "step": 500 }, { "epoch": 0.13, "learning_rate": 9.499e-06, "loss": 4.0504, "step": 505 }, { "epoch": 0.13, "learning_rate": 9.494000000000001e-06, "loss": 4.048, "step": 510 }, { "epoch": 0.13, "learning_rate": 9.489e-06, "loss": 4.1066, "step": 515 }, { "epoch": 0.14, "learning_rate": 9.484000000000001e-06, "loss": 4.1354, "step": 520 }, { "epoch": 0.14, "learning_rate": 9.479e-06, "loss": 4.1238, "step": 525 }, { "epoch": 0.14, "learning_rate": 9.474000000000001e-06, "loss": 4.1232, "step": 530 }, { "epoch": 0.14, "learning_rate": 9.469e-06, "loss": 4.1252, "step": 535 }, { "epoch": 0.14, "learning_rate": 9.464000000000001e-06, "loss": 4.0975, "step": 540 }, { "epoch": 0.14, "learning_rate": 9.459e-06, "loss": 4.1111, "step": 545 }, { "epoch": 0.14, "learning_rate": 9.454000000000001e-06, "loss": 4.0047, "step": 550 }, { "epoch": 0.15, "learning_rate": 9.449e-06, "loss": 4.0992, "step": 555 }, { "epoch": 0.15, "learning_rate": 9.444000000000001e-06, "loss": 4.0734, "step": 560 }, { "epoch": 0.15, "learning_rate": 9.439e-06, "loss": 4.0809, "step": 565 }, { "epoch": 0.15, "learning_rate": 9.434000000000001e-06, "loss": 4.101, "step": 570 }, { "epoch": 0.15, "learning_rate": 9.429e-06, "loss": 4.0662, "step": 575 }, { "epoch": 0.15, "learning_rate": 9.424000000000002e-06, "loss": 4.1041, "step": 580 }, { "epoch": 0.15, "learning_rate": 9.419e-06, "loss": 4.0564, "step": 585 }, { "epoch": 0.15, "learning_rate": 9.414000000000002e-06, "loss": 4.0986, "step": 590 }, { "epoch": 0.16, "learning_rate": 9.409000000000001e-06, "loss": 4.0309, "step": 595 }, { "epoch": 0.16, "learning_rate": 9.404e-06, "loss": 4.0605, "step": 600 }, { "epoch": 0.16, "learning_rate": 9.399000000000001e-06, "loss": 4.0857, "step": 605 }, { "epoch": 0.16, "learning_rate": 9.394e-06, "loss": 4.1307, "step": 610 }, { "epoch": 0.16, "learning_rate": 9.389000000000001e-06, "loss": 4.06, "step": 615 }, { "epoch": 0.16, "learning_rate": 9.384e-06, "loss": 4.0039, "step": 620 }, { "epoch": 0.16, "learning_rate": 9.379000000000001e-06, "loss": 4.0258, "step": 625 }, { "epoch": 0.17, "learning_rate": 9.374e-06, "loss": 4.0738, "step": 630 }, { "epoch": 0.17, "learning_rate": 9.369e-06, "loss": 4.0551, "step": 635 }, { "epoch": 0.17, "learning_rate": 9.364e-06, "loss": 4.0518, "step": 640 }, { "epoch": 0.17, "learning_rate": 9.359e-06, "loss": 4.0584, "step": 645 }, { "epoch": 0.17, "learning_rate": 9.354e-06, "loss": 4.1109, "step": 650 }, { "epoch": 0.17, "learning_rate": 9.349e-06, "loss": 3.9898, "step": 655 }, { "epoch": 0.17, "learning_rate": 9.344e-06, "loss": 4.1406, "step": 660 }, { "epoch": 0.17, "learning_rate": 9.339e-06, "loss": 4.0725, "step": 665 }, { "epoch": 0.18, "learning_rate": 9.334e-06, "loss": 4.0207, "step": 670 }, { "epoch": 0.18, "learning_rate": 9.329e-06, "loss": 4.0826, "step": 675 }, { "epoch": 0.18, "learning_rate": 9.324000000000001e-06, "loss": 4.1059, "step": 680 }, { "epoch": 0.18, "learning_rate": 9.319e-06, "loss": 3.9967, "step": 685 }, { "epoch": 0.18, "learning_rate": 9.314000000000001e-06, "loss": 4.0328, "step": 690 }, { "epoch": 0.18, "learning_rate": 9.309e-06, "loss": 3.9918, "step": 695 }, { "epoch": 0.18, "learning_rate": 9.304000000000001e-06, "loss": 4.0434, "step": 700 }, { "epoch": 0.18, "learning_rate": 9.299e-06, "loss": 3.9584, "step": 705 }, { "epoch": 0.19, "learning_rate": 9.294000000000001e-06, "loss": 4.0551, "step": 710 }, { "epoch": 0.19, "learning_rate": 9.289e-06, "loss": 3.9684, "step": 715 }, { "epoch": 0.19, "learning_rate": 9.284000000000001e-06, "loss": 4.0221, "step": 720 }, { "epoch": 0.19, "learning_rate": 9.279e-06, "loss": 3.985, "step": 725 }, { "epoch": 0.19, "learning_rate": 9.274000000000001e-06, "loss": 4.0648, "step": 730 }, { "epoch": 0.19, "learning_rate": 9.269e-06, "loss": 4.0109, "step": 735 }, { "epoch": 0.19, "learning_rate": 9.264000000000001e-06, "loss": 3.9553, "step": 740 }, { "epoch": 0.2, "learning_rate": 9.259e-06, "loss": 3.9904, "step": 745 }, { "epoch": 0.2, "learning_rate": 9.254000000000002e-06, "loss": 3.9719, "step": 750 }, { "epoch": 0.2, "learning_rate": 9.249e-06, "loss": 3.8973, "step": 755 }, { "epoch": 0.2, "learning_rate": 9.244e-06, "loss": 3.9936, "step": 760 }, { "epoch": 0.2, "learning_rate": 9.239e-06, "loss": 3.9498, "step": 765 }, { "epoch": 0.2, "learning_rate": 9.234e-06, "loss": 3.9557, "step": 770 }, { "epoch": 0.2, "learning_rate": 9.229000000000001e-06, "loss": 3.9266, "step": 775 }, { "epoch": 0.2, "learning_rate": 9.224e-06, "loss": 3.9543, "step": 780 }, { "epoch": 0.21, "learning_rate": 9.219000000000001e-06, "loss": 3.9732, "step": 785 }, { "epoch": 0.21, "learning_rate": 9.214e-06, "loss": 3.9762, "step": 790 }, { "epoch": 0.21, "learning_rate": 9.209000000000001e-06, "loss": 4.0695, "step": 795 }, { "epoch": 0.21, "learning_rate": 9.204e-06, "loss": 3.9869, "step": 800 }, { "epoch": 0.21, "learning_rate": 9.199000000000001e-06, "loss": 4.0061, "step": 805 }, { "epoch": 0.21, "learning_rate": 9.194e-06, "loss": 4.0121, "step": 810 }, { "epoch": 0.21, "learning_rate": 9.189000000000001e-06, "loss": 3.9105, "step": 815 }, { "epoch": 0.21, "learning_rate": 9.184e-06, "loss": 3.8631, "step": 820 }, { "epoch": 0.22, "learning_rate": 9.179000000000001e-06, "loss": 3.9498, "step": 825 }, { "epoch": 0.22, "learning_rate": 9.174e-06, "loss": 3.9451, "step": 830 }, { "epoch": 0.22, "learning_rate": 9.169000000000001e-06, "loss": 3.951, "step": 835 }, { "epoch": 0.22, "learning_rate": 9.164e-06, "loss": 3.9297, "step": 840 }, { "epoch": 0.22, "learning_rate": 9.159000000000002e-06, "loss": 3.9771, "step": 845 }, { "epoch": 0.22, "learning_rate": 9.154e-06, "loss": 4.0842, "step": 850 }, { "epoch": 0.22, "learning_rate": 9.149000000000002e-06, "loss": 3.8865, "step": 855 }, { "epoch": 0.23, "learning_rate": 9.144000000000001e-06, "loss": 3.9312, "step": 860 }, { "epoch": 0.23, "learning_rate": 9.139000000000002e-06, "loss": 3.8875, "step": 865 }, { "epoch": 0.23, "learning_rate": 9.134000000000001e-06, "loss": 4.0389, "step": 870 }, { "epoch": 0.23, "learning_rate": 9.129000000000002e-06, "loss": 3.9568, "step": 875 }, { "epoch": 0.23, "learning_rate": 9.124000000000001e-06, "loss": 3.9541, "step": 880 }, { "epoch": 0.23, "learning_rate": 9.119000000000002e-06, "loss": 3.9092, "step": 885 }, { "epoch": 0.23, "learning_rate": 9.114000000000001e-06, "loss": 3.9404, "step": 890 }, { "epoch": 0.23, "learning_rate": 9.109e-06, "loss": 3.9371, "step": 895 }, { "epoch": 0.24, "learning_rate": 9.104000000000001e-06, "loss": 3.9477, "step": 900 }, { "epoch": 0.24, "learning_rate": 9.099e-06, "loss": 3.9469, "step": 905 }, { "epoch": 0.24, "learning_rate": 9.094000000000001e-06, "loss": 3.9191, "step": 910 }, { "epoch": 0.24, "learning_rate": 9.089e-06, "loss": 3.9527, "step": 915 }, { "epoch": 0.24, "learning_rate": 9.084e-06, "loss": 3.8934, "step": 920 }, { "epoch": 0.24, "learning_rate": 9.079e-06, "loss": 3.9773, "step": 925 }, { "epoch": 0.24, "learning_rate": 9.074e-06, "loss": 3.823, "step": 930 }, { "epoch": 0.25, "learning_rate": 9.069e-06, "loss": 3.8857, "step": 935 }, { "epoch": 0.25, "learning_rate": 9.064e-06, "loss": 3.9092, "step": 940 }, { "epoch": 0.25, "learning_rate": 9.059000000000001e-06, "loss": 3.8338, "step": 945 }, { "epoch": 0.25, "learning_rate": 9.054e-06, "loss": 3.9457, "step": 950 }, { "epoch": 0.25, "learning_rate": 9.049000000000001e-06, "loss": 3.8869, "step": 955 }, { "epoch": 0.25, "learning_rate": 9.044e-06, "loss": 3.8594, "step": 960 }, { "epoch": 0.25, "learning_rate": 9.039000000000001e-06, "loss": 4.0318, "step": 965 }, { "epoch": 0.25, "learning_rate": 9.034e-06, "loss": 3.8469, "step": 970 }, { "epoch": 0.26, "learning_rate": 9.029000000000001e-06, "loss": 3.8367, "step": 975 }, { "epoch": 0.26, "learning_rate": 9.024e-06, "loss": 3.8814, "step": 980 }, { "epoch": 0.26, "learning_rate": 9.019000000000001e-06, "loss": 3.8818, "step": 985 }, { "epoch": 0.26, "learning_rate": 9.014e-06, "loss": 3.908, "step": 990 }, { "epoch": 0.26, "learning_rate": 9.009000000000001e-06, "loss": 3.9705, "step": 995 }, { "epoch": 0.26, "learning_rate": 9.004e-06, "loss": 3.9086, "step": 1000 }, { "epoch": 0.26, "learning_rate": 8.999000000000001e-06, "loss": 3.9795, "step": 1005 }, { "epoch": 0.26, "learning_rate": 8.994e-06, "loss": 3.8629, "step": 1010 }, { "epoch": 0.27, "learning_rate": 8.989000000000002e-06, "loss": 3.8287, "step": 1015 }, { "epoch": 0.27, "learning_rate": 8.984e-06, "loss": 3.8717, "step": 1020 }, { "epoch": 0.27, "learning_rate": 8.979000000000002e-06, "loss": 3.8865, "step": 1025 }, { "epoch": 0.27, "learning_rate": 8.974e-06, "loss": 3.8344, "step": 1030 }, { "epoch": 0.27, "learning_rate": 8.969000000000002e-06, "loss": 3.9541, "step": 1035 }, { "epoch": 0.27, "learning_rate": 8.964000000000001e-06, "loss": 3.8318, "step": 1040 }, { "epoch": 0.27, "learning_rate": 8.959000000000002e-06, "loss": 3.9328, "step": 1045 }, { "epoch": 0.28, "learning_rate": 8.954000000000001e-06, "loss": 3.8621, "step": 1050 }, { "epoch": 0.28, "learning_rate": 8.949e-06, "loss": 3.7871, "step": 1055 }, { "epoch": 0.28, "learning_rate": 8.944000000000001e-06, "loss": 3.8988, "step": 1060 }, { "epoch": 0.28, "learning_rate": 8.939e-06, "loss": 3.8232, "step": 1065 }, { "epoch": 0.28, "learning_rate": 8.934000000000001e-06, "loss": 3.8816, "step": 1070 }, { "epoch": 0.28, "learning_rate": 8.929e-06, "loss": 3.8775, "step": 1075 }, { "epoch": 0.28, "learning_rate": 8.924e-06, "loss": 3.8115, "step": 1080 }, { "epoch": 0.28, "learning_rate": 8.919e-06, "loss": 3.7941, "step": 1085 }, { "epoch": 0.29, "learning_rate": 8.914e-06, "loss": 3.8678, "step": 1090 }, { "epoch": 0.29, "learning_rate": 8.909e-06, "loss": 3.8215, "step": 1095 }, { "epoch": 0.29, "learning_rate": 8.904e-06, "loss": 3.79, "step": 1100 }, { "epoch": 0.29, "learning_rate": 8.899e-06, "loss": 3.8092, "step": 1105 }, { "epoch": 0.29, "learning_rate": 8.894e-06, "loss": 3.79, "step": 1110 }, { "epoch": 0.29, "learning_rate": 8.889e-06, "loss": 3.8162, "step": 1115 }, { "epoch": 0.29, "learning_rate": 8.884e-06, "loss": 3.8568, "step": 1120 }, { "epoch": 0.29, "learning_rate": 8.879000000000001e-06, "loss": 3.867, "step": 1125 }, { "epoch": 0.3, "learning_rate": 8.874e-06, "loss": 3.7988, "step": 1130 }, { "epoch": 0.3, "learning_rate": 8.869000000000001e-06, "loss": 3.8088, "step": 1135 }, { "epoch": 0.3, "learning_rate": 8.864e-06, "loss": 3.7711, "step": 1140 }, { "epoch": 0.3, "learning_rate": 8.859000000000001e-06, "loss": 3.7242, "step": 1145 }, { "epoch": 0.3, "learning_rate": 8.854e-06, "loss": 3.8512, "step": 1150 }, { "epoch": 0.3, "learning_rate": 8.849000000000001e-06, "loss": 3.8945, "step": 1155 }, { "epoch": 0.3, "learning_rate": 8.844e-06, "loss": 3.8687, "step": 1160 }, { "epoch": 0.31, "learning_rate": 8.839000000000001e-06, "loss": 3.7533, "step": 1165 }, { "epoch": 0.31, "learning_rate": 8.834e-06, "loss": 3.8707, "step": 1170 }, { "epoch": 0.31, "learning_rate": 8.829000000000001e-06, "loss": 3.8086, "step": 1175 }, { "epoch": 0.31, "learning_rate": 8.824e-06, "loss": 3.7467, "step": 1180 }, { "epoch": 0.31, "learning_rate": 8.819000000000001e-06, "loss": 3.8078, "step": 1185 }, { "epoch": 0.31, "learning_rate": 8.814e-06, "loss": 3.7465, "step": 1190 }, { "epoch": 0.31, "learning_rate": 8.809000000000002e-06, "loss": 3.7955, "step": 1195 }, { "epoch": 0.31, "learning_rate": 8.804e-06, "loss": 3.8281, "step": 1200 }, { "epoch": 0.32, "learning_rate": 8.799000000000002e-06, "loss": 3.8035, "step": 1205 }, { "epoch": 0.32, "learning_rate": 8.794e-06, "loss": 3.7963, "step": 1210 }, { "epoch": 0.32, "learning_rate": 8.789e-06, "loss": 3.8061, "step": 1215 }, { "epoch": 0.32, "learning_rate": 8.784000000000001e-06, "loss": 3.777, "step": 1220 }, { "epoch": 0.32, "learning_rate": 8.779e-06, "loss": 3.7582, "step": 1225 }, { "epoch": 0.32, "learning_rate": 8.774000000000001e-06, "loss": 3.7725, "step": 1230 }, { "epoch": 0.32, "learning_rate": 8.769e-06, "loss": 3.7516, "step": 1235 }, { "epoch": 0.33, "learning_rate": 8.764e-06, "loss": 3.8543, "step": 1240 }, { "epoch": 0.33, "learning_rate": 8.759e-06, "loss": 3.8566, "step": 1245 }, { "epoch": 0.33, "learning_rate": 8.754e-06, "loss": 3.7695, "step": 1250 }, { "epoch": 0.33, "learning_rate": 8.749e-06, "loss": 3.8271, "step": 1255 }, { "epoch": 0.33, "learning_rate": 8.744e-06, "loss": 3.773, "step": 1260 }, { "epoch": 0.33, "learning_rate": 8.739e-06, "loss": 3.7283, "step": 1265 }, { "epoch": 0.33, "learning_rate": 8.734e-06, "loss": 3.7822, "step": 1270 }, { "epoch": 0.33, "learning_rate": 8.729e-06, "loss": 3.7816, "step": 1275 }, { "epoch": 0.34, "learning_rate": 8.724e-06, "loss": 3.751, "step": 1280 }, { "epoch": 0.34, "learning_rate": 8.719e-06, "loss": 3.8271, "step": 1285 }, { "epoch": 0.34, "learning_rate": 8.714e-06, "loss": 3.7195, "step": 1290 }, { "epoch": 0.34, "learning_rate": 8.709e-06, "loss": 3.7584, "step": 1295 }, { "epoch": 0.34, "learning_rate": 8.704e-06, "loss": 3.7889, "step": 1300 }, { "epoch": 0.34, "learning_rate": 8.699000000000001e-06, "loss": 3.8529, "step": 1305 }, { "epoch": 0.34, "learning_rate": 8.694e-06, "loss": 3.8166, "step": 1310 }, { "epoch": 0.34, "learning_rate": 8.689000000000001e-06, "loss": 3.7484, "step": 1315 }, { "epoch": 0.35, "learning_rate": 8.684e-06, "loss": 3.8014, "step": 1320 }, { "epoch": 0.35, "learning_rate": 8.679000000000001e-06, "loss": 3.7658, "step": 1325 }, { "epoch": 0.35, "learning_rate": 8.674e-06, "loss": 3.7834, "step": 1330 }, { "epoch": 0.35, "learning_rate": 8.669000000000001e-06, "loss": 3.7973, "step": 1335 }, { "epoch": 0.35, "learning_rate": 8.664e-06, "loss": 3.7607, "step": 1340 }, { "epoch": 0.35, "learning_rate": 8.659000000000001e-06, "loss": 3.7381, "step": 1345 }, { "epoch": 0.35, "learning_rate": 8.654e-06, "loss": 3.751, "step": 1350 }, { "epoch": 0.36, "learning_rate": 8.649000000000001e-06, "loss": 3.7201, "step": 1355 }, { "epoch": 0.36, "learning_rate": 8.644e-06, "loss": 3.7969, "step": 1360 }, { "epoch": 0.36, "learning_rate": 8.639000000000001e-06, "loss": 3.7773, "step": 1365 }, { "epoch": 0.36, "learning_rate": 8.634e-06, "loss": 3.7752, "step": 1370 }, { "epoch": 0.36, "learning_rate": 8.629e-06, "loss": 3.6992, "step": 1375 }, { "epoch": 0.36, "learning_rate": 8.624e-06, "loss": 3.651, "step": 1380 }, { "epoch": 0.36, "learning_rate": 8.619e-06, "loss": 3.7598, "step": 1385 }, { "epoch": 0.36, "learning_rate": 8.614000000000001e-06, "loss": 3.7367, "step": 1390 }, { "epoch": 0.37, "learning_rate": 8.609e-06, "loss": 3.6896, "step": 1395 }, { "epoch": 0.37, "learning_rate": 8.604000000000001e-06, "loss": 3.7732, "step": 1400 }, { "epoch": 0.37, "learning_rate": 8.599e-06, "loss": 3.7836, "step": 1405 }, { "epoch": 0.37, "learning_rate": 8.594000000000001e-06, "loss": 3.7854, "step": 1410 }, { "epoch": 0.37, "learning_rate": 8.589e-06, "loss": 3.701, "step": 1415 }, { "epoch": 0.37, "learning_rate": 8.584000000000001e-06, "loss": 3.7652, "step": 1420 }, { "epoch": 0.37, "learning_rate": 8.579e-06, "loss": 3.775, "step": 1425 }, { "epoch": 0.37, "learning_rate": 8.574000000000001e-06, "loss": 3.7207, "step": 1430 }, { "epoch": 0.38, "learning_rate": 8.569e-06, "loss": 3.71, "step": 1435 }, { "epoch": 0.38, "learning_rate": 8.564000000000001e-06, "loss": 3.7359, "step": 1440 }, { "epoch": 0.38, "learning_rate": 8.559e-06, "loss": 3.6854, "step": 1445 }, { "epoch": 0.38, "learning_rate": 8.554000000000001e-06, "loss": 3.7342, "step": 1450 }, { "epoch": 0.38, "learning_rate": 8.549e-06, "loss": 3.6707, "step": 1455 }, { "epoch": 0.38, "learning_rate": 8.544000000000002e-06, "loss": 3.6596, "step": 1460 }, { "epoch": 0.38, "learning_rate": 8.539e-06, "loss": 3.6711, "step": 1465 }, { "epoch": 0.39, "learning_rate": 8.534000000000002e-06, "loss": 3.7279, "step": 1470 }, { "epoch": 0.39, "learning_rate": 8.529e-06, "loss": 3.7115, "step": 1475 }, { "epoch": 0.39, "learning_rate": 8.524000000000002e-06, "loss": 3.7139, "step": 1480 }, { "epoch": 0.39, "learning_rate": 8.519000000000001e-06, "loss": 3.674, "step": 1485 }, { "epoch": 0.39, "learning_rate": 8.514000000000002e-06, "loss": 3.6191, "step": 1490 }, { "epoch": 0.39, "learning_rate": 8.509000000000001e-06, "loss": 3.6361, "step": 1495 }, { "epoch": 0.39, "learning_rate": 8.504000000000002e-06, "loss": 3.7717, "step": 1500 }, { "epoch": 0.39, "learning_rate": 8.499000000000001e-06, "loss": 3.6355, "step": 1505 }, { "epoch": 0.4, "learning_rate": 8.494e-06, "loss": 3.8113, "step": 1510 }, { "epoch": 0.4, "learning_rate": 8.489000000000001e-06, "loss": 3.7465, "step": 1515 }, { "epoch": 0.4, "learning_rate": 8.484e-06, "loss": 3.8033, "step": 1520 }, { "epoch": 0.4, "learning_rate": 8.479000000000001e-06, "loss": 3.6867, "step": 1525 }, { "epoch": 0.4, "learning_rate": 8.474e-06, "loss": 3.7062, "step": 1530 }, { "epoch": 0.4, "learning_rate": 8.469e-06, "loss": 3.726, "step": 1535 }, { "epoch": 0.4, "learning_rate": 8.464e-06, "loss": 3.6432, "step": 1540 }, { "epoch": 0.4, "learning_rate": 8.459e-06, "loss": 3.6943, "step": 1545 }, { "epoch": 0.41, "learning_rate": 8.454e-06, "loss": 3.6127, "step": 1550 }, { "epoch": 0.41, "learning_rate": 8.449e-06, "loss": 3.6529, "step": 1555 }, { "epoch": 0.41, "learning_rate": 8.444e-06, "loss": 3.6063, "step": 1560 }, { "epoch": 0.41, "learning_rate": 8.439e-06, "loss": 3.7633, "step": 1565 }, { "epoch": 0.41, "learning_rate": 8.434000000000001e-06, "loss": 3.6211, "step": 1570 }, { "epoch": 0.41, "learning_rate": 8.429e-06, "loss": 3.6895, "step": 1575 }, { "epoch": 0.41, "learning_rate": 8.424000000000001e-06, "loss": 3.6152, "step": 1580 }, { "epoch": 0.42, "learning_rate": 8.419e-06, "loss": 3.6549, "step": 1585 }, { "epoch": 0.42, "learning_rate": 8.414000000000001e-06, "loss": 3.6502, "step": 1590 }, { "epoch": 0.42, "learning_rate": 8.409e-06, "loss": 3.5689, "step": 1595 }, { "epoch": 0.42, "learning_rate": 8.404000000000001e-06, "loss": 3.7002, "step": 1600 }, { "epoch": 0.42, "learning_rate": 8.399e-06, "loss": 3.5998, "step": 1605 }, { "epoch": 0.42, "learning_rate": 8.394000000000001e-06, "loss": 3.7164, "step": 1610 }, { "epoch": 0.42, "learning_rate": 8.389e-06, "loss": 3.6006, "step": 1615 }, { "epoch": 0.42, "learning_rate": 8.384000000000001e-06, "loss": 3.5586, "step": 1620 }, { "epoch": 0.43, "learning_rate": 8.379e-06, "loss": 3.6801, "step": 1625 }, { "epoch": 0.43, "learning_rate": 8.374000000000001e-06, "loss": 3.601, "step": 1630 }, { "epoch": 0.43, "learning_rate": 8.369e-06, "loss": 3.6344, "step": 1635 }, { "epoch": 0.43, "learning_rate": 8.364000000000002e-06, "loss": 3.6637, "step": 1640 }, { "epoch": 0.43, "learning_rate": 8.359e-06, "loss": 3.6357, "step": 1645 }, { "epoch": 0.43, "learning_rate": 8.354000000000002e-06, "loss": 3.652, "step": 1650 }, { "epoch": 0.43, "learning_rate": 8.349000000000001e-06, "loss": 3.6439, "step": 1655 }, { "epoch": 0.44, "learning_rate": 8.344000000000002e-06, "loss": 3.6051, "step": 1660 }, { "epoch": 0.44, "learning_rate": 8.339000000000001e-06, "loss": 3.6207, "step": 1665 }, { "epoch": 0.44, "learning_rate": 8.334e-06, "loss": 3.6059, "step": 1670 }, { "epoch": 0.44, "learning_rate": 8.329000000000001e-06, "loss": 3.7102, "step": 1675 }, { "epoch": 0.44, "learning_rate": 8.324e-06, "loss": 3.5629, "step": 1680 }, { "epoch": 0.44, "learning_rate": 8.319000000000001e-06, "loss": 3.6357, "step": 1685 }, { "epoch": 0.44, "learning_rate": 8.314e-06, "loss": 3.6416, "step": 1690 }, { "epoch": 0.44, "learning_rate": 8.309e-06, "loss": 3.6572, "step": 1695 }, { "epoch": 0.45, "learning_rate": 8.304e-06, "loss": 3.6244, "step": 1700 }, { "epoch": 0.45, "learning_rate": 8.299e-06, "loss": 3.677, "step": 1705 }, { "epoch": 0.45, "learning_rate": 8.294e-06, "loss": 3.6006, "step": 1710 }, { "epoch": 0.45, "learning_rate": 8.289e-06, "loss": 3.7182, "step": 1715 }, { "epoch": 0.45, "learning_rate": 8.284e-06, "loss": 3.6451, "step": 1720 }, { "epoch": 0.45, "learning_rate": 8.279e-06, "loss": 3.508, "step": 1725 }, { "epoch": 0.45, "learning_rate": 8.274e-06, "loss": 3.6182, "step": 1730 }, { "epoch": 0.45, "learning_rate": 8.269e-06, "loss": 3.5447, "step": 1735 }, { "epoch": 0.46, "learning_rate": 8.264e-06, "loss": 3.5941, "step": 1740 }, { "epoch": 0.46, "learning_rate": 8.259e-06, "loss": 3.5094, "step": 1745 }, { "epoch": 0.46, "learning_rate": 8.254000000000001e-06, "loss": 3.5988, "step": 1750 }, { "epoch": 0.46, "learning_rate": 8.249e-06, "loss": 3.6652, "step": 1755 }, { "epoch": 0.46, "learning_rate": 8.244000000000001e-06, "loss": 3.5957, "step": 1760 }, { "epoch": 0.46, "learning_rate": 8.239e-06, "loss": 3.5326, "step": 1765 }, { "epoch": 0.46, "learning_rate": 8.234000000000001e-06, "loss": 3.5537, "step": 1770 }, { "epoch": 0.47, "learning_rate": 8.229e-06, "loss": 3.5834, "step": 1775 }, { "epoch": 0.47, "learning_rate": 8.224000000000001e-06, "loss": 3.5666, "step": 1780 }, { "epoch": 0.47, "learning_rate": 8.219e-06, "loss": 3.6174, "step": 1785 }, { "epoch": 0.47, "learning_rate": 8.214000000000001e-06, "loss": 3.5148, "step": 1790 }, { "epoch": 0.47, "learning_rate": 8.209e-06, "loss": 3.5037, "step": 1795 }, { "epoch": 0.47, "learning_rate": 8.204000000000001e-06, "loss": 3.6, "step": 1800 }, { "epoch": 0.47, "learning_rate": 8.199e-06, "loss": 3.5457, "step": 1805 }, { "epoch": 0.47, "learning_rate": 8.194000000000002e-06, "loss": 3.5021, "step": 1810 }, { "epoch": 0.48, "learning_rate": 8.189e-06, "loss": 3.509, "step": 1815 }, { "epoch": 0.48, "learning_rate": 8.184000000000002e-06, "loss": 3.5457, "step": 1820 }, { "epoch": 0.48, "learning_rate": 8.179e-06, "loss": 3.5449, "step": 1825 }, { "epoch": 0.48, "learning_rate": 8.174e-06, "loss": 3.5832, "step": 1830 }, { "epoch": 0.48, "learning_rate": 8.169000000000001e-06, "loss": 3.4852, "step": 1835 }, { "epoch": 0.48, "learning_rate": 8.164e-06, "loss": 3.6166, "step": 1840 }, { "epoch": 0.48, "learning_rate": 8.159000000000001e-06, "loss": 3.5248, "step": 1845 }, { "epoch": 0.48, "learning_rate": 8.154e-06, "loss": 3.5617, "step": 1850 }, { "epoch": 0.49, "learning_rate": 8.149e-06, "loss": 3.5119, "step": 1855 }, { "epoch": 0.49, "learning_rate": 8.144e-06, "loss": 3.5475, "step": 1860 }, { "epoch": 0.49, "learning_rate": 8.139e-06, "loss": 3.5646, "step": 1865 }, { "epoch": 0.49, "learning_rate": 8.134e-06, "loss": 3.4521, "step": 1870 }, { "epoch": 0.49, "learning_rate": 8.129e-06, "loss": 3.492, "step": 1875 }, { "epoch": 0.49, "learning_rate": 8.124e-06, "loss": 3.6187, "step": 1880 }, { "epoch": 0.49, "learning_rate": 8.119e-06, "loss": 3.4984, "step": 1885 }, { "epoch": 0.5, "learning_rate": 8.114e-06, "loss": 3.5744, "step": 1890 }, { "epoch": 0.5, "learning_rate": 8.109e-06, "loss": 3.5514, "step": 1895 }, { "epoch": 0.5, "learning_rate": 8.104e-06, "loss": 3.4807, "step": 1900 }, { "epoch": 0.5, "learning_rate": 8.099e-06, "loss": 3.5049, "step": 1905 }, { "epoch": 0.5, "learning_rate": 8.094e-06, "loss": 3.5098, "step": 1910 }, { "epoch": 0.5, "learning_rate": 8.089e-06, "loss": 3.4152, "step": 1915 }, { "epoch": 0.5, "learning_rate": 8.084000000000001e-06, "loss": 3.4281, "step": 1920 }, { "epoch": 0.5, "learning_rate": 8.079e-06, "loss": 3.5766, "step": 1925 }, { "epoch": 0.51, "learning_rate": 8.074000000000001e-06, "loss": 3.4908, "step": 1930 }, { "epoch": 0.51, "learning_rate": 8.069e-06, "loss": 3.5432, "step": 1935 }, { "epoch": 0.51, "learning_rate": 8.064000000000001e-06, "loss": 3.5154, "step": 1940 }, { "epoch": 0.51, "learning_rate": 8.059e-06, "loss": 3.4568, "step": 1945 }, { "epoch": 0.51, "learning_rate": 8.054000000000001e-06, "loss": 3.5314, "step": 1950 }, { "epoch": 0.51, "learning_rate": 8.049e-06, "loss": 3.5516, "step": 1955 }, { "epoch": 0.51, "learning_rate": 8.044000000000001e-06, "loss": 3.4271, "step": 1960 }, { "epoch": 0.52, "learning_rate": 8.039e-06, "loss": 3.4174, "step": 1965 }, { "epoch": 0.52, "learning_rate": 8.034000000000001e-06, "loss": 3.5492, "step": 1970 }, { "epoch": 0.52, "learning_rate": 8.029e-06, "loss": 3.568, "step": 1975 }, { "epoch": 0.52, "learning_rate": 8.024000000000001e-06, "loss": 3.5455, "step": 1980 }, { "epoch": 0.52, "learning_rate": 8.019e-06, "loss": 3.5598, "step": 1985 }, { "epoch": 0.52, "learning_rate": 8.014e-06, "loss": 3.5848, "step": 1990 }, { "epoch": 0.52, "learning_rate": 8.009e-06, "loss": 3.4631, "step": 1995 }, { "epoch": 0.52, "learning_rate": 8.004e-06, "loss": 3.3873, "step": 2000 } ], "max_steps": 10000, "num_train_epochs": 3, "total_flos": 1.5936160471711744e+18, "trial_name": null, "trial_params": null }