{ "best_metric": 1.9482014179229736, "best_model_checkpoint": "saves/Custom/lora/llama2-Medical-Medtext-28-10/checkpoint-14000", "epoch": 1.9999324529703806, "eval_steps": 400, "global_step": 14804, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.99932450689003e-06, "loss": 2.8148, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.9986490137800594e-06, "loss": 2.6944, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.9979735206700893e-06, "loss": 2.688, "step": 15 }, { "epoch": 0.0, "learning_rate": 1.997298027560119e-06, "loss": 2.8038, "step": 20 }, { "epoch": 0.0, "learning_rate": 1.9966225344501488e-06, "loss": 2.8559, "step": 25 }, { "epoch": 0.0, "learning_rate": 1.9959470413401783e-06, "loss": 2.7153, "step": 30 }, { "epoch": 0.0, "learning_rate": 1.995271548230208e-06, "loss": 2.7136, "step": 35 }, { "epoch": 0.01, "learning_rate": 1.9945960551202377e-06, "loss": 2.7289, "step": 40 }, { "epoch": 0.01, "learning_rate": 1.9939205620102677e-06, "loss": 2.7078, "step": 45 }, { "epoch": 0.01, "learning_rate": 1.993245068900297e-06, "loss": 2.6738, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.992569575790327e-06, "loss": 2.6968, "step": 55 }, { "epoch": 0.01, "learning_rate": 1.9918940826803566e-06, "loss": 2.7469, "step": 60 }, { "epoch": 0.01, "learning_rate": 1.9912185895703866e-06, "loss": 2.628, "step": 65 }, { "epoch": 0.01, "learning_rate": 1.990543096460416e-06, "loss": 2.7036, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.9898676033504456e-06, "loss": 2.5699, "step": 75 }, { "epoch": 0.01, "learning_rate": 1.9891921102404755e-06, "loss": 2.6844, "step": 80 }, { "epoch": 0.01, "learning_rate": 1.988516617130505e-06, "loss": 2.6691, "step": 85 }, { "epoch": 0.01, "learning_rate": 1.987841124020535e-06, "loss": 2.708, "step": 90 }, { "epoch": 0.01, "learning_rate": 1.9871656309105645e-06, "loss": 2.7602, "step": 95 }, { "epoch": 0.01, "learning_rate": 1.9864901378005944e-06, "loss": 2.8753, "step": 100 }, { "epoch": 0.01, "learning_rate": 1.985814644690624e-06, "loss": 2.6093, "step": 105 }, { "epoch": 0.01, "learning_rate": 1.985139151580654e-06, "loss": 2.7309, "step": 110 }, { "epoch": 0.02, "learning_rate": 1.9844636584706834e-06, "loss": 2.6666, "step": 115 }, { "epoch": 0.02, "learning_rate": 1.9837881653607133e-06, "loss": 2.6702, "step": 120 }, { "epoch": 0.02, "learning_rate": 1.983112672250743e-06, "loss": 2.7468, "step": 125 }, { "epoch": 0.02, "learning_rate": 1.9824371791407724e-06, "loss": 2.6879, "step": 130 }, { "epoch": 0.02, "learning_rate": 1.9817616860308023e-06, "loss": 2.6141, "step": 135 }, { "epoch": 0.02, "learning_rate": 1.981086192920832e-06, "loss": 2.6581, "step": 140 }, { "epoch": 0.02, "learning_rate": 1.9804106998108618e-06, "loss": 2.5732, "step": 145 }, { "epoch": 0.02, "learning_rate": 1.9797352067008917e-06, "loss": 2.6452, "step": 150 }, { "epoch": 0.02, "learning_rate": 1.9790597135909212e-06, "loss": 2.6152, "step": 155 }, { "epoch": 0.02, "learning_rate": 1.978384220480951e-06, "loss": 2.5607, "step": 160 }, { "epoch": 0.02, "learning_rate": 1.9777087273709807e-06, "loss": 2.6272, "step": 165 }, { "epoch": 0.02, "learning_rate": 1.9770332342610106e-06, "loss": 2.7238, "step": 170 }, { "epoch": 0.02, "learning_rate": 1.97635774115104e-06, "loss": 2.6797, "step": 175 }, { "epoch": 0.02, "learning_rate": 1.97568224804107e-06, "loss": 2.6734, "step": 180 }, { "epoch": 0.02, "learning_rate": 1.9750067549310996e-06, "loss": 2.5306, "step": 185 }, { "epoch": 0.03, "learning_rate": 1.9743312618211295e-06, "loss": 2.5373, "step": 190 }, { "epoch": 0.03, "learning_rate": 1.973655768711159e-06, "loss": 2.5462, "step": 195 }, { "epoch": 0.03, "learning_rate": 1.972980275601189e-06, "loss": 2.6225, "step": 200 }, { "epoch": 0.03, "learning_rate": 1.9723047824912185e-06, "loss": 2.5914, "step": 205 }, { "epoch": 0.03, "learning_rate": 1.9716292893812484e-06, "loss": 2.5997, "step": 210 }, { "epoch": 0.03, "learning_rate": 1.970953796271278e-06, "loss": 2.5858, "step": 215 }, { "epoch": 0.03, "learning_rate": 1.970278303161308e-06, "loss": 2.6066, "step": 220 }, { "epoch": 0.03, "learning_rate": 1.9696028100513374e-06, "loss": 2.4575, "step": 225 }, { "epoch": 0.03, "learning_rate": 1.9689273169413673e-06, "loss": 2.5655, "step": 230 }, { "epoch": 0.03, "learning_rate": 1.968251823831397e-06, "loss": 2.6101, "step": 235 }, { "epoch": 0.03, "learning_rate": 1.9675763307214268e-06, "loss": 2.4752, "step": 240 }, { "epoch": 0.03, "learning_rate": 1.9669008376114563e-06, "loss": 2.5649, "step": 245 }, { "epoch": 0.03, "learning_rate": 1.9662253445014862e-06, "loss": 2.4658, "step": 250 }, { "epoch": 0.03, "learning_rate": 1.9655498513915157e-06, "loss": 2.6025, "step": 255 }, { "epoch": 0.04, "learning_rate": 1.9648743582815453e-06, "loss": 2.528, "step": 260 }, { "epoch": 0.04, "learning_rate": 1.964198865171575e-06, "loss": 2.4624, "step": 265 }, { "epoch": 0.04, "learning_rate": 1.9635233720616047e-06, "loss": 2.5285, "step": 270 }, { "epoch": 0.04, "learning_rate": 1.9628478789516346e-06, "loss": 2.5167, "step": 275 }, { "epoch": 0.04, "learning_rate": 1.962172385841664e-06, "loss": 2.4256, "step": 280 }, { "epoch": 0.04, "learning_rate": 1.961496892731694e-06, "loss": 2.4909, "step": 285 }, { "epoch": 0.04, "learning_rate": 1.9608213996217236e-06, "loss": 2.4507, "step": 290 }, { "epoch": 0.04, "learning_rate": 1.9601459065117535e-06, "loss": 2.5089, "step": 295 }, { "epoch": 0.04, "learning_rate": 1.959470413401783e-06, "loss": 2.4909, "step": 300 }, { "epoch": 0.04, "learning_rate": 1.958794920291813e-06, "loss": 2.3888, "step": 305 }, { "epoch": 0.04, "learning_rate": 1.9581194271818425e-06, "loss": 2.3279, "step": 310 }, { "epoch": 0.04, "learning_rate": 1.9574439340718725e-06, "loss": 2.3981, "step": 315 }, { "epoch": 0.04, "learning_rate": 1.956768440961902e-06, "loss": 2.4836, "step": 320 }, { "epoch": 0.04, "learning_rate": 1.956092947851932e-06, "loss": 2.4944, "step": 325 }, { "epoch": 0.04, "learning_rate": 1.9554174547419614e-06, "loss": 2.5004, "step": 330 }, { "epoch": 0.05, "learning_rate": 1.9547419616319914e-06, "loss": 2.3797, "step": 335 }, { "epoch": 0.05, "learning_rate": 1.954066468522021e-06, "loss": 2.4652, "step": 340 }, { "epoch": 0.05, "learning_rate": 1.953390975412051e-06, "loss": 2.4376, "step": 345 }, { "epoch": 0.05, "learning_rate": 1.9527154823020803e-06, "loss": 2.4507, "step": 350 }, { "epoch": 0.05, "learning_rate": 1.9520399891921103e-06, "loss": 2.4931, "step": 355 }, { "epoch": 0.05, "learning_rate": 1.9513644960821398e-06, "loss": 2.5258, "step": 360 }, { "epoch": 0.05, "learning_rate": 1.9506890029721697e-06, "loss": 2.3549, "step": 365 }, { "epoch": 0.05, "learning_rate": 1.9500135098621992e-06, "loss": 2.4138, "step": 370 }, { "epoch": 0.05, "learning_rate": 1.949338016752229e-06, "loss": 2.4262, "step": 375 }, { "epoch": 0.05, "learning_rate": 1.9486625236422587e-06, "loss": 2.3892, "step": 380 }, { "epoch": 0.05, "learning_rate": 1.9479870305322886e-06, "loss": 2.275, "step": 385 }, { "epoch": 0.05, "learning_rate": 1.947311537422318e-06, "loss": 2.4148, "step": 390 }, { "epoch": 0.05, "learning_rate": 1.946636044312348e-06, "loss": 2.3715, "step": 395 }, { "epoch": 0.05, "learning_rate": 1.9459605512023776e-06, "loss": 2.3765, "step": 400 }, { "epoch": 0.05, "eval_loss": 2.380300521850586, "eval_runtime": 165.4903, "eval_samples_per_second": 3.601, "eval_steps_per_second": 0.453, "step": 400 }, { "epoch": 0.05, "learning_rate": 1.9452850580924075e-06, "loss": 2.4244, "step": 405 }, { "epoch": 0.06, "learning_rate": 1.944609564982437e-06, "loss": 2.3635, "step": 410 }, { "epoch": 0.06, "learning_rate": 1.943934071872467e-06, "loss": 2.3548, "step": 415 }, { "epoch": 0.06, "learning_rate": 1.9432585787624965e-06, "loss": 2.3199, "step": 420 }, { "epoch": 0.06, "learning_rate": 1.9425830856525264e-06, "loss": 2.3976, "step": 425 }, { "epoch": 0.06, "learning_rate": 1.941907592542556e-06, "loss": 2.4299, "step": 430 }, { "epoch": 0.06, "learning_rate": 1.941232099432586e-06, "loss": 2.3214, "step": 435 }, { "epoch": 0.06, "learning_rate": 1.9405566063226154e-06, "loss": 2.3571, "step": 440 }, { "epoch": 0.06, "learning_rate": 1.9398811132126453e-06, "loss": 2.3554, "step": 445 }, { "epoch": 0.06, "learning_rate": 1.939205620102675e-06, "loss": 2.3911, "step": 450 }, { "epoch": 0.06, "learning_rate": 1.9385301269927044e-06, "loss": 2.3898, "step": 455 }, { "epoch": 0.06, "learning_rate": 1.9378546338827343e-06, "loss": 2.3447, "step": 460 }, { "epoch": 0.06, "learning_rate": 1.937179140772764e-06, "loss": 2.3268, "step": 465 }, { "epoch": 0.06, "learning_rate": 1.9365036476627938e-06, "loss": 2.3788, "step": 470 }, { "epoch": 0.06, "learning_rate": 1.9358281545528233e-06, "loss": 2.3091, "step": 475 }, { "epoch": 0.06, "learning_rate": 1.935152661442853e-06, "loss": 2.289, "step": 480 }, { "epoch": 0.07, "learning_rate": 1.9344771683328827e-06, "loss": 2.3099, "step": 485 }, { "epoch": 0.07, "learning_rate": 1.9338016752229127e-06, "loss": 2.2789, "step": 490 }, { "epoch": 0.07, "learning_rate": 1.933126182112942e-06, "loss": 2.2609, "step": 495 }, { "epoch": 0.07, "learning_rate": 1.932450689002972e-06, "loss": 2.3611, "step": 500 }, { "epoch": 0.07, "learning_rate": 1.9317751958930016e-06, "loss": 2.2964, "step": 505 }, { "epoch": 0.07, "learning_rate": 1.9310997027830316e-06, "loss": 2.3029, "step": 510 }, { "epoch": 0.07, "learning_rate": 1.930424209673061e-06, "loss": 2.3109, "step": 515 }, { "epoch": 0.07, "learning_rate": 1.929748716563091e-06, "loss": 2.4029, "step": 520 }, { "epoch": 0.07, "learning_rate": 1.9290732234531205e-06, "loss": 2.2692, "step": 525 }, { "epoch": 0.07, "learning_rate": 1.9283977303431505e-06, "loss": 2.4243, "step": 530 }, { "epoch": 0.07, "learning_rate": 1.92772223723318e-06, "loss": 2.3542, "step": 535 }, { "epoch": 0.07, "learning_rate": 1.92704674412321e-06, "loss": 2.3045, "step": 540 }, { "epoch": 0.07, "learning_rate": 1.9263712510132394e-06, "loss": 2.3004, "step": 545 }, { "epoch": 0.07, "learning_rate": 1.9256957579032694e-06, "loss": 2.359, "step": 550 }, { "epoch": 0.07, "learning_rate": 1.925020264793299e-06, "loss": 2.3057, "step": 555 }, { "epoch": 0.08, "learning_rate": 1.924344771683329e-06, "loss": 2.3865, "step": 560 }, { "epoch": 0.08, "learning_rate": 1.9236692785733583e-06, "loss": 2.2997, "step": 565 }, { "epoch": 0.08, "learning_rate": 1.9229937854633883e-06, "loss": 2.2846, "step": 570 }, { "epoch": 0.08, "learning_rate": 1.922318292353418e-06, "loss": 2.3284, "step": 575 }, { "epoch": 0.08, "learning_rate": 1.9216427992434477e-06, "loss": 2.258, "step": 580 }, { "epoch": 0.08, "learning_rate": 1.9209673061334777e-06, "loss": 2.3458, "step": 585 }, { "epoch": 0.08, "learning_rate": 1.920291813023507e-06, "loss": 2.276, "step": 590 }, { "epoch": 0.08, "learning_rate": 1.9196163199135367e-06, "loss": 2.2503, "step": 595 }, { "epoch": 0.08, "learning_rate": 1.9189408268035666e-06, "loss": 2.2727, "step": 600 }, { "epoch": 0.08, "learning_rate": 1.918265333693596e-06, "loss": 2.289, "step": 605 }, { "epoch": 0.08, "learning_rate": 1.917589840583626e-06, "loss": 2.2968, "step": 610 }, { "epoch": 0.08, "learning_rate": 1.9169143474736556e-06, "loss": 2.3512, "step": 615 }, { "epoch": 0.08, "learning_rate": 1.9162388543636855e-06, "loss": 2.3458, "step": 620 }, { "epoch": 0.08, "learning_rate": 1.915563361253715e-06, "loss": 2.2532, "step": 625 }, { "epoch": 0.09, "learning_rate": 1.914887868143745e-06, "loss": 2.2273, "step": 630 }, { "epoch": 0.09, "learning_rate": 1.9142123750337745e-06, "loss": 2.3108, "step": 635 }, { "epoch": 0.09, "learning_rate": 1.9135368819238044e-06, "loss": 2.3238, "step": 640 }, { "epoch": 0.09, "learning_rate": 1.912861388813834e-06, "loss": 2.2053, "step": 645 }, { "epoch": 0.09, "learning_rate": 1.9121858957038635e-06, "loss": 2.386, "step": 650 }, { "epoch": 0.09, "learning_rate": 1.9115104025938934e-06, "loss": 2.1914, "step": 655 }, { "epoch": 0.09, "learning_rate": 1.910834909483923e-06, "loss": 2.2638, "step": 660 }, { "epoch": 0.09, "learning_rate": 1.910159416373953e-06, "loss": 2.3571, "step": 665 }, { "epoch": 0.09, "learning_rate": 1.9094839232639824e-06, "loss": 2.344, "step": 670 }, { "epoch": 0.09, "learning_rate": 1.9088084301540123e-06, "loss": 2.2765, "step": 675 }, { "epoch": 0.09, "learning_rate": 1.908132937044042e-06, "loss": 2.2643, "step": 680 }, { "epoch": 0.09, "learning_rate": 1.9074574439340718e-06, "loss": 2.2843, "step": 685 }, { "epoch": 0.09, "learning_rate": 1.9067819508241015e-06, "loss": 2.3156, "step": 690 }, { "epoch": 0.09, "learning_rate": 1.9061064577141312e-06, "loss": 2.2365, "step": 695 }, { "epoch": 0.09, "learning_rate": 1.905430964604161e-06, "loss": 2.2396, "step": 700 }, { "epoch": 0.1, "learning_rate": 1.9047554714941907e-06, "loss": 2.3003, "step": 705 }, { "epoch": 0.1, "learning_rate": 1.9040799783842202e-06, "loss": 2.2414, "step": 710 }, { "epoch": 0.1, "learning_rate": 1.9034044852742501e-06, "loss": 2.2087, "step": 715 }, { "epoch": 0.1, "learning_rate": 1.9027289921642796e-06, "loss": 2.3138, "step": 720 }, { "epoch": 0.1, "learning_rate": 1.9020534990543096e-06, "loss": 2.2667, "step": 725 }, { "epoch": 0.1, "learning_rate": 1.9013780059443393e-06, "loss": 2.171, "step": 730 }, { "epoch": 0.1, "learning_rate": 1.900702512834369e-06, "loss": 2.2188, "step": 735 }, { "epoch": 0.1, "learning_rate": 1.9000270197243988e-06, "loss": 2.2388, "step": 740 }, { "epoch": 0.1, "learning_rate": 1.8993515266144285e-06, "loss": 2.2639, "step": 745 }, { "epoch": 0.1, "learning_rate": 1.8986760335044582e-06, "loss": 2.1737, "step": 750 }, { "epoch": 0.1, "learning_rate": 1.898000540394488e-06, "loss": 2.2252, "step": 755 }, { "epoch": 0.1, "learning_rate": 1.8973250472845177e-06, "loss": 2.1938, "step": 760 }, { "epoch": 0.1, "learning_rate": 1.8966495541745474e-06, "loss": 2.2139, "step": 765 }, { "epoch": 0.1, "learning_rate": 1.8959740610645771e-06, "loss": 2.2717, "step": 770 }, { "epoch": 0.1, "learning_rate": 1.8952985679546068e-06, "loss": 2.2766, "step": 775 }, { "epoch": 0.11, "learning_rate": 1.8946230748446366e-06, "loss": 2.2662, "step": 780 }, { "epoch": 0.11, "learning_rate": 1.893947581734666e-06, "loss": 2.1394, "step": 785 }, { "epoch": 0.11, "learning_rate": 1.893272088624696e-06, "loss": 2.187, "step": 790 }, { "epoch": 0.11, "learning_rate": 1.8925965955147255e-06, "loss": 2.2881, "step": 795 }, { "epoch": 0.11, "learning_rate": 1.8919211024047555e-06, "loss": 2.2189, "step": 800 }, { "epoch": 0.11, "eval_loss": 2.2260634899139404, "eval_runtime": 165.4618, "eval_samples_per_second": 3.602, "eval_steps_per_second": 0.453, "step": 800 }, { "epoch": 0.11, "learning_rate": 1.891245609294785e-06, "loss": 2.2459, "step": 805 }, { "epoch": 0.11, "learning_rate": 1.890570116184815e-06, "loss": 2.1488, "step": 810 }, { "epoch": 0.11, "learning_rate": 1.8898946230748444e-06, "loss": 2.2133, "step": 815 }, { "epoch": 0.11, "learning_rate": 1.8892191299648744e-06, "loss": 2.2117, "step": 820 }, { "epoch": 0.11, "learning_rate": 1.888543636854904e-06, "loss": 2.1795, "step": 825 }, { "epoch": 0.11, "learning_rate": 1.8878681437449338e-06, "loss": 2.2757, "step": 830 }, { "epoch": 0.11, "learning_rate": 1.8871926506349634e-06, "loss": 2.2471, "step": 835 }, { "epoch": 0.11, "learning_rate": 1.8865171575249933e-06, "loss": 2.3356, "step": 840 }, { "epoch": 0.11, "learning_rate": 1.8858416644150228e-06, "loss": 2.263, "step": 845 }, { "epoch": 0.11, "learning_rate": 1.8851661713050525e-06, "loss": 2.2264, "step": 850 }, { "epoch": 0.12, "learning_rate": 1.8844906781950823e-06, "loss": 2.2703, "step": 855 }, { "epoch": 0.12, "learning_rate": 1.883815185085112e-06, "loss": 2.2639, "step": 860 }, { "epoch": 0.12, "learning_rate": 1.8831396919751417e-06, "loss": 2.2495, "step": 865 }, { "epoch": 0.12, "learning_rate": 1.8824641988651714e-06, "loss": 2.2448, "step": 870 }, { "epoch": 0.12, "learning_rate": 1.8817887057552014e-06, "loss": 2.2363, "step": 875 }, { "epoch": 0.12, "learning_rate": 1.8811132126452309e-06, "loss": 2.1892, "step": 880 }, { "epoch": 0.12, "learning_rate": 1.8804377195352608e-06, "loss": 2.1822, "step": 885 }, { "epoch": 0.12, "learning_rate": 1.8797622264252903e-06, "loss": 2.1871, "step": 890 }, { "epoch": 0.12, "learning_rate": 1.8790867333153203e-06, "loss": 2.1882, "step": 895 }, { "epoch": 0.12, "learning_rate": 1.8784112402053498e-06, "loss": 2.1884, "step": 900 }, { "epoch": 0.12, "learning_rate": 1.8777357470953797e-06, "loss": 2.2064, "step": 905 }, { "epoch": 0.12, "learning_rate": 1.8770602539854092e-06, "loss": 2.249, "step": 910 }, { "epoch": 0.12, "learning_rate": 1.876384760875439e-06, "loss": 2.2484, "step": 915 }, { "epoch": 0.12, "learning_rate": 1.8757092677654687e-06, "loss": 2.1547, "step": 920 }, { "epoch": 0.12, "learning_rate": 1.8750337746554984e-06, "loss": 2.177, "step": 925 }, { "epoch": 0.13, "learning_rate": 1.8743582815455281e-06, "loss": 2.1332, "step": 930 }, { "epoch": 0.13, "learning_rate": 1.8736827884355579e-06, "loss": 2.2354, "step": 935 }, { "epoch": 0.13, "learning_rate": 1.8730072953255876e-06, "loss": 2.1353, "step": 940 }, { "epoch": 0.13, "learning_rate": 1.8723318022156173e-06, "loss": 2.2044, "step": 945 }, { "epoch": 0.13, "learning_rate": 1.871656309105647e-06, "loss": 2.2369, "step": 950 }, { "epoch": 0.13, "learning_rate": 1.8709808159956768e-06, "loss": 2.1856, "step": 955 }, { "epoch": 0.13, "learning_rate": 1.8703053228857065e-06, "loss": 2.1692, "step": 960 }, { "epoch": 0.13, "learning_rate": 1.8696298297757362e-06, "loss": 2.2238, "step": 965 }, { "epoch": 0.13, "learning_rate": 1.8689543366657657e-06, "loss": 2.2798, "step": 970 }, { "epoch": 0.13, "learning_rate": 1.8682788435557957e-06, "loss": 2.2296, "step": 975 }, { "epoch": 0.13, "learning_rate": 1.8676033504458252e-06, "loss": 2.2336, "step": 980 }, { "epoch": 0.13, "learning_rate": 1.8669278573358551e-06, "loss": 2.2287, "step": 985 }, { "epoch": 0.13, "learning_rate": 1.8662523642258847e-06, "loss": 2.244, "step": 990 }, { "epoch": 0.13, "learning_rate": 1.8655768711159146e-06, "loss": 2.1761, "step": 995 }, { "epoch": 0.14, "learning_rate": 1.8649013780059441e-06, "loss": 2.2065, "step": 1000 }, { "epoch": 0.14, "learning_rate": 1.864225884895974e-06, "loss": 2.1739, "step": 1005 }, { "epoch": 0.14, "learning_rate": 1.8635503917860036e-06, "loss": 2.1753, "step": 1010 }, { "epoch": 0.14, "learning_rate": 1.8628748986760335e-06, "loss": 2.3107, "step": 1015 }, { "epoch": 0.14, "learning_rate": 1.8621994055660632e-06, "loss": 2.214, "step": 1020 }, { "epoch": 0.14, "learning_rate": 1.861523912456093e-06, "loss": 2.1209, "step": 1025 }, { "epoch": 0.14, "learning_rate": 1.8608484193461227e-06, "loss": 2.1574, "step": 1030 }, { "epoch": 0.14, "learning_rate": 1.8601729262361522e-06, "loss": 2.2181, "step": 1035 }, { "epoch": 0.14, "learning_rate": 1.8594974331261821e-06, "loss": 2.2101, "step": 1040 }, { "epoch": 0.14, "learning_rate": 1.8588219400162116e-06, "loss": 2.196, "step": 1045 }, { "epoch": 0.14, "learning_rate": 1.8581464469062416e-06, "loss": 2.2107, "step": 1050 }, { "epoch": 0.14, "learning_rate": 1.857470953796271e-06, "loss": 2.1913, "step": 1055 }, { "epoch": 0.14, "learning_rate": 1.856795460686301e-06, "loss": 2.1373, "step": 1060 }, { "epoch": 0.14, "learning_rate": 1.8561199675763305e-06, "loss": 2.0939, "step": 1065 }, { "epoch": 0.14, "learning_rate": 1.8554444744663605e-06, "loss": 2.2936, "step": 1070 }, { "epoch": 0.15, "learning_rate": 1.85476898135639e-06, "loss": 2.11, "step": 1075 }, { "epoch": 0.15, "learning_rate": 1.85409348824642e-06, "loss": 2.2067, "step": 1080 }, { "epoch": 0.15, "learning_rate": 1.8534179951364495e-06, "loss": 2.0939, "step": 1085 }, { "epoch": 0.15, "learning_rate": 1.8527425020264794e-06, "loss": 2.1118, "step": 1090 }, { "epoch": 0.15, "learning_rate": 1.852067008916509e-06, "loss": 2.2529, "step": 1095 }, { "epoch": 0.15, "learning_rate": 1.8513915158065388e-06, "loss": 2.1717, "step": 1100 }, { "epoch": 0.15, "learning_rate": 1.8507160226965684e-06, "loss": 2.1909, "step": 1105 }, { "epoch": 0.15, "learning_rate": 1.850040529586598e-06, "loss": 2.1772, "step": 1110 }, { "epoch": 0.15, "learning_rate": 1.8493650364766278e-06, "loss": 2.1615, "step": 1115 }, { "epoch": 0.15, "learning_rate": 1.8486895433666575e-06, "loss": 2.1519, "step": 1120 }, { "epoch": 0.15, "learning_rate": 1.8480140502566873e-06, "loss": 2.1842, "step": 1125 }, { "epoch": 0.15, "learning_rate": 1.847338557146717e-06, "loss": 2.1566, "step": 1130 }, { "epoch": 0.15, "learning_rate": 1.8466630640367467e-06, "loss": 2.2503, "step": 1135 }, { "epoch": 0.15, "learning_rate": 1.8459875709267764e-06, "loss": 2.1565, "step": 1140 }, { "epoch": 0.15, "learning_rate": 1.8453120778168062e-06, "loss": 2.1437, "step": 1145 }, { "epoch": 0.16, "learning_rate": 1.844636584706836e-06, "loss": 2.1883, "step": 1150 }, { "epoch": 0.16, "learning_rate": 1.8439610915968656e-06, "loss": 2.1408, "step": 1155 }, { "epoch": 0.16, "learning_rate": 1.8432855984868953e-06, "loss": 2.2447, "step": 1160 }, { "epoch": 0.16, "learning_rate": 1.8426101053769249e-06, "loss": 2.1307, "step": 1165 }, { "epoch": 0.16, "learning_rate": 1.8419346122669548e-06, "loss": 2.132, "step": 1170 }, { "epoch": 0.16, "learning_rate": 1.8412591191569845e-06, "loss": 2.2312, "step": 1175 }, { "epoch": 0.16, "learning_rate": 1.8405836260470143e-06, "loss": 2.1841, "step": 1180 }, { "epoch": 0.16, "learning_rate": 1.839908132937044e-06, "loss": 2.1887, "step": 1185 }, { "epoch": 0.16, "learning_rate": 1.8392326398270737e-06, "loss": 2.0504, "step": 1190 }, { "epoch": 0.16, "learning_rate": 1.8385571467171034e-06, "loss": 2.167, "step": 1195 }, { "epoch": 0.16, "learning_rate": 1.8378816536071332e-06, "loss": 2.1824, "step": 1200 }, { "epoch": 0.16, "eval_loss": 2.1631150245666504, "eval_runtime": 165.4999, "eval_samples_per_second": 3.601, "eval_steps_per_second": 0.453, "step": 1200 }, { "epoch": 0.16, "learning_rate": 1.8372061604971629e-06, "loss": 2.2233, "step": 1205 }, { "epoch": 0.16, "learning_rate": 1.8365306673871926e-06, "loss": 2.2954, "step": 1210 }, { "epoch": 0.16, "learning_rate": 1.8358551742772223e-06, "loss": 2.2049, "step": 1215 }, { "epoch": 0.16, "learning_rate": 1.835179681167252e-06, "loss": 2.1491, "step": 1220 }, { "epoch": 0.17, "learning_rate": 1.8345041880572818e-06, "loss": 2.1174, "step": 1225 }, { "epoch": 0.17, "learning_rate": 1.8338286949473113e-06, "loss": 2.1562, "step": 1230 }, { "epoch": 0.17, "learning_rate": 1.8331532018373412e-06, "loss": 2.1639, "step": 1235 }, { "epoch": 0.17, "learning_rate": 1.8324777087273708e-06, "loss": 2.1512, "step": 1240 }, { "epoch": 0.17, "learning_rate": 1.8318022156174007e-06, "loss": 2.1613, "step": 1245 }, { "epoch": 0.17, "learning_rate": 1.8311267225074302e-06, "loss": 2.1649, "step": 1250 }, { "epoch": 0.17, "learning_rate": 1.8304512293974601e-06, "loss": 2.1872, "step": 1255 }, { "epoch": 0.17, "learning_rate": 1.8297757362874897e-06, "loss": 2.1949, "step": 1260 }, { "epoch": 0.17, "learning_rate": 1.8291002431775196e-06, "loss": 2.1656, "step": 1265 }, { "epoch": 0.17, "learning_rate": 1.8284247500675491e-06, "loss": 2.1845, "step": 1270 }, { "epoch": 0.17, "learning_rate": 1.827749256957579e-06, "loss": 2.1477, "step": 1275 }, { "epoch": 0.17, "learning_rate": 1.8270737638476086e-06, "loss": 2.2332, "step": 1280 }, { "epoch": 0.17, "learning_rate": 1.8263982707376385e-06, "loss": 2.1762, "step": 1285 }, { "epoch": 0.17, "learning_rate": 1.825722777627668e-06, "loss": 2.2039, "step": 1290 }, { "epoch": 0.17, "learning_rate": 1.8250472845176977e-06, "loss": 2.192, "step": 1295 }, { "epoch": 0.18, "learning_rate": 1.8243717914077275e-06, "loss": 2.2305, "step": 1300 }, { "epoch": 0.18, "learning_rate": 1.8236962982977572e-06, "loss": 2.063, "step": 1305 }, { "epoch": 0.18, "learning_rate": 1.823020805187787e-06, "loss": 2.1416, "step": 1310 }, { "epoch": 0.18, "learning_rate": 1.8223453120778166e-06, "loss": 2.082, "step": 1315 }, { "epoch": 0.18, "learning_rate": 1.8216698189678466e-06, "loss": 2.2055, "step": 1320 }, { "epoch": 0.18, "learning_rate": 1.820994325857876e-06, "loss": 2.2108, "step": 1325 }, { "epoch": 0.18, "learning_rate": 1.820318832747906e-06, "loss": 2.2147, "step": 1330 }, { "epoch": 0.18, "learning_rate": 1.8196433396379356e-06, "loss": 2.2602, "step": 1335 }, { "epoch": 0.18, "learning_rate": 1.8189678465279655e-06, "loss": 2.1456, "step": 1340 }, { "epoch": 0.18, "learning_rate": 1.818292353417995e-06, "loss": 2.1897, "step": 1345 }, { "epoch": 0.18, "learning_rate": 1.817616860308025e-06, "loss": 2.1242, "step": 1350 }, { "epoch": 0.18, "learning_rate": 1.8169413671980545e-06, "loss": 2.1346, "step": 1355 }, { "epoch": 0.18, "learning_rate": 1.8162658740880844e-06, "loss": 2.1532, "step": 1360 }, { "epoch": 0.18, "learning_rate": 1.815590380978114e-06, "loss": 2.1337, "step": 1365 }, { "epoch": 0.19, "learning_rate": 1.8149148878681436e-06, "loss": 2.1347, "step": 1370 }, { "epoch": 0.19, "learning_rate": 1.8142393947581734e-06, "loss": 2.1518, "step": 1375 }, { "epoch": 0.19, "learning_rate": 1.813563901648203e-06, "loss": 2.085, "step": 1380 }, { "epoch": 0.19, "learning_rate": 1.8128884085382328e-06, "loss": 2.2308, "step": 1385 }, { "epoch": 0.19, "learning_rate": 1.8122129154282625e-06, "loss": 2.189, "step": 1390 }, { "epoch": 0.19, "learning_rate": 1.8115374223182923e-06, "loss": 2.1436, "step": 1395 }, { "epoch": 0.19, "learning_rate": 1.810861929208322e-06, "loss": 2.1599, "step": 1400 }, { "epoch": 0.19, "learning_rate": 1.8101864360983517e-06, "loss": 2.1162, "step": 1405 }, { "epoch": 0.19, "learning_rate": 1.8095109429883814e-06, "loss": 2.1438, "step": 1410 }, { "epoch": 0.19, "learning_rate": 1.8088354498784112e-06, "loss": 2.1114, "step": 1415 }, { "epoch": 0.19, "learning_rate": 1.808159956768441e-06, "loss": 2.1274, "step": 1420 }, { "epoch": 0.19, "learning_rate": 1.8074844636584704e-06, "loss": 2.0931, "step": 1425 }, { "epoch": 0.19, "learning_rate": 1.8068089705485004e-06, "loss": 2.1427, "step": 1430 }, { "epoch": 0.19, "learning_rate": 1.8061334774385299e-06, "loss": 2.1255, "step": 1435 }, { "epoch": 0.19, "learning_rate": 1.8054579843285598e-06, "loss": 2.2059, "step": 1440 }, { "epoch": 0.2, "learning_rate": 1.8047824912185893e-06, "loss": 2.1694, "step": 1445 }, { "epoch": 0.2, "learning_rate": 1.8041069981086193e-06, "loss": 2.0896, "step": 1450 }, { "epoch": 0.2, "learning_rate": 1.8034315049986488e-06, "loss": 2.1429, "step": 1455 }, { "epoch": 0.2, "learning_rate": 1.8027560118886787e-06, "loss": 2.2017, "step": 1460 }, { "epoch": 0.2, "learning_rate": 1.8020805187787084e-06, "loss": 2.1535, "step": 1465 }, { "epoch": 0.2, "learning_rate": 1.8014050256687382e-06, "loss": 2.1342, "step": 1470 }, { "epoch": 0.2, "learning_rate": 1.8007295325587679e-06, "loss": 2.1528, "step": 1475 }, { "epoch": 0.2, "learning_rate": 1.8000540394487976e-06, "loss": 2.1151, "step": 1480 }, { "epoch": 0.2, "learning_rate": 1.7993785463388273e-06, "loss": 2.1482, "step": 1485 }, { "epoch": 0.2, "learning_rate": 1.7987030532288569e-06, "loss": 2.0806, "step": 1490 }, { "epoch": 0.2, "learning_rate": 1.7980275601188868e-06, "loss": 2.1843, "step": 1495 }, { "epoch": 0.2, "learning_rate": 1.7973520670089163e-06, "loss": 2.1359, "step": 1500 }, { "epoch": 0.2, "learning_rate": 1.7966765738989462e-06, "loss": 2.1303, "step": 1505 }, { "epoch": 0.2, "learning_rate": 1.7960010807889758e-06, "loss": 2.1373, "step": 1510 }, { "epoch": 0.2, "learning_rate": 1.7953255876790057e-06, "loss": 2.1413, "step": 1515 }, { "epoch": 0.21, "learning_rate": 1.7946500945690352e-06, "loss": 2.1424, "step": 1520 }, { "epoch": 0.21, "learning_rate": 1.7939746014590652e-06, "loss": 2.1073, "step": 1525 }, { "epoch": 0.21, "learning_rate": 1.7932991083490947e-06, "loss": 2.0807, "step": 1530 }, { "epoch": 0.21, "learning_rate": 1.7926236152391246e-06, "loss": 2.1708, "step": 1535 }, { "epoch": 0.21, "learning_rate": 1.7919481221291541e-06, "loss": 2.1456, "step": 1540 }, { "epoch": 0.21, "learning_rate": 1.791272629019184e-06, "loss": 2.2062, "step": 1545 }, { "epoch": 0.21, "learning_rate": 1.7905971359092136e-06, "loss": 2.1719, "step": 1550 }, { "epoch": 0.21, "learning_rate": 1.7899216427992433e-06, "loss": 2.1313, "step": 1555 }, { "epoch": 0.21, "learning_rate": 1.789246149689273e-06, "loss": 2.0904, "step": 1560 }, { "epoch": 0.21, "learning_rate": 1.7885706565793028e-06, "loss": 2.1181, "step": 1565 }, { "epoch": 0.21, "learning_rate": 1.7878951634693325e-06, "loss": 2.236, "step": 1570 }, { "epoch": 0.21, "learning_rate": 1.7872196703593622e-06, "loss": 2.2107, "step": 1575 }, { "epoch": 0.21, "learning_rate": 1.786544177249392e-06, "loss": 2.1614, "step": 1580 }, { "epoch": 0.21, "learning_rate": 1.7858686841394217e-06, "loss": 2.0653, "step": 1585 }, { "epoch": 0.21, "learning_rate": 1.7851931910294514e-06, "loss": 2.1127, "step": 1590 }, { "epoch": 0.22, "learning_rate": 1.7845176979194811e-06, "loss": 2.1073, "step": 1595 }, { "epoch": 0.22, "learning_rate": 1.7838422048095108e-06, "loss": 2.0995, "step": 1600 }, { "epoch": 0.22, "eval_loss": 2.1323931217193604, "eval_runtime": 165.5394, "eval_samples_per_second": 3.6, "eval_steps_per_second": 0.453, "step": 1600 }, { "epoch": 0.22, "learning_rate": 1.7831667116995406e-06, "loss": 2.1629, "step": 1605 }, { "epoch": 0.22, "learning_rate": 1.7824912185895705e-06, "loss": 2.1419, "step": 1610 }, { "epoch": 0.22, "learning_rate": 1.7818157254796e-06, "loss": 2.1061, "step": 1615 }, { "epoch": 0.22, "learning_rate": 1.78114023236963e-06, "loss": 2.1188, "step": 1620 }, { "epoch": 0.22, "learning_rate": 1.7804647392596595e-06, "loss": 2.1862, "step": 1625 }, { "epoch": 0.22, "learning_rate": 1.7797892461496892e-06, "loss": 2.153, "step": 1630 }, { "epoch": 0.22, "learning_rate": 1.779113753039719e-06, "loss": 2.0605, "step": 1635 }, { "epoch": 0.22, "learning_rate": 1.7784382599297486e-06, "loss": 2.2079, "step": 1640 }, { "epoch": 0.22, "learning_rate": 1.7777627668197784e-06, "loss": 2.1516, "step": 1645 }, { "epoch": 0.22, "learning_rate": 1.777087273709808e-06, "loss": 2.1234, "step": 1650 }, { "epoch": 0.22, "learning_rate": 1.7764117805998378e-06, "loss": 2.231, "step": 1655 }, { "epoch": 0.22, "learning_rate": 1.7757362874898676e-06, "loss": 2.1475, "step": 1660 }, { "epoch": 0.22, "learning_rate": 1.7750607943798973e-06, "loss": 2.1289, "step": 1665 }, { "epoch": 0.23, "learning_rate": 1.774385301269927e-06, "loss": 2.0901, "step": 1670 }, { "epoch": 0.23, "learning_rate": 1.7737098081599567e-06, "loss": 2.17, "step": 1675 }, { "epoch": 0.23, "learning_rate": 1.7730343150499865e-06, "loss": 2.2153, "step": 1680 }, { "epoch": 0.23, "learning_rate": 1.772358821940016e-06, "loss": 2.1964, "step": 1685 }, { "epoch": 0.23, "learning_rate": 1.771683328830046e-06, "loss": 2.2056, "step": 1690 }, { "epoch": 0.23, "learning_rate": 1.7710078357200754e-06, "loss": 2.0911, "step": 1695 }, { "epoch": 0.23, "learning_rate": 1.7703323426101054e-06, "loss": 2.1362, "step": 1700 }, { "epoch": 0.23, "learning_rate": 1.7696568495001349e-06, "loss": 2.1819, "step": 1705 }, { "epoch": 0.23, "learning_rate": 1.7689813563901648e-06, "loss": 2.155, "step": 1710 }, { "epoch": 0.23, "learning_rate": 1.7683058632801943e-06, "loss": 2.0689, "step": 1715 }, { "epoch": 0.23, "learning_rate": 1.7676303701702243e-06, "loss": 2.1661, "step": 1720 }, { "epoch": 0.23, "learning_rate": 1.7669548770602538e-06, "loss": 2.094, "step": 1725 }, { "epoch": 0.23, "learning_rate": 1.7662793839502837e-06, "loss": 2.061, "step": 1730 }, { "epoch": 0.23, "learning_rate": 1.7656038908403132e-06, "loss": 2.185, "step": 1735 }, { "epoch": 0.24, "learning_rate": 1.7649283977303432e-06, "loss": 2.1631, "step": 1740 }, { "epoch": 0.24, "learning_rate": 1.7642529046203727e-06, "loss": 2.1535, "step": 1745 }, { "epoch": 0.24, "learning_rate": 1.7635774115104024e-06, "loss": 2.2405, "step": 1750 }, { "epoch": 0.24, "learning_rate": 1.7629019184004324e-06, "loss": 2.1495, "step": 1755 }, { "epoch": 0.24, "learning_rate": 1.7622264252904619e-06, "loss": 2.1073, "step": 1760 }, { "epoch": 0.24, "learning_rate": 1.7615509321804918e-06, "loss": 2.1346, "step": 1765 }, { "epoch": 0.24, "learning_rate": 1.7608754390705213e-06, "loss": 2.0585, "step": 1770 }, { "epoch": 0.24, "learning_rate": 1.7601999459605513e-06, "loss": 2.1566, "step": 1775 }, { "epoch": 0.24, "learning_rate": 1.7595244528505808e-06, "loss": 2.1382, "step": 1780 }, { "epoch": 0.24, "learning_rate": 1.7588489597406107e-06, "loss": 2.1801, "step": 1785 }, { "epoch": 0.24, "learning_rate": 1.7581734666306402e-06, "loss": 2.1169, "step": 1790 }, { "epoch": 0.24, "learning_rate": 1.7574979735206702e-06, "loss": 2.1017, "step": 1795 }, { "epoch": 0.24, "learning_rate": 1.7568224804106997e-06, "loss": 2.1697, "step": 1800 }, { "epoch": 0.24, "learning_rate": 1.7561469873007296e-06, "loss": 2.0574, "step": 1805 }, { "epoch": 0.24, "learning_rate": 1.7554714941907591e-06, "loss": 2.1086, "step": 1810 }, { "epoch": 0.25, "learning_rate": 1.7547960010807889e-06, "loss": 2.1359, "step": 1815 }, { "epoch": 0.25, "learning_rate": 1.7541205079708186e-06, "loss": 2.178, "step": 1820 }, { "epoch": 0.25, "learning_rate": 1.7534450148608483e-06, "loss": 2.1483, "step": 1825 }, { "epoch": 0.25, "learning_rate": 1.752769521750878e-06, "loss": 2.1726, "step": 1830 }, { "epoch": 0.25, "learning_rate": 1.7520940286409078e-06, "loss": 2.0898, "step": 1835 }, { "epoch": 0.25, "learning_rate": 1.7514185355309375e-06, "loss": 2.0657, "step": 1840 }, { "epoch": 0.25, "learning_rate": 1.7507430424209672e-06, "loss": 2.1823, "step": 1845 }, { "epoch": 0.25, "learning_rate": 1.750067549310997e-06, "loss": 2.0883, "step": 1850 }, { "epoch": 0.25, "learning_rate": 1.7493920562010267e-06, "loss": 2.1784, "step": 1855 }, { "epoch": 0.25, "learning_rate": 1.7487165630910564e-06, "loss": 2.0749, "step": 1860 }, { "epoch": 0.25, "learning_rate": 1.7480410699810861e-06, "loss": 2.1461, "step": 1865 }, { "epoch": 0.25, "learning_rate": 1.7473655768711158e-06, "loss": 2.2457, "step": 1870 }, { "epoch": 0.25, "learning_rate": 1.7466900837611456e-06, "loss": 2.1692, "step": 1875 }, { "epoch": 0.25, "learning_rate": 1.746014590651175e-06, "loss": 2.1989, "step": 1880 }, { "epoch": 0.25, "learning_rate": 1.745339097541205e-06, "loss": 2.194, "step": 1885 }, { "epoch": 0.26, "learning_rate": 1.7446636044312345e-06, "loss": 2.1568, "step": 1890 }, { "epoch": 0.26, "learning_rate": 1.7439881113212645e-06, "loss": 2.0265, "step": 1895 }, { "epoch": 0.26, "learning_rate": 1.7433126182112942e-06, "loss": 2.165, "step": 1900 }, { "epoch": 0.26, "learning_rate": 1.742637125101324e-06, "loss": 2.1315, "step": 1905 }, { "epoch": 0.26, "learning_rate": 1.7419616319913537e-06, "loss": 2.052, "step": 1910 }, { "epoch": 0.26, "learning_rate": 1.7412861388813834e-06, "loss": 2.151, "step": 1915 }, { "epoch": 0.26, "learning_rate": 1.740610645771413e-06, "loss": 2.1216, "step": 1920 }, { "epoch": 0.26, "learning_rate": 1.7399351526614428e-06, "loss": 2.2173, "step": 1925 }, { "epoch": 0.26, "learning_rate": 1.7392596595514726e-06, "loss": 2.0417, "step": 1930 }, { "epoch": 0.26, "learning_rate": 1.7385841664415023e-06, "loss": 2.1326, "step": 1935 }, { "epoch": 0.26, "learning_rate": 1.737908673331532e-06, "loss": 2.0473, "step": 1940 }, { "epoch": 0.26, "learning_rate": 1.7372331802215615e-06, "loss": 2.1349, "step": 1945 }, { "epoch": 0.26, "learning_rate": 1.7365576871115915e-06, "loss": 2.0019, "step": 1950 }, { "epoch": 0.26, "learning_rate": 1.735882194001621e-06, "loss": 2.1083, "step": 1955 }, { "epoch": 0.26, "learning_rate": 1.735206700891651e-06, "loss": 2.0901, "step": 1960 }, { "epoch": 0.27, "learning_rate": 1.7345312077816804e-06, "loss": 2.1184, "step": 1965 }, { "epoch": 0.27, "learning_rate": 1.7338557146717104e-06, "loss": 2.121, "step": 1970 }, { "epoch": 0.27, "learning_rate": 1.7331802215617399e-06, "loss": 1.9879, "step": 1975 }, { "epoch": 0.27, "learning_rate": 1.7325047284517698e-06, "loss": 2.1164, "step": 1980 }, { "epoch": 0.27, "learning_rate": 1.7318292353417993e-06, "loss": 2.118, "step": 1985 }, { "epoch": 0.27, "learning_rate": 1.7311537422318293e-06, "loss": 2.1429, "step": 1990 }, { "epoch": 0.27, "learning_rate": 1.7304782491218588e-06, "loss": 2.1996, "step": 1995 }, { "epoch": 0.27, "learning_rate": 1.7298027560118887e-06, "loss": 2.1266, "step": 2000 }, { "epoch": 0.27, "eval_loss": 2.1100127696990967, "eval_runtime": 165.3022, "eval_samples_per_second": 3.606, "eval_steps_per_second": 0.454, "step": 2000 }, { "epoch": 0.27, "learning_rate": 1.7291272629019182e-06, "loss": 2.2143, "step": 2005 }, { "epoch": 0.27, "learning_rate": 1.728451769791948e-06, "loss": 2.1678, "step": 2010 }, { "epoch": 0.27, "learning_rate": 1.7277762766819777e-06, "loss": 2.1279, "step": 2015 }, { "epoch": 0.27, "learning_rate": 1.7271007835720074e-06, "loss": 2.2299, "step": 2020 }, { "epoch": 0.27, "learning_rate": 1.7264252904620371e-06, "loss": 2.1529, "step": 2025 }, { "epoch": 0.27, "learning_rate": 1.7257497973520669e-06, "loss": 2.1269, "step": 2030 }, { "epoch": 0.27, "learning_rate": 1.7250743042420966e-06, "loss": 2.1651, "step": 2035 }, { "epoch": 0.28, "learning_rate": 1.7243988111321263e-06, "loss": 2.0818, "step": 2040 }, { "epoch": 0.28, "learning_rate": 1.7237233180221563e-06, "loss": 2.0841, "step": 2045 }, { "epoch": 0.28, "learning_rate": 1.7230478249121858e-06, "loss": 2.1054, "step": 2050 }, { "epoch": 0.28, "learning_rate": 1.7223723318022157e-06, "loss": 2.1279, "step": 2055 }, { "epoch": 0.28, "learning_rate": 1.7216968386922452e-06, "loss": 2.1461, "step": 2060 }, { "epoch": 0.28, "learning_rate": 1.7210213455822752e-06, "loss": 2.2039, "step": 2065 }, { "epoch": 0.28, "learning_rate": 1.7203458524723047e-06, "loss": 2.2047, "step": 2070 }, { "epoch": 0.28, "learning_rate": 1.7196703593623344e-06, "loss": 2.11, "step": 2075 }, { "epoch": 0.28, "learning_rate": 1.7189948662523641e-06, "loss": 2.1663, "step": 2080 }, { "epoch": 0.28, "learning_rate": 1.7183193731423939e-06, "loss": 2.0122, "step": 2085 }, { "epoch": 0.28, "learning_rate": 1.7176438800324236e-06, "loss": 2.1258, "step": 2090 }, { "epoch": 0.28, "learning_rate": 1.7169683869224533e-06, "loss": 2.103, "step": 2095 }, { "epoch": 0.28, "learning_rate": 1.716292893812483e-06, "loss": 2.181, "step": 2100 }, { "epoch": 0.28, "learning_rate": 1.7156174007025128e-06, "loss": 2.1502, "step": 2105 }, { "epoch": 0.29, "learning_rate": 1.7149419075925425e-06, "loss": 2.1905, "step": 2110 }, { "epoch": 0.29, "learning_rate": 1.7142664144825722e-06, "loss": 2.1424, "step": 2115 }, { "epoch": 0.29, "learning_rate": 1.713590921372602e-06, "loss": 2.1408, "step": 2120 }, { "epoch": 0.29, "learning_rate": 1.7129154282626317e-06, "loss": 2.0876, "step": 2125 }, { "epoch": 0.29, "learning_rate": 1.7122399351526614e-06, "loss": 2.1308, "step": 2130 }, { "epoch": 0.29, "learning_rate": 1.7115644420426911e-06, "loss": 2.13, "step": 2135 }, { "epoch": 0.29, "learning_rate": 1.7108889489327206e-06, "loss": 2.1297, "step": 2140 }, { "epoch": 0.29, "learning_rate": 1.7102134558227506e-06, "loss": 2.146, "step": 2145 }, { "epoch": 0.29, "learning_rate": 1.70953796271278e-06, "loss": 2.1063, "step": 2150 }, { "epoch": 0.29, "learning_rate": 1.70886246960281e-06, "loss": 2.1605, "step": 2155 }, { "epoch": 0.29, "learning_rate": 1.7081869764928395e-06, "loss": 2.1093, "step": 2160 }, { "epoch": 0.29, "learning_rate": 1.7075114833828695e-06, "loss": 2.1206, "step": 2165 }, { "epoch": 0.29, "learning_rate": 1.706835990272899e-06, "loss": 2.076, "step": 2170 }, { "epoch": 0.29, "learning_rate": 1.706160497162929e-06, "loss": 2.1505, "step": 2175 }, { "epoch": 0.29, "learning_rate": 1.7054850040529584e-06, "loss": 2.1548, "step": 2180 }, { "epoch": 0.3, "learning_rate": 1.7048095109429884e-06, "loss": 2.098, "step": 2185 }, { "epoch": 0.3, "learning_rate": 1.7041340178330181e-06, "loss": 2.1067, "step": 2190 }, { "epoch": 0.3, "learning_rate": 1.7034585247230478e-06, "loss": 2.0598, "step": 2195 }, { "epoch": 0.3, "learning_rate": 1.7027830316130776e-06, "loss": 2.1151, "step": 2200 }, { "epoch": 0.3, "learning_rate": 1.702107538503107e-06, "loss": 2.0832, "step": 2205 }, { "epoch": 0.3, "learning_rate": 1.701432045393137e-06, "loss": 2.1308, "step": 2210 }, { "epoch": 0.3, "learning_rate": 1.7007565522831665e-06, "loss": 2.1276, "step": 2215 }, { "epoch": 0.3, "learning_rate": 1.7000810591731965e-06, "loss": 2.0819, "step": 2220 }, { "epoch": 0.3, "learning_rate": 1.699405566063226e-06, "loss": 2.113, "step": 2225 }, { "epoch": 0.3, "learning_rate": 1.698730072953256e-06, "loss": 2.1185, "step": 2230 }, { "epoch": 0.3, "learning_rate": 1.6980545798432854e-06, "loss": 2.1705, "step": 2235 }, { "epoch": 0.3, "learning_rate": 1.6973790867333154e-06, "loss": 2.1164, "step": 2240 }, { "epoch": 0.3, "learning_rate": 1.6967035936233449e-06, "loss": 2.0782, "step": 2245 }, { "epoch": 0.3, "learning_rate": 1.6960281005133748e-06, "loss": 2.1195, "step": 2250 }, { "epoch": 0.3, "learning_rate": 1.6953526074034043e-06, "loss": 2.0733, "step": 2255 }, { "epoch": 0.31, "learning_rate": 1.6946771142934343e-06, "loss": 2.1518, "step": 2260 }, { "epoch": 0.31, "learning_rate": 1.6940016211834638e-06, "loss": 2.0086, "step": 2265 }, { "epoch": 0.31, "learning_rate": 1.6933261280734935e-06, "loss": 2.0694, "step": 2270 }, { "epoch": 0.31, "learning_rate": 1.6926506349635232e-06, "loss": 2.1871, "step": 2275 }, { "epoch": 0.31, "learning_rate": 1.691975141853553e-06, "loss": 2.1151, "step": 2280 }, { "epoch": 0.31, "learning_rate": 1.6912996487435827e-06, "loss": 2.1302, "step": 2285 }, { "epoch": 0.31, "learning_rate": 1.6906241556336124e-06, "loss": 2.0942, "step": 2290 }, { "epoch": 0.31, "learning_rate": 1.6899486625236422e-06, "loss": 2.1311, "step": 2295 }, { "epoch": 0.31, "learning_rate": 1.6892731694136719e-06, "loss": 2.0993, "step": 2300 }, { "epoch": 0.31, "learning_rate": 1.6885976763037016e-06, "loss": 2.1066, "step": 2305 }, { "epoch": 0.31, "learning_rate": 1.6879221831937313e-06, "loss": 2.1216, "step": 2310 }, { "epoch": 0.31, "learning_rate": 1.687246690083761e-06, "loss": 2.0656, "step": 2315 }, { "epoch": 0.31, "learning_rate": 1.6865711969737908e-06, "loss": 2.0993, "step": 2320 }, { "epoch": 0.31, "learning_rate": 1.6858957038638205e-06, "loss": 2.0928, "step": 2325 }, { "epoch": 0.31, "learning_rate": 1.6852202107538502e-06, "loss": 2.1319, "step": 2330 }, { "epoch": 0.32, "learning_rate": 1.68454471764388e-06, "loss": 2.1125, "step": 2335 }, { "epoch": 0.32, "learning_rate": 1.6838692245339097e-06, "loss": 1.9319, "step": 2340 }, { "epoch": 0.32, "learning_rate": 1.6831937314239394e-06, "loss": 2.1942, "step": 2345 }, { "epoch": 0.32, "learning_rate": 1.6825182383139691e-06, "loss": 2.1001, "step": 2350 }, { "epoch": 0.32, "learning_rate": 1.6818427452039989e-06, "loss": 2.1189, "step": 2355 }, { "epoch": 0.32, "learning_rate": 1.6811672520940286e-06, "loss": 2.1774, "step": 2360 }, { "epoch": 0.32, "learning_rate": 1.6804917589840583e-06, "loss": 2.1566, "step": 2365 }, { "epoch": 0.32, "learning_rate": 1.679816265874088e-06, "loss": 2.1077, "step": 2370 }, { "epoch": 0.32, "learning_rate": 1.6791407727641178e-06, "loss": 1.994, "step": 2375 }, { "epoch": 0.32, "learning_rate": 1.6784652796541475e-06, "loss": 2.1114, "step": 2380 }, { "epoch": 0.32, "learning_rate": 1.6777897865441772e-06, "loss": 2.1286, "step": 2385 }, { "epoch": 0.32, "learning_rate": 1.677114293434207e-06, "loss": 2.1445, "step": 2390 }, { "epoch": 0.32, "learning_rate": 1.6764388003242367e-06, "loss": 2.0908, "step": 2395 }, { "epoch": 0.32, "learning_rate": 1.6757633072142662e-06, "loss": 2.0279, "step": 2400 }, { "epoch": 0.32, "eval_loss": 2.090486764907837, "eval_runtime": 165.4313, "eval_samples_per_second": 3.603, "eval_steps_per_second": 0.453, "step": 2400 }, { "epoch": 0.32, "learning_rate": 1.6750878141042961e-06, "loss": 2.0959, "step": 2405 }, { "epoch": 0.33, "learning_rate": 1.6744123209943256e-06, "loss": 2.1049, "step": 2410 }, { "epoch": 0.33, "learning_rate": 1.6737368278843556e-06, "loss": 2.1089, "step": 2415 }, { "epoch": 0.33, "learning_rate": 1.673061334774385e-06, "loss": 2.1387, "step": 2420 }, { "epoch": 0.33, "learning_rate": 1.672385841664415e-06, "loss": 2.1712, "step": 2425 }, { "epoch": 0.33, "learning_rate": 1.6717103485544446e-06, "loss": 2.0841, "step": 2430 }, { "epoch": 0.33, "learning_rate": 1.6710348554444745e-06, "loss": 2.1344, "step": 2435 }, { "epoch": 0.33, "learning_rate": 1.670359362334504e-06, "loss": 2.0958, "step": 2440 }, { "epoch": 0.33, "learning_rate": 1.669683869224534e-06, "loss": 2.0917, "step": 2445 }, { "epoch": 0.33, "learning_rate": 1.6690083761145635e-06, "loss": 2.0386, "step": 2450 }, { "epoch": 0.33, "learning_rate": 1.6683328830045934e-06, "loss": 2.0025, "step": 2455 }, { "epoch": 0.33, "learning_rate": 1.667657389894623e-06, "loss": 2.1355, "step": 2460 }, { "epoch": 0.33, "learning_rate": 1.6669818967846526e-06, "loss": 2.1008, "step": 2465 }, { "epoch": 0.33, "learning_rate": 1.6663064036746824e-06, "loss": 2.0849, "step": 2470 }, { "epoch": 0.33, "learning_rate": 1.665630910564712e-06, "loss": 2.1699, "step": 2475 }, { "epoch": 0.34, "learning_rate": 1.664955417454742e-06, "loss": 2.0264, "step": 2480 }, { "epoch": 0.34, "learning_rate": 1.6642799243447715e-06, "loss": 2.0421, "step": 2485 }, { "epoch": 0.34, "learning_rate": 1.6636044312348015e-06, "loss": 2.1003, "step": 2490 }, { "epoch": 0.34, "learning_rate": 1.662928938124831e-06, "loss": 2.1109, "step": 2495 }, { "epoch": 0.34, "learning_rate": 1.662253445014861e-06, "loss": 2.1525, "step": 2500 }, { "epoch": 0.34, "learning_rate": 1.6615779519048904e-06, "loss": 2.0433, "step": 2505 }, { "epoch": 0.34, "learning_rate": 1.6609024587949204e-06, "loss": 2.0748, "step": 2510 }, { "epoch": 0.34, "learning_rate": 1.66022696568495e-06, "loss": 2.1392, "step": 2515 }, { "epoch": 0.34, "learning_rate": 1.6595514725749798e-06, "loss": 2.0972, "step": 2520 }, { "epoch": 0.34, "learning_rate": 1.6588759794650093e-06, "loss": 2.1862, "step": 2525 }, { "epoch": 0.34, "learning_rate": 1.658200486355039e-06, "loss": 2.1236, "step": 2530 }, { "epoch": 0.34, "learning_rate": 1.6575249932450688e-06, "loss": 2.19, "step": 2535 }, { "epoch": 0.34, "learning_rate": 1.6568495001350985e-06, "loss": 2.1086, "step": 2540 }, { "epoch": 0.34, "learning_rate": 1.6561740070251283e-06, "loss": 2.133, "step": 2545 }, { "epoch": 0.34, "learning_rate": 1.655498513915158e-06, "loss": 2.0417, "step": 2550 }, { "epoch": 0.35, "learning_rate": 1.6548230208051877e-06, "loss": 2.1212, "step": 2555 }, { "epoch": 0.35, "learning_rate": 1.6541475276952174e-06, "loss": 2.0668, "step": 2560 }, { "epoch": 0.35, "learning_rate": 1.6534720345852472e-06, "loss": 2.1242, "step": 2565 }, { "epoch": 0.35, "learning_rate": 1.6527965414752769e-06, "loss": 2.0738, "step": 2570 }, { "epoch": 0.35, "learning_rate": 1.6521210483653066e-06, "loss": 2.1083, "step": 2575 }, { "epoch": 0.35, "learning_rate": 1.6514455552553363e-06, "loss": 2.0711, "step": 2580 }, { "epoch": 0.35, "learning_rate": 1.650770062145366e-06, "loss": 2.0547, "step": 2585 }, { "epoch": 0.35, "learning_rate": 1.6500945690353958e-06, "loss": 2.0984, "step": 2590 }, { "epoch": 0.35, "learning_rate": 1.6494190759254253e-06, "loss": 2.0657, "step": 2595 }, { "epoch": 0.35, "learning_rate": 1.6487435828154552e-06, "loss": 2.1342, "step": 2600 }, { "epoch": 0.35, "learning_rate": 1.6480680897054848e-06, "loss": 2.1238, "step": 2605 }, { "epoch": 0.35, "learning_rate": 1.6473925965955147e-06, "loss": 2.0945, "step": 2610 }, { "epoch": 0.35, "learning_rate": 1.6467171034855442e-06, "loss": 2.1569, "step": 2615 }, { "epoch": 0.35, "learning_rate": 1.6460416103755741e-06, "loss": 2.055, "step": 2620 }, { "epoch": 0.35, "learning_rate": 1.6453661172656039e-06, "loss": 2.1235, "step": 2625 }, { "epoch": 0.36, "learning_rate": 1.6446906241556336e-06, "loss": 2.0776, "step": 2630 }, { "epoch": 0.36, "learning_rate": 1.6440151310456633e-06, "loss": 2.1014, "step": 2635 }, { "epoch": 0.36, "learning_rate": 1.643339637935693e-06, "loss": 2.0771, "step": 2640 }, { "epoch": 0.36, "learning_rate": 1.6426641448257228e-06, "loss": 2.0169, "step": 2645 }, { "epoch": 0.36, "learning_rate": 1.6419886517157525e-06, "loss": 2.0718, "step": 2650 }, { "epoch": 0.36, "learning_rate": 1.6413131586057822e-06, "loss": 2.1339, "step": 2655 }, { "epoch": 0.36, "learning_rate": 1.6406376654958117e-06, "loss": 2.1413, "step": 2660 }, { "epoch": 0.36, "learning_rate": 1.6399621723858417e-06, "loss": 2.1967, "step": 2665 }, { "epoch": 0.36, "learning_rate": 1.6392866792758712e-06, "loss": 2.0654, "step": 2670 }, { "epoch": 0.36, "learning_rate": 1.6386111861659011e-06, "loss": 2.0067, "step": 2675 }, { "epoch": 0.36, "learning_rate": 1.6379356930559307e-06, "loss": 1.9827, "step": 2680 }, { "epoch": 0.36, "learning_rate": 1.6372601999459606e-06, "loss": 2.1348, "step": 2685 }, { "epoch": 0.36, "learning_rate": 1.63658470683599e-06, "loss": 2.0717, "step": 2690 }, { "epoch": 0.36, "learning_rate": 1.63590921372602e-06, "loss": 2.0616, "step": 2695 }, { "epoch": 0.36, "learning_rate": 1.6352337206160496e-06, "loss": 2.1373, "step": 2700 }, { "epoch": 0.37, "learning_rate": 1.6345582275060795e-06, "loss": 2.07, "step": 2705 }, { "epoch": 0.37, "learning_rate": 1.633882734396109e-06, "loss": 2.0335, "step": 2710 }, { "epoch": 0.37, "learning_rate": 1.633207241286139e-06, "loss": 2.119, "step": 2715 }, { "epoch": 0.37, "learning_rate": 1.6325317481761685e-06, "loss": 2.0989, "step": 2720 }, { "epoch": 0.37, "learning_rate": 1.6318562550661982e-06, "loss": 2.1273, "step": 2725 }, { "epoch": 0.37, "learning_rate": 1.631180761956228e-06, "loss": 2.0868, "step": 2730 }, { "epoch": 0.37, "learning_rate": 1.6305052688462576e-06, "loss": 2.0753, "step": 2735 }, { "epoch": 0.37, "learning_rate": 1.6298297757362874e-06, "loss": 2.0779, "step": 2740 }, { "epoch": 0.37, "learning_rate": 1.629154282626317e-06, "loss": 1.9635, "step": 2745 }, { "epoch": 0.37, "learning_rate": 1.6284787895163468e-06, "loss": 2.0501, "step": 2750 }, { "epoch": 0.37, "learning_rate": 1.6278032964063765e-06, "loss": 2.0689, "step": 2755 }, { "epoch": 0.37, "learning_rate": 1.6271278032964063e-06, "loss": 2.0779, "step": 2760 }, { "epoch": 0.37, "learning_rate": 1.626452310186436e-06, "loss": 2.0272, "step": 2765 }, { "epoch": 0.37, "learning_rate": 1.625776817076466e-06, "loss": 2.1481, "step": 2770 }, { "epoch": 0.37, "learning_rate": 1.6251013239664955e-06, "loss": 2.0285, "step": 2775 }, { "epoch": 0.38, "learning_rate": 1.6244258308565254e-06, "loss": 2.013, "step": 2780 }, { "epoch": 0.38, "learning_rate": 1.623750337746555e-06, "loss": 2.1225, "step": 2785 }, { "epoch": 0.38, "learning_rate": 1.6230748446365846e-06, "loss": 2.067, "step": 2790 }, { "epoch": 0.38, "learning_rate": 1.6223993515266144e-06, "loss": 2.1046, "step": 2795 }, { "epoch": 0.38, "learning_rate": 1.621723858416644e-06, "loss": 2.0699, "step": 2800 }, { "epoch": 0.38, "eval_loss": 2.0739200115203857, "eval_runtime": 165.5539, "eval_samples_per_second": 3.6, "eval_steps_per_second": 0.453, "step": 2800 }, { "epoch": 0.38, "learning_rate": 1.6210483653066738e-06, "loss": 2.119, "step": 2805 }, { "epoch": 0.38, "learning_rate": 1.6203728721967035e-06, "loss": 2.0534, "step": 2810 }, { "epoch": 0.38, "learning_rate": 1.6196973790867333e-06, "loss": 2.1377, "step": 2815 }, { "epoch": 0.38, "learning_rate": 1.619021885976763e-06, "loss": 2.0943, "step": 2820 }, { "epoch": 0.38, "learning_rate": 1.6183463928667927e-06, "loss": 2.1415, "step": 2825 }, { "epoch": 0.38, "learning_rate": 1.6176708997568224e-06, "loss": 2.0923, "step": 2830 }, { "epoch": 0.38, "learning_rate": 1.6169954066468522e-06, "loss": 2.0528, "step": 2835 }, { "epoch": 0.38, "learning_rate": 1.6163199135368819e-06, "loss": 2.0935, "step": 2840 }, { "epoch": 0.38, "learning_rate": 1.6156444204269116e-06, "loss": 2.1159, "step": 2845 }, { "epoch": 0.39, "learning_rate": 1.6149689273169413e-06, "loss": 2.0728, "step": 2850 }, { "epoch": 0.39, "learning_rate": 1.6142934342069709e-06, "loss": 2.0722, "step": 2855 }, { "epoch": 0.39, "learning_rate": 1.6136179410970008e-06, "loss": 2.0461, "step": 2860 }, { "epoch": 0.39, "learning_rate": 1.6129424479870303e-06, "loss": 2.1287, "step": 2865 }, { "epoch": 0.39, "learning_rate": 1.6122669548770603e-06, "loss": 2.0648, "step": 2870 }, { "epoch": 0.39, "learning_rate": 1.6115914617670898e-06, "loss": 2.0533, "step": 2875 }, { "epoch": 0.39, "learning_rate": 1.6109159686571197e-06, "loss": 2.0866, "step": 2880 }, { "epoch": 0.39, "learning_rate": 1.6102404755471492e-06, "loss": 2.1042, "step": 2885 }, { "epoch": 0.39, "learning_rate": 1.6095649824371792e-06, "loss": 2.1186, "step": 2890 }, { "epoch": 0.39, "learning_rate": 1.6088894893272087e-06, "loss": 2.1474, "step": 2895 }, { "epoch": 0.39, "learning_rate": 1.6082139962172386e-06, "loss": 2.1159, "step": 2900 }, { "epoch": 0.39, "learning_rate": 1.6075385031072681e-06, "loss": 2.0443, "step": 2905 }, { "epoch": 0.39, "learning_rate": 1.606863009997298e-06, "loss": 2.0029, "step": 2910 }, { "epoch": 0.39, "learning_rate": 1.6061875168873278e-06, "loss": 2.0684, "step": 2915 }, { "epoch": 0.39, "learning_rate": 1.6055120237773573e-06, "loss": 1.9991, "step": 2920 }, { "epoch": 0.4, "learning_rate": 1.6048365306673872e-06, "loss": 2.0945, "step": 2925 }, { "epoch": 0.4, "learning_rate": 1.6041610375574168e-06, "loss": 2.0717, "step": 2930 }, { "epoch": 0.4, "learning_rate": 1.6034855444474467e-06, "loss": 2.0839, "step": 2935 }, { "epoch": 0.4, "learning_rate": 1.6028100513374762e-06, "loss": 2.1329, "step": 2940 }, { "epoch": 0.4, "learning_rate": 1.6021345582275061e-06, "loss": 2.0077, "step": 2945 }, { "epoch": 0.4, "learning_rate": 1.6014590651175357e-06, "loss": 2.0517, "step": 2950 }, { "epoch": 0.4, "learning_rate": 1.6007835720075656e-06, "loss": 2.0322, "step": 2955 }, { "epoch": 0.4, "learning_rate": 1.6001080788975951e-06, "loss": 2.055, "step": 2960 }, { "epoch": 0.4, "learning_rate": 1.599432585787625e-06, "loss": 2.1037, "step": 2965 }, { "epoch": 0.4, "learning_rate": 1.5987570926776546e-06, "loss": 2.059, "step": 2970 }, { "epoch": 0.4, "learning_rate": 1.5980815995676845e-06, "loss": 2.0551, "step": 2975 }, { "epoch": 0.4, "learning_rate": 1.597406106457714e-06, "loss": 2.0848, "step": 2980 }, { "epoch": 0.4, "learning_rate": 1.5967306133477437e-06, "loss": 2.0801, "step": 2985 }, { "epoch": 0.4, "learning_rate": 1.5960551202377735e-06, "loss": 2.0898, "step": 2990 }, { "epoch": 0.4, "learning_rate": 1.5953796271278032e-06, "loss": 2.0975, "step": 2995 }, { "epoch": 0.41, "learning_rate": 1.594704134017833e-06, "loss": 2.0278, "step": 3000 }, { "epoch": 0.41, "learning_rate": 1.5940286409078626e-06, "loss": 2.0342, "step": 3005 }, { "epoch": 0.41, "learning_rate": 1.5933531477978924e-06, "loss": 2.068, "step": 3010 }, { "epoch": 0.41, "learning_rate": 1.592677654687922e-06, "loss": 2.0488, "step": 3015 }, { "epoch": 0.41, "learning_rate": 1.5920021615779518e-06, "loss": 2.0348, "step": 3020 }, { "epoch": 0.41, "learning_rate": 1.5913266684679816e-06, "loss": 2.0646, "step": 3025 }, { "epoch": 0.41, "learning_rate": 1.5906511753580113e-06, "loss": 2.0238, "step": 3030 }, { "epoch": 0.41, "learning_rate": 1.589975682248041e-06, "loss": 2.1214, "step": 3035 }, { "epoch": 0.41, "learning_rate": 1.5893001891380705e-06, "loss": 2.0474, "step": 3040 }, { "epoch": 0.41, "learning_rate": 1.5886246960281005e-06, "loss": 2.0923, "step": 3045 }, { "epoch": 0.41, "learning_rate": 1.58794920291813e-06, "loss": 2.0824, "step": 3050 }, { "epoch": 0.41, "learning_rate": 1.58727370980816e-06, "loss": 2.0408, "step": 3055 }, { "epoch": 0.41, "learning_rate": 1.5865982166981896e-06, "loss": 2.0531, "step": 3060 }, { "epoch": 0.41, "learning_rate": 1.5859227235882194e-06, "loss": 2.0216, "step": 3065 }, { "epoch": 0.41, "learning_rate": 1.585247230478249e-06, "loss": 2.1115, "step": 3070 }, { "epoch": 0.42, "learning_rate": 1.5845717373682788e-06, "loss": 2.0333, "step": 3075 }, { "epoch": 0.42, "learning_rate": 1.5838962442583085e-06, "loss": 2.0941, "step": 3080 }, { "epoch": 0.42, "learning_rate": 1.5832207511483383e-06, "loss": 2.0995, "step": 3085 }, { "epoch": 0.42, "learning_rate": 1.582545258038368e-06, "loss": 2.1471, "step": 3090 }, { "epoch": 0.42, "learning_rate": 1.5818697649283977e-06, "loss": 1.9764, "step": 3095 }, { "epoch": 0.42, "learning_rate": 1.5811942718184274e-06, "loss": 2.0089, "step": 3100 }, { "epoch": 0.42, "learning_rate": 1.580518778708457e-06, "loss": 2.1237, "step": 3105 }, { "epoch": 0.42, "learning_rate": 1.579843285598487e-06, "loss": 2.1105, "step": 3110 }, { "epoch": 0.42, "learning_rate": 1.5791677924885164e-06, "loss": 2.1469, "step": 3115 }, { "epoch": 0.42, "learning_rate": 1.5784922993785464e-06, "loss": 2.0442, "step": 3120 }, { "epoch": 0.42, "learning_rate": 1.5778168062685759e-06, "loss": 2.194, "step": 3125 }, { "epoch": 0.42, "learning_rate": 1.5771413131586058e-06, "loss": 2.0288, "step": 3130 }, { "epoch": 0.42, "learning_rate": 1.5764658200486353e-06, "loss": 2.0379, "step": 3135 }, { "epoch": 0.42, "learning_rate": 1.5757903269386653e-06, "loss": 2.07, "step": 3140 }, { "epoch": 0.42, "learning_rate": 1.5751148338286948e-06, "loss": 2.0844, "step": 3145 }, { "epoch": 0.43, "learning_rate": 1.5744393407187247e-06, "loss": 2.0518, "step": 3150 }, { "epoch": 0.43, "learning_rate": 1.5737638476087542e-06, "loss": 2.0537, "step": 3155 }, { "epoch": 0.43, "learning_rate": 1.5730883544987842e-06, "loss": 2.0722, "step": 3160 }, { "epoch": 0.43, "learning_rate": 1.5724128613888137e-06, "loss": 2.1186, "step": 3165 }, { "epoch": 0.43, "learning_rate": 1.5717373682788436e-06, "loss": 2.0531, "step": 3170 }, { "epoch": 0.43, "learning_rate": 1.5710618751688731e-06, "loss": 2.0955, "step": 3175 }, { "epoch": 0.43, "learning_rate": 1.5703863820589029e-06, "loss": 2.0703, "step": 3180 }, { "epoch": 0.43, "learning_rate": 1.5697108889489326e-06, "loss": 2.1432, "step": 3185 }, { "epoch": 0.43, "learning_rate": 1.5690353958389623e-06, "loss": 2.0921, "step": 3190 }, { "epoch": 0.43, "learning_rate": 1.568359902728992e-06, "loss": 2.0203, "step": 3195 }, { "epoch": 0.43, "learning_rate": 1.5676844096190218e-06, "loss": 2.048, "step": 3200 }, { "epoch": 0.43, "eval_loss": 2.060811758041382, "eval_runtime": 165.4321, "eval_samples_per_second": 3.603, "eval_steps_per_second": 0.453, "step": 3200 }, { "epoch": 0.43, "learning_rate": 1.5670089165090515e-06, "loss": 2.0813, "step": 3205 }, { "epoch": 0.43, "learning_rate": 1.5663334233990812e-06, "loss": 2.1681, "step": 3210 }, { "epoch": 0.43, "learning_rate": 1.5656579302891112e-06, "loss": 2.009, "step": 3215 }, { "epoch": 0.44, "learning_rate": 1.5649824371791407e-06, "loss": 2.1227, "step": 3220 }, { "epoch": 0.44, "learning_rate": 1.5643069440691706e-06, "loss": 1.9925, "step": 3225 }, { "epoch": 0.44, "learning_rate": 1.5636314509592001e-06, "loss": 2.0469, "step": 3230 }, { "epoch": 0.44, "learning_rate": 1.56295595784923e-06, "loss": 2.0438, "step": 3235 }, { "epoch": 0.44, "learning_rate": 1.5622804647392596e-06, "loss": 2.0806, "step": 3240 }, { "epoch": 0.44, "learning_rate": 1.5616049716292893e-06, "loss": 2.1108, "step": 3245 }, { "epoch": 0.44, "learning_rate": 1.560929478519319e-06, "loss": 2.0461, "step": 3250 }, { "epoch": 0.44, "learning_rate": 1.5602539854093488e-06, "loss": 2.13, "step": 3255 }, { "epoch": 0.44, "learning_rate": 1.5595784922993785e-06, "loss": 2.1125, "step": 3260 }, { "epoch": 0.44, "learning_rate": 1.5589029991894082e-06, "loss": 2.0172, "step": 3265 }, { "epoch": 0.44, "learning_rate": 1.558227506079438e-06, "loss": 2.1299, "step": 3270 }, { "epoch": 0.44, "learning_rate": 1.5575520129694677e-06, "loss": 2.0406, "step": 3275 }, { "epoch": 0.44, "learning_rate": 1.5568765198594974e-06, "loss": 2.0501, "step": 3280 }, { "epoch": 0.44, "learning_rate": 1.5562010267495271e-06, "loss": 2.0377, "step": 3285 }, { "epoch": 0.44, "learning_rate": 1.5555255336395568e-06, "loss": 2.0649, "step": 3290 }, { "epoch": 0.45, "learning_rate": 1.5548500405295866e-06, "loss": 2.0212, "step": 3295 }, { "epoch": 0.45, "learning_rate": 1.554174547419616e-06, "loss": 2.0358, "step": 3300 }, { "epoch": 0.45, "learning_rate": 1.553499054309646e-06, "loss": 2.1017, "step": 3305 }, { "epoch": 0.45, "learning_rate": 1.5528235611996755e-06, "loss": 1.9907, "step": 3310 }, { "epoch": 0.45, "learning_rate": 1.5521480680897055e-06, "loss": 2.0097, "step": 3315 }, { "epoch": 0.45, "learning_rate": 1.551472574979735e-06, "loss": 2.0266, "step": 3320 }, { "epoch": 0.45, "learning_rate": 1.550797081869765e-06, "loss": 2.1016, "step": 3325 }, { "epoch": 0.45, "learning_rate": 1.5501215887597944e-06, "loss": 2.162, "step": 3330 }, { "epoch": 0.45, "learning_rate": 1.5494460956498244e-06, "loss": 2.0395, "step": 3335 }, { "epoch": 0.45, "learning_rate": 1.5487706025398539e-06, "loss": 2.0559, "step": 3340 }, { "epoch": 0.45, "learning_rate": 1.5480951094298838e-06, "loss": 2.1894, "step": 3345 }, { "epoch": 0.45, "learning_rate": 1.5474196163199133e-06, "loss": 2.0609, "step": 3350 }, { "epoch": 0.45, "learning_rate": 1.5467441232099433e-06, "loss": 2.1246, "step": 3355 }, { "epoch": 0.45, "learning_rate": 1.546068630099973e-06, "loss": 2.125, "step": 3360 }, { "epoch": 0.45, "learning_rate": 1.5453931369900025e-06, "loss": 2.1067, "step": 3365 }, { "epoch": 0.46, "learning_rate": 1.5447176438800325e-06, "loss": 1.9872, "step": 3370 }, { "epoch": 0.46, "learning_rate": 1.544042150770062e-06, "loss": 2.1292, "step": 3375 }, { "epoch": 0.46, "learning_rate": 1.543366657660092e-06, "loss": 2.1035, "step": 3380 }, { "epoch": 0.46, "learning_rate": 1.5426911645501214e-06, "loss": 2.1686, "step": 3385 }, { "epoch": 0.46, "learning_rate": 1.5420156714401514e-06, "loss": 2.0928, "step": 3390 }, { "epoch": 0.46, "learning_rate": 1.5413401783301809e-06, "loss": 2.0584, "step": 3395 }, { "epoch": 0.46, "learning_rate": 1.5406646852202108e-06, "loss": 2.0046, "step": 3400 }, { "epoch": 0.46, "learning_rate": 1.5399891921102403e-06, "loss": 2.0863, "step": 3405 }, { "epoch": 0.46, "learning_rate": 1.5393136990002703e-06, "loss": 2.0954, "step": 3410 }, { "epoch": 0.46, "learning_rate": 1.5386382058902998e-06, "loss": 2.0727, "step": 3415 }, { "epoch": 0.46, "learning_rate": 1.5379627127803297e-06, "loss": 2.1576, "step": 3420 }, { "epoch": 0.46, "learning_rate": 1.5372872196703592e-06, "loss": 2.046, "step": 3425 }, { "epoch": 0.46, "learning_rate": 1.5366117265603892e-06, "loss": 2.0489, "step": 3430 }, { "epoch": 0.46, "learning_rate": 1.5359362334504187e-06, "loss": 2.0607, "step": 3435 }, { "epoch": 0.46, "learning_rate": 1.5352607403404484e-06, "loss": 1.995, "step": 3440 }, { "epoch": 0.47, "learning_rate": 1.5345852472304781e-06, "loss": 2.0571, "step": 3445 }, { "epoch": 0.47, "learning_rate": 1.5339097541205079e-06, "loss": 2.0897, "step": 3450 }, { "epoch": 0.47, "learning_rate": 1.5332342610105376e-06, "loss": 2.0928, "step": 3455 }, { "epoch": 0.47, "learning_rate": 1.5325587679005673e-06, "loss": 2.0929, "step": 3460 }, { "epoch": 0.47, "learning_rate": 1.531883274790597e-06, "loss": 2.123, "step": 3465 }, { "epoch": 0.47, "learning_rate": 1.5312077816806268e-06, "loss": 2.1284, "step": 3470 }, { "epoch": 0.47, "learning_rate": 1.5305322885706565e-06, "loss": 2.0668, "step": 3475 }, { "epoch": 0.47, "learning_rate": 1.5298567954606862e-06, "loss": 2.038, "step": 3480 }, { "epoch": 0.47, "learning_rate": 1.529181302350716e-06, "loss": 2.0609, "step": 3485 }, { "epoch": 0.47, "learning_rate": 1.5285058092407457e-06, "loss": 2.1102, "step": 3490 }, { "epoch": 0.47, "learning_rate": 1.5278303161307752e-06, "loss": 2.0901, "step": 3495 }, { "epoch": 0.47, "learning_rate": 1.5271548230208051e-06, "loss": 2.0164, "step": 3500 }, { "epoch": 0.47, "learning_rate": 1.5264793299108349e-06, "loss": 2.0328, "step": 3505 }, { "epoch": 0.47, "learning_rate": 1.5258038368008646e-06, "loss": 2.131, "step": 3510 }, { "epoch": 0.47, "learning_rate": 1.5251283436908943e-06, "loss": 2.1453, "step": 3515 }, { "epoch": 0.48, "learning_rate": 1.524452850580924e-06, "loss": 2.0078, "step": 3520 }, { "epoch": 0.48, "learning_rate": 1.5237773574709538e-06, "loss": 2.0995, "step": 3525 }, { "epoch": 0.48, "learning_rate": 1.5231018643609835e-06, "loss": 2.0006, "step": 3530 }, { "epoch": 0.48, "learning_rate": 1.5224263712510132e-06, "loss": 2.1118, "step": 3535 }, { "epoch": 0.48, "learning_rate": 1.521750878141043e-06, "loss": 2.1054, "step": 3540 }, { "epoch": 0.48, "learning_rate": 1.5210753850310727e-06, "loss": 2.0164, "step": 3545 }, { "epoch": 0.48, "learning_rate": 1.5203998919211024e-06, "loss": 2.146, "step": 3550 }, { "epoch": 0.48, "learning_rate": 1.5197243988111321e-06, "loss": 2.0721, "step": 3555 }, { "epoch": 0.48, "learning_rate": 1.5190489057011616e-06, "loss": 2.0081, "step": 3560 }, { "epoch": 0.48, "learning_rate": 1.5183734125911916e-06, "loss": 2.1044, "step": 3565 }, { "epoch": 0.48, "learning_rate": 1.517697919481221e-06, "loss": 2.097, "step": 3570 }, { "epoch": 0.48, "learning_rate": 1.517022426371251e-06, "loss": 2.0469, "step": 3575 }, { "epoch": 0.48, "learning_rate": 1.5163469332612805e-06, "loss": 2.0239, "step": 3580 }, { "epoch": 0.48, "learning_rate": 1.5156714401513105e-06, "loss": 2.0602, "step": 3585 }, { "epoch": 0.48, "learning_rate": 1.51499594704134e-06, "loss": 2.0721, "step": 3590 }, { "epoch": 0.49, "learning_rate": 1.51432045393137e-06, "loss": 2.0337, "step": 3595 }, { "epoch": 0.49, "learning_rate": 1.5136449608213994e-06, "loss": 1.8914, "step": 3600 }, { "epoch": 0.49, "eval_loss": 2.049622058868408, "eval_runtime": 165.1588, "eval_samples_per_second": 3.609, "eval_steps_per_second": 0.454, "step": 3600 }, { "epoch": 0.49, "learning_rate": 1.5129694677114294e-06, "loss": 1.9725, "step": 3605 }, { "epoch": 0.49, "learning_rate": 1.5122939746014589e-06, "loss": 1.9695, "step": 3610 }, { "epoch": 0.49, "learning_rate": 1.5116184814914888e-06, "loss": 2.0596, "step": 3615 }, { "epoch": 0.49, "learning_rate": 1.5109429883815183e-06, "loss": 1.993, "step": 3620 }, { "epoch": 0.49, "learning_rate": 1.510267495271548e-06, "loss": 2.0617, "step": 3625 }, { "epoch": 0.49, "learning_rate": 1.5095920021615778e-06, "loss": 2.0145, "step": 3630 }, { "epoch": 0.49, "learning_rate": 1.5089165090516075e-06, "loss": 2.1149, "step": 3635 }, { "epoch": 0.49, "learning_rate": 1.5082410159416373e-06, "loss": 2.0825, "step": 3640 }, { "epoch": 0.49, "learning_rate": 1.507565522831667e-06, "loss": 2.0762, "step": 3645 }, { "epoch": 0.49, "learning_rate": 1.506890029721697e-06, "loss": 2.1251, "step": 3650 }, { "epoch": 0.49, "learning_rate": 1.5062145366117264e-06, "loss": 2.0812, "step": 3655 }, { "epoch": 0.49, "learning_rate": 1.5055390435017564e-06, "loss": 2.0488, "step": 3660 }, { "epoch": 0.5, "learning_rate": 1.5048635503917859e-06, "loss": 2.0906, "step": 3665 }, { "epoch": 0.5, "learning_rate": 1.5041880572818158e-06, "loss": 2.0587, "step": 3670 }, { "epoch": 0.5, "learning_rate": 1.5035125641718453e-06, "loss": 2.1736, "step": 3675 }, { "epoch": 0.5, "learning_rate": 1.5028370710618753e-06, "loss": 2.0042, "step": 3680 }, { "epoch": 0.5, "learning_rate": 1.5021615779519048e-06, "loss": 2.0003, "step": 3685 }, { "epoch": 0.5, "learning_rate": 1.5014860848419347e-06, "loss": 2.0667, "step": 3690 }, { "epoch": 0.5, "learning_rate": 1.5008105917319642e-06, "loss": 2.111, "step": 3695 }, { "epoch": 0.5, "learning_rate": 1.500135098621994e-06, "loss": 2.0688, "step": 3700 }, { "epoch": 0.5, "learning_rate": 1.4994596055120237e-06, "loss": 1.9983, "step": 3705 }, { "epoch": 0.5, "learning_rate": 1.4987841124020534e-06, "loss": 2.0898, "step": 3710 }, { "epoch": 0.5, "learning_rate": 1.4981086192920831e-06, "loss": 2.0828, "step": 3715 }, { "epoch": 0.5, "learning_rate": 1.4974331261821129e-06, "loss": 2.0781, "step": 3720 }, { "epoch": 0.5, "learning_rate": 1.4967576330721426e-06, "loss": 2.0078, "step": 3725 }, { "epoch": 0.5, "learning_rate": 1.4960821399621723e-06, "loss": 2.0798, "step": 3730 }, { "epoch": 0.5, "learning_rate": 1.495406646852202e-06, "loss": 2.0405, "step": 3735 }, { "epoch": 0.51, "learning_rate": 1.4947311537422318e-06, "loss": 2.0513, "step": 3740 }, { "epoch": 0.51, "learning_rate": 1.4940556606322615e-06, "loss": 2.0381, "step": 3745 }, { "epoch": 0.51, "learning_rate": 1.4933801675222912e-06, "loss": 2.0066, "step": 3750 }, { "epoch": 0.51, "learning_rate": 1.4927046744123207e-06, "loss": 2.0412, "step": 3755 }, { "epoch": 0.51, "learning_rate": 1.4920291813023507e-06, "loss": 2.0252, "step": 3760 }, { "epoch": 0.51, "learning_rate": 1.4913536881923802e-06, "loss": 2.1475, "step": 3765 }, { "epoch": 0.51, "learning_rate": 1.4906781950824101e-06, "loss": 2.0673, "step": 3770 }, { "epoch": 0.51, "learning_rate": 1.4900027019724396e-06, "loss": 2.0293, "step": 3775 }, { "epoch": 0.51, "learning_rate": 1.4893272088624696e-06, "loss": 2.0817, "step": 3780 }, { "epoch": 0.51, "learning_rate": 1.488651715752499e-06, "loss": 2.08, "step": 3785 }, { "epoch": 0.51, "learning_rate": 1.487976222642529e-06, "loss": 2.044, "step": 3790 }, { "epoch": 0.51, "learning_rate": 1.4873007295325588e-06, "loss": 2.0545, "step": 3795 }, { "epoch": 0.51, "learning_rate": 1.4866252364225885e-06, "loss": 2.0712, "step": 3800 }, { "epoch": 0.51, "learning_rate": 1.4859497433126182e-06, "loss": 2.0178, "step": 3805 }, { "epoch": 0.51, "learning_rate": 1.485274250202648e-06, "loss": 2.0222, "step": 3810 }, { "epoch": 0.52, "learning_rate": 1.4845987570926777e-06, "loss": 2.0417, "step": 3815 }, { "epoch": 0.52, "learning_rate": 1.4839232639827072e-06, "loss": 2.0045, "step": 3820 }, { "epoch": 0.52, "learning_rate": 1.4832477708727371e-06, "loss": 2.048, "step": 3825 }, { "epoch": 0.52, "learning_rate": 1.4825722777627666e-06, "loss": 2.1043, "step": 3830 }, { "epoch": 0.52, "learning_rate": 1.4818967846527966e-06, "loss": 2.0242, "step": 3835 }, { "epoch": 0.52, "learning_rate": 1.481221291542826e-06, "loss": 2.1304, "step": 3840 }, { "epoch": 0.52, "learning_rate": 1.480545798432856e-06, "loss": 2.0436, "step": 3845 }, { "epoch": 0.52, "learning_rate": 1.4798703053228855e-06, "loss": 2.0306, "step": 3850 }, { "epoch": 0.52, "learning_rate": 1.4791948122129155e-06, "loss": 1.9944, "step": 3855 }, { "epoch": 0.52, "learning_rate": 1.478519319102945e-06, "loss": 2.0746, "step": 3860 }, { "epoch": 0.52, "learning_rate": 1.477843825992975e-06, "loss": 2.0468, "step": 3865 }, { "epoch": 0.52, "learning_rate": 1.4771683328830044e-06, "loss": 2.0807, "step": 3870 }, { "epoch": 0.52, "learning_rate": 1.4764928397730344e-06, "loss": 2.1531, "step": 3875 }, { "epoch": 0.52, "learning_rate": 1.475817346663064e-06, "loss": 2.0949, "step": 3880 }, { "epoch": 0.52, "learning_rate": 1.4751418535530936e-06, "loss": 1.9704, "step": 3885 }, { "epoch": 0.53, "learning_rate": 1.4744663604431234e-06, "loss": 1.9872, "step": 3890 }, { "epoch": 0.53, "learning_rate": 1.473790867333153e-06, "loss": 1.997, "step": 3895 }, { "epoch": 0.53, "learning_rate": 1.4731153742231828e-06, "loss": 2.0673, "step": 3900 }, { "epoch": 0.53, "learning_rate": 1.4724398811132125e-06, "loss": 2.0324, "step": 3905 }, { "epoch": 0.53, "learning_rate": 1.4717643880032423e-06, "loss": 2.0306, "step": 3910 }, { "epoch": 0.53, "learning_rate": 1.471088894893272e-06, "loss": 2.0718, "step": 3915 }, { "epoch": 0.53, "learning_rate": 1.4704134017833017e-06, "loss": 2.1204, "step": 3920 }, { "epoch": 0.53, "learning_rate": 1.4697379086733314e-06, "loss": 1.9834, "step": 3925 }, { "epoch": 0.53, "learning_rate": 1.4690624155633612e-06, "loss": 2.0658, "step": 3930 }, { "epoch": 0.53, "learning_rate": 1.4683869224533909e-06, "loss": 2.0699, "step": 3935 }, { "epoch": 0.53, "learning_rate": 1.4677114293434208e-06, "loss": 2.0704, "step": 3940 }, { "epoch": 0.53, "learning_rate": 1.4670359362334503e-06, "loss": 2.0563, "step": 3945 }, { "epoch": 0.53, "learning_rate": 1.46636044312348e-06, "loss": 2.0176, "step": 3950 }, { "epoch": 0.53, "learning_rate": 1.4656849500135098e-06, "loss": 2.067, "step": 3955 }, { "epoch": 0.53, "learning_rate": 1.4650094569035395e-06, "loss": 2.1244, "step": 3960 }, { "epoch": 0.54, "learning_rate": 1.4643339637935692e-06, "loss": 2.0181, "step": 3965 }, { "epoch": 0.54, "learning_rate": 1.463658470683599e-06, "loss": 2.0596, "step": 3970 }, { "epoch": 0.54, "learning_rate": 1.4629829775736287e-06, "loss": 2.0484, "step": 3975 }, { "epoch": 0.54, "learning_rate": 1.4623074844636584e-06, "loss": 2.0901, "step": 3980 }, { "epoch": 0.54, "learning_rate": 1.4616319913536882e-06, "loss": 1.9073, "step": 3985 }, { "epoch": 0.54, "learning_rate": 1.4609564982437179e-06, "loss": 2.0358, "step": 3990 }, { "epoch": 0.54, "learning_rate": 1.4602810051337476e-06, "loss": 2.0291, "step": 3995 }, { "epoch": 0.54, "learning_rate": 1.4596055120237773e-06, "loss": 2.0276, "step": 4000 }, { "epoch": 0.54, "eval_loss": 2.039377212524414, "eval_runtime": 165.26, "eval_samples_per_second": 3.606, "eval_steps_per_second": 0.454, "step": 4000 }, { "epoch": 0.54, "learning_rate": 1.458930018913807e-06, "loss": 2.1331, "step": 4005 }, { "epoch": 0.54, "learning_rate": 1.4582545258038368e-06, "loss": 2.0162, "step": 4010 }, { "epoch": 0.54, "learning_rate": 1.4575790326938663e-06, "loss": 1.9811, "step": 4015 }, { "epoch": 0.54, "learning_rate": 1.4569035395838962e-06, "loss": 1.9574, "step": 4020 }, { "epoch": 0.54, "learning_rate": 1.4562280464739258e-06, "loss": 1.9547, "step": 4025 }, { "epoch": 0.54, "learning_rate": 1.4555525533639557e-06, "loss": 2.0814, "step": 4030 }, { "epoch": 0.55, "learning_rate": 1.4548770602539852e-06, "loss": 2.0517, "step": 4035 }, { "epoch": 0.55, "learning_rate": 1.4542015671440151e-06, "loss": 2.0873, "step": 4040 }, { "epoch": 0.55, "learning_rate": 1.4535260740340447e-06, "loss": 2.0353, "step": 4045 }, { "epoch": 0.55, "learning_rate": 1.4528505809240746e-06, "loss": 2.0967, "step": 4050 }, { "epoch": 0.55, "learning_rate": 1.4521750878141041e-06, "loss": 1.9676, "step": 4055 }, { "epoch": 0.55, "learning_rate": 1.451499594704134e-06, "loss": 2.0532, "step": 4060 }, { "epoch": 0.55, "learning_rate": 1.4508241015941636e-06, "loss": 2.0872, "step": 4065 }, { "epoch": 0.55, "learning_rate": 1.4501486084841935e-06, "loss": 2.092, "step": 4070 }, { "epoch": 0.55, "learning_rate": 1.449473115374223e-06, "loss": 1.9767, "step": 4075 }, { "epoch": 0.55, "learning_rate": 1.4487976222642527e-06, "loss": 2.0484, "step": 4080 }, { "epoch": 0.55, "learning_rate": 1.4481221291542827e-06, "loss": 2.0448, "step": 4085 }, { "epoch": 0.55, "learning_rate": 1.4474466360443122e-06, "loss": 2.0531, "step": 4090 }, { "epoch": 0.55, "learning_rate": 1.4467711429343421e-06, "loss": 2.014, "step": 4095 }, { "epoch": 0.55, "learning_rate": 1.4460956498243716e-06, "loss": 2.0123, "step": 4100 }, { "epoch": 0.55, "learning_rate": 1.4454201567144016e-06, "loss": 2.0659, "step": 4105 }, { "epoch": 0.56, "learning_rate": 1.444744663604431e-06, "loss": 2.0284, "step": 4110 }, { "epoch": 0.56, "learning_rate": 1.444069170494461e-06, "loss": 2.1546, "step": 4115 }, { "epoch": 0.56, "learning_rate": 1.4433936773844905e-06, "loss": 2.0555, "step": 4120 }, { "epoch": 0.56, "learning_rate": 1.4427181842745205e-06, "loss": 2.0946, "step": 4125 }, { "epoch": 0.56, "learning_rate": 1.44204269116455e-06, "loss": 2.037, "step": 4130 }, { "epoch": 0.56, "learning_rate": 1.44136719805458e-06, "loss": 2.0163, "step": 4135 }, { "epoch": 0.56, "learning_rate": 1.4406917049446095e-06, "loss": 2.0224, "step": 4140 }, { "epoch": 0.56, "learning_rate": 1.4400162118346392e-06, "loss": 2.0542, "step": 4145 }, { "epoch": 0.56, "learning_rate": 1.439340718724669e-06, "loss": 2.0628, "step": 4150 }, { "epoch": 0.56, "learning_rate": 1.4386652256146986e-06, "loss": 2.0527, "step": 4155 }, { "epoch": 0.56, "learning_rate": 1.4379897325047284e-06, "loss": 1.9956, "step": 4160 }, { "epoch": 0.56, "learning_rate": 1.437314239394758e-06, "loss": 1.9983, "step": 4165 }, { "epoch": 0.56, "learning_rate": 1.4366387462847878e-06, "loss": 2.0351, "step": 4170 }, { "epoch": 0.56, "learning_rate": 1.4359632531748175e-06, "loss": 1.9065, "step": 4175 }, { "epoch": 0.56, "learning_rate": 1.4352877600648473e-06, "loss": 2.0242, "step": 4180 }, { "epoch": 0.57, "learning_rate": 1.434612266954877e-06, "loss": 2.0076, "step": 4185 }, { "epoch": 0.57, "learning_rate": 1.4339367738449067e-06, "loss": 2.0707, "step": 4190 }, { "epoch": 0.57, "learning_rate": 1.4332612807349364e-06, "loss": 2.0493, "step": 4195 }, { "epoch": 0.57, "learning_rate": 1.4325857876249662e-06, "loss": 2.0358, "step": 4200 }, { "epoch": 0.57, "learning_rate": 1.431910294514996e-06, "loss": 1.9821, "step": 4205 }, { "epoch": 0.57, "learning_rate": 1.4312348014050254e-06, "loss": 2.0288, "step": 4210 }, { "epoch": 0.57, "learning_rate": 1.4305593082950553e-06, "loss": 2.0614, "step": 4215 }, { "epoch": 0.57, "learning_rate": 1.4298838151850849e-06, "loss": 2.1316, "step": 4220 }, { "epoch": 0.57, "learning_rate": 1.4292083220751148e-06, "loss": 1.9865, "step": 4225 }, { "epoch": 0.57, "learning_rate": 1.4285328289651445e-06, "loss": 2.1231, "step": 4230 }, { "epoch": 0.57, "learning_rate": 1.4278573358551743e-06, "loss": 2.0609, "step": 4235 }, { "epoch": 0.57, "learning_rate": 1.427181842745204e-06, "loss": 1.995, "step": 4240 }, { "epoch": 0.57, "learning_rate": 1.4265063496352337e-06, "loss": 2.0347, "step": 4245 }, { "epoch": 0.57, "learning_rate": 1.4258308565252634e-06, "loss": 2.0714, "step": 4250 }, { "epoch": 0.57, "learning_rate": 1.4251553634152932e-06, "loss": 1.9985, "step": 4255 }, { "epoch": 0.58, "learning_rate": 1.4244798703053229e-06, "loss": 2.0541, "step": 4260 }, { "epoch": 0.58, "learning_rate": 1.4238043771953526e-06, "loss": 2.0663, "step": 4265 }, { "epoch": 0.58, "learning_rate": 1.4231288840853823e-06, "loss": 2.0048, "step": 4270 }, { "epoch": 0.58, "learning_rate": 1.4224533909754119e-06, "loss": 2.0742, "step": 4275 }, { "epoch": 0.58, "learning_rate": 1.4217778978654418e-06, "loss": 2.0085, "step": 4280 }, { "epoch": 0.58, "learning_rate": 1.4211024047554713e-06, "loss": 1.9951, "step": 4285 }, { "epoch": 0.58, "learning_rate": 1.4204269116455012e-06, "loss": 2.0308, "step": 4290 }, { "epoch": 0.58, "learning_rate": 1.4197514185355308e-06, "loss": 2.1408, "step": 4295 }, { "epoch": 0.58, "learning_rate": 1.4190759254255607e-06, "loss": 1.9765, "step": 4300 }, { "epoch": 0.58, "learning_rate": 1.4184004323155902e-06, "loss": 1.9744, "step": 4305 }, { "epoch": 0.58, "learning_rate": 1.4177249392056201e-06, "loss": 2.0597, "step": 4310 }, { "epoch": 0.58, "learning_rate": 1.4170494460956497e-06, "loss": 2.0787, "step": 4315 }, { "epoch": 0.58, "learning_rate": 1.4163739529856796e-06, "loss": 1.9937, "step": 4320 }, { "epoch": 0.58, "learning_rate": 1.4156984598757091e-06, "loss": 2.0415, "step": 4325 }, { "epoch": 0.58, "learning_rate": 1.415022966765739e-06, "loss": 1.9378, "step": 4330 }, { "epoch": 0.59, "learning_rate": 1.4143474736557686e-06, "loss": 2.0868, "step": 4335 }, { "epoch": 0.59, "learning_rate": 1.4136719805457983e-06, "loss": 2.0547, "step": 4340 }, { "epoch": 0.59, "learning_rate": 1.412996487435828e-06, "loss": 2.0683, "step": 4345 }, { "epoch": 0.59, "learning_rate": 1.4123209943258577e-06, "loss": 2.1531, "step": 4350 }, { "epoch": 0.59, "learning_rate": 1.4116455012158875e-06, "loss": 2.0331, "step": 4355 }, { "epoch": 0.59, "learning_rate": 1.4109700081059172e-06, "loss": 2.0184, "step": 4360 }, { "epoch": 0.59, "learning_rate": 1.410294514995947e-06, "loss": 2.0473, "step": 4365 }, { "epoch": 0.59, "learning_rate": 1.4096190218859767e-06, "loss": 1.9568, "step": 4370 }, { "epoch": 0.59, "learning_rate": 1.4089435287760066e-06, "loss": 2.031, "step": 4375 }, { "epoch": 0.59, "learning_rate": 1.408268035666036e-06, "loss": 1.9944, "step": 4380 }, { "epoch": 0.59, "learning_rate": 1.407592542556066e-06, "loss": 2.1136, "step": 4385 }, { "epoch": 0.59, "learning_rate": 1.4069170494460956e-06, "loss": 2.0197, "step": 4390 }, { "epoch": 0.59, "learning_rate": 1.4062415563361255e-06, "loss": 2.1447, "step": 4395 }, { "epoch": 0.59, "learning_rate": 1.405566063226155e-06, "loss": 1.9879, "step": 4400 }, { "epoch": 0.59, "eval_loss": 2.0300517082214355, "eval_runtime": 165.4272, "eval_samples_per_second": 3.603, "eval_steps_per_second": 0.453, "step": 4400 }, { "epoch": 0.6, "learning_rate": 1.4048905701161847e-06, "loss": 2.1061, "step": 4405 }, { "epoch": 0.6, "learning_rate": 1.4042150770062145e-06, "loss": 2.0397, "step": 4410 }, { "epoch": 0.6, "learning_rate": 1.4035395838962442e-06, "loss": 1.943, "step": 4415 }, { "epoch": 0.6, "learning_rate": 1.402864090786274e-06, "loss": 2.0362, "step": 4420 }, { "epoch": 0.6, "learning_rate": 1.4021885976763036e-06, "loss": 2.0861, "step": 4425 }, { "epoch": 0.6, "learning_rate": 1.4015131045663334e-06, "loss": 2.0146, "step": 4430 }, { "epoch": 0.6, "learning_rate": 1.400837611456363e-06, "loss": 2.0927, "step": 4435 }, { "epoch": 0.6, "learning_rate": 1.4001621183463928e-06, "loss": 2.0839, "step": 4440 }, { "epoch": 0.6, "learning_rate": 1.3994866252364225e-06, "loss": 2.0464, "step": 4445 }, { "epoch": 0.6, "learning_rate": 1.3988111321264523e-06, "loss": 2.0034, "step": 4450 }, { "epoch": 0.6, "learning_rate": 1.398135639016482e-06, "loss": 2.0405, "step": 4455 }, { "epoch": 0.6, "learning_rate": 1.3974601459065117e-06, "loss": 1.9895, "step": 4460 }, { "epoch": 0.6, "learning_rate": 1.3967846527965415e-06, "loss": 2.058, "step": 4465 }, { "epoch": 0.6, "learning_rate": 1.396109159686571e-06, "loss": 2.0722, "step": 4470 }, { "epoch": 0.6, "learning_rate": 1.395433666576601e-06, "loss": 1.9765, "step": 4475 }, { "epoch": 0.61, "learning_rate": 1.3947581734666304e-06, "loss": 1.9971, "step": 4480 }, { "epoch": 0.61, "learning_rate": 1.3940826803566604e-06, "loss": 2.0599, "step": 4485 }, { "epoch": 0.61, "learning_rate": 1.3934071872466899e-06, "loss": 2.0489, "step": 4490 }, { "epoch": 0.61, "learning_rate": 1.3927316941367198e-06, "loss": 1.9994, "step": 4495 }, { "epoch": 0.61, "learning_rate": 1.3920562010267493e-06, "loss": 2.0765, "step": 4500 }, { "epoch": 0.61, "learning_rate": 1.3913807079167793e-06, "loss": 2.1492, "step": 4505 }, { "epoch": 0.61, "learning_rate": 1.3907052148068088e-06, "loss": 2.0749, "step": 4510 }, { "epoch": 0.61, "learning_rate": 1.3900297216968387e-06, "loss": 1.9798, "step": 4515 }, { "epoch": 0.61, "learning_rate": 1.3893542285868684e-06, "loss": 2.0888, "step": 4520 }, { "epoch": 0.61, "learning_rate": 1.3886787354768982e-06, "loss": 1.9906, "step": 4525 }, { "epoch": 0.61, "learning_rate": 1.3880032423669279e-06, "loss": 2.0952, "step": 4530 }, { "epoch": 0.61, "learning_rate": 1.3873277492569574e-06, "loss": 2.0239, "step": 4535 }, { "epoch": 0.61, "learning_rate": 1.3866522561469873e-06, "loss": 2.0986, "step": 4540 }, { "epoch": 0.61, "learning_rate": 1.3859767630370169e-06, "loss": 2.1158, "step": 4545 }, { "epoch": 0.61, "learning_rate": 1.3853012699270468e-06, "loss": 1.9589, "step": 4550 }, { "epoch": 0.62, "learning_rate": 1.3846257768170763e-06, "loss": 2.0019, "step": 4555 }, { "epoch": 0.62, "learning_rate": 1.3839502837071062e-06, "loss": 2.0844, "step": 4560 }, { "epoch": 0.62, "learning_rate": 1.3832747905971358e-06, "loss": 2.0934, "step": 4565 }, { "epoch": 0.62, "learning_rate": 1.3825992974871657e-06, "loss": 2.007, "step": 4570 }, { "epoch": 0.62, "learning_rate": 1.3819238043771952e-06, "loss": 1.9538, "step": 4575 }, { "epoch": 0.62, "learning_rate": 1.3812483112672252e-06, "loss": 2.0167, "step": 4580 }, { "epoch": 0.62, "learning_rate": 1.3805728181572547e-06, "loss": 2.0112, "step": 4585 }, { "epoch": 0.62, "learning_rate": 1.3798973250472846e-06, "loss": 2.0415, "step": 4590 }, { "epoch": 0.62, "learning_rate": 1.3792218319373141e-06, "loss": 2.0616, "step": 4595 }, { "epoch": 0.62, "learning_rate": 1.3785463388273438e-06, "loss": 2.0704, "step": 4600 }, { "epoch": 0.62, "learning_rate": 1.3778708457173736e-06, "loss": 2.1166, "step": 4605 }, { "epoch": 0.62, "learning_rate": 1.3771953526074033e-06, "loss": 2.0665, "step": 4610 }, { "epoch": 0.62, "learning_rate": 1.376519859497433e-06, "loss": 2.0999, "step": 4615 }, { "epoch": 0.62, "learning_rate": 1.3758443663874628e-06, "loss": 2.0465, "step": 4620 }, { "epoch": 0.62, "learning_rate": 1.3751688732774925e-06, "loss": 2.0234, "step": 4625 }, { "epoch": 0.63, "learning_rate": 1.3744933801675222e-06, "loss": 2.0602, "step": 4630 }, { "epoch": 0.63, "learning_rate": 1.373817887057552e-06, "loss": 2.0471, "step": 4635 }, { "epoch": 0.63, "learning_rate": 1.3731423939475817e-06, "loss": 2.0579, "step": 4640 }, { "epoch": 0.63, "learning_rate": 1.3724669008376114e-06, "loss": 2.0118, "step": 4645 }, { "epoch": 0.63, "learning_rate": 1.3717914077276411e-06, "loss": 1.9907, "step": 4650 }, { "epoch": 0.63, "learning_rate": 1.3711159146176708e-06, "loss": 1.9865, "step": 4655 }, { "epoch": 0.63, "learning_rate": 1.3704404215077006e-06, "loss": 2.0415, "step": 4660 }, { "epoch": 0.63, "learning_rate": 1.3697649283977303e-06, "loss": 2.0445, "step": 4665 }, { "epoch": 0.63, "learning_rate": 1.36908943528776e-06, "loss": 1.9549, "step": 4670 }, { "epoch": 0.63, "learning_rate": 1.3684139421777897e-06, "loss": 1.9776, "step": 4675 }, { "epoch": 0.63, "learning_rate": 1.3677384490678195e-06, "loss": 2.0224, "step": 4680 }, { "epoch": 0.63, "learning_rate": 1.3670629559578492e-06, "loss": 1.9824, "step": 4685 }, { "epoch": 0.63, "learning_rate": 1.366387462847879e-06, "loss": 1.9642, "step": 4690 }, { "epoch": 0.63, "learning_rate": 1.3657119697379086e-06, "loss": 2.0882, "step": 4695 }, { "epoch": 0.63, "learning_rate": 1.3650364766279384e-06, "loss": 2.098, "step": 4700 }, { "epoch": 0.64, "learning_rate": 1.364360983517968e-06, "loss": 2.0735, "step": 4705 }, { "epoch": 0.64, "learning_rate": 1.3636854904079978e-06, "loss": 2.011, "step": 4710 }, { "epoch": 0.64, "learning_rate": 1.3630099972980276e-06, "loss": 1.9934, "step": 4715 }, { "epoch": 0.64, "learning_rate": 1.3623345041880573e-06, "loss": 2.0129, "step": 4720 }, { "epoch": 0.64, "learning_rate": 1.361659011078087e-06, "loss": 2.0225, "step": 4725 }, { "epoch": 0.64, "learning_rate": 1.3609835179681165e-06, "loss": 2.1099, "step": 4730 }, { "epoch": 0.64, "learning_rate": 1.3603080248581465e-06, "loss": 1.9598, "step": 4735 }, { "epoch": 0.64, "learning_rate": 1.359632531748176e-06, "loss": 1.9245, "step": 4740 }, { "epoch": 0.64, "learning_rate": 1.358957038638206e-06, "loss": 2.0642, "step": 4745 }, { "epoch": 0.64, "learning_rate": 1.3582815455282354e-06, "loss": 2.1169, "step": 4750 }, { "epoch": 0.64, "learning_rate": 1.3576060524182654e-06, "loss": 2.0505, "step": 4755 }, { "epoch": 0.64, "learning_rate": 1.3569305593082949e-06, "loss": 2.0364, "step": 4760 }, { "epoch": 0.64, "learning_rate": 1.3562550661983248e-06, "loss": 2.1029, "step": 4765 }, { "epoch": 0.64, "learning_rate": 1.3555795730883543e-06, "loss": 2.136, "step": 4770 }, { "epoch": 0.65, "learning_rate": 1.3549040799783843e-06, "loss": 1.9615, "step": 4775 }, { "epoch": 0.65, "learning_rate": 1.3542285868684138e-06, "loss": 1.9855, "step": 4780 }, { "epoch": 0.65, "learning_rate": 1.3535530937584437e-06, "loss": 1.9529, "step": 4785 }, { "epoch": 0.65, "learning_rate": 1.3528776006484732e-06, "loss": 2.0281, "step": 4790 }, { "epoch": 0.65, "learning_rate": 1.352202107538503e-06, "loss": 1.9702, "step": 4795 }, { "epoch": 0.65, "learning_rate": 1.3515266144285327e-06, "loss": 2.0656, "step": 4800 }, { "epoch": 0.65, "eval_loss": 2.021768808364868, "eval_runtime": 165.3452, "eval_samples_per_second": 3.605, "eval_steps_per_second": 0.454, "step": 4800 }, { "epoch": 0.65, "learning_rate": 1.3508511213185624e-06, "loss": 1.9761, "step": 4805 }, { "epoch": 0.65, "learning_rate": 1.3501756282085924e-06, "loss": 2.043, "step": 4810 }, { "epoch": 0.65, "learning_rate": 1.3495001350986219e-06, "loss": 2.0883, "step": 4815 }, { "epoch": 0.65, "learning_rate": 1.3488246419886518e-06, "loss": 2.0304, "step": 4820 }, { "epoch": 0.65, "learning_rate": 1.3481491488786813e-06, "loss": 2.0323, "step": 4825 }, { "epoch": 0.65, "learning_rate": 1.3474736557687113e-06, "loss": 2.032, "step": 4830 }, { "epoch": 0.65, "learning_rate": 1.3467981626587408e-06, "loss": 2.0289, "step": 4835 }, { "epoch": 0.65, "learning_rate": 1.3461226695487707e-06, "loss": 1.9711, "step": 4840 }, { "epoch": 0.65, "learning_rate": 1.3454471764388002e-06, "loss": 2.0748, "step": 4845 }, { "epoch": 0.66, "learning_rate": 1.3447716833288302e-06, "loss": 2.0305, "step": 4850 }, { "epoch": 0.66, "learning_rate": 1.3440961902188597e-06, "loss": 2.1066, "step": 4855 }, { "epoch": 0.66, "learning_rate": 1.3434206971088894e-06, "loss": 2.009, "step": 4860 }, { "epoch": 0.66, "learning_rate": 1.3427452039989191e-06, "loss": 1.9678, "step": 4865 }, { "epoch": 0.66, "learning_rate": 1.3420697108889489e-06, "loss": 2.1183, "step": 4870 }, { "epoch": 0.66, "learning_rate": 1.3413942177789786e-06, "loss": 2.0144, "step": 4875 }, { "epoch": 0.66, "learning_rate": 1.3407187246690083e-06, "loss": 2.1348, "step": 4880 }, { "epoch": 0.66, "learning_rate": 1.340043231559038e-06, "loss": 1.955, "step": 4885 }, { "epoch": 0.66, "learning_rate": 1.3393677384490678e-06, "loss": 1.9506, "step": 4890 }, { "epoch": 0.66, "learning_rate": 1.3386922453390975e-06, "loss": 2.0105, "step": 4895 }, { "epoch": 0.66, "learning_rate": 1.3380167522291272e-06, "loss": 1.9965, "step": 4900 }, { "epoch": 0.66, "learning_rate": 1.337341259119157e-06, "loss": 1.9777, "step": 4905 }, { "epoch": 0.66, "learning_rate": 1.3366657660091867e-06, "loss": 2.0761, "step": 4910 }, { "epoch": 0.66, "learning_rate": 1.3359902728992164e-06, "loss": 2.0911, "step": 4915 }, { "epoch": 0.66, "learning_rate": 1.3353147797892461e-06, "loss": 1.9092, "step": 4920 }, { "epoch": 0.67, "learning_rate": 1.3346392866792756e-06, "loss": 2.006, "step": 4925 }, { "epoch": 0.67, "learning_rate": 1.3339637935693056e-06, "loss": 1.9245, "step": 4930 }, { "epoch": 0.67, "learning_rate": 1.333288300459335e-06, "loss": 2.0058, "step": 4935 }, { "epoch": 0.67, "learning_rate": 1.332612807349365e-06, "loss": 2.0073, "step": 4940 }, { "epoch": 0.67, "learning_rate": 1.3319373142393945e-06, "loss": 1.9605, "step": 4945 }, { "epoch": 0.67, "learning_rate": 1.3312618211294245e-06, "loss": 1.9436, "step": 4950 }, { "epoch": 0.67, "learning_rate": 1.3305863280194542e-06, "loss": 2.0926, "step": 4955 }, { "epoch": 0.67, "learning_rate": 1.329910834909484e-06, "loss": 2.0783, "step": 4960 }, { "epoch": 0.67, "learning_rate": 1.3292353417995137e-06, "loss": 1.9943, "step": 4965 }, { "epoch": 0.67, "learning_rate": 1.3285598486895434e-06, "loss": 2.0203, "step": 4970 }, { "epoch": 0.67, "learning_rate": 1.3278843555795731e-06, "loss": 2.0214, "step": 4975 }, { "epoch": 0.67, "learning_rate": 1.3272088624696028e-06, "loss": 2.1185, "step": 4980 }, { "epoch": 0.67, "learning_rate": 1.3265333693596326e-06, "loss": 2.0785, "step": 4985 }, { "epoch": 0.67, "learning_rate": 1.325857876249662e-06, "loss": 1.9431, "step": 4990 }, { "epoch": 0.67, "learning_rate": 1.325182383139692e-06, "loss": 2.0834, "step": 4995 }, { "epoch": 0.68, "learning_rate": 1.3245068900297215e-06, "loss": 1.8992, "step": 5000 }, { "epoch": 0.68, "learning_rate": 1.3238313969197515e-06, "loss": 2.0508, "step": 5005 }, { "epoch": 0.68, "learning_rate": 1.323155903809781e-06, "loss": 2.0358, "step": 5010 }, { "epoch": 0.68, "learning_rate": 1.322480410699811e-06, "loss": 2.0482, "step": 5015 }, { "epoch": 0.68, "learning_rate": 1.3218049175898404e-06, "loss": 1.9552, "step": 5020 }, { "epoch": 0.68, "learning_rate": 1.3211294244798704e-06, "loss": 1.9477, "step": 5025 }, { "epoch": 0.68, "learning_rate": 1.3204539313698999e-06, "loss": 2.0353, "step": 5030 }, { "epoch": 0.68, "learning_rate": 1.3197784382599298e-06, "loss": 1.9951, "step": 5035 }, { "epoch": 0.68, "learning_rate": 1.3191029451499593e-06, "loss": 2.0656, "step": 5040 }, { "epoch": 0.68, "learning_rate": 1.3184274520399893e-06, "loss": 2.0767, "step": 5045 }, { "epoch": 0.68, "learning_rate": 1.3177519589300188e-06, "loss": 2.0943, "step": 5050 }, { "epoch": 0.68, "learning_rate": 1.3170764658200485e-06, "loss": 2.0578, "step": 5055 }, { "epoch": 0.68, "learning_rate": 1.3164009727100782e-06, "loss": 2.0385, "step": 5060 }, { "epoch": 0.68, "learning_rate": 1.315725479600108e-06, "loss": 1.9904, "step": 5065 }, { "epoch": 0.68, "learning_rate": 1.3150499864901377e-06, "loss": 1.9545, "step": 5070 }, { "epoch": 0.69, "learning_rate": 1.3143744933801674e-06, "loss": 2.0289, "step": 5075 }, { "epoch": 0.69, "learning_rate": 1.3136990002701971e-06, "loss": 1.9972, "step": 5080 }, { "epoch": 0.69, "learning_rate": 1.3130235071602269e-06, "loss": 1.9992, "step": 5085 }, { "epoch": 0.69, "learning_rate": 1.3123480140502566e-06, "loss": 2.0014, "step": 5090 }, { "epoch": 0.69, "learning_rate": 1.3116725209402863e-06, "loss": 2.0328, "step": 5095 }, { "epoch": 0.69, "learning_rate": 1.3109970278303163e-06, "loss": 2.0877, "step": 5100 }, { "epoch": 0.69, "learning_rate": 1.3103215347203458e-06, "loss": 2.0689, "step": 5105 }, { "epoch": 0.69, "learning_rate": 1.3096460416103757e-06, "loss": 1.9704, "step": 5110 }, { "epoch": 0.69, "learning_rate": 1.3089705485004052e-06, "loss": 1.9833, "step": 5115 }, { "epoch": 0.69, "learning_rate": 1.308295055390435e-06, "loss": 2.0617, "step": 5120 }, { "epoch": 0.69, "learning_rate": 1.3076195622804647e-06, "loss": 1.995, "step": 5125 }, { "epoch": 0.69, "learning_rate": 1.3069440691704944e-06, "loss": 1.9955, "step": 5130 }, { "epoch": 0.69, "learning_rate": 1.3062685760605241e-06, "loss": 2.0641, "step": 5135 }, { "epoch": 0.69, "learning_rate": 1.3055930829505539e-06, "loss": 2.0867, "step": 5140 }, { "epoch": 0.7, "learning_rate": 1.3049175898405836e-06, "loss": 2.017, "step": 5145 }, { "epoch": 0.7, "learning_rate": 1.3042420967306133e-06, "loss": 2.114, "step": 5150 }, { "epoch": 0.7, "learning_rate": 1.303566603620643e-06, "loss": 2.0112, "step": 5155 }, { "epoch": 0.7, "learning_rate": 1.3028911105106728e-06, "loss": 2.037, "step": 5160 }, { "epoch": 0.7, "learning_rate": 1.3022156174007025e-06, "loss": 2.0592, "step": 5165 }, { "epoch": 0.7, "learning_rate": 1.3015401242907322e-06, "loss": 2.0331, "step": 5170 }, { "epoch": 0.7, "learning_rate": 1.3008646311807617e-06, "loss": 2.0045, "step": 5175 }, { "epoch": 0.7, "learning_rate": 1.3001891380707917e-06, "loss": 2.0046, "step": 5180 }, { "epoch": 0.7, "learning_rate": 1.2995136449608212e-06, "loss": 2.0658, "step": 5185 }, { "epoch": 0.7, "learning_rate": 1.2988381518508511e-06, "loss": 2.038, "step": 5190 }, { "epoch": 0.7, "learning_rate": 1.2981626587408806e-06, "loss": 1.9584, "step": 5195 }, { "epoch": 0.7, "learning_rate": 1.2974871656309106e-06, "loss": 2.1201, "step": 5200 }, { "epoch": 0.7, "eval_loss": 2.0141761302948, "eval_runtime": 165.1921, "eval_samples_per_second": 3.608, "eval_steps_per_second": 0.454, "step": 5200 }, { "epoch": 0.7, "learning_rate": 1.29681167252094e-06, "loss": 1.9123, "step": 5205 }, { "epoch": 0.7, "learning_rate": 1.29613617941097e-06, "loss": 1.9701, "step": 5210 }, { "epoch": 0.7, "learning_rate": 1.2954606863009995e-06, "loss": 2.0273, "step": 5215 }, { "epoch": 0.71, "learning_rate": 1.2947851931910295e-06, "loss": 2.0312, "step": 5220 }, { "epoch": 0.71, "learning_rate": 1.294109700081059e-06, "loss": 2.0207, "step": 5225 }, { "epoch": 0.71, "learning_rate": 1.293434206971089e-06, "loss": 2.0797, "step": 5230 }, { "epoch": 0.71, "learning_rate": 1.2927587138611185e-06, "loss": 1.9935, "step": 5235 }, { "epoch": 0.71, "learning_rate": 1.2920832207511484e-06, "loss": 2.0066, "step": 5240 }, { "epoch": 0.71, "learning_rate": 1.291407727641178e-06, "loss": 2.0568, "step": 5245 }, { "epoch": 0.71, "learning_rate": 1.2907322345312076e-06, "loss": 2.1398, "step": 5250 }, { "epoch": 0.71, "learning_rate": 1.2900567414212376e-06, "loss": 1.9884, "step": 5255 }, { "epoch": 0.71, "learning_rate": 1.289381248311267e-06, "loss": 2.0353, "step": 5260 }, { "epoch": 0.71, "learning_rate": 1.288705755201297e-06, "loss": 1.9529, "step": 5265 }, { "epoch": 0.71, "learning_rate": 1.2880302620913265e-06, "loss": 2.0423, "step": 5270 }, { "epoch": 0.71, "learning_rate": 1.2873547689813565e-06, "loss": 2.0891, "step": 5275 }, { "epoch": 0.71, "learning_rate": 1.286679275871386e-06, "loss": 2.0047, "step": 5280 }, { "epoch": 0.71, "learning_rate": 1.286003782761416e-06, "loss": 2.0324, "step": 5285 }, { "epoch": 0.71, "learning_rate": 1.2853282896514454e-06, "loss": 2.0379, "step": 5290 }, { "epoch": 0.72, "learning_rate": 1.2846527965414754e-06, "loss": 2.0345, "step": 5295 }, { "epoch": 0.72, "learning_rate": 1.2839773034315049e-06, "loss": 1.9509, "step": 5300 }, { "epoch": 0.72, "learning_rate": 1.2833018103215348e-06, "loss": 2.076, "step": 5305 }, { "epoch": 0.72, "learning_rate": 1.2826263172115643e-06, "loss": 2.0107, "step": 5310 }, { "epoch": 0.72, "learning_rate": 1.281950824101594e-06, "loss": 1.9542, "step": 5315 }, { "epoch": 0.72, "learning_rate": 1.2812753309916238e-06, "loss": 1.9767, "step": 5320 }, { "epoch": 0.72, "learning_rate": 1.2805998378816535e-06, "loss": 1.9998, "step": 5325 }, { "epoch": 0.72, "learning_rate": 1.2799243447716832e-06, "loss": 1.9633, "step": 5330 }, { "epoch": 0.72, "learning_rate": 1.279248851661713e-06, "loss": 2.006, "step": 5335 }, { "epoch": 0.72, "learning_rate": 1.2785733585517427e-06, "loss": 2.0871, "step": 5340 }, { "epoch": 0.72, "learning_rate": 1.2778978654417724e-06, "loss": 2.0124, "step": 5345 }, { "epoch": 0.72, "learning_rate": 1.2772223723318022e-06, "loss": 1.9771, "step": 5350 }, { "epoch": 0.72, "learning_rate": 1.2765468792218319e-06, "loss": 2.0389, "step": 5355 }, { "epoch": 0.72, "learning_rate": 1.2758713861118616e-06, "loss": 2.1118, "step": 5360 }, { "epoch": 0.72, "learning_rate": 1.2751958930018913e-06, "loss": 2.0358, "step": 5365 }, { "epoch": 0.73, "learning_rate": 1.2745203998919208e-06, "loss": 2.0398, "step": 5370 }, { "epoch": 0.73, "learning_rate": 1.2738449067819508e-06, "loss": 1.9582, "step": 5375 }, { "epoch": 0.73, "learning_rate": 1.2731694136719803e-06, "loss": 2.0692, "step": 5380 }, { "epoch": 0.73, "learning_rate": 1.2724939205620102e-06, "loss": 2.0527, "step": 5385 }, { "epoch": 0.73, "learning_rate": 1.2718184274520398e-06, "loss": 2.077, "step": 5390 }, { "epoch": 0.73, "learning_rate": 1.2711429343420697e-06, "loss": 2.0804, "step": 5395 }, { "epoch": 0.73, "learning_rate": 1.2704674412320994e-06, "loss": 1.9849, "step": 5400 }, { "epoch": 0.73, "learning_rate": 1.2697919481221291e-06, "loss": 2.0286, "step": 5405 }, { "epoch": 0.73, "learning_rate": 1.2691164550121589e-06, "loss": 2.0492, "step": 5410 }, { "epoch": 0.73, "learning_rate": 1.2684409619021886e-06, "loss": 1.9443, "step": 5415 }, { "epoch": 0.73, "learning_rate": 1.2677654687922183e-06, "loss": 1.9515, "step": 5420 }, { "epoch": 0.73, "learning_rate": 1.267089975682248e-06, "loss": 2.044, "step": 5425 }, { "epoch": 0.73, "learning_rate": 1.2664144825722778e-06, "loss": 2.0567, "step": 5430 }, { "epoch": 0.73, "learning_rate": 1.2657389894623073e-06, "loss": 1.9627, "step": 5435 }, { "epoch": 0.73, "learning_rate": 1.2650634963523372e-06, "loss": 2.0867, "step": 5440 }, { "epoch": 0.74, "learning_rate": 1.2643880032423667e-06, "loss": 1.9958, "step": 5445 }, { "epoch": 0.74, "learning_rate": 1.2637125101323967e-06, "loss": 1.9795, "step": 5450 }, { "epoch": 0.74, "learning_rate": 1.2630370170224262e-06, "loss": 2.1329, "step": 5455 }, { "epoch": 0.74, "learning_rate": 1.2623615239124561e-06, "loss": 1.9491, "step": 5460 }, { "epoch": 0.74, "learning_rate": 1.2616860308024856e-06, "loss": 1.9966, "step": 5465 }, { "epoch": 0.74, "learning_rate": 1.2610105376925156e-06, "loss": 2.0714, "step": 5470 }, { "epoch": 0.74, "learning_rate": 1.260335044582545e-06, "loss": 2.047, "step": 5475 }, { "epoch": 0.74, "learning_rate": 1.259659551472575e-06, "loss": 1.9376, "step": 5480 }, { "epoch": 0.74, "learning_rate": 1.2589840583626046e-06, "loss": 2.012, "step": 5485 }, { "epoch": 0.74, "learning_rate": 1.2583085652526345e-06, "loss": 2.0617, "step": 5490 }, { "epoch": 0.74, "learning_rate": 1.257633072142664e-06, "loss": 1.9983, "step": 5495 }, { "epoch": 0.74, "learning_rate": 1.256957579032694e-06, "loss": 2.0385, "step": 5500 }, { "epoch": 0.74, "learning_rate": 1.2562820859227235e-06, "loss": 1.9452, "step": 5505 }, { "epoch": 0.74, "learning_rate": 1.2556065928127532e-06, "loss": 2.0028, "step": 5510 }, { "epoch": 0.75, "learning_rate": 1.254931099702783e-06, "loss": 2.0326, "step": 5515 }, { "epoch": 0.75, "learning_rate": 1.2542556065928126e-06, "loss": 1.9992, "step": 5520 }, { "epoch": 0.75, "learning_rate": 1.2535801134828424e-06, "loss": 1.9506, "step": 5525 }, { "epoch": 0.75, "learning_rate": 1.252904620372872e-06, "loss": 2.039, "step": 5530 }, { "epoch": 0.75, "learning_rate": 1.2522291272629018e-06, "loss": 2.072, "step": 5535 }, { "epoch": 0.75, "learning_rate": 1.2515536341529315e-06, "loss": 2.0191, "step": 5540 }, { "epoch": 0.75, "learning_rate": 1.2508781410429615e-06, "loss": 2.0545, "step": 5545 }, { "epoch": 0.75, "learning_rate": 1.250202647932991e-06, "loss": 1.9955, "step": 5550 }, { "epoch": 0.75, "learning_rate": 1.249527154823021e-06, "loss": 1.9908, "step": 5555 }, { "epoch": 0.75, "learning_rate": 1.2488516617130504e-06, "loss": 1.9835, "step": 5560 }, { "epoch": 0.75, "learning_rate": 1.2481761686030804e-06, "loss": 2.054, "step": 5565 }, { "epoch": 0.75, "learning_rate": 1.24750067549311e-06, "loss": 2.0544, "step": 5570 }, { "epoch": 0.75, "learning_rate": 1.2468251823831396e-06, "loss": 2.0558, "step": 5575 }, { "epoch": 0.75, "learning_rate": 1.2461496892731694e-06, "loss": 1.9398, "step": 5580 }, { "epoch": 0.75, "learning_rate": 1.245474196163199e-06, "loss": 2.1002, "step": 5585 }, { "epoch": 0.76, "learning_rate": 1.2447987030532288e-06, "loss": 2.1542, "step": 5590 }, { "epoch": 0.76, "learning_rate": 1.2441232099432585e-06, "loss": 2.0554, "step": 5595 }, { "epoch": 0.76, "learning_rate": 1.2434477168332883e-06, "loss": 2.0143, "step": 5600 }, { "epoch": 0.76, "eval_loss": 2.007206916809082, "eval_runtime": 165.3065, "eval_samples_per_second": 3.605, "eval_steps_per_second": 0.454, "step": 5600 }, { "epoch": 0.76, "learning_rate": 1.242772223723318e-06, "loss": 2.093, "step": 5605 }, { "epoch": 0.76, "learning_rate": 1.2420967306133477e-06, "loss": 1.9739, "step": 5610 }, { "epoch": 0.76, "learning_rate": 1.2414212375033774e-06, "loss": 2.1094, "step": 5615 }, { "epoch": 0.76, "learning_rate": 1.2407457443934072e-06, "loss": 2.1694, "step": 5620 }, { "epoch": 0.76, "learning_rate": 1.2400702512834369e-06, "loss": 2.0056, "step": 5625 }, { "epoch": 0.76, "learning_rate": 1.2393947581734664e-06, "loss": 1.9875, "step": 5630 }, { "epoch": 0.76, "learning_rate": 1.2387192650634963e-06, "loss": 2.0466, "step": 5635 }, { "epoch": 0.76, "learning_rate": 1.2380437719535259e-06, "loss": 1.9781, "step": 5640 }, { "epoch": 0.76, "learning_rate": 1.2373682788435558e-06, "loss": 2.044, "step": 5645 }, { "epoch": 0.76, "learning_rate": 1.2366927857335853e-06, "loss": 1.9391, "step": 5650 }, { "epoch": 0.76, "learning_rate": 1.2360172926236152e-06, "loss": 1.9503, "step": 5655 }, { "epoch": 0.76, "learning_rate": 1.2353417995136448e-06, "loss": 2.1022, "step": 5660 }, { "epoch": 0.77, "learning_rate": 1.2346663064036747e-06, "loss": 2.1214, "step": 5665 }, { "epoch": 0.77, "learning_rate": 1.2339908132937042e-06, "loss": 2.0368, "step": 5670 }, { "epoch": 0.77, "learning_rate": 1.2333153201837342e-06, "loss": 2.0938, "step": 5675 }, { "epoch": 0.77, "learning_rate": 1.2326398270737637e-06, "loss": 1.9393, "step": 5680 }, { "epoch": 0.77, "learning_rate": 1.2319643339637936e-06, "loss": 1.9626, "step": 5685 }, { "epoch": 0.77, "learning_rate": 1.2312888408538233e-06, "loss": 2.0078, "step": 5690 }, { "epoch": 0.77, "learning_rate": 1.2306133477438528e-06, "loss": 1.9386, "step": 5695 }, { "epoch": 0.77, "learning_rate": 1.2299378546338828e-06, "loss": 2.0948, "step": 5700 }, { "epoch": 0.77, "learning_rate": 1.2292623615239123e-06, "loss": 2.013, "step": 5705 }, { "epoch": 0.77, "learning_rate": 1.2285868684139422e-06, "loss": 2.0048, "step": 5710 }, { "epoch": 0.77, "learning_rate": 1.2279113753039717e-06, "loss": 2.0154, "step": 5715 }, { "epoch": 0.77, "learning_rate": 1.2272358821940017e-06, "loss": 2.0012, "step": 5720 }, { "epoch": 0.77, "learning_rate": 1.2265603890840312e-06, "loss": 1.9525, "step": 5725 }, { "epoch": 0.77, "learning_rate": 1.2258848959740611e-06, "loss": 2.0631, "step": 5730 }, { "epoch": 0.77, "learning_rate": 1.2252094028640907e-06, "loss": 2.0146, "step": 5735 }, { "epoch": 0.78, "learning_rate": 1.2245339097541206e-06, "loss": 1.9761, "step": 5740 }, { "epoch": 0.78, "learning_rate": 1.2238584166441501e-06, "loss": 2.0205, "step": 5745 }, { "epoch": 0.78, "learning_rate": 1.22318292353418e-06, "loss": 2.0304, "step": 5750 }, { "epoch": 0.78, "learning_rate": 1.2225074304242096e-06, "loss": 2.0341, "step": 5755 }, { "epoch": 0.78, "learning_rate": 1.2218319373142395e-06, "loss": 2.0402, "step": 5760 }, { "epoch": 0.78, "learning_rate": 1.221156444204269e-06, "loss": 2.0297, "step": 5765 }, { "epoch": 0.78, "learning_rate": 1.2204809510942987e-06, "loss": 2.0116, "step": 5770 }, { "epoch": 0.78, "learning_rate": 1.2198054579843285e-06, "loss": 2.0157, "step": 5775 }, { "epoch": 0.78, "learning_rate": 1.2191299648743582e-06, "loss": 2.0291, "step": 5780 }, { "epoch": 0.78, "learning_rate": 1.218454471764388e-06, "loss": 2.0338, "step": 5785 }, { "epoch": 0.78, "learning_rate": 1.2177789786544176e-06, "loss": 2.0364, "step": 5790 }, { "epoch": 0.78, "learning_rate": 1.2171034855444474e-06, "loss": 1.9964, "step": 5795 }, { "epoch": 0.78, "learning_rate": 1.216427992434477e-06, "loss": 2.135, "step": 5800 }, { "epoch": 0.78, "learning_rate": 1.2157524993245068e-06, "loss": 1.8928, "step": 5805 }, { "epoch": 0.78, "learning_rate": 1.2150770062145365e-06, "loss": 1.9855, "step": 5810 }, { "epoch": 0.79, "learning_rate": 1.2144015131045663e-06, "loss": 1.9905, "step": 5815 }, { "epoch": 0.79, "learning_rate": 1.213726019994596e-06, "loss": 1.9884, "step": 5820 }, { "epoch": 0.79, "learning_rate": 1.2130505268846255e-06, "loss": 2.0023, "step": 5825 }, { "epoch": 0.79, "learning_rate": 1.2123750337746555e-06, "loss": 2.0608, "step": 5830 }, { "epoch": 0.79, "learning_rate": 1.2116995406646852e-06, "loss": 2.0036, "step": 5835 }, { "epoch": 0.79, "learning_rate": 1.211024047554715e-06, "loss": 2.0105, "step": 5840 }, { "epoch": 0.79, "learning_rate": 1.2103485544447446e-06, "loss": 1.9843, "step": 5845 }, { "epoch": 0.79, "learning_rate": 1.2096730613347744e-06, "loss": 1.9958, "step": 5850 }, { "epoch": 0.79, "learning_rate": 1.208997568224804e-06, "loss": 2.0423, "step": 5855 }, { "epoch": 0.79, "learning_rate": 1.2083220751148338e-06, "loss": 1.9651, "step": 5860 }, { "epoch": 0.79, "learning_rate": 1.2076465820048635e-06, "loss": 2.0644, "step": 5865 }, { "epoch": 0.79, "learning_rate": 1.2069710888948933e-06, "loss": 1.9557, "step": 5870 }, { "epoch": 0.79, "learning_rate": 1.206295595784923e-06, "loss": 2.0243, "step": 5875 }, { "epoch": 0.79, "learning_rate": 1.2056201026749527e-06, "loss": 1.9895, "step": 5880 }, { "epoch": 0.8, "learning_rate": 1.2049446095649824e-06, "loss": 1.8585, "step": 5885 }, { "epoch": 0.8, "learning_rate": 1.204269116455012e-06, "loss": 2.0616, "step": 5890 }, { "epoch": 0.8, "learning_rate": 1.203593623345042e-06, "loss": 2.0316, "step": 5895 }, { "epoch": 0.8, "learning_rate": 1.2029181302350714e-06, "loss": 2.0547, "step": 5900 }, { "epoch": 0.8, "learning_rate": 1.2022426371251013e-06, "loss": 2.0177, "step": 5905 }, { "epoch": 0.8, "learning_rate": 1.2015671440151309e-06, "loss": 1.9315, "step": 5910 }, { "epoch": 0.8, "learning_rate": 1.2008916509051608e-06, "loss": 2.0497, "step": 5915 }, { "epoch": 0.8, "learning_rate": 1.2002161577951903e-06, "loss": 2.0856, "step": 5920 }, { "epoch": 0.8, "learning_rate": 1.1995406646852203e-06, "loss": 1.9031, "step": 5925 }, { "epoch": 0.8, "learning_rate": 1.1988651715752498e-06, "loss": 2.0394, "step": 5930 }, { "epoch": 0.8, "learning_rate": 1.1981896784652797e-06, "loss": 1.9831, "step": 5935 }, { "epoch": 0.8, "learning_rate": 1.1975141853553092e-06, "loss": 1.9886, "step": 5940 }, { "epoch": 0.8, "learning_rate": 1.1968386922453392e-06, "loss": 2.0037, "step": 5945 }, { "epoch": 0.8, "learning_rate": 1.1961631991353687e-06, "loss": 2.018, "step": 5950 }, { "epoch": 0.8, "learning_rate": 1.1954877060253984e-06, "loss": 2.1561, "step": 5955 }, { "epoch": 0.81, "learning_rate": 1.1948122129154281e-06, "loss": 2.0146, "step": 5960 }, { "epoch": 0.81, "learning_rate": 1.1941367198054579e-06, "loss": 2.0204, "step": 5965 }, { "epoch": 0.81, "learning_rate": 1.1934612266954876e-06, "loss": 1.9847, "step": 5970 }, { "epoch": 0.81, "learning_rate": 1.1927857335855173e-06, "loss": 1.9988, "step": 5975 }, { "epoch": 0.81, "learning_rate": 1.1921102404755472e-06, "loss": 2.0426, "step": 5980 }, { "epoch": 0.81, "learning_rate": 1.1914347473655768e-06, "loss": 1.9307, "step": 5985 }, { "epoch": 0.81, "learning_rate": 1.1907592542556067e-06, "loss": 1.9322, "step": 5990 }, { "epoch": 0.81, "learning_rate": 1.1900837611456362e-06, "loss": 2.0565, "step": 5995 }, { "epoch": 0.81, "learning_rate": 1.1894082680356661e-06, "loss": 1.9926, "step": 6000 }, { "epoch": 0.81, "eval_loss": 2.0011448860168457, "eval_runtime": 165.1917, "eval_samples_per_second": 3.608, "eval_steps_per_second": 0.454, "step": 6000 }, { "epoch": 0.81, "learning_rate": 1.1887327749256957e-06, "loss": 2.0437, "step": 6005 }, { "epoch": 0.81, "learning_rate": 1.1880572818157256e-06, "loss": 1.925, "step": 6010 }, { "epoch": 0.81, "learning_rate": 1.1873817887057551e-06, "loss": 2.0602, "step": 6015 }, { "epoch": 0.81, "learning_rate": 1.1867062955957848e-06, "loss": 2.025, "step": 6020 }, { "epoch": 0.81, "learning_rate": 1.1860308024858146e-06, "loss": 1.9937, "step": 6025 }, { "epoch": 0.81, "learning_rate": 1.1853553093758443e-06, "loss": 2.0074, "step": 6030 }, { "epoch": 0.82, "learning_rate": 1.184679816265874e-06, "loss": 2.0228, "step": 6035 }, { "epoch": 0.82, "learning_rate": 1.1840043231559037e-06, "loss": 2.0472, "step": 6040 }, { "epoch": 0.82, "learning_rate": 1.1833288300459335e-06, "loss": 2.0226, "step": 6045 }, { "epoch": 0.82, "learning_rate": 1.1826533369359632e-06, "loss": 1.9945, "step": 6050 }, { "epoch": 0.82, "learning_rate": 1.181977843825993e-06, "loss": 1.9856, "step": 6055 }, { "epoch": 0.82, "learning_rate": 1.1813023507160227e-06, "loss": 2.0037, "step": 6060 }, { "epoch": 0.82, "learning_rate": 1.1806268576060524e-06, "loss": 1.9873, "step": 6065 }, { "epoch": 0.82, "learning_rate": 1.179951364496082e-06, "loss": 2.0066, "step": 6070 }, { "epoch": 0.82, "learning_rate": 1.1792758713861118e-06, "loss": 2.0181, "step": 6075 }, { "epoch": 0.82, "learning_rate": 1.1786003782761416e-06, "loss": 1.9531, "step": 6080 }, { "epoch": 0.82, "learning_rate": 1.177924885166171e-06, "loss": 2.0259, "step": 6085 }, { "epoch": 0.82, "learning_rate": 1.177249392056201e-06, "loss": 1.9526, "step": 6090 }, { "epoch": 0.82, "learning_rate": 1.1765738989462305e-06, "loss": 1.9071, "step": 6095 }, { "epoch": 0.82, "learning_rate": 1.1758984058362605e-06, "loss": 2.0754, "step": 6100 }, { "epoch": 0.82, "learning_rate": 1.17522291272629e-06, "loss": 2.0043, "step": 6105 }, { "epoch": 0.83, "learning_rate": 1.17454741961632e-06, "loss": 1.986, "step": 6110 }, { "epoch": 0.83, "learning_rate": 1.1738719265063494e-06, "loss": 2.0672, "step": 6115 }, { "epoch": 0.83, "learning_rate": 1.1731964333963794e-06, "loss": 2.0446, "step": 6120 }, { "epoch": 0.83, "learning_rate": 1.172520940286409e-06, "loss": 2.0913, "step": 6125 }, { "epoch": 0.83, "learning_rate": 1.1718454471764388e-06, "loss": 2.0568, "step": 6130 }, { "epoch": 0.83, "learning_rate": 1.1711699540664685e-06, "loss": 1.9685, "step": 6135 }, { "epoch": 0.83, "learning_rate": 1.1704944609564983e-06, "loss": 2.033, "step": 6140 }, { "epoch": 0.83, "learning_rate": 1.169818967846528e-06, "loss": 1.9966, "step": 6145 }, { "epoch": 0.83, "learning_rate": 1.1691434747365575e-06, "loss": 1.9999, "step": 6150 }, { "epoch": 0.83, "learning_rate": 1.1684679816265874e-06, "loss": 2.0394, "step": 6155 }, { "epoch": 0.83, "learning_rate": 1.167792488516617e-06, "loss": 1.9596, "step": 6160 }, { "epoch": 0.83, "learning_rate": 1.167116995406647e-06, "loss": 2.0409, "step": 6165 }, { "epoch": 0.83, "learning_rate": 1.1664415022966764e-06, "loss": 1.9961, "step": 6170 }, { "epoch": 0.83, "learning_rate": 1.1657660091867064e-06, "loss": 2.025, "step": 6175 }, { "epoch": 0.83, "learning_rate": 1.1650905160767359e-06, "loss": 2.0427, "step": 6180 }, { "epoch": 0.84, "learning_rate": 1.1644150229667658e-06, "loss": 1.9939, "step": 6185 }, { "epoch": 0.84, "learning_rate": 1.1637395298567953e-06, "loss": 2.061, "step": 6190 }, { "epoch": 0.84, "learning_rate": 1.1630640367468253e-06, "loss": 2.0645, "step": 6195 }, { "epoch": 0.84, "learning_rate": 1.1623885436368548e-06, "loss": 2.0212, "step": 6200 }, { "epoch": 0.84, "learning_rate": 1.1617130505268847e-06, "loss": 2.0376, "step": 6205 }, { "epoch": 0.84, "learning_rate": 1.1610375574169142e-06, "loss": 1.9781, "step": 6210 }, { "epoch": 0.84, "learning_rate": 1.160362064306944e-06, "loss": 2.0583, "step": 6215 }, { "epoch": 0.84, "learning_rate": 1.1596865711969737e-06, "loss": 2.0362, "step": 6220 }, { "epoch": 0.84, "learning_rate": 1.1590110780870034e-06, "loss": 2.0105, "step": 6225 }, { "epoch": 0.84, "learning_rate": 1.1583355849770331e-06, "loss": 2.0859, "step": 6230 }, { "epoch": 0.84, "learning_rate": 1.1576600918670629e-06, "loss": 1.9903, "step": 6235 }, { "epoch": 0.84, "learning_rate": 1.1569845987570926e-06, "loss": 1.9948, "step": 6240 }, { "epoch": 0.84, "learning_rate": 1.1563091056471223e-06, "loss": 1.9782, "step": 6245 }, { "epoch": 0.84, "learning_rate": 1.155633612537152e-06, "loss": 1.9493, "step": 6250 }, { "epoch": 0.85, "learning_rate": 1.1549581194271818e-06, "loss": 2.0101, "step": 6255 }, { "epoch": 0.85, "learning_rate": 1.1542826263172115e-06, "loss": 2.0458, "step": 6260 }, { "epoch": 0.85, "learning_rate": 1.1536071332072412e-06, "loss": 1.8974, "step": 6265 }, { "epoch": 0.85, "learning_rate": 1.1529316400972712e-06, "loss": 1.9465, "step": 6270 }, { "epoch": 0.85, "learning_rate": 1.1522561469873007e-06, "loss": 1.9905, "step": 6275 }, { "epoch": 0.85, "learning_rate": 1.1515806538773304e-06, "loss": 2.0912, "step": 6280 }, { "epoch": 0.85, "learning_rate": 1.1509051607673601e-06, "loss": 1.9464, "step": 6285 }, { "epoch": 0.85, "learning_rate": 1.1502296676573898e-06, "loss": 1.9497, "step": 6290 }, { "epoch": 0.85, "learning_rate": 1.1495541745474196e-06, "loss": 2.0405, "step": 6295 }, { "epoch": 0.85, "learning_rate": 1.1488786814374493e-06, "loss": 1.9306, "step": 6300 }, { "epoch": 0.85, "learning_rate": 1.148203188327479e-06, "loss": 2.1404, "step": 6305 }, { "epoch": 0.85, "learning_rate": 1.1475276952175088e-06, "loss": 1.906, "step": 6310 }, { "epoch": 0.85, "learning_rate": 1.1468522021075385e-06, "loss": 2.0527, "step": 6315 }, { "epoch": 0.85, "learning_rate": 1.1461767089975682e-06, "loss": 2.057, "step": 6320 }, { "epoch": 0.85, "learning_rate": 1.145501215887598e-06, "loss": 1.948, "step": 6325 }, { "epoch": 0.86, "learning_rate": 1.1448257227776277e-06, "loss": 2.0469, "step": 6330 }, { "epoch": 0.86, "learning_rate": 1.1441502296676574e-06, "loss": 2.0718, "step": 6335 }, { "epoch": 0.86, "learning_rate": 1.1434747365576871e-06, "loss": 2.0162, "step": 6340 }, { "epoch": 0.86, "learning_rate": 1.1427992434477166e-06, "loss": 2.0367, "step": 6345 }, { "epoch": 0.86, "learning_rate": 1.1421237503377466e-06, "loss": 1.9805, "step": 6350 }, { "epoch": 0.86, "learning_rate": 1.141448257227776e-06, "loss": 1.975, "step": 6355 }, { "epoch": 0.86, "learning_rate": 1.140772764117806e-06, "loss": 2.1027, "step": 6360 }, { "epoch": 0.86, "learning_rate": 1.1400972710078355e-06, "loss": 2.0534, "step": 6365 }, { "epoch": 0.86, "learning_rate": 1.1394217778978655e-06, "loss": 2.0852, "step": 6370 }, { "epoch": 0.86, "learning_rate": 1.138746284787895e-06, "loss": 2.0087, "step": 6375 }, { "epoch": 0.86, "learning_rate": 1.138070791677925e-06, "loss": 1.9716, "step": 6380 }, { "epoch": 0.86, "learning_rate": 1.1373952985679544e-06, "loss": 1.9873, "step": 6385 }, { "epoch": 0.86, "learning_rate": 1.1367198054579844e-06, "loss": 2.0416, "step": 6390 }, { "epoch": 0.86, "learning_rate": 1.1360443123480139e-06, "loss": 2.0387, "step": 6395 }, { "epoch": 0.86, "learning_rate": 1.1353688192380438e-06, "loss": 1.9927, "step": 6400 }, { "epoch": 0.86, "eval_loss": 1.9952620267868042, "eval_runtime": 165.2045, "eval_samples_per_second": 3.608, "eval_steps_per_second": 0.454, "step": 6400 }, { "epoch": 0.87, "learning_rate": 1.1346933261280733e-06, "loss": 2.0118, "step": 6405 }, { "epoch": 0.87, "learning_rate": 1.134017833018103e-06, "loss": 1.9994, "step": 6410 }, { "epoch": 0.87, "learning_rate": 1.133342339908133e-06, "loss": 2.0647, "step": 6415 }, { "epoch": 0.87, "learning_rate": 1.1326668467981625e-06, "loss": 2.0085, "step": 6420 }, { "epoch": 0.87, "learning_rate": 1.1319913536881925e-06, "loss": 2.0304, "step": 6425 }, { "epoch": 0.87, "learning_rate": 1.131315860578222e-06, "loss": 2.0116, "step": 6430 }, { "epoch": 0.87, "learning_rate": 1.130640367468252e-06, "loss": 1.9938, "step": 6435 }, { "epoch": 0.87, "learning_rate": 1.1299648743582814e-06, "loss": 1.9892, "step": 6440 }, { "epoch": 0.87, "learning_rate": 1.1292893812483114e-06, "loss": 1.9668, "step": 6445 }, { "epoch": 0.87, "learning_rate": 1.1286138881383409e-06, "loss": 1.9952, "step": 6450 }, { "epoch": 0.87, "learning_rate": 1.1279383950283708e-06, "loss": 2.02, "step": 6455 }, { "epoch": 0.87, "learning_rate": 1.1272629019184003e-06, "loss": 1.9425, "step": 6460 }, { "epoch": 0.87, "learning_rate": 1.1265874088084303e-06, "loss": 2.1208, "step": 6465 }, { "epoch": 0.87, "learning_rate": 1.1259119156984598e-06, "loss": 1.9683, "step": 6470 }, { "epoch": 0.87, "learning_rate": 1.1252364225884895e-06, "loss": 1.9708, "step": 6475 }, { "epoch": 0.88, "learning_rate": 1.1245609294785192e-06, "loss": 2.0613, "step": 6480 }, { "epoch": 0.88, "learning_rate": 1.123885436368549e-06, "loss": 1.9685, "step": 6485 }, { "epoch": 0.88, "learning_rate": 1.1232099432585787e-06, "loss": 2.028, "step": 6490 }, { "epoch": 0.88, "learning_rate": 1.1225344501486084e-06, "loss": 2.0237, "step": 6495 }, { "epoch": 0.88, "learning_rate": 1.1218589570386381e-06, "loss": 2.007, "step": 6500 }, { "epoch": 0.88, "learning_rate": 1.1211834639286679e-06, "loss": 2.1089, "step": 6505 }, { "epoch": 0.88, "learning_rate": 1.1205079708186976e-06, "loss": 1.9852, "step": 6510 }, { "epoch": 0.88, "learning_rate": 1.1198324777087273e-06, "loss": 2.005, "step": 6515 }, { "epoch": 0.88, "learning_rate": 1.119156984598757e-06, "loss": 1.8925, "step": 6520 }, { "epoch": 0.88, "learning_rate": 1.1184814914887868e-06, "loss": 1.9801, "step": 6525 }, { "epoch": 0.88, "learning_rate": 1.1178059983788165e-06, "loss": 1.9388, "step": 6530 }, { "epoch": 0.88, "learning_rate": 1.1171305052688462e-06, "loss": 2.0295, "step": 6535 }, { "epoch": 0.88, "learning_rate": 1.1164550121588757e-06, "loss": 1.9851, "step": 6540 }, { "epoch": 0.88, "learning_rate": 1.1157795190489057e-06, "loss": 1.9958, "step": 6545 }, { "epoch": 0.88, "learning_rate": 1.1151040259389352e-06, "loss": 2.0137, "step": 6550 }, { "epoch": 0.89, "learning_rate": 1.1144285328289651e-06, "loss": 1.9999, "step": 6555 }, { "epoch": 0.89, "learning_rate": 1.1137530397189949e-06, "loss": 2.0079, "step": 6560 }, { "epoch": 0.89, "learning_rate": 1.1130775466090246e-06, "loss": 2.0772, "step": 6565 }, { "epoch": 0.89, "learning_rate": 1.1124020534990543e-06, "loss": 1.9449, "step": 6570 }, { "epoch": 0.89, "learning_rate": 1.111726560389084e-06, "loss": 2.0685, "step": 6575 }, { "epoch": 0.89, "learning_rate": 1.1110510672791138e-06, "loss": 2.0436, "step": 6580 }, { "epoch": 0.89, "learning_rate": 1.1103755741691435e-06, "loss": 1.9999, "step": 6585 }, { "epoch": 0.89, "learning_rate": 1.1097000810591732e-06, "loss": 1.9621, "step": 6590 }, { "epoch": 0.89, "learning_rate": 1.109024587949203e-06, "loss": 2.018, "step": 6595 }, { "epoch": 0.89, "learning_rate": 1.1083490948392327e-06, "loss": 1.9571, "step": 6600 }, { "epoch": 0.89, "learning_rate": 1.1076736017292622e-06, "loss": 1.9302, "step": 6605 }, { "epoch": 0.89, "learning_rate": 1.1069981086192921e-06, "loss": 1.9668, "step": 6610 }, { "epoch": 0.89, "learning_rate": 1.1063226155093216e-06, "loss": 2.0856, "step": 6615 }, { "epoch": 0.89, "learning_rate": 1.1056471223993516e-06, "loss": 1.9749, "step": 6620 }, { "epoch": 0.89, "learning_rate": 1.104971629289381e-06, "loss": 1.9511, "step": 6625 }, { "epoch": 0.9, "learning_rate": 1.104296136179411e-06, "loss": 2.0511, "step": 6630 }, { "epoch": 0.9, "learning_rate": 1.1036206430694405e-06, "loss": 2.0733, "step": 6635 }, { "epoch": 0.9, "learning_rate": 1.1029451499594705e-06, "loss": 1.9882, "step": 6640 }, { "epoch": 0.9, "learning_rate": 1.1022696568495e-06, "loss": 2.0366, "step": 6645 }, { "epoch": 0.9, "learning_rate": 1.10159416373953e-06, "loss": 1.9377, "step": 6650 }, { "epoch": 0.9, "learning_rate": 1.1009186706295594e-06, "loss": 1.9914, "step": 6655 }, { "epoch": 0.9, "learning_rate": 1.1002431775195894e-06, "loss": 2.035, "step": 6660 }, { "epoch": 0.9, "learning_rate": 1.099567684409619e-06, "loss": 2.0213, "step": 6665 }, { "epoch": 0.9, "learning_rate": 1.0988921912996486e-06, "loss": 2.0916, "step": 6670 }, { "epoch": 0.9, "learning_rate": 1.0982166981896783e-06, "loss": 1.9638, "step": 6675 }, { "epoch": 0.9, "learning_rate": 1.097541205079708e-06, "loss": 2.0284, "step": 6680 }, { "epoch": 0.9, "learning_rate": 1.0968657119697378e-06, "loss": 1.9684, "step": 6685 }, { "epoch": 0.9, "learning_rate": 1.0961902188597675e-06, "loss": 2.0286, "step": 6690 }, { "epoch": 0.9, "learning_rate": 1.0955147257497973e-06, "loss": 1.94, "step": 6695 }, { "epoch": 0.91, "learning_rate": 1.094839232639827e-06, "loss": 2.0547, "step": 6700 }, { "epoch": 0.91, "learning_rate": 1.094163739529857e-06, "loss": 1.9955, "step": 6705 }, { "epoch": 0.91, "learning_rate": 1.0934882464198864e-06, "loss": 2.0647, "step": 6710 }, { "epoch": 0.91, "learning_rate": 1.0928127533099164e-06, "loss": 1.955, "step": 6715 }, { "epoch": 0.91, "learning_rate": 1.0921372601999459e-06, "loss": 2.0865, "step": 6720 }, { "epoch": 0.91, "learning_rate": 1.0914617670899758e-06, "loss": 1.9343, "step": 6725 }, { "epoch": 0.91, "learning_rate": 1.0907862739800053e-06, "loss": 1.9694, "step": 6730 }, { "epoch": 0.91, "learning_rate": 1.090110780870035e-06, "loss": 2.071, "step": 6735 }, { "epoch": 0.91, "learning_rate": 1.0894352877600648e-06, "loss": 2.0049, "step": 6740 }, { "epoch": 0.91, "learning_rate": 1.0887597946500945e-06, "loss": 1.9945, "step": 6745 }, { "epoch": 0.91, "learning_rate": 1.0880843015401242e-06, "loss": 2.0204, "step": 6750 }, { "epoch": 0.91, "learning_rate": 1.087408808430154e-06, "loss": 1.9284, "step": 6755 }, { "epoch": 0.91, "learning_rate": 1.0867333153201837e-06, "loss": 1.9173, "step": 6760 }, { "epoch": 0.91, "learning_rate": 1.0860578222102134e-06, "loss": 1.9321, "step": 6765 }, { "epoch": 0.91, "learning_rate": 1.0853823291002431e-06, "loss": 2.0363, "step": 6770 }, { "epoch": 0.92, "learning_rate": 1.0847068359902729e-06, "loss": 2.053, "step": 6775 }, { "epoch": 0.92, "learning_rate": 1.0840313428803026e-06, "loss": 1.9384, "step": 6780 }, { "epoch": 0.92, "learning_rate": 1.0833558497703323e-06, "loss": 1.9329, "step": 6785 }, { "epoch": 0.92, "learning_rate": 1.082680356660362e-06, "loss": 2.0216, "step": 6790 }, { "epoch": 0.92, "learning_rate": 1.0820048635503918e-06, "loss": 2.0901, "step": 6795 }, { "epoch": 0.92, "learning_rate": 1.0813293704404213e-06, "loss": 2.0983, "step": 6800 }, { "epoch": 0.92, "eval_loss": 1.989758014678955, "eval_runtime": 165.4469, "eval_samples_per_second": 3.602, "eval_steps_per_second": 0.453, "step": 6800 }, { "epoch": 0.92, "learning_rate": 1.0806538773304512e-06, "loss": 2.0018, "step": 6805 }, { "epoch": 0.92, "learning_rate": 1.0799783842204807e-06, "loss": 1.9857, "step": 6810 }, { "epoch": 0.92, "learning_rate": 1.0793028911105107e-06, "loss": 2.0529, "step": 6815 }, { "epoch": 0.92, "learning_rate": 1.0786273980005402e-06, "loss": 1.9866, "step": 6820 }, { "epoch": 0.92, "learning_rate": 1.0779519048905701e-06, "loss": 2.0546, "step": 6825 }, { "epoch": 0.92, "learning_rate": 1.0772764117805996e-06, "loss": 1.9727, "step": 6830 }, { "epoch": 0.92, "learning_rate": 1.0766009186706296e-06, "loss": 2.0949, "step": 6835 }, { "epoch": 0.92, "learning_rate": 1.075925425560659e-06, "loss": 2.049, "step": 6840 }, { "epoch": 0.92, "learning_rate": 1.075249932450689e-06, "loss": 2.0513, "step": 6845 }, { "epoch": 0.93, "learning_rate": 1.0745744393407188e-06, "loss": 1.9751, "step": 6850 }, { "epoch": 0.93, "learning_rate": 1.0738989462307485e-06, "loss": 2.0021, "step": 6855 }, { "epoch": 0.93, "learning_rate": 1.0732234531207782e-06, "loss": 2.0116, "step": 6860 }, { "epoch": 0.93, "learning_rate": 1.0725479600108077e-06, "loss": 1.9602, "step": 6865 }, { "epoch": 0.93, "learning_rate": 1.0718724669008377e-06, "loss": 1.9296, "step": 6870 }, { "epoch": 0.93, "learning_rate": 1.0711969737908672e-06, "loss": 1.9961, "step": 6875 }, { "epoch": 0.93, "learning_rate": 1.0705214806808971e-06, "loss": 1.9395, "step": 6880 }, { "epoch": 0.93, "learning_rate": 1.0698459875709266e-06, "loss": 1.9828, "step": 6885 }, { "epoch": 0.93, "learning_rate": 1.0691704944609566e-06, "loss": 1.9577, "step": 6890 }, { "epoch": 0.93, "learning_rate": 1.068495001350986e-06, "loss": 1.9432, "step": 6895 }, { "epoch": 0.93, "learning_rate": 1.067819508241016e-06, "loss": 1.953, "step": 6900 }, { "epoch": 0.93, "learning_rate": 1.0671440151310455e-06, "loss": 2.0855, "step": 6905 }, { "epoch": 0.93, "learning_rate": 1.0664685220210755e-06, "loss": 2.021, "step": 6910 }, { "epoch": 0.93, "learning_rate": 1.065793028911105e-06, "loss": 2.1253, "step": 6915 }, { "epoch": 0.93, "learning_rate": 1.065117535801135e-06, "loss": 1.9398, "step": 6920 }, { "epoch": 0.94, "learning_rate": 1.0644420426911644e-06, "loss": 1.8746, "step": 6925 }, { "epoch": 0.94, "learning_rate": 1.0637665495811942e-06, "loss": 1.9618, "step": 6930 }, { "epoch": 0.94, "learning_rate": 1.063091056471224e-06, "loss": 1.9594, "step": 6935 }, { "epoch": 0.94, "learning_rate": 1.0624155633612536e-06, "loss": 1.9582, "step": 6940 }, { "epoch": 0.94, "learning_rate": 1.0617400702512834e-06, "loss": 2.0177, "step": 6945 }, { "epoch": 0.94, "learning_rate": 1.061064577141313e-06, "loss": 2.0466, "step": 6950 }, { "epoch": 0.94, "learning_rate": 1.0603890840313428e-06, "loss": 1.9999, "step": 6955 }, { "epoch": 0.94, "learning_rate": 1.0597135909213725e-06, "loss": 1.9628, "step": 6960 }, { "epoch": 0.94, "learning_rate": 1.0590380978114023e-06, "loss": 2.0182, "step": 6965 }, { "epoch": 0.94, "learning_rate": 1.058362604701432e-06, "loss": 2.0239, "step": 6970 }, { "epoch": 0.94, "learning_rate": 1.0576871115914617e-06, "loss": 1.915, "step": 6975 }, { "epoch": 0.94, "learning_rate": 1.0570116184814914e-06, "loss": 2.0042, "step": 6980 }, { "epoch": 0.94, "learning_rate": 1.056336125371521e-06, "loss": 1.9469, "step": 6985 }, { "epoch": 0.94, "learning_rate": 1.0556606322615509e-06, "loss": 2.1206, "step": 6990 }, { "epoch": 0.94, "learning_rate": 1.0549851391515806e-06, "loss": 2.0603, "step": 6995 }, { "epoch": 0.95, "learning_rate": 1.0543096460416103e-06, "loss": 1.9043, "step": 7000 }, { "epoch": 0.95, "learning_rate": 1.05363415293164e-06, "loss": 1.9525, "step": 7005 }, { "epoch": 0.95, "learning_rate": 1.0529586598216698e-06, "loss": 2.0047, "step": 7010 }, { "epoch": 0.95, "learning_rate": 1.0522831667116995e-06, "loss": 2.0078, "step": 7015 }, { "epoch": 0.95, "learning_rate": 1.0516076736017292e-06, "loss": 2.0401, "step": 7020 }, { "epoch": 0.95, "learning_rate": 1.050932180491759e-06, "loss": 2.0235, "step": 7025 }, { "epoch": 0.95, "learning_rate": 1.0502566873817887e-06, "loss": 1.8742, "step": 7030 }, { "epoch": 0.95, "learning_rate": 1.0495811942718184e-06, "loss": 1.9213, "step": 7035 }, { "epoch": 0.95, "learning_rate": 1.0489057011618482e-06, "loss": 2.0103, "step": 7040 }, { "epoch": 0.95, "learning_rate": 1.0482302080518779e-06, "loss": 1.9676, "step": 7045 }, { "epoch": 0.95, "learning_rate": 1.0475547149419076e-06, "loss": 2.0613, "step": 7050 }, { "epoch": 0.95, "learning_rate": 1.0468792218319373e-06, "loss": 1.9115, "step": 7055 }, { "epoch": 0.95, "learning_rate": 1.0462037287219668e-06, "loss": 2.1336, "step": 7060 }, { "epoch": 0.95, "learning_rate": 1.0455282356119968e-06, "loss": 2.0362, "step": 7065 }, { "epoch": 0.96, "learning_rate": 1.0448527425020263e-06, "loss": 1.9231, "step": 7070 }, { "epoch": 0.96, "learning_rate": 1.0441772493920562e-06, "loss": 2.0279, "step": 7075 }, { "epoch": 0.96, "learning_rate": 1.0435017562820858e-06, "loss": 1.9745, "step": 7080 }, { "epoch": 0.96, "learning_rate": 1.0428262631721157e-06, "loss": 1.934, "step": 7085 }, { "epoch": 0.96, "learning_rate": 1.0421507700621452e-06, "loss": 1.9617, "step": 7090 }, { "epoch": 0.96, "learning_rate": 1.0414752769521751e-06, "loss": 1.9954, "step": 7095 }, { "epoch": 0.96, "learning_rate": 1.0407997838422047e-06, "loss": 1.9847, "step": 7100 }, { "epoch": 0.96, "learning_rate": 1.0401242907322346e-06, "loss": 2.0425, "step": 7105 }, { "epoch": 0.96, "learning_rate": 1.0394487976222641e-06, "loss": 1.9804, "step": 7110 }, { "epoch": 0.96, "learning_rate": 1.038773304512294e-06, "loss": 1.9689, "step": 7115 }, { "epoch": 0.96, "learning_rate": 1.0380978114023236e-06, "loss": 2.0032, "step": 7120 }, { "epoch": 0.96, "learning_rate": 1.0374223182923533e-06, "loss": 2.0059, "step": 7125 }, { "epoch": 0.96, "learning_rate": 1.036746825182383e-06, "loss": 2.0135, "step": 7130 }, { "epoch": 0.96, "learning_rate": 1.0360713320724127e-06, "loss": 1.94, "step": 7135 }, { "epoch": 0.96, "learning_rate": 1.0353958389624427e-06, "loss": 1.9191, "step": 7140 }, { "epoch": 0.97, "learning_rate": 1.0347203458524722e-06, "loss": 1.9087, "step": 7145 }, { "epoch": 0.97, "learning_rate": 1.0340448527425021e-06, "loss": 1.9729, "step": 7150 }, { "epoch": 0.97, "learning_rate": 1.0333693596325316e-06, "loss": 2.003, "step": 7155 }, { "epoch": 0.97, "learning_rate": 1.0326938665225616e-06, "loss": 2.0825, "step": 7160 }, { "epoch": 0.97, "learning_rate": 1.032018373412591e-06, "loss": 1.9802, "step": 7165 }, { "epoch": 0.97, "learning_rate": 1.031342880302621e-06, "loss": 1.9544, "step": 7170 }, { "epoch": 0.97, "learning_rate": 1.0306673871926506e-06, "loss": 1.9041, "step": 7175 }, { "epoch": 0.97, "learning_rate": 1.0299918940826805e-06, "loss": 1.8864, "step": 7180 }, { "epoch": 0.97, "learning_rate": 1.02931640097271e-06, "loss": 2.0309, "step": 7185 }, { "epoch": 0.97, "learning_rate": 1.0286409078627397e-06, "loss": 1.9711, "step": 7190 }, { "epoch": 0.97, "learning_rate": 1.0279654147527695e-06, "loss": 1.9598, "step": 7195 }, { "epoch": 0.97, "learning_rate": 1.0272899216427992e-06, "loss": 1.9517, "step": 7200 }, { "epoch": 0.97, "eval_loss": 1.985114336013794, "eval_runtime": 165.362, "eval_samples_per_second": 3.604, "eval_steps_per_second": 0.454, "step": 7200 }, { "epoch": 0.97, "learning_rate": 1.026614428532829e-06, "loss": 1.8818, "step": 7205 }, { "epoch": 0.97, "learning_rate": 1.0259389354228586e-06, "loss": 1.9379, "step": 7210 }, { "epoch": 0.97, "learning_rate": 1.0252634423128884e-06, "loss": 1.9673, "step": 7215 }, { "epoch": 0.98, "learning_rate": 1.024587949202918e-06, "loss": 2.0222, "step": 7220 }, { "epoch": 0.98, "learning_rate": 1.0239124560929478e-06, "loss": 1.9747, "step": 7225 }, { "epoch": 0.98, "learning_rate": 1.0232369629829775e-06, "loss": 2.088, "step": 7230 }, { "epoch": 0.98, "learning_rate": 1.0225614698730073e-06, "loss": 2.0251, "step": 7235 }, { "epoch": 0.98, "learning_rate": 1.021885976763037e-06, "loss": 2.1025, "step": 7240 }, { "epoch": 0.98, "learning_rate": 1.0212104836530665e-06, "loss": 1.9793, "step": 7245 }, { "epoch": 0.98, "learning_rate": 1.0205349905430964e-06, "loss": 2.0546, "step": 7250 }, { "epoch": 0.98, "learning_rate": 1.019859497433126e-06, "loss": 2.0635, "step": 7255 }, { "epoch": 0.98, "learning_rate": 1.019184004323156e-06, "loss": 1.9224, "step": 7260 }, { "epoch": 0.98, "learning_rate": 1.0185085112131854e-06, "loss": 2.0158, "step": 7265 }, { "epoch": 0.98, "learning_rate": 1.0178330181032154e-06, "loss": 2.0352, "step": 7270 }, { "epoch": 0.98, "learning_rate": 1.0171575249932449e-06, "loss": 2.0249, "step": 7275 }, { "epoch": 0.98, "learning_rate": 1.0164820318832748e-06, "loss": 1.9468, "step": 7280 }, { "epoch": 0.98, "learning_rate": 1.0158065387733043e-06, "loss": 1.9318, "step": 7285 }, { "epoch": 0.98, "learning_rate": 1.0151310456633343e-06, "loss": 2.0633, "step": 7290 }, { "epoch": 0.99, "learning_rate": 1.014455552553364e-06, "loss": 2.0459, "step": 7295 }, { "epoch": 0.99, "learning_rate": 1.0137800594433937e-06, "loss": 2.0734, "step": 7300 }, { "epoch": 0.99, "learning_rate": 1.0131045663334234e-06, "loss": 2.06, "step": 7305 }, { "epoch": 0.99, "learning_rate": 1.0124290732234532e-06, "loss": 1.947, "step": 7310 }, { "epoch": 0.99, "learning_rate": 1.0117535801134829e-06, "loss": 2.0348, "step": 7315 }, { "epoch": 0.99, "learning_rate": 1.0110780870035124e-06, "loss": 2.0692, "step": 7320 }, { "epoch": 0.99, "learning_rate": 1.0104025938935423e-06, "loss": 2.02, "step": 7325 }, { "epoch": 0.99, "learning_rate": 1.0097271007835719e-06, "loss": 1.9659, "step": 7330 }, { "epoch": 0.99, "learning_rate": 1.0090516076736018e-06, "loss": 1.9602, "step": 7335 }, { "epoch": 0.99, "learning_rate": 1.0083761145636313e-06, "loss": 1.9835, "step": 7340 }, { "epoch": 0.99, "learning_rate": 1.0077006214536612e-06, "loss": 1.9815, "step": 7345 }, { "epoch": 0.99, "learning_rate": 1.0070251283436908e-06, "loss": 1.9859, "step": 7350 }, { "epoch": 0.99, "learning_rate": 1.0063496352337207e-06, "loss": 2.1161, "step": 7355 }, { "epoch": 0.99, "learning_rate": 1.0056741421237502e-06, "loss": 2.0387, "step": 7360 }, { "epoch": 0.99, "learning_rate": 1.0049986490137801e-06, "loss": 2.0205, "step": 7365 }, { "epoch": 1.0, "learning_rate": 1.0043231559038097e-06, "loss": 2.0478, "step": 7370 }, { "epoch": 1.0, "learning_rate": 1.0036476627938396e-06, "loss": 2.023, "step": 7375 }, { "epoch": 1.0, "learning_rate": 1.0029721696838691e-06, "loss": 2.0349, "step": 7380 }, { "epoch": 1.0, "learning_rate": 1.0022966765738988e-06, "loss": 2.0054, "step": 7385 }, { "epoch": 1.0, "learning_rate": 1.0016211834639286e-06, "loss": 2.1175, "step": 7390 }, { "epoch": 1.0, "learning_rate": 1.0009456903539583e-06, "loss": 2.0055, "step": 7395 }, { "epoch": 1.0, "learning_rate": 1.000270197243988e-06, "loss": 2.0236, "step": 7400 }, { "epoch": 1.0, "learning_rate": 9.995947041340177e-07, "loss": 2.073, "step": 7405 }, { "epoch": 1.0, "learning_rate": 9.989192110240475e-07, "loss": 1.9406, "step": 7410 }, { "epoch": 1.0, "learning_rate": 9.982437179140772e-07, "loss": 2.0151, "step": 7415 }, { "epoch": 1.0, "learning_rate": 9.97568224804107e-07, "loss": 1.9841, "step": 7420 }, { "epoch": 1.0, "learning_rate": 9.968927316941367e-07, "loss": 1.8764, "step": 7425 }, { "epoch": 1.0, "learning_rate": 9.962172385841664e-07, "loss": 1.982, "step": 7430 }, { "epoch": 1.0, "learning_rate": 9.95541745474196e-07, "loss": 1.9922, "step": 7435 }, { "epoch": 1.01, "learning_rate": 9.948662523642258e-07, "loss": 2.0233, "step": 7440 }, { "epoch": 1.01, "learning_rate": 9.941907592542556e-07, "loss": 1.9742, "step": 7445 }, { "epoch": 1.01, "learning_rate": 9.935152661442853e-07, "loss": 2.048, "step": 7450 }, { "epoch": 1.01, "learning_rate": 9.92839773034315e-07, "loss": 1.9523, "step": 7455 }, { "epoch": 1.01, "learning_rate": 9.921642799243447e-07, "loss": 2.0822, "step": 7460 }, { "epoch": 1.01, "learning_rate": 9.914887868143745e-07, "loss": 2.0152, "step": 7465 }, { "epoch": 1.01, "learning_rate": 9.908132937044042e-07, "loss": 2.0208, "step": 7470 }, { "epoch": 1.01, "learning_rate": 9.90137800594434e-07, "loss": 1.9042, "step": 7475 }, { "epoch": 1.01, "learning_rate": 9.894623074844636e-07, "loss": 2.0153, "step": 7480 }, { "epoch": 1.01, "learning_rate": 9.887868143744934e-07, "loss": 1.9946, "step": 7485 }, { "epoch": 1.01, "learning_rate": 9.88111321264523e-07, "loss": 1.9934, "step": 7490 }, { "epoch": 1.01, "learning_rate": 9.874358281545528e-07, "loss": 2.0187, "step": 7495 }, { "epoch": 1.01, "learning_rate": 9.867603350445825e-07, "loss": 1.9518, "step": 7500 }, { "epoch": 1.01, "learning_rate": 9.860848419346123e-07, "loss": 2.0422, "step": 7505 }, { "epoch": 1.01, "learning_rate": 9.85409348824642e-07, "loss": 2.0009, "step": 7510 }, { "epoch": 1.02, "learning_rate": 9.847338557146717e-07, "loss": 2.0596, "step": 7515 }, { "epoch": 1.02, "learning_rate": 9.840583626047015e-07, "loss": 2.0612, "step": 7520 }, { "epoch": 1.02, "learning_rate": 9.833828694947312e-07, "loss": 1.9486, "step": 7525 }, { "epoch": 1.02, "learning_rate": 9.82707376384761e-07, "loss": 2.0392, "step": 7530 }, { "epoch": 1.02, "learning_rate": 9.820318832747906e-07, "loss": 1.9368, "step": 7535 }, { "epoch": 1.02, "learning_rate": 9.813563901648204e-07, "loss": 2.0314, "step": 7540 }, { "epoch": 1.02, "learning_rate": 9.8068089705485e-07, "loss": 1.9035, "step": 7545 }, { "epoch": 1.02, "learning_rate": 9.800054039448798e-07, "loss": 2.0037, "step": 7550 }, { "epoch": 1.02, "learning_rate": 9.793299108349095e-07, "loss": 2.0193, "step": 7555 }, { "epoch": 1.02, "learning_rate": 9.786544177249393e-07, "loss": 2.0409, "step": 7560 }, { "epoch": 1.02, "learning_rate": 9.77978924614969e-07, "loss": 2.056, "step": 7565 }, { "epoch": 1.02, "learning_rate": 9.773034315049987e-07, "loss": 1.9435, "step": 7570 }, { "epoch": 1.02, "learning_rate": 9.766279383950282e-07, "loss": 1.9287, "step": 7575 }, { "epoch": 1.02, "learning_rate": 9.75952445285058e-07, "loss": 1.9647, "step": 7580 }, { "epoch": 1.02, "learning_rate": 9.752769521750877e-07, "loss": 1.8393, "step": 7585 }, { "epoch": 1.03, "learning_rate": 9.746014590651174e-07, "loss": 1.9353, "step": 7590 }, { "epoch": 1.03, "learning_rate": 9.739259659551471e-07, "loss": 2.0181, "step": 7595 }, { "epoch": 1.03, "learning_rate": 9.732504728451769e-07, "loss": 1.9621, "step": 7600 }, { "epoch": 1.03, "eval_loss": 1.9808038473129272, "eval_runtime": 165.1869, "eval_samples_per_second": 3.608, "eval_steps_per_second": 0.454, "step": 7600 }, { "epoch": 1.03, "learning_rate": 9.725749797352066e-07, "loss": 2.0002, "step": 7605 }, { "epoch": 1.03, "learning_rate": 9.718994866252363e-07, "loss": 1.9619, "step": 7610 }, { "epoch": 1.03, "learning_rate": 9.71223993515266e-07, "loss": 2.032, "step": 7615 }, { "epoch": 1.03, "learning_rate": 9.705485004052958e-07, "loss": 1.8952, "step": 7620 }, { "epoch": 1.03, "learning_rate": 9.698730072953255e-07, "loss": 2.0198, "step": 7625 }, { "epoch": 1.03, "learning_rate": 9.691975141853552e-07, "loss": 1.9973, "step": 7630 }, { "epoch": 1.03, "learning_rate": 9.68522021075385e-07, "loss": 2.0368, "step": 7635 }, { "epoch": 1.03, "learning_rate": 9.678465279654147e-07, "loss": 1.913, "step": 7640 }, { "epoch": 1.03, "learning_rate": 9.671710348554444e-07, "loss": 1.9466, "step": 7645 }, { "epoch": 1.03, "learning_rate": 9.664955417454741e-07, "loss": 1.9923, "step": 7650 }, { "epoch": 1.03, "learning_rate": 9.658200486355039e-07, "loss": 2.055, "step": 7655 }, { "epoch": 1.03, "learning_rate": 9.651445555255336e-07, "loss": 1.9588, "step": 7660 }, { "epoch": 1.04, "learning_rate": 9.644690624155633e-07, "loss": 1.9861, "step": 7665 }, { "epoch": 1.04, "learning_rate": 9.63793569305593e-07, "loss": 2.0062, "step": 7670 }, { "epoch": 1.04, "learning_rate": 9.631180761956228e-07, "loss": 1.992, "step": 7675 }, { "epoch": 1.04, "learning_rate": 9.624425830856525e-07, "loss": 1.9221, "step": 7680 }, { "epoch": 1.04, "learning_rate": 9.617670899756822e-07, "loss": 1.9701, "step": 7685 }, { "epoch": 1.04, "learning_rate": 9.61091596865712e-07, "loss": 2.0102, "step": 7690 }, { "epoch": 1.04, "learning_rate": 9.604161037557417e-07, "loss": 1.9687, "step": 7695 }, { "epoch": 1.04, "learning_rate": 9.597406106457714e-07, "loss": 1.98, "step": 7700 }, { "epoch": 1.04, "learning_rate": 9.590651175358011e-07, "loss": 1.9699, "step": 7705 }, { "epoch": 1.04, "learning_rate": 9.583896244258308e-07, "loss": 1.9465, "step": 7710 }, { "epoch": 1.04, "learning_rate": 9.577141313158606e-07, "loss": 2.04, "step": 7715 }, { "epoch": 1.04, "learning_rate": 9.570386382058903e-07, "loss": 2.0439, "step": 7720 }, { "epoch": 1.04, "learning_rate": 9.5636314509592e-07, "loss": 1.9968, "step": 7725 }, { "epoch": 1.04, "learning_rate": 9.556876519859497e-07, "loss": 2.0141, "step": 7730 }, { "epoch": 1.04, "learning_rate": 9.550121588759795e-07, "loss": 1.9623, "step": 7735 }, { "epoch": 1.05, "learning_rate": 9.543366657660092e-07, "loss": 2.0191, "step": 7740 }, { "epoch": 1.05, "learning_rate": 9.536611726560388e-07, "loss": 1.9723, "step": 7745 }, { "epoch": 1.05, "learning_rate": 9.529856795460685e-07, "loss": 2.0218, "step": 7750 }, { "epoch": 1.05, "learning_rate": 9.523101864360983e-07, "loss": 1.8727, "step": 7755 }, { "epoch": 1.05, "learning_rate": 9.51634693326128e-07, "loss": 1.8031, "step": 7760 }, { "epoch": 1.05, "learning_rate": 9.509592002161577e-07, "loss": 2.0312, "step": 7765 }, { "epoch": 1.05, "learning_rate": 9.502837071061874e-07, "loss": 2.0651, "step": 7770 }, { "epoch": 1.05, "learning_rate": 9.496082139962172e-07, "loss": 2.0122, "step": 7775 }, { "epoch": 1.05, "learning_rate": 9.489327208862469e-07, "loss": 1.8839, "step": 7780 }, { "epoch": 1.05, "learning_rate": 9.482572277762766e-07, "loss": 2.0005, "step": 7785 }, { "epoch": 1.05, "learning_rate": 9.475817346663064e-07, "loss": 2.0028, "step": 7790 }, { "epoch": 1.05, "learning_rate": 9.469062415563361e-07, "loss": 1.9506, "step": 7795 }, { "epoch": 1.05, "learning_rate": 9.462307484463658e-07, "loss": 2.0455, "step": 7800 }, { "epoch": 1.05, "learning_rate": 9.455552553363955e-07, "loss": 1.9396, "step": 7805 }, { "epoch": 1.06, "learning_rate": 9.448797622264253e-07, "loss": 2.0283, "step": 7810 }, { "epoch": 1.06, "learning_rate": 9.44204269116455e-07, "loss": 2.0555, "step": 7815 }, { "epoch": 1.06, "learning_rate": 9.435287760064847e-07, "loss": 2.0782, "step": 7820 }, { "epoch": 1.06, "learning_rate": 9.428532828965144e-07, "loss": 2.0381, "step": 7825 }, { "epoch": 1.06, "learning_rate": 9.421777897865442e-07, "loss": 2.0217, "step": 7830 }, { "epoch": 1.06, "learning_rate": 9.415022966765739e-07, "loss": 1.9138, "step": 7835 }, { "epoch": 1.06, "learning_rate": 9.408268035666036e-07, "loss": 2.0361, "step": 7840 }, { "epoch": 1.06, "learning_rate": 9.401513104566333e-07, "loss": 1.9504, "step": 7845 }, { "epoch": 1.06, "learning_rate": 9.394758173466631e-07, "loss": 1.9862, "step": 7850 }, { "epoch": 1.06, "learning_rate": 9.388003242366928e-07, "loss": 1.923, "step": 7855 }, { "epoch": 1.06, "learning_rate": 9.381248311267225e-07, "loss": 1.9324, "step": 7860 }, { "epoch": 1.06, "learning_rate": 9.374493380167521e-07, "loss": 2.098, "step": 7865 }, { "epoch": 1.06, "learning_rate": 9.367738449067819e-07, "loss": 1.949, "step": 7870 }, { "epoch": 1.06, "learning_rate": 9.360983517968116e-07, "loss": 2.0113, "step": 7875 }, { "epoch": 1.06, "learning_rate": 9.354228586868413e-07, "loss": 1.9433, "step": 7880 }, { "epoch": 1.07, "learning_rate": 9.34747365576871e-07, "loss": 2.0162, "step": 7885 }, { "epoch": 1.07, "learning_rate": 9.340718724669008e-07, "loss": 1.9516, "step": 7890 }, { "epoch": 1.07, "learning_rate": 9.333963793569305e-07, "loss": 1.9885, "step": 7895 }, { "epoch": 1.07, "learning_rate": 9.327208862469602e-07, "loss": 1.9776, "step": 7900 }, { "epoch": 1.07, "learning_rate": 9.3204539313699e-07, "loss": 2.0417, "step": 7905 }, { "epoch": 1.07, "learning_rate": 9.313699000270197e-07, "loss": 2.0449, "step": 7910 }, { "epoch": 1.07, "learning_rate": 9.306944069170494e-07, "loss": 2.0715, "step": 7915 }, { "epoch": 1.07, "learning_rate": 9.300189138070791e-07, "loss": 1.9786, "step": 7920 }, { "epoch": 1.07, "learning_rate": 9.293434206971088e-07, "loss": 1.9668, "step": 7925 }, { "epoch": 1.07, "learning_rate": 9.286679275871385e-07, "loss": 2.0532, "step": 7930 }, { "epoch": 1.07, "learning_rate": 9.279924344771682e-07, "loss": 2.0171, "step": 7935 }, { "epoch": 1.07, "learning_rate": 9.273169413671979e-07, "loss": 2.0196, "step": 7940 }, { "epoch": 1.07, "learning_rate": 9.266414482572278e-07, "loss": 1.9405, "step": 7945 }, { "epoch": 1.07, "learning_rate": 9.259659551472575e-07, "loss": 1.9522, "step": 7950 }, { "epoch": 1.07, "learning_rate": 9.252904620372872e-07, "loss": 1.9491, "step": 7955 }, { "epoch": 1.08, "learning_rate": 9.246149689273169e-07, "loss": 1.9541, "step": 7960 }, { "epoch": 1.08, "learning_rate": 9.239394758173467e-07, "loss": 2.0164, "step": 7965 }, { "epoch": 1.08, "learning_rate": 9.232639827073764e-07, "loss": 1.9922, "step": 7970 }, { "epoch": 1.08, "learning_rate": 9.225884895974061e-07, "loss": 1.9706, "step": 7975 }, { "epoch": 1.08, "learning_rate": 9.219129964874358e-07, "loss": 1.9756, "step": 7980 }, { "epoch": 1.08, "learning_rate": 9.212375033774656e-07, "loss": 2.0676, "step": 7985 }, { "epoch": 1.08, "learning_rate": 9.205620102674953e-07, "loss": 2.0105, "step": 7990 }, { "epoch": 1.08, "learning_rate": 9.198865171575249e-07, "loss": 1.9872, "step": 7995 }, { "epoch": 1.08, "learning_rate": 9.192110240475546e-07, "loss": 1.9072, "step": 8000 }, { "epoch": 1.08, "eval_loss": 1.9768513441085815, "eval_runtime": 165.2166, "eval_samples_per_second": 3.607, "eval_steps_per_second": 0.454, "step": 8000 }, { "epoch": 1.08, "learning_rate": 9.185355309375844e-07, "loss": 2.021, "step": 8005 }, { "epoch": 1.08, "learning_rate": 9.178600378276141e-07, "loss": 2.0312, "step": 8010 }, { "epoch": 1.08, "learning_rate": 9.171845447176438e-07, "loss": 2.0465, "step": 8015 }, { "epoch": 1.08, "learning_rate": 9.165090516076736e-07, "loss": 2.0647, "step": 8020 }, { "epoch": 1.08, "learning_rate": 9.158335584977033e-07, "loss": 1.9135, "step": 8025 }, { "epoch": 1.08, "learning_rate": 9.15158065387733e-07, "loss": 1.9873, "step": 8030 }, { "epoch": 1.09, "learning_rate": 9.144825722777627e-07, "loss": 2.0321, "step": 8035 }, { "epoch": 1.09, "learning_rate": 9.138070791677925e-07, "loss": 1.8527, "step": 8040 }, { "epoch": 1.09, "learning_rate": 9.131315860578222e-07, "loss": 1.9411, "step": 8045 }, { "epoch": 1.09, "learning_rate": 9.124560929478519e-07, "loss": 2.0264, "step": 8050 }, { "epoch": 1.09, "learning_rate": 9.117805998378815e-07, "loss": 2.0049, "step": 8055 }, { "epoch": 1.09, "learning_rate": 9.111051067279113e-07, "loss": 1.9286, "step": 8060 }, { "epoch": 1.09, "learning_rate": 9.10429613617941e-07, "loss": 2.0032, "step": 8065 }, { "epoch": 1.09, "learning_rate": 9.097541205079707e-07, "loss": 1.9846, "step": 8070 }, { "epoch": 1.09, "learning_rate": 9.090786273980004e-07, "loss": 2.0288, "step": 8075 }, { "epoch": 1.09, "learning_rate": 9.084031342880302e-07, "loss": 2.0061, "step": 8080 }, { "epoch": 1.09, "learning_rate": 9.077276411780599e-07, "loss": 1.9841, "step": 8085 }, { "epoch": 1.09, "learning_rate": 9.070521480680897e-07, "loss": 2.0113, "step": 8090 }, { "epoch": 1.09, "learning_rate": 9.063766549581194e-07, "loss": 1.9356, "step": 8095 }, { "epoch": 1.09, "learning_rate": 9.057011618481492e-07, "loss": 1.9294, "step": 8100 }, { "epoch": 1.09, "learning_rate": 9.050256687381789e-07, "loss": 1.9696, "step": 8105 }, { "epoch": 1.1, "learning_rate": 9.043501756282086e-07, "loss": 1.9592, "step": 8110 }, { "epoch": 1.1, "learning_rate": 9.036746825182383e-07, "loss": 2.0304, "step": 8115 }, { "epoch": 1.1, "learning_rate": 9.029991894082681e-07, "loss": 1.9916, "step": 8120 }, { "epoch": 1.1, "learning_rate": 9.023236962982977e-07, "loss": 2.0205, "step": 8125 }, { "epoch": 1.1, "learning_rate": 9.016482031883274e-07, "loss": 1.9852, "step": 8130 }, { "epoch": 1.1, "learning_rate": 9.009727100783571e-07, "loss": 1.9217, "step": 8135 }, { "epoch": 1.1, "learning_rate": 9.002972169683869e-07, "loss": 1.929, "step": 8140 }, { "epoch": 1.1, "learning_rate": 8.996217238584166e-07, "loss": 1.9051, "step": 8145 }, { "epoch": 1.1, "learning_rate": 8.989462307484463e-07, "loss": 1.9446, "step": 8150 }, { "epoch": 1.1, "learning_rate": 8.982707376384761e-07, "loss": 1.9425, "step": 8155 }, { "epoch": 1.1, "learning_rate": 8.975952445285058e-07, "loss": 1.9975, "step": 8160 }, { "epoch": 1.1, "learning_rate": 8.969197514185355e-07, "loss": 1.988, "step": 8165 }, { "epoch": 1.1, "learning_rate": 8.962442583085652e-07, "loss": 2.0169, "step": 8170 }, { "epoch": 1.1, "learning_rate": 8.95568765198595e-07, "loss": 2.0097, "step": 8175 }, { "epoch": 1.11, "learning_rate": 8.948932720886247e-07, "loss": 2.0233, "step": 8180 }, { "epoch": 1.11, "learning_rate": 8.942177789786543e-07, "loss": 1.9843, "step": 8185 }, { "epoch": 1.11, "learning_rate": 8.93542285868684e-07, "loss": 1.901, "step": 8190 }, { "epoch": 1.11, "learning_rate": 8.928667927587138e-07, "loss": 1.9648, "step": 8195 }, { "epoch": 1.11, "learning_rate": 8.921912996487435e-07, "loss": 1.9876, "step": 8200 }, { "epoch": 1.11, "learning_rate": 8.915158065387732e-07, "loss": 1.9283, "step": 8205 }, { "epoch": 1.11, "learning_rate": 8.908403134288029e-07, "loss": 1.9734, "step": 8210 }, { "epoch": 1.11, "learning_rate": 8.901648203188327e-07, "loss": 1.9392, "step": 8215 }, { "epoch": 1.11, "learning_rate": 8.894893272088624e-07, "loss": 1.9981, "step": 8220 }, { "epoch": 1.11, "learning_rate": 8.888138340988921e-07, "loss": 1.9668, "step": 8225 }, { "epoch": 1.11, "learning_rate": 8.881383409889218e-07, "loss": 2.1008, "step": 8230 }, { "epoch": 1.11, "learning_rate": 8.874628478789517e-07, "loss": 1.8967, "step": 8235 }, { "epoch": 1.11, "learning_rate": 8.867873547689814e-07, "loss": 1.9444, "step": 8240 }, { "epoch": 1.11, "learning_rate": 8.861118616590111e-07, "loss": 1.9424, "step": 8245 }, { "epoch": 1.11, "learning_rate": 8.854363685490409e-07, "loss": 2.0454, "step": 8250 }, { "epoch": 1.12, "learning_rate": 8.847608754390705e-07, "loss": 2.0193, "step": 8255 }, { "epoch": 1.12, "learning_rate": 8.840853823291002e-07, "loss": 1.9897, "step": 8260 }, { "epoch": 1.12, "learning_rate": 8.834098892191299e-07, "loss": 1.9687, "step": 8265 }, { "epoch": 1.12, "learning_rate": 8.827343961091597e-07, "loss": 2.0078, "step": 8270 }, { "epoch": 1.12, "learning_rate": 8.820589029991894e-07, "loss": 2.022, "step": 8275 }, { "epoch": 1.12, "learning_rate": 8.813834098892191e-07, "loss": 1.9253, "step": 8280 }, { "epoch": 1.12, "learning_rate": 8.807079167792488e-07, "loss": 1.9977, "step": 8285 }, { "epoch": 1.12, "learning_rate": 8.800324236692786e-07, "loss": 1.9738, "step": 8290 }, { "epoch": 1.12, "learning_rate": 8.793569305593083e-07, "loss": 1.9692, "step": 8295 }, { "epoch": 1.12, "learning_rate": 8.78681437449338e-07, "loss": 1.9502, "step": 8300 }, { "epoch": 1.12, "learning_rate": 8.780059443393677e-07, "loss": 1.9975, "step": 8305 }, { "epoch": 1.12, "learning_rate": 8.773304512293975e-07, "loss": 2.0219, "step": 8310 }, { "epoch": 1.12, "learning_rate": 8.766549581194271e-07, "loss": 1.9972, "step": 8315 }, { "epoch": 1.12, "learning_rate": 8.759794650094568e-07, "loss": 2.0072, "step": 8320 }, { "epoch": 1.12, "learning_rate": 8.753039718994865e-07, "loss": 1.9763, "step": 8325 }, { "epoch": 1.13, "learning_rate": 8.746284787895163e-07, "loss": 2.0124, "step": 8330 }, { "epoch": 1.13, "learning_rate": 8.73952985679546e-07, "loss": 1.9558, "step": 8335 }, { "epoch": 1.13, "learning_rate": 8.732774925695757e-07, "loss": 1.971, "step": 8340 }, { "epoch": 1.13, "learning_rate": 8.726019994596054e-07, "loss": 2.0614, "step": 8345 }, { "epoch": 1.13, "learning_rate": 8.719265063496352e-07, "loss": 2.0482, "step": 8350 }, { "epoch": 1.13, "learning_rate": 8.712510132396649e-07, "loss": 1.9451, "step": 8355 }, { "epoch": 1.13, "learning_rate": 8.705755201296946e-07, "loss": 1.9871, "step": 8360 }, { "epoch": 1.13, "learning_rate": 8.699000270197243e-07, "loss": 1.9311, "step": 8365 }, { "epoch": 1.13, "learning_rate": 8.692245339097541e-07, "loss": 1.9456, "step": 8370 }, { "epoch": 1.13, "learning_rate": 8.685490407997838e-07, "loss": 2.0216, "step": 8375 }, { "epoch": 1.13, "learning_rate": 8.678735476898136e-07, "loss": 1.9703, "step": 8380 }, { "epoch": 1.13, "learning_rate": 8.671980545798433e-07, "loss": 1.9589, "step": 8385 }, { "epoch": 1.13, "learning_rate": 8.66522561469873e-07, "loss": 2.006, "step": 8390 }, { "epoch": 1.13, "learning_rate": 8.658470683599027e-07, "loss": 1.9825, "step": 8395 }, { "epoch": 1.13, "learning_rate": 8.651715752499324e-07, "loss": 2.0099, "step": 8400 }, { "epoch": 1.13, "eval_loss": 1.9731146097183228, "eval_runtime": 165.3898, "eval_samples_per_second": 3.604, "eval_steps_per_second": 0.453, "step": 8400 }, { "epoch": 1.14, "learning_rate": 8.644960821399622e-07, "loss": 1.9268, "step": 8405 }, { "epoch": 1.14, "learning_rate": 8.638205890299919e-07, "loss": 2.0344, "step": 8410 }, { "epoch": 1.14, "learning_rate": 8.631450959200216e-07, "loss": 1.961, "step": 8415 }, { "epoch": 1.14, "learning_rate": 8.624696028100513e-07, "loss": 2.0161, "step": 8420 }, { "epoch": 1.14, "learning_rate": 8.617941097000811e-07, "loss": 2.0132, "step": 8425 }, { "epoch": 1.14, "learning_rate": 8.611186165901108e-07, "loss": 1.9307, "step": 8430 }, { "epoch": 1.14, "learning_rate": 8.604431234801405e-07, "loss": 2.0199, "step": 8435 }, { "epoch": 1.14, "learning_rate": 8.597676303701702e-07, "loss": 1.9297, "step": 8440 }, { "epoch": 1.14, "learning_rate": 8.590921372601999e-07, "loss": 1.9283, "step": 8445 }, { "epoch": 1.14, "learning_rate": 8.584166441502296e-07, "loss": 2.0167, "step": 8450 }, { "epoch": 1.14, "learning_rate": 8.577411510402593e-07, "loss": 1.9519, "step": 8455 }, { "epoch": 1.14, "learning_rate": 8.57065657930289e-07, "loss": 1.9769, "step": 8460 }, { "epoch": 1.14, "learning_rate": 8.563901648203188e-07, "loss": 1.9857, "step": 8465 }, { "epoch": 1.14, "learning_rate": 8.557146717103485e-07, "loss": 1.9416, "step": 8470 }, { "epoch": 1.14, "learning_rate": 8.550391786003782e-07, "loss": 1.989, "step": 8475 }, { "epoch": 1.15, "learning_rate": 8.543636854904079e-07, "loss": 1.9692, "step": 8480 }, { "epoch": 1.15, "learning_rate": 8.536881923804377e-07, "loss": 1.9521, "step": 8485 }, { "epoch": 1.15, "learning_rate": 8.530126992704674e-07, "loss": 2.012, "step": 8490 }, { "epoch": 1.15, "learning_rate": 8.523372061604971e-07, "loss": 2.0318, "step": 8495 }, { "epoch": 1.15, "learning_rate": 8.516617130505268e-07, "loss": 2.0502, "step": 8500 }, { "epoch": 1.15, "learning_rate": 8.509862199405566e-07, "loss": 2.0098, "step": 8505 }, { "epoch": 1.15, "learning_rate": 8.503107268305862e-07, "loss": 1.9996, "step": 8510 }, { "epoch": 1.15, "learning_rate": 8.496352337206159e-07, "loss": 2.0079, "step": 8515 }, { "epoch": 1.15, "learning_rate": 8.489597406106456e-07, "loss": 1.9978, "step": 8520 }, { "epoch": 1.15, "learning_rate": 8.482842475006755e-07, "loss": 1.9966, "step": 8525 }, { "epoch": 1.15, "learning_rate": 8.476087543907052e-07, "loss": 1.9699, "step": 8530 }, { "epoch": 1.15, "learning_rate": 8.469332612807349e-07, "loss": 2.0915, "step": 8535 }, { "epoch": 1.15, "learning_rate": 8.462577681707647e-07, "loss": 1.9294, "step": 8540 }, { "epoch": 1.15, "learning_rate": 8.455822750607944e-07, "loss": 1.9252, "step": 8545 }, { "epoch": 1.16, "learning_rate": 8.449067819508241e-07, "loss": 1.9872, "step": 8550 }, { "epoch": 1.16, "learning_rate": 8.442312888408538e-07, "loss": 1.9805, "step": 8555 }, { "epoch": 1.16, "learning_rate": 8.435557957308836e-07, "loss": 2.0113, "step": 8560 }, { "epoch": 1.16, "learning_rate": 8.428803026209133e-07, "loss": 1.9835, "step": 8565 }, { "epoch": 1.16, "learning_rate": 8.42204809510943e-07, "loss": 2.0882, "step": 8570 }, { "epoch": 1.16, "learning_rate": 8.415293164009726e-07, "loss": 2.0025, "step": 8575 }, { "epoch": 1.16, "learning_rate": 8.408538232910024e-07, "loss": 1.9061, "step": 8580 }, { "epoch": 1.16, "learning_rate": 8.401783301810321e-07, "loss": 1.9768, "step": 8585 }, { "epoch": 1.16, "learning_rate": 8.395028370710618e-07, "loss": 2.023, "step": 8590 }, { "epoch": 1.16, "learning_rate": 8.388273439610915e-07, "loss": 2.0458, "step": 8595 }, { "epoch": 1.16, "learning_rate": 8.381518508511213e-07, "loss": 2.0728, "step": 8600 }, { "epoch": 1.16, "learning_rate": 8.37476357741151e-07, "loss": 2.0385, "step": 8605 }, { "epoch": 1.16, "learning_rate": 8.368008646311807e-07, "loss": 1.9309, "step": 8610 }, { "epoch": 1.16, "learning_rate": 8.361253715212104e-07, "loss": 1.9162, "step": 8615 }, { "epoch": 1.16, "learning_rate": 8.354498784112402e-07, "loss": 1.9476, "step": 8620 }, { "epoch": 1.17, "learning_rate": 8.347743853012699e-07, "loss": 2.0305, "step": 8625 }, { "epoch": 1.17, "learning_rate": 8.340988921912996e-07, "loss": 2.0251, "step": 8630 }, { "epoch": 1.17, "learning_rate": 8.334233990813294e-07, "loss": 1.9413, "step": 8635 }, { "epoch": 1.17, "learning_rate": 8.32747905971359e-07, "loss": 1.8424, "step": 8640 }, { "epoch": 1.17, "learning_rate": 8.320724128613887e-07, "loss": 1.9744, "step": 8645 }, { "epoch": 1.17, "learning_rate": 8.313969197514184e-07, "loss": 1.9232, "step": 8650 }, { "epoch": 1.17, "learning_rate": 8.307214266414482e-07, "loss": 2.0065, "step": 8655 }, { "epoch": 1.17, "learning_rate": 8.300459335314779e-07, "loss": 1.8987, "step": 8660 }, { "epoch": 1.17, "learning_rate": 8.293704404215076e-07, "loss": 1.9654, "step": 8665 }, { "epoch": 1.17, "learning_rate": 8.286949473115374e-07, "loss": 2.0159, "step": 8670 }, { "epoch": 1.17, "learning_rate": 8.280194542015672e-07, "loss": 1.9236, "step": 8675 }, { "epoch": 1.17, "learning_rate": 8.273439610915969e-07, "loss": 1.9523, "step": 8680 }, { "epoch": 1.17, "learning_rate": 8.266684679816266e-07, "loss": 1.9601, "step": 8685 }, { "epoch": 1.17, "learning_rate": 8.259929748716563e-07, "loss": 1.8293, "step": 8690 }, { "epoch": 1.17, "learning_rate": 8.253174817616861e-07, "loss": 1.99, "step": 8695 }, { "epoch": 1.18, "learning_rate": 8.246419886517158e-07, "loss": 1.9628, "step": 8700 }, { "epoch": 1.18, "learning_rate": 8.239664955417454e-07, "loss": 1.9766, "step": 8705 }, { "epoch": 1.18, "learning_rate": 8.232910024317751e-07, "loss": 1.8718, "step": 8710 }, { "epoch": 1.18, "learning_rate": 8.226155093218049e-07, "loss": 1.9567, "step": 8715 }, { "epoch": 1.18, "learning_rate": 8.219400162118346e-07, "loss": 1.8965, "step": 8720 }, { "epoch": 1.18, "learning_rate": 8.212645231018643e-07, "loss": 1.9987, "step": 8725 }, { "epoch": 1.18, "learning_rate": 8.20589029991894e-07, "loss": 1.9691, "step": 8730 }, { "epoch": 1.18, "learning_rate": 8.199135368819238e-07, "loss": 1.9517, "step": 8735 }, { "epoch": 1.18, "learning_rate": 8.192380437719535e-07, "loss": 2.0101, "step": 8740 }, { "epoch": 1.18, "learning_rate": 8.185625506619832e-07, "loss": 1.9377, "step": 8745 }, { "epoch": 1.18, "learning_rate": 8.17887057552013e-07, "loss": 2.0027, "step": 8750 }, { "epoch": 1.18, "learning_rate": 8.172115644420427e-07, "loss": 1.939, "step": 8755 }, { "epoch": 1.18, "learning_rate": 8.165360713320724e-07, "loss": 2.0467, "step": 8760 }, { "epoch": 1.18, "learning_rate": 8.158605782221021e-07, "loss": 1.9963, "step": 8765 }, { "epoch": 1.18, "learning_rate": 8.151850851121318e-07, "loss": 1.9886, "step": 8770 }, { "epoch": 1.19, "learning_rate": 8.145095920021615e-07, "loss": 1.944, "step": 8775 }, { "epoch": 1.19, "learning_rate": 8.138340988921912e-07, "loss": 1.8703, "step": 8780 }, { "epoch": 1.19, "learning_rate": 8.131586057822209e-07, "loss": 1.886, "step": 8785 }, { "epoch": 1.19, "learning_rate": 8.124831126722507e-07, "loss": 1.979, "step": 8790 }, { "epoch": 1.19, "learning_rate": 8.118076195622804e-07, "loss": 1.9356, "step": 8795 }, { "epoch": 1.19, "learning_rate": 8.111321264523101e-07, "loss": 1.9588, "step": 8800 }, { "epoch": 1.19, "eval_loss": 1.9695961475372314, "eval_runtime": 165.3874, "eval_samples_per_second": 3.604, "eval_steps_per_second": 0.453, "step": 8800 }, { "epoch": 1.19, "learning_rate": 8.104566333423398e-07, "loss": 1.9797, "step": 8805 }, { "epoch": 1.19, "learning_rate": 8.097811402323696e-07, "loss": 1.9845, "step": 8810 }, { "epoch": 1.19, "learning_rate": 8.091056471223993e-07, "loss": 1.9642, "step": 8815 }, { "epoch": 1.19, "learning_rate": 8.084301540124291e-07, "loss": 1.9069, "step": 8820 }, { "epoch": 1.19, "learning_rate": 8.077546609024588e-07, "loss": 2.1011, "step": 8825 }, { "epoch": 1.19, "learning_rate": 8.070791677924886e-07, "loss": 2.0182, "step": 8830 }, { "epoch": 1.19, "learning_rate": 8.064036746825182e-07, "loss": 1.8241, "step": 8835 }, { "epoch": 1.19, "learning_rate": 8.057281815725479e-07, "loss": 1.9258, "step": 8840 }, { "epoch": 1.19, "learning_rate": 8.050526884625776e-07, "loss": 1.9356, "step": 8845 }, { "epoch": 1.2, "learning_rate": 8.043771953526074e-07, "loss": 1.959, "step": 8850 }, { "epoch": 1.2, "learning_rate": 8.037017022426371e-07, "loss": 1.9901, "step": 8855 }, { "epoch": 1.2, "learning_rate": 8.030262091326668e-07, "loss": 1.9471, "step": 8860 }, { "epoch": 1.2, "learning_rate": 8.023507160226966e-07, "loss": 2.0966, "step": 8865 }, { "epoch": 1.2, "learning_rate": 8.016752229127263e-07, "loss": 1.9141, "step": 8870 }, { "epoch": 1.2, "learning_rate": 8.00999729802756e-07, "loss": 2.0125, "step": 8875 }, { "epoch": 1.2, "learning_rate": 8.003242366927857e-07, "loss": 1.9031, "step": 8880 }, { "epoch": 1.2, "learning_rate": 7.996487435828155e-07, "loss": 2.0249, "step": 8885 }, { "epoch": 1.2, "learning_rate": 7.989732504728452e-07, "loss": 1.9745, "step": 8890 }, { "epoch": 1.2, "learning_rate": 7.982977573628749e-07, "loss": 1.9759, "step": 8895 }, { "epoch": 1.2, "learning_rate": 7.976222642529045e-07, "loss": 1.9258, "step": 8900 }, { "epoch": 1.2, "learning_rate": 7.969467711429343e-07, "loss": 2.013, "step": 8905 }, { "epoch": 1.2, "learning_rate": 7.96271278032964e-07, "loss": 1.9858, "step": 8910 }, { "epoch": 1.2, "learning_rate": 7.955957849229937e-07, "loss": 1.9655, "step": 8915 }, { "epoch": 1.21, "learning_rate": 7.949202918130234e-07, "loss": 2.0186, "step": 8920 }, { "epoch": 1.21, "learning_rate": 7.942447987030532e-07, "loss": 1.9713, "step": 8925 }, { "epoch": 1.21, "learning_rate": 7.935693055930829e-07, "loss": 1.9275, "step": 8930 }, { "epoch": 1.21, "learning_rate": 7.928938124831126e-07, "loss": 1.8962, "step": 8935 }, { "epoch": 1.21, "learning_rate": 7.922183193731423e-07, "loss": 2.063, "step": 8940 }, { "epoch": 1.21, "learning_rate": 7.915428262631721e-07, "loss": 1.8896, "step": 8945 }, { "epoch": 1.21, "learning_rate": 7.908673331532018e-07, "loss": 1.966, "step": 8950 }, { "epoch": 1.21, "learning_rate": 7.901918400432315e-07, "loss": 1.9818, "step": 8955 }, { "epoch": 1.21, "learning_rate": 7.895163469332611e-07, "loss": 2.0381, "step": 8960 }, { "epoch": 1.21, "learning_rate": 7.88840853823291e-07, "loss": 1.9868, "step": 8965 }, { "epoch": 1.21, "learning_rate": 7.881653607133207e-07, "loss": 1.9937, "step": 8970 }, { "epoch": 1.21, "learning_rate": 7.874898676033504e-07, "loss": 1.9347, "step": 8975 }, { "epoch": 1.21, "learning_rate": 7.868143744933801e-07, "loss": 1.9733, "step": 8980 }, { "epoch": 1.21, "learning_rate": 7.861388813834099e-07, "loss": 1.9135, "step": 8985 }, { "epoch": 1.21, "learning_rate": 7.854633882734396e-07, "loss": 1.9111, "step": 8990 }, { "epoch": 1.22, "learning_rate": 7.847878951634693e-07, "loss": 2.0278, "step": 8995 }, { "epoch": 1.22, "learning_rate": 7.841124020534991e-07, "loss": 2.0025, "step": 9000 }, { "epoch": 1.22, "learning_rate": 7.834369089435288e-07, "loss": 1.9607, "step": 9005 }, { "epoch": 1.22, "learning_rate": 7.827614158335585e-07, "loss": 1.966, "step": 9010 }, { "epoch": 1.22, "learning_rate": 7.820859227235882e-07, "loss": 2.0316, "step": 9015 }, { "epoch": 1.22, "learning_rate": 7.81410429613618e-07, "loss": 2.0147, "step": 9020 }, { "epoch": 1.22, "learning_rate": 7.807349365036477e-07, "loss": 1.9391, "step": 9025 }, { "epoch": 1.22, "learning_rate": 7.800594433936773e-07, "loss": 2.0185, "step": 9030 }, { "epoch": 1.22, "learning_rate": 7.79383950283707e-07, "loss": 1.9343, "step": 9035 }, { "epoch": 1.22, "learning_rate": 7.787084571737368e-07, "loss": 1.9053, "step": 9040 }, { "epoch": 1.22, "learning_rate": 7.780329640637665e-07, "loss": 1.9645, "step": 9045 }, { "epoch": 1.22, "learning_rate": 7.773574709537962e-07, "loss": 1.9119, "step": 9050 }, { "epoch": 1.22, "learning_rate": 7.766819778438259e-07, "loss": 1.9916, "step": 9055 }, { "epoch": 1.22, "learning_rate": 7.760064847338557e-07, "loss": 1.978, "step": 9060 }, { "epoch": 1.22, "learning_rate": 7.753309916238854e-07, "loss": 1.9838, "step": 9065 }, { "epoch": 1.23, "learning_rate": 7.746554985139151e-07, "loss": 2.0075, "step": 9070 }, { "epoch": 1.23, "learning_rate": 7.739800054039448e-07, "loss": 2.0059, "step": 9075 }, { "epoch": 1.23, "learning_rate": 7.733045122939746e-07, "loss": 1.9458, "step": 9080 }, { "epoch": 1.23, "learning_rate": 7.726290191840043e-07, "loss": 2.0189, "step": 9085 }, { "epoch": 1.23, "learning_rate": 7.719535260740339e-07, "loss": 1.907, "step": 9090 }, { "epoch": 1.23, "learning_rate": 7.712780329640636e-07, "loss": 2.0197, "step": 9095 }, { "epoch": 1.23, "learning_rate": 7.706025398540934e-07, "loss": 1.9569, "step": 9100 }, { "epoch": 1.23, "learning_rate": 7.699270467441231e-07, "loss": 2.0457, "step": 9105 }, { "epoch": 1.23, "learning_rate": 7.692515536341529e-07, "loss": 1.9715, "step": 9110 }, { "epoch": 1.23, "learning_rate": 7.685760605241827e-07, "loss": 1.9375, "step": 9115 }, { "epoch": 1.23, "learning_rate": 7.679005674142124e-07, "loss": 1.9681, "step": 9120 }, { "epoch": 1.23, "learning_rate": 7.672250743042421e-07, "loss": 2.0124, "step": 9125 }, { "epoch": 1.23, "learning_rate": 7.665495811942718e-07, "loss": 1.9938, "step": 9130 }, { "epoch": 1.23, "learning_rate": 7.658740880843016e-07, "loss": 1.9986, "step": 9135 }, { "epoch": 1.23, "learning_rate": 7.651985949743313e-07, "loss": 2.0119, "step": 9140 }, { "epoch": 1.24, "learning_rate": 7.64523101864361e-07, "loss": 1.989, "step": 9145 }, { "epoch": 1.24, "learning_rate": 7.638476087543907e-07, "loss": 1.9794, "step": 9150 }, { "epoch": 1.24, "learning_rate": 7.631721156444205e-07, "loss": 1.9841, "step": 9155 }, { "epoch": 1.24, "learning_rate": 7.624966225344501e-07, "loss": 2.0012, "step": 9160 }, { "epoch": 1.24, "learning_rate": 7.618211294244798e-07, "loss": 2.0727, "step": 9165 }, { "epoch": 1.24, "learning_rate": 7.611456363145095e-07, "loss": 2.0328, "step": 9170 }, { "epoch": 1.24, "learning_rate": 7.604701432045393e-07, "loss": 1.9844, "step": 9175 }, { "epoch": 1.24, "learning_rate": 7.59794650094569e-07, "loss": 2.0104, "step": 9180 }, { "epoch": 1.24, "learning_rate": 7.591191569845987e-07, "loss": 1.9992, "step": 9185 }, { "epoch": 1.24, "learning_rate": 7.584436638746284e-07, "loss": 2.0314, "step": 9190 }, { "epoch": 1.24, "learning_rate": 7.577681707646582e-07, "loss": 1.9967, "step": 9195 }, { "epoch": 1.24, "learning_rate": 7.570926776546879e-07, "loss": 2.0098, "step": 9200 }, { "epoch": 1.24, "eval_loss": 1.96656334400177, "eval_runtime": 165.5096, "eval_samples_per_second": 3.601, "eval_steps_per_second": 0.453, "step": 9200 }, { "epoch": 1.24, "learning_rate": 7.564171845447176e-07, "loss": 1.9014, "step": 9205 }, { "epoch": 1.24, "learning_rate": 7.557416914347473e-07, "loss": 2.0112, "step": 9210 }, { "epoch": 1.24, "learning_rate": 7.550661983247771e-07, "loss": 1.981, "step": 9215 }, { "epoch": 1.25, "learning_rate": 7.543907052148067e-07, "loss": 2.0282, "step": 9220 }, { "epoch": 1.25, "learning_rate": 7.537152121048364e-07, "loss": 1.888, "step": 9225 }, { "epoch": 1.25, "learning_rate": 7.530397189948661e-07, "loss": 1.957, "step": 9230 }, { "epoch": 1.25, "learning_rate": 7.523642258848959e-07, "loss": 2.0638, "step": 9235 }, { "epoch": 1.25, "learning_rate": 7.516887327749256e-07, "loss": 2.0383, "step": 9240 }, { "epoch": 1.25, "learning_rate": 7.510132396649553e-07, "loss": 2.1005, "step": 9245 }, { "epoch": 1.25, "learning_rate": 7.50337746554985e-07, "loss": 1.9186, "step": 9250 }, { "epoch": 1.25, "learning_rate": 7.496622534450149e-07, "loss": 1.9245, "step": 9255 }, { "epoch": 1.25, "learning_rate": 7.489867603350446e-07, "loss": 1.9879, "step": 9260 }, { "epoch": 1.25, "learning_rate": 7.483112672250743e-07, "loss": 1.9247, "step": 9265 }, { "epoch": 1.25, "learning_rate": 7.476357741151041e-07, "loss": 1.9621, "step": 9270 }, { "epoch": 1.25, "learning_rate": 7.469602810051338e-07, "loss": 1.8959, "step": 9275 }, { "epoch": 1.25, "learning_rate": 7.462847878951635e-07, "loss": 2.0015, "step": 9280 }, { "epoch": 1.25, "learning_rate": 7.456092947851932e-07, "loss": 2.0016, "step": 9285 }, { "epoch": 1.26, "learning_rate": 7.449338016752229e-07, "loss": 2.0414, "step": 9290 }, { "epoch": 1.26, "learning_rate": 7.442583085652526e-07, "loss": 1.8762, "step": 9295 }, { "epoch": 1.26, "learning_rate": 7.435828154552823e-07, "loss": 1.9855, "step": 9300 }, { "epoch": 1.26, "learning_rate": 7.42907322345312e-07, "loss": 1.8853, "step": 9305 }, { "epoch": 1.26, "learning_rate": 7.422318292353418e-07, "loss": 1.9256, "step": 9310 }, { "epoch": 1.26, "learning_rate": 7.415563361253715e-07, "loss": 1.9988, "step": 9315 }, { "epoch": 1.26, "learning_rate": 7.408808430154012e-07, "loss": 1.9554, "step": 9320 }, { "epoch": 1.26, "learning_rate": 7.402053499054309e-07, "loss": 1.9522, "step": 9325 }, { "epoch": 1.26, "learning_rate": 7.395298567954607e-07, "loss": 1.9611, "step": 9330 }, { "epoch": 1.26, "learning_rate": 7.388543636854904e-07, "loss": 1.9107, "step": 9335 }, { "epoch": 1.26, "learning_rate": 7.381788705755201e-07, "loss": 1.9087, "step": 9340 }, { "epoch": 1.26, "learning_rate": 7.375033774655498e-07, "loss": 1.8861, "step": 9345 }, { "epoch": 1.26, "learning_rate": 7.368278843555795e-07, "loss": 1.9844, "step": 9350 }, { "epoch": 1.26, "learning_rate": 7.361523912456092e-07, "loss": 1.9853, "step": 9355 }, { "epoch": 1.26, "learning_rate": 7.354768981356389e-07, "loss": 1.9048, "step": 9360 }, { "epoch": 1.27, "learning_rate": 7.348014050256686e-07, "loss": 1.9164, "step": 9365 }, { "epoch": 1.27, "learning_rate": 7.341259119156984e-07, "loss": 1.9002, "step": 9370 }, { "epoch": 1.27, "learning_rate": 7.334504188057281e-07, "loss": 1.9329, "step": 9375 }, { "epoch": 1.27, "learning_rate": 7.327749256957578e-07, "loss": 1.8667, "step": 9380 }, { "epoch": 1.27, "learning_rate": 7.320994325857876e-07, "loss": 1.9071, "step": 9385 }, { "epoch": 1.27, "learning_rate": 7.314239394758173e-07, "loss": 1.9801, "step": 9390 }, { "epoch": 1.27, "learning_rate": 7.30748446365847e-07, "loss": 1.8903, "step": 9395 }, { "epoch": 1.27, "learning_rate": 7.300729532558768e-07, "loss": 2.0193, "step": 9400 }, { "epoch": 1.27, "learning_rate": 7.293974601459066e-07, "loss": 1.9583, "step": 9405 }, { "epoch": 1.27, "learning_rate": 7.287219670359363e-07, "loss": 1.9964, "step": 9410 }, { "epoch": 1.27, "learning_rate": 7.28046473925966e-07, "loss": 1.9577, "step": 9415 }, { "epoch": 1.27, "learning_rate": 7.273709808159956e-07, "loss": 1.9443, "step": 9420 }, { "epoch": 1.27, "learning_rate": 7.266954877060254e-07, "loss": 2.057, "step": 9425 }, { "epoch": 1.27, "learning_rate": 7.260199945960551e-07, "loss": 2.0193, "step": 9430 }, { "epoch": 1.27, "learning_rate": 7.253445014860848e-07, "loss": 2.0085, "step": 9435 }, { "epoch": 1.28, "learning_rate": 7.246690083761145e-07, "loss": 1.9692, "step": 9440 }, { "epoch": 1.28, "learning_rate": 7.239935152661443e-07, "loss": 1.9368, "step": 9445 }, { "epoch": 1.28, "learning_rate": 7.23318022156174e-07, "loss": 1.9555, "step": 9450 }, { "epoch": 1.28, "learning_rate": 7.226425290462037e-07, "loss": 1.9812, "step": 9455 }, { "epoch": 1.28, "learning_rate": 7.219670359362334e-07, "loss": 1.9442, "step": 9460 }, { "epoch": 1.28, "learning_rate": 7.212915428262632e-07, "loss": 1.9647, "step": 9465 }, { "epoch": 1.28, "learning_rate": 7.206160497162929e-07, "loss": 2.0252, "step": 9470 }, { "epoch": 1.28, "learning_rate": 7.199405566063226e-07, "loss": 1.9675, "step": 9475 }, { "epoch": 1.28, "learning_rate": 7.192650634963522e-07, "loss": 1.9984, "step": 9480 }, { "epoch": 1.28, "learning_rate": 7.18589570386382e-07, "loss": 2.0282, "step": 9485 }, { "epoch": 1.28, "learning_rate": 7.179140772764117e-07, "loss": 1.9355, "step": 9490 }, { "epoch": 1.28, "learning_rate": 7.172385841664414e-07, "loss": 2.0768, "step": 9495 }, { "epoch": 1.28, "learning_rate": 7.165630910564712e-07, "loss": 2.0453, "step": 9500 }, { "epoch": 1.28, "learning_rate": 7.158875979465009e-07, "loss": 1.9795, "step": 9505 }, { "epoch": 1.28, "learning_rate": 7.152121048365306e-07, "loss": 1.9689, "step": 9510 }, { "epoch": 1.29, "learning_rate": 7.145366117265603e-07, "loss": 2.0186, "step": 9515 }, { "epoch": 1.29, "learning_rate": 7.138611186165901e-07, "loss": 2.0045, "step": 9520 }, { "epoch": 1.29, "learning_rate": 7.131856255066198e-07, "loss": 2.0813, "step": 9525 }, { "epoch": 1.29, "learning_rate": 7.125101323966495e-07, "loss": 1.9417, "step": 9530 }, { "epoch": 1.29, "learning_rate": 7.118346392866792e-07, "loss": 1.8859, "step": 9535 }, { "epoch": 1.29, "learning_rate": 7.11159146176709e-07, "loss": 1.9949, "step": 9540 }, { "epoch": 1.29, "learning_rate": 7.104836530667388e-07, "loss": 2.0046, "step": 9545 }, { "epoch": 1.29, "learning_rate": 7.098081599567684e-07, "loss": 1.9243, "step": 9550 }, { "epoch": 1.29, "learning_rate": 7.091326668467981e-07, "loss": 1.9855, "step": 9555 }, { "epoch": 1.29, "learning_rate": 7.084571737368279e-07, "loss": 1.9326, "step": 9560 }, { "epoch": 1.29, "learning_rate": 7.077816806268576e-07, "loss": 2.0766, "step": 9565 }, { "epoch": 1.29, "learning_rate": 7.071061875168873e-07, "loss": 2.016, "step": 9570 }, { "epoch": 1.29, "learning_rate": 7.06430694406917e-07, "loss": 2.012, "step": 9575 }, { "epoch": 1.29, "learning_rate": 7.057552012969468e-07, "loss": 1.9261, "step": 9580 }, { "epoch": 1.29, "learning_rate": 7.050797081869765e-07, "loss": 2.047, "step": 9585 }, { "epoch": 1.3, "learning_rate": 7.044042150770062e-07, "loss": 1.9532, "step": 9590 }, { "epoch": 1.3, "learning_rate": 7.03728721967036e-07, "loss": 2.0149, "step": 9595 }, { "epoch": 1.3, "learning_rate": 7.030532288570657e-07, "loss": 1.9688, "step": 9600 }, { "epoch": 1.3, "eval_loss": 1.963936448097229, "eval_runtime": 165.4614, "eval_samples_per_second": 3.602, "eval_steps_per_second": 0.453, "step": 9600 }, { "epoch": 1.3, "learning_rate": 7.023777357470954e-07, "loss": 1.9251, "step": 9605 }, { "epoch": 1.3, "learning_rate": 7.01702242637125e-07, "loss": 2.0004, "step": 9610 }, { "epoch": 1.3, "learning_rate": 7.010267495271548e-07, "loss": 1.903, "step": 9615 }, { "epoch": 1.3, "learning_rate": 7.003512564171845e-07, "loss": 2.0285, "step": 9620 }, { "epoch": 1.3, "learning_rate": 6.996757633072142e-07, "loss": 1.9815, "step": 9625 }, { "epoch": 1.3, "learning_rate": 6.990002701972439e-07, "loss": 1.8968, "step": 9630 }, { "epoch": 1.3, "learning_rate": 6.983247770872737e-07, "loss": 1.9809, "step": 9635 }, { "epoch": 1.3, "learning_rate": 6.976492839773034e-07, "loss": 1.9713, "step": 9640 }, { "epoch": 1.3, "learning_rate": 6.969737908673331e-07, "loss": 1.9472, "step": 9645 }, { "epoch": 1.3, "learning_rate": 6.962982977573628e-07, "loss": 1.9328, "step": 9650 }, { "epoch": 1.3, "learning_rate": 6.956228046473926e-07, "loss": 1.9631, "step": 9655 }, { "epoch": 1.31, "learning_rate": 6.949473115374223e-07, "loss": 2.0051, "step": 9660 }, { "epoch": 1.31, "learning_rate": 6.94271818427452e-07, "loss": 1.9866, "step": 9665 }, { "epoch": 1.31, "learning_rate": 6.935963253174817e-07, "loss": 1.9494, "step": 9670 }, { "epoch": 1.31, "learning_rate": 6.929208322075114e-07, "loss": 1.9731, "step": 9675 }, { "epoch": 1.31, "learning_rate": 6.922453390975411e-07, "loss": 1.9718, "step": 9680 }, { "epoch": 1.31, "learning_rate": 6.915698459875708e-07, "loss": 1.9197, "step": 9685 }, { "epoch": 1.31, "learning_rate": 6.908943528776006e-07, "loss": 2.0505, "step": 9690 }, { "epoch": 1.31, "learning_rate": 6.902188597676304e-07, "loss": 2.0586, "step": 9695 }, { "epoch": 1.31, "learning_rate": 6.895433666576601e-07, "loss": 1.9232, "step": 9700 }, { "epoch": 1.31, "learning_rate": 6.888678735476898e-07, "loss": 1.9559, "step": 9705 }, { "epoch": 1.31, "learning_rate": 6.881923804377195e-07, "loss": 2.0318, "step": 9710 }, { "epoch": 1.31, "learning_rate": 6.875168873277493e-07, "loss": 1.8752, "step": 9715 }, { "epoch": 1.31, "learning_rate": 6.86841394217779e-07, "loss": 1.9969, "step": 9720 }, { "epoch": 1.31, "learning_rate": 6.861659011078087e-07, "loss": 1.9531, "step": 9725 }, { "epoch": 1.31, "learning_rate": 6.854904079978385e-07, "loss": 2.0319, "step": 9730 }, { "epoch": 1.32, "learning_rate": 6.848149148878682e-07, "loss": 1.8927, "step": 9735 }, { "epoch": 1.32, "learning_rate": 6.841394217778978e-07, "loss": 1.8673, "step": 9740 }, { "epoch": 1.32, "learning_rate": 6.834639286679275e-07, "loss": 2.0187, "step": 9745 }, { "epoch": 1.32, "learning_rate": 6.827884355579573e-07, "loss": 2.0092, "step": 9750 }, { "epoch": 1.32, "learning_rate": 6.82112942447987e-07, "loss": 2.0315, "step": 9755 }, { "epoch": 1.32, "learning_rate": 6.814374493380167e-07, "loss": 1.9539, "step": 9760 }, { "epoch": 1.32, "learning_rate": 6.807619562280464e-07, "loss": 1.8357, "step": 9765 }, { "epoch": 1.32, "learning_rate": 6.800864631180762e-07, "loss": 2.0681, "step": 9770 }, { "epoch": 1.32, "learning_rate": 6.794109700081059e-07, "loss": 1.9327, "step": 9775 }, { "epoch": 1.32, "learning_rate": 6.787354768981356e-07, "loss": 1.916, "step": 9780 }, { "epoch": 1.32, "learning_rate": 6.780599837881653e-07, "loss": 1.9925, "step": 9785 }, { "epoch": 1.32, "learning_rate": 6.773844906781951e-07, "loss": 1.9407, "step": 9790 }, { "epoch": 1.32, "learning_rate": 6.767089975682248e-07, "loss": 1.9569, "step": 9795 }, { "epoch": 1.32, "learning_rate": 6.760335044582545e-07, "loss": 1.9141, "step": 9800 }, { "epoch": 1.32, "learning_rate": 6.753580113482841e-07, "loss": 1.9845, "step": 9805 }, { "epoch": 1.33, "learning_rate": 6.746825182383139e-07, "loss": 2.0271, "step": 9810 }, { "epoch": 1.33, "learning_rate": 6.740070251283436e-07, "loss": 1.9355, "step": 9815 }, { "epoch": 1.33, "learning_rate": 6.733315320183733e-07, "loss": 1.9558, "step": 9820 }, { "epoch": 1.33, "learning_rate": 6.72656038908403e-07, "loss": 1.9754, "step": 9825 }, { "epoch": 1.33, "learning_rate": 6.719805457984328e-07, "loss": 2.0408, "step": 9830 }, { "epoch": 1.33, "learning_rate": 6.713050526884625e-07, "loss": 1.9422, "step": 9835 }, { "epoch": 1.33, "learning_rate": 6.706295595784923e-07, "loss": 2.0819, "step": 9840 }, { "epoch": 1.33, "learning_rate": 6.699540664685221e-07, "loss": 1.9963, "step": 9845 }, { "epoch": 1.33, "learning_rate": 6.692785733585518e-07, "loss": 1.9835, "step": 9850 }, { "epoch": 1.33, "learning_rate": 6.686030802485815e-07, "loss": 1.9566, "step": 9855 }, { "epoch": 1.33, "learning_rate": 6.679275871386112e-07, "loss": 2.0353, "step": 9860 }, { "epoch": 1.33, "learning_rate": 6.67252094028641e-07, "loss": 2.0683, "step": 9865 }, { "epoch": 1.33, "learning_rate": 6.665766009186706e-07, "loss": 1.9563, "step": 9870 }, { "epoch": 1.33, "learning_rate": 6.659011078087003e-07, "loss": 1.8663, "step": 9875 }, { "epoch": 1.33, "learning_rate": 6.6522561469873e-07, "loss": 1.9794, "step": 9880 }, { "epoch": 1.34, "learning_rate": 6.645501215887598e-07, "loss": 1.9824, "step": 9885 }, { "epoch": 1.34, "learning_rate": 6.638746284787895e-07, "loss": 1.9124, "step": 9890 }, { "epoch": 1.34, "learning_rate": 6.631991353688192e-07, "loss": 1.9687, "step": 9895 }, { "epoch": 1.34, "learning_rate": 6.625236422588489e-07, "loss": 2.0313, "step": 9900 }, { "epoch": 1.34, "learning_rate": 6.618481491488787e-07, "loss": 1.943, "step": 9905 }, { "epoch": 1.34, "learning_rate": 6.611726560389084e-07, "loss": 1.9226, "step": 9910 }, { "epoch": 1.34, "learning_rate": 6.604971629289381e-07, "loss": 2.0105, "step": 9915 }, { "epoch": 1.34, "learning_rate": 6.598216698189678e-07, "loss": 2.0327, "step": 9920 }, { "epoch": 1.34, "learning_rate": 6.591461767089976e-07, "loss": 1.9641, "step": 9925 }, { "epoch": 1.34, "learning_rate": 6.584706835990273e-07, "loss": 1.9631, "step": 9930 }, { "epoch": 1.34, "learning_rate": 6.577951904890569e-07, "loss": 1.9812, "step": 9935 }, { "epoch": 1.34, "learning_rate": 6.571196973790866e-07, "loss": 2.0004, "step": 9940 }, { "epoch": 1.34, "learning_rate": 6.564442042691164e-07, "loss": 1.8798, "step": 9945 }, { "epoch": 1.34, "learning_rate": 6.557687111591461e-07, "loss": 2.0338, "step": 9950 }, { "epoch": 1.34, "learning_rate": 6.550932180491758e-07, "loss": 1.9378, "step": 9955 }, { "epoch": 1.35, "learning_rate": 6.544177249392055e-07, "loss": 1.8363, "step": 9960 }, { "epoch": 1.35, "learning_rate": 6.537422318292353e-07, "loss": 1.9079, "step": 9965 }, { "epoch": 1.35, "learning_rate": 6.53066738719265e-07, "loss": 1.8645, "step": 9970 }, { "epoch": 1.35, "learning_rate": 6.523912456092947e-07, "loss": 1.9543, "step": 9975 }, { "epoch": 1.35, "learning_rate": 6.517157524993245e-07, "loss": 1.942, "step": 9980 }, { "epoch": 1.35, "learning_rate": 6.510402593893543e-07, "loss": 1.9462, "step": 9985 }, { "epoch": 1.35, "learning_rate": 6.50364766279384e-07, "loss": 2.1435, "step": 9990 }, { "epoch": 1.35, "learning_rate": 6.496892731694137e-07, "loss": 2.0368, "step": 9995 }, { "epoch": 1.35, "learning_rate": 6.490137800594434e-07, "loss": 1.9291, "step": 10000 }, { "epoch": 1.35, "eval_loss": 1.961105465888977, "eval_runtime": 165.405, "eval_samples_per_second": 3.603, "eval_steps_per_second": 0.453, "step": 10000 }, { "epoch": 1.35, "learning_rate": 6.483382869494731e-07, "loss": 1.9377, "step": 10005 }, { "epoch": 1.35, "learning_rate": 6.476627938395028e-07, "loss": 1.9865, "step": 10010 }, { "epoch": 1.35, "learning_rate": 6.469873007295325e-07, "loss": 1.9385, "step": 10015 }, { "epoch": 1.35, "learning_rate": 6.463118076195623e-07, "loss": 1.9832, "step": 10020 }, { "epoch": 1.35, "learning_rate": 6.45636314509592e-07, "loss": 2.0501, "step": 10025 }, { "epoch": 1.35, "learning_rate": 6.449608213996217e-07, "loss": 1.931, "step": 10030 }, { "epoch": 1.36, "learning_rate": 6.442853282896514e-07, "loss": 1.9716, "step": 10035 }, { "epoch": 1.36, "learning_rate": 6.436098351796812e-07, "loss": 2.1156, "step": 10040 }, { "epoch": 1.36, "learning_rate": 6.429343420697109e-07, "loss": 1.9412, "step": 10045 }, { "epoch": 1.36, "learning_rate": 6.422588489597406e-07, "loss": 1.9237, "step": 10050 }, { "epoch": 1.36, "learning_rate": 6.415833558497703e-07, "loss": 1.9539, "step": 10055 }, { "epoch": 1.36, "learning_rate": 6.409078627398001e-07, "loss": 1.9547, "step": 10060 }, { "epoch": 1.36, "learning_rate": 6.402323696298297e-07, "loss": 1.9443, "step": 10065 }, { "epoch": 1.36, "learning_rate": 6.395568765198594e-07, "loss": 1.8834, "step": 10070 }, { "epoch": 1.36, "learning_rate": 6.388813834098891e-07, "loss": 1.996, "step": 10075 }, { "epoch": 1.36, "learning_rate": 6.382058902999189e-07, "loss": 1.9791, "step": 10080 }, { "epoch": 1.36, "learning_rate": 6.375303971899486e-07, "loss": 2.0237, "step": 10085 }, { "epoch": 1.36, "learning_rate": 6.368549040799783e-07, "loss": 1.955, "step": 10090 }, { "epoch": 1.36, "learning_rate": 6.36179410970008e-07, "loss": 2.0109, "step": 10095 }, { "epoch": 1.36, "learning_rate": 6.355039178600378e-07, "loss": 1.9346, "step": 10100 }, { "epoch": 1.37, "learning_rate": 6.348284247500675e-07, "loss": 2.0298, "step": 10105 }, { "epoch": 1.37, "learning_rate": 6.341529316400972e-07, "loss": 2.0031, "step": 10110 }, { "epoch": 1.37, "learning_rate": 6.33477438530127e-07, "loss": 1.887, "step": 10115 }, { "epoch": 1.37, "learning_rate": 6.328019454201567e-07, "loss": 1.955, "step": 10120 }, { "epoch": 1.37, "learning_rate": 6.321264523101863e-07, "loss": 1.9275, "step": 10125 }, { "epoch": 1.37, "learning_rate": 6.314509592002161e-07, "loss": 1.8876, "step": 10130 }, { "epoch": 1.37, "learning_rate": 6.307754660902459e-07, "loss": 1.9722, "step": 10135 }, { "epoch": 1.37, "learning_rate": 6.300999729802756e-07, "loss": 1.9802, "step": 10140 }, { "epoch": 1.37, "learning_rate": 6.294244798703053e-07, "loss": 2.0026, "step": 10145 }, { "epoch": 1.37, "learning_rate": 6.28748986760335e-07, "loss": 2.0283, "step": 10150 }, { "epoch": 1.37, "learning_rate": 6.280734936503648e-07, "loss": 1.9919, "step": 10155 }, { "epoch": 1.37, "learning_rate": 6.273980005403945e-07, "loss": 1.915, "step": 10160 }, { "epoch": 1.37, "learning_rate": 6.267225074304242e-07, "loss": 1.9717, "step": 10165 }, { "epoch": 1.37, "learning_rate": 6.260470143204539e-07, "loss": 1.9684, "step": 10170 }, { "epoch": 1.37, "learning_rate": 6.253715212104837e-07, "loss": 2.023, "step": 10175 }, { "epoch": 1.38, "learning_rate": 6.246960281005134e-07, "loss": 1.8704, "step": 10180 }, { "epoch": 1.38, "learning_rate": 6.240205349905431e-07, "loss": 1.9476, "step": 10185 }, { "epoch": 1.38, "learning_rate": 6.233450418805728e-07, "loss": 1.975, "step": 10190 }, { "epoch": 1.38, "learning_rate": 6.226695487706025e-07, "loss": 1.8549, "step": 10195 }, { "epoch": 1.38, "learning_rate": 6.219940556606322e-07, "loss": 2.0041, "step": 10200 }, { "epoch": 1.38, "learning_rate": 6.213185625506619e-07, "loss": 1.9219, "step": 10205 }, { "epoch": 1.38, "learning_rate": 6.206430694406916e-07, "loss": 2.0199, "step": 10210 }, { "epoch": 1.38, "learning_rate": 6.199675763307214e-07, "loss": 2.0977, "step": 10215 }, { "epoch": 1.38, "learning_rate": 6.192920832207511e-07, "loss": 1.9502, "step": 10220 }, { "epoch": 1.38, "learning_rate": 6.186165901107808e-07, "loss": 2.0295, "step": 10225 }, { "epoch": 1.38, "learning_rate": 6.179410970008106e-07, "loss": 1.8866, "step": 10230 }, { "epoch": 1.38, "learning_rate": 6.172656038908403e-07, "loss": 1.9987, "step": 10235 }, { "epoch": 1.38, "learning_rate": 6.1659011078087e-07, "loss": 2.0693, "step": 10240 }, { "epoch": 1.38, "learning_rate": 6.159146176708997e-07, "loss": 1.9641, "step": 10245 }, { "epoch": 1.38, "learning_rate": 6.152391245609295e-07, "loss": 1.8745, "step": 10250 }, { "epoch": 1.39, "learning_rate": 6.145636314509591e-07, "loss": 1.9384, "step": 10255 }, { "epoch": 1.39, "learning_rate": 6.138881383409888e-07, "loss": 1.9278, "step": 10260 }, { "epoch": 1.39, "learning_rate": 6.132126452310185e-07, "loss": 2.0378, "step": 10265 }, { "epoch": 1.39, "learning_rate": 6.125371521210483e-07, "loss": 2.0384, "step": 10270 }, { "epoch": 1.39, "learning_rate": 6.118616590110781e-07, "loss": 1.9244, "step": 10275 }, { "epoch": 1.39, "learning_rate": 6.111861659011078e-07, "loss": 1.9505, "step": 10280 }, { "epoch": 1.39, "learning_rate": 6.105106727911375e-07, "loss": 1.9794, "step": 10285 }, { "epoch": 1.39, "learning_rate": 6.098351796811673e-07, "loss": 1.9922, "step": 10290 }, { "epoch": 1.39, "learning_rate": 6.09159686571197e-07, "loss": 1.9708, "step": 10295 }, { "epoch": 1.39, "learning_rate": 6.084841934612267e-07, "loss": 2.0161, "step": 10300 }, { "epoch": 1.39, "learning_rate": 6.078087003512564e-07, "loss": 1.9192, "step": 10305 }, { "epoch": 1.39, "learning_rate": 6.071332072412862e-07, "loss": 1.91, "step": 10310 }, { "epoch": 1.39, "learning_rate": 6.064577141313159e-07, "loss": 2.0866, "step": 10315 }, { "epoch": 1.39, "learning_rate": 6.057822210213456e-07, "loss": 2.0044, "step": 10320 }, { "epoch": 1.39, "learning_rate": 6.051067279113752e-07, "loss": 2.1199, "step": 10325 }, { "epoch": 1.4, "learning_rate": 6.04431234801405e-07, "loss": 1.996, "step": 10330 }, { "epoch": 1.4, "learning_rate": 6.037557416914347e-07, "loss": 1.9303, "step": 10335 }, { "epoch": 1.4, "learning_rate": 6.030802485814644e-07, "loss": 1.9297, "step": 10340 }, { "epoch": 1.4, "learning_rate": 6.024047554714942e-07, "loss": 1.9069, "step": 10345 }, { "epoch": 1.4, "learning_rate": 6.017292623615239e-07, "loss": 2.0062, "step": 10350 }, { "epoch": 1.4, "learning_rate": 6.010537692515536e-07, "loss": 2.0051, "step": 10355 }, { "epoch": 1.4, "learning_rate": 6.003782761415833e-07, "loss": 1.9996, "step": 10360 }, { "epoch": 1.4, "learning_rate": 5.997027830316131e-07, "loss": 1.9947, "step": 10365 }, { "epoch": 1.4, "learning_rate": 5.990272899216428e-07, "loss": 1.984, "step": 10370 }, { "epoch": 1.4, "learning_rate": 5.983517968116725e-07, "loss": 1.9509, "step": 10375 }, { "epoch": 1.4, "learning_rate": 5.976763037017022e-07, "loss": 1.9455, "step": 10380 }, { "epoch": 1.4, "learning_rate": 5.970008105917319e-07, "loss": 1.9383, "step": 10385 }, { "epoch": 1.4, "learning_rate": 5.963253174817616e-07, "loss": 1.8293, "step": 10390 }, { "epoch": 1.4, "learning_rate": 5.956498243717913e-07, "loss": 1.9228, "step": 10395 }, { "epoch": 1.4, "learning_rate": 5.94974331261821e-07, "loss": 2.0348, "step": 10400 }, { "epoch": 1.4, "eval_loss": 1.9586541652679443, "eval_runtime": 165.5067, "eval_samples_per_second": 3.601, "eval_steps_per_second": 0.453, "step": 10400 }, { "epoch": 1.41, "learning_rate": 5.942988381518508e-07, "loss": 1.9632, "step": 10405 }, { "epoch": 1.41, "learning_rate": 5.936233450418805e-07, "loss": 1.8818, "step": 10410 }, { "epoch": 1.41, "learning_rate": 5.929478519319102e-07, "loss": 1.9375, "step": 10415 }, { "epoch": 1.41, "learning_rate": 5.9227235882194e-07, "loss": 1.9466, "step": 10420 }, { "epoch": 1.41, "learning_rate": 5.915968657119698e-07, "loss": 2.0166, "step": 10425 }, { "epoch": 1.41, "learning_rate": 5.909213726019995e-07, "loss": 1.9846, "step": 10430 }, { "epoch": 1.41, "learning_rate": 5.902458794920292e-07, "loss": 1.9175, "step": 10435 }, { "epoch": 1.41, "learning_rate": 5.89570386382059e-07, "loss": 1.8928, "step": 10440 }, { "epoch": 1.41, "learning_rate": 5.888948932720887e-07, "loss": 1.9582, "step": 10445 }, { "epoch": 1.41, "learning_rate": 5.882194001621184e-07, "loss": 1.9586, "step": 10450 }, { "epoch": 1.41, "learning_rate": 5.87543907052148e-07, "loss": 1.9719, "step": 10455 }, { "epoch": 1.41, "learning_rate": 5.868684139421778e-07, "loss": 1.9996, "step": 10460 }, { "epoch": 1.41, "learning_rate": 5.861929208322075e-07, "loss": 1.935, "step": 10465 }, { "epoch": 1.41, "learning_rate": 5.855174277222372e-07, "loss": 1.9258, "step": 10470 }, { "epoch": 1.42, "learning_rate": 5.848419346122669e-07, "loss": 2.0089, "step": 10475 }, { "epoch": 1.42, "learning_rate": 5.841664415022967e-07, "loss": 1.9161, "step": 10480 }, { "epoch": 1.42, "learning_rate": 5.834909483923264e-07, "loss": 1.9469, "step": 10485 }, { "epoch": 1.42, "learning_rate": 5.828154552823561e-07, "loss": 1.9688, "step": 10490 }, { "epoch": 1.42, "learning_rate": 5.821399621723858e-07, "loss": 1.9424, "step": 10495 }, { "epoch": 1.42, "learning_rate": 5.814644690624156e-07, "loss": 1.9893, "step": 10500 }, { "epoch": 1.42, "learning_rate": 5.807889759524453e-07, "loss": 1.9072, "step": 10505 }, { "epoch": 1.42, "learning_rate": 5.80113482842475e-07, "loss": 2.0403, "step": 10510 }, { "epoch": 1.42, "learning_rate": 5.794379897325046e-07, "loss": 1.9779, "step": 10515 }, { "epoch": 1.42, "learning_rate": 5.787624966225344e-07, "loss": 1.9289, "step": 10520 }, { "epoch": 1.42, "learning_rate": 5.780870035125641e-07, "loss": 2.047, "step": 10525 }, { "epoch": 1.42, "learning_rate": 5.774115104025938e-07, "loss": 2.0729, "step": 10530 }, { "epoch": 1.42, "learning_rate": 5.767360172926235e-07, "loss": 1.9825, "step": 10535 }, { "epoch": 1.42, "learning_rate": 5.760605241826533e-07, "loss": 1.9138, "step": 10540 }, { "epoch": 1.42, "learning_rate": 5.75385031072683e-07, "loss": 1.9638, "step": 10545 }, { "epoch": 1.43, "learning_rate": 5.747095379627127e-07, "loss": 2.0517, "step": 10550 }, { "epoch": 1.43, "learning_rate": 5.740340448527424e-07, "loss": 2.0771, "step": 10555 }, { "epoch": 1.43, "learning_rate": 5.733585517427722e-07, "loss": 1.9698, "step": 10560 }, { "epoch": 1.43, "learning_rate": 5.72683058632802e-07, "loss": 1.9299, "step": 10565 }, { "epoch": 1.43, "learning_rate": 5.720075655228317e-07, "loss": 2.0264, "step": 10570 }, { "epoch": 1.43, "learning_rate": 5.713320724128615e-07, "loss": 2.0212, "step": 10575 }, { "epoch": 1.43, "learning_rate": 5.706565793028912e-07, "loss": 1.9875, "step": 10580 }, { "epoch": 1.43, "learning_rate": 5.699810861929208e-07, "loss": 1.9172, "step": 10585 }, { "epoch": 1.43, "learning_rate": 5.693055930829505e-07, "loss": 2.0435, "step": 10590 }, { "epoch": 1.43, "learning_rate": 5.686300999729803e-07, "loss": 2.0022, "step": 10595 }, { "epoch": 1.43, "learning_rate": 5.6795460686301e-07, "loss": 2.0239, "step": 10600 }, { "epoch": 1.43, "learning_rate": 5.672791137530397e-07, "loss": 1.9575, "step": 10605 }, { "epoch": 1.43, "learning_rate": 5.666036206430694e-07, "loss": 2.0316, "step": 10610 }, { "epoch": 1.43, "learning_rate": 5.659281275330992e-07, "loss": 1.9751, "step": 10615 }, { "epoch": 1.43, "learning_rate": 5.652526344231289e-07, "loss": 1.9895, "step": 10620 }, { "epoch": 1.44, "learning_rate": 5.645771413131586e-07, "loss": 1.9672, "step": 10625 }, { "epoch": 1.44, "learning_rate": 5.639016482031883e-07, "loss": 1.9807, "step": 10630 }, { "epoch": 1.44, "learning_rate": 5.632261550932181e-07, "loss": 1.9333, "step": 10635 }, { "epoch": 1.44, "learning_rate": 5.625506619832478e-07, "loss": 1.835, "step": 10640 }, { "epoch": 1.44, "learning_rate": 5.618751688732774e-07, "loss": 1.9753, "step": 10645 }, { "epoch": 1.44, "learning_rate": 5.611996757633071e-07, "loss": 1.9376, "step": 10650 }, { "epoch": 1.44, "learning_rate": 5.605241826533369e-07, "loss": 1.9732, "step": 10655 }, { "epoch": 1.44, "learning_rate": 5.598486895433666e-07, "loss": 2.0749, "step": 10660 }, { "epoch": 1.44, "learning_rate": 5.591731964333963e-07, "loss": 1.934, "step": 10665 }, { "epoch": 1.44, "learning_rate": 5.58497703323426e-07, "loss": 1.9208, "step": 10670 }, { "epoch": 1.44, "learning_rate": 5.578222102134558e-07, "loss": 1.9562, "step": 10675 }, { "epoch": 1.44, "learning_rate": 5.571467171034855e-07, "loss": 2.082, "step": 10680 }, { "epoch": 1.44, "learning_rate": 5.564712239935152e-07, "loss": 2.0432, "step": 10685 }, { "epoch": 1.44, "learning_rate": 5.557957308835449e-07, "loss": 1.9236, "step": 10690 }, { "epoch": 1.44, "learning_rate": 5.551202377735747e-07, "loss": 2.0632, "step": 10695 }, { "epoch": 1.45, "learning_rate": 5.544447446636044e-07, "loss": 1.9083, "step": 10700 }, { "epoch": 1.45, "learning_rate": 5.537692515536341e-07, "loss": 2.0383, "step": 10705 }, { "epoch": 1.45, "learning_rate": 5.530937584436639e-07, "loss": 1.9063, "step": 10710 }, { "epoch": 1.45, "learning_rate": 5.524182653336936e-07, "loss": 2.0845, "step": 10715 }, { "epoch": 1.45, "learning_rate": 5.517427722237233e-07, "loss": 1.9743, "step": 10720 }, { "epoch": 1.45, "learning_rate": 5.51067279113753e-07, "loss": 2.0553, "step": 10725 }, { "epoch": 1.45, "learning_rate": 5.503917860037828e-07, "loss": 2.0907, "step": 10730 }, { "epoch": 1.45, "learning_rate": 5.497162928938125e-07, "loss": 1.9165, "step": 10735 }, { "epoch": 1.45, "learning_rate": 5.490407997838422e-07, "loss": 1.9457, "step": 10740 }, { "epoch": 1.45, "learning_rate": 5.483653066738719e-07, "loss": 1.9602, "step": 10745 }, { "epoch": 1.45, "learning_rate": 5.476898135639017e-07, "loss": 1.9842, "step": 10750 }, { "epoch": 1.45, "learning_rate": 5.470143204539314e-07, "loss": 1.9074, "step": 10755 }, { "epoch": 1.45, "learning_rate": 5.463388273439611e-07, "loss": 1.9034, "step": 10760 }, { "epoch": 1.45, "learning_rate": 5.456633342339908e-07, "loss": 1.966, "step": 10765 }, { "epoch": 1.45, "learning_rate": 5.449878411240206e-07, "loss": 2.0049, "step": 10770 }, { "epoch": 1.46, "learning_rate": 5.443123480140502e-07, "loss": 1.9493, "step": 10775 }, { "epoch": 1.46, "learning_rate": 5.436368549040799e-07, "loss": 1.9264, "step": 10780 }, { "epoch": 1.46, "learning_rate": 5.429613617941096e-07, "loss": 1.9516, "step": 10785 }, { "epoch": 1.46, "learning_rate": 5.422858686841394e-07, "loss": 2.0036, "step": 10790 }, { "epoch": 1.46, "learning_rate": 5.416103755741691e-07, "loss": 1.9817, "step": 10795 }, { "epoch": 1.46, "learning_rate": 5.409348824641988e-07, "loss": 1.9724, "step": 10800 }, { "epoch": 1.46, "eval_loss": 1.9568144083023071, "eval_runtime": 165.314, "eval_samples_per_second": 3.605, "eval_steps_per_second": 0.454, "step": 10800 }, { "epoch": 1.46, "learning_rate": 5.402593893542285e-07, "loss": 2.0044, "step": 10805 }, { "epoch": 1.46, "learning_rate": 5.395838962442583e-07, "loss": 1.9504, "step": 10810 }, { "epoch": 1.46, "learning_rate": 5.38908403134288e-07, "loss": 1.9536, "step": 10815 }, { "epoch": 1.46, "learning_rate": 5.382329100243177e-07, "loss": 1.9608, "step": 10820 }, { "epoch": 1.46, "learning_rate": 5.375574169143475e-07, "loss": 2.0382, "step": 10825 }, { "epoch": 1.46, "learning_rate": 5.368819238043772e-07, "loss": 1.9799, "step": 10830 }, { "epoch": 1.46, "learning_rate": 5.362064306944069e-07, "loss": 1.9551, "step": 10835 }, { "epoch": 1.46, "learning_rate": 5.355309375844365e-07, "loss": 1.9896, "step": 10840 }, { "epoch": 1.47, "learning_rate": 5.348554444744662e-07, "loss": 2.039, "step": 10845 }, { "epoch": 1.47, "learning_rate": 5.34179951364496e-07, "loss": 1.9283, "step": 10850 }, { "epoch": 1.47, "learning_rate": 5.335044582545257e-07, "loss": 1.924, "step": 10855 }, { "epoch": 1.47, "learning_rate": 5.328289651445555e-07, "loss": 2.0038, "step": 10860 }, { "epoch": 1.47, "learning_rate": 5.321534720345853e-07, "loss": 1.9953, "step": 10865 }, { "epoch": 1.47, "learning_rate": 5.31477978924615e-07, "loss": 2.0623, "step": 10870 }, { "epoch": 1.47, "learning_rate": 5.308024858146447e-07, "loss": 2.0246, "step": 10875 }, { "epoch": 1.47, "learning_rate": 5.301269927046744e-07, "loss": 1.9648, "step": 10880 }, { "epoch": 1.47, "learning_rate": 5.294514995947042e-07, "loss": 1.9202, "step": 10885 }, { "epoch": 1.47, "learning_rate": 5.287760064847339e-07, "loss": 2.0132, "step": 10890 }, { "epoch": 1.47, "learning_rate": 5.281005133747636e-07, "loss": 2.0106, "step": 10895 }, { "epoch": 1.47, "learning_rate": 5.274250202647933e-07, "loss": 2.006, "step": 10900 }, { "epoch": 1.47, "learning_rate": 5.26749527154823e-07, "loss": 1.9925, "step": 10905 }, { "epoch": 1.47, "learning_rate": 5.260740340448527e-07, "loss": 2.018, "step": 10910 }, { "epoch": 1.47, "learning_rate": 5.253985409348824e-07, "loss": 1.9322, "step": 10915 }, { "epoch": 1.48, "learning_rate": 5.247230478249121e-07, "loss": 1.9453, "step": 10920 }, { "epoch": 1.48, "learning_rate": 5.240475547149419e-07, "loss": 1.965, "step": 10925 }, { "epoch": 1.48, "learning_rate": 5.233720616049716e-07, "loss": 1.9446, "step": 10930 }, { "epoch": 1.48, "learning_rate": 5.226965684950013e-07, "loss": 1.9942, "step": 10935 }, { "epoch": 1.48, "learning_rate": 5.22021075385031e-07, "loss": 1.9138, "step": 10940 }, { "epoch": 1.48, "learning_rate": 5.213455822750608e-07, "loss": 1.9764, "step": 10945 }, { "epoch": 1.48, "learning_rate": 5.206700891650905e-07, "loss": 1.9884, "step": 10950 }, { "epoch": 1.48, "learning_rate": 5.199945960551202e-07, "loss": 1.9135, "step": 10955 }, { "epoch": 1.48, "learning_rate": 5.1931910294515e-07, "loss": 2.0193, "step": 10960 }, { "epoch": 1.48, "learning_rate": 5.186436098351797e-07, "loss": 1.9446, "step": 10965 }, { "epoch": 1.48, "learning_rate": 5.179681167252093e-07, "loss": 2.0596, "step": 10970 }, { "epoch": 1.48, "learning_rate": 5.17292623615239e-07, "loss": 1.9912, "step": 10975 }, { "epoch": 1.48, "learning_rate": 5.166171305052688e-07, "loss": 1.9624, "step": 10980 }, { "epoch": 1.48, "learning_rate": 5.159416373952985e-07, "loss": 1.9297, "step": 10985 }, { "epoch": 1.48, "learning_rate": 5.152661442853282e-07, "loss": 1.9808, "step": 10990 }, { "epoch": 1.49, "learning_rate": 5.145906511753579e-07, "loss": 1.9933, "step": 10995 }, { "epoch": 1.49, "learning_rate": 5.139151580653877e-07, "loss": 1.9767, "step": 11000 }, { "epoch": 1.49, "learning_rate": 5.132396649554175e-07, "loss": 1.9229, "step": 11005 }, { "epoch": 1.49, "learning_rate": 5.125641718454472e-07, "loss": 1.9596, "step": 11010 }, { "epoch": 1.49, "learning_rate": 5.118886787354769e-07, "loss": 1.9631, "step": 11015 }, { "epoch": 1.49, "learning_rate": 5.112131856255067e-07, "loss": 1.9485, "step": 11020 }, { "epoch": 1.49, "learning_rate": 5.105376925155364e-07, "loss": 1.9808, "step": 11025 }, { "epoch": 1.49, "learning_rate": 5.098621994055661e-07, "loss": 1.925, "step": 11030 }, { "epoch": 1.49, "learning_rate": 5.091867062955957e-07, "loss": 1.9282, "step": 11035 }, { "epoch": 1.49, "learning_rate": 5.085112131856255e-07, "loss": 1.9395, "step": 11040 }, { "epoch": 1.49, "learning_rate": 5.078357200756552e-07, "loss": 1.9219, "step": 11045 }, { "epoch": 1.49, "learning_rate": 5.071602269656849e-07, "loss": 1.8854, "step": 11050 }, { "epoch": 1.49, "learning_rate": 5.064847338557146e-07, "loss": 1.9981, "step": 11055 }, { "epoch": 1.49, "learning_rate": 5.058092407457444e-07, "loss": 1.9279, "step": 11060 }, { "epoch": 1.49, "learning_rate": 5.051337476357741e-07, "loss": 2.0484, "step": 11065 }, { "epoch": 1.5, "learning_rate": 5.044582545258038e-07, "loss": 1.971, "step": 11070 }, { "epoch": 1.5, "learning_rate": 5.037827614158336e-07, "loss": 1.914, "step": 11075 }, { "epoch": 1.5, "learning_rate": 5.031072683058633e-07, "loss": 1.9193, "step": 11080 }, { "epoch": 1.5, "learning_rate": 5.02431775195893e-07, "loss": 1.9318, "step": 11085 }, { "epoch": 1.5, "learning_rate": 5.017562820859227e-07, "loss": 1.9414, "step": 11090 }, { "epoch": 1.5, "learning_rate": 5.010807889759525e-07, "loss": 1.9664, "step": 11095 }, { "epoch": 1.5, "learning_rate": 5.004052958659821e-07, "loss": 2.0118, "step": 11100 }, { "epoch": 1.5, "learning_rate": 4.997298027560118e-07, "loss": 1.9932, "step": 11105 }, { "epoch": 1.5, "learning_rate": 4.990543096460415e-07, "loss": 1.9015, "step": 11110 }, { "epoch": 1.5, "learning_rate": 4.983788165360714e-07, "loss": 1.9539, "step": 11115 }, { "epoch": 1.5, "learning_rate": 4.977033234261011e-07, "loss": 2.1365, "step": 11120 }, { "epoch": 1.5, "learning_rate": 4.970278303161308e-07, "loss": 1.966, "step": 11125 }, { "epoch": 1.5, "learning_rate": 4.963523372061604e-07, "loss": 2.0074, "step": 11130 }, { "epoch": 1.5, "learning_rate": 4.956768440961902e-07, "loss": 2.0762, "step": 11135 }, { "epoch": 1.5, "learning_rate": 4.950013509862199e-07, "loss": 1.8631, "step": 11140 }, { "epoch": 1.51, "learning_rate": 4.943258578762496e-07, "loss": 1.9708, "step": 11145 }, { "epoch": 1.51, "learning_rate": 4.936503647662793e-07, "loss": 1.9905, "step": 11150 }, { "epoch": 1.51, "learning_rate": 4.929748716563091e-07, "loss": 1.9543, "step": 11155 }, { "epoch": 1.51, "learning_rate": 4.922993785463388e-07, "loss": 1.9418, "step": 11160 }, { "epoch": 1.51, "learning_rate": 4.916238854363685e-07, "loss": 1.9517, "step": 11165 }, { "epoch": 1.51, "learning_rate": 4.909483923263982e-07, "loss": 1.8429, "step": 11170 }, { "epoch": 1.51, "learning_rate": 4.90272899216428e-07, "loss": 1.9754, "step": 11175 }, { "epoch": 1.51, "learning_rate": 4.895974061064577e-07, "loss": 1.8974, "step": 11180 }, { "epoch": 1.51, "learning_rate": 4.889219129964874e-07, "loss": 1.9996, "step": 11185 }, { "epoch": 1.51, "learning_rate": 4.882464198865172e-07, "loss": 2.0173, "step": 11190 }, { "epoch": 1.51, "learning_rate": 4.875709267765469e-07, "loss": 1.9683, "step": 11195 }, { "epoch": 1.51, "learning_rate": 4.868954336665766e-07, "loss": 1.9497, "step": 11200 }, { "epoch": 1.51, "eval_loss": 1.955120325088501, "eval_runtime": 165.2289, "eval_samples_per_second": 3.607, "eval_steps_per_second": 0.454, "step": 11200 }, { "epoch": 1.51, "learning_rate": 4.862199405566063e-07, "loss": 1.9811, "step": 11205 }, { "epoch": 1.51, "learning_rate": 4.855444474466361e-07, "loss": 1.9879, "step": 11210 }, { "epoch": 1.52, "learning_rate": 4.848689543366658e-07, "loss": 1.9957, "step": 11215 }, { "epoch": 1.52, "learning_rate": 4.841934612266955e-07, "loss": 1.8997, "step": 11220 }, { "epoch": 1.52, "learning_rate": 4.835179681167252e-07, "loss": 1.9366, "step": 11225 }, { "epoch": 1.52, "learning_rate": 4.828424750067549e-07, "loss": 1.9237, "step": 11230 }, { "epoch": 1.52, "learning_rate": 4.821669818967846e-07, "loss": 1.9348, "step": 11235 }, { "epoch": 1.52, "learning_rate": 4.814914887868143e-07, "loss": 1.8665, "step": 11240 }, { "epoch": 1.52, "learning_rate": 4.80815995676844e-07, "loss": 1.9615, "step": 11245 }, { "epoch": 1.52, "learning_rate": 4.801405025668738e-07, "loss": 2.0397, "step": 11250 }, { "epoch": 1.52, "learning_rate": 4.794650094569035e-07, "loss": 2.0333, "step": 11255 }, { "epoch": 1.52, "learning_rate": 4.787895163469332e-07, "loss": 1.996, "step": 11260 }, { "epoch": 1.52, "learning_rate": 4.781140232369629e-07, "loss": 1.971, "step": 11265 }, { "epoch": 1.52, "learning_rate": 4.774385301269927e-07, "loss": 2.0498, "step": 11270 }, { "epoch": 1.52, "learning_rate": 4.767630370170224e-07, "loss": 2.0136, "step": 11275 }, { "epoch": 1.52, "learning_rate": 4.760875439070521e-07, "loss": 1.9929, "step": 11280 }, { "epoch": 1.52, "learning_rate": 4.7541205079708184e-07, "loss": 1.9976, "step": 11285 }, { "epoch": 1.53, "learning_rate": 4.7473655768711157e-07, "loss": 1.8972, "step": 11290 }, { "epoch": 1.53, "learning_rate": 4.740610645771413e-07, "loss": 2.0062, "step": 11295 }, { "epoch": 1.53, "learning_rate": 4.73385571467171e-07, "loss": 2.0605, "step": 11300 }, { "epoch": 1.53, "learning_rate": 4.7271007835720075e-07, "loss": 1.9654, "step": 11305 }, { "epoch": 1.53, "learning_rate": 4.720345852472304e-07, "loss": 2.0147, "step": 11310 }, { "epoch": 1.53, "learning_rate": 4.7135909213726015e-07, "loss": 1.9944, "step": 11315 }, { "epoch": 1.53, "learning_rate": 4.706835990272899e-07, "loss": 1.9651, "step": 11320 }, { "epoch": 1.53, "learning_rate": 4.700081059173196e-07, "loss": 1.9287, "step": 11325 }, { "epoch": 1.53, "learning_rate": 4.693326128073494e-07, "loss": 2.0328, "step": 11330 }, { "epoch": 1.53, "learning_rate": 4.686571196973791e-07, "loss": 2.0085, "step": 11335 }, { "epoch": 1.53, "learning_rate": 4.679816265874088e-07, "loss": 1.969, "step": 11340 }, { "epoch": 1.53, "learning_rate": 4.673061334774385e-07, "loss": 1.8905, "step": 11345 }, { "epoch": 1.53, "learning_rate": 4.6663064036746823e-07, "loss": 1.9451, "step": 11350 }, { "epoch": 1.53, "learning_rate": 4.6595514725749796e-07, "loss": 1.9741, "step": 11355 }, { "epoch": 1.53, "learning_rate": 4.652796541475277e-07, "loss": 1.9929, "step": 11360 }, { "epoch": 1.54, "learning_rate": 4.646041610375574e-07, "loss": 1.9585, "step": 11365 }, { "epoch": 1.54, "learning_rate": 4.6392866792758714e-07, "loss": 1.9383, "step": 11370 }, { "epoch": 1.54, "learning_rate": 4.632531748176168e-07, "loss": 1.9451, "step": 11375 }, { "epoch": 1.54, "learning_rate": 4.6257768170764654e-07, "loss": 1.9353, "step": 11380 }, { "epoch": 1.54, "learning_rate": 4.6190218859767626e-07, "loss": 1.9131, "step": 11385 }, { "epoch": 1.54, "learning_rate": 4.61226695487706e-07, "loss": 2.0578, "step": 11390 }, { "epoch": 1.54, "learning_rate": 4.605512023777357e-07, "loss": 1.9363, "step": 11395 }, { "epoch": 1.54, "learning_rate": 4.5987570926776544e-07, "loss": 1.9979, "step": 11400 }, { "epoch": 1.54, "learning_rate": 4.5920021615779517e-07, "loss": 1.9738, "step": 11405 }, { "epoch": 1.54, "learning_rate": 4.585247230478249e-07, "loss": 1.9685, "step": 11410 }, { "epoch": 1.54, "learning_rate": 4.578492299378546e-07, "loss": 1.9402, "step": 11415 }, { "epoch": 1.54, "learning_rate": 4.5717373682788435e-07, "loss": 1.9906, "step": 11420 }, { "epoch": 1.54, "learning_rate": 4.564982437179141e-07, "loss": 1.8393, "step": 11425 }, { "epoch": 1.54, "learning_rate": 4.558227506079438e-07, "loss": 1.988, "step": 11430 }, { "epoch": 1.54, "learning_rate": 4.5514725749797353e-07, "loss": 1.9981, "step": 11435 }, { "epoch": 1.55, "learning_rate": 4.544717643880032e-07, "loss": 1.9823, "step": 11440 }, { "epoch": 1.55, "learning_rate": 4.537962712780329e-07, "loss": 1.9773, "step": 11445 }, { "epoch": 1.55, "learning_rate": 4.5312077816806265e-07, "loss": 1.9343, "step": 11450 }, { "epoch": 1.55, "learning_rate": 4.524452850580924e-07, "loss": 1.949, "step": 11455 }, { "epoch": 1.55, "learning_rate": 4.517697919481221e-07, "loss": 1.9503, "step": 11460 }, { "epoch": 1.55, "learning_rate": 4.5109429883815183e-07, "loss": 1.9878, "step": 11465 }, { "epoch": 1.55, "learning_rate": 4.504188057281815e-07, "loss": 2.0098, "step": 11470 }, { "epoch": 1.55, "learning_rate": 4.497433126182113e-07, "loss": 1.9646, "step": 11475 }, { "epoch": 1.55, "learning_rate": 4.49067819508241e-07, "loss": 2.0194, "step": 11480 }, { "epoch": 1.55, "learning_rate": 4.4839232639827074e-07, "loss": 1.9737, "step": 11485 }, { "epoch": 1.55, "learning_rate": 4.4771683328830046e-07, "loss": 1.9999, "step": 11490 }, { "epoch": 1.55, "learning_rate": 4.470413401783302e-07, "loss": 1.9922, "step": 11495 }, { "epoch": 1.55, "learning_rate": 4.463658470683599e-07, "loss": 1.9929, "step": 11500 }, { "epoch": 1.55, "learning_rate": 4.456903539583896e-07, "loss": 2.0408, "step": 11505 }, { "epoch": 1.55, "learning_rate": 4.450148608484193e-07, "loss": 1.9746, "step": 11510 }, { "epoch": 1.56, "learning_rate": 4.4433936773844904e-07, "loss": 2.0071, "step": 11515 }, { "epoch": 1.56, "learning_rate": 4.4366387462847877e-07, "loss": 1.9578, "step": 11520 }, { "epoch": 1.56, "learning_rate": 4.429883815185085e-07, "loss": 1.9269, "step": 11525 }, { "epoch": 1.56, "learning_rate": 4.423128884085382e-07, "loss": 1.9608, "step": 11530 }, { "epoch": 1.56, "learning_rate": 4.416373952985679e-07, "loss": 1.9224, "step": 11535 }, { "epoch": 1.56, "learning_rate": 4.409619021885976e-07, "loss": 1.9858, "step": 11540 }, { "epoch": 1.56, "learning_rate": 4.4028640907862735e-07, "loss": 1.9847, "step": 11545 }, { "epoch": 1.56, "learning_rate": 4.396109159686571e-07, "loss": 2.0205, "step": 11550 }, { "epoch": 1.56, "learning_rate": 4.3893542285868685e-07, "loss": 1.9723, "step": 11555 }, { "epoch": 1.56, "learning_rate": 4.382599297487166e-07, "loss": 2.043, "step": 11560 }, { "epoch": 1.56, "learning_rate": 4.375844366387463e-07, "loss": 1.9467, "step": 11565 }, { "epoch": 1.56, "learning_rate": 4.36908943528776e-07, "loss": 1.9304, "step": 11570 }, { "epoch": 1.56, "learning_rate": 4.362334504188057e-07, "loss": 2.0435, "step": 11575 }, { "epoch": 1.56, "learning_rate": 4.3555795730883543e-07, "loss": 1.9946, "step": 11580 }, { "epoch": 1.57, "learning_rate": 4.3488246419886516e-07, "loss": 1.9891, "step": 11585 }, { "epoch": 1.57, "learning_rate": 4.342069710888949e-07, "loss": 1.9875, "step": 11590 }, { "epoch": 1.57, "learning_rate": 4.335314779789246e-07, "loss": 2.0281, "step": 11595 }, { "epoch": 1.57, "learning_rate": 4.328559848689543e-07, "loss": 1.8857, "step": 11600 }, { "epoch": 1.57, "eval_loss": 1.9535282850265503, "eval_runtime": 165.4539, "eval_samples_per_second": 3.602, "eval_steps_per_second": 0.453, "step": 11600 }, { "epoch": 1.57, "learning_rate": 4.32180491758984e-07, "loss": 1.9551, "step": 11605 }, { "epoch": 1.57, "learning_rate": 4.3150499864901373e-07, "loss": 1.9194, "step": 11610 }, { "epoch": 1.57, "learning_rate": 4.3082950553904346e-07, "loss": 1.9471, "step": 11615 }, { "epoch": 1.57, "learning_rate": 4.301540124290732e-07, "loss": 1.9033, "step": 11620 }, { "epoch": 1.57, "learning_rate": 4.2947851931910297e-07, "loss": 1.9445, "step": 11625 }, { "epoch": 1.57, "learning_rate": 4.288030262091327e-07, "loss": 1.9713, "step": 11630 }, { "epoch": 1.57, "learning_rate": 4.2812753309916237e-07, "loss": 1.9252, "step": 11635 }, { "epoch": 1.57, "learning_rate": 4.274520399891921e-07, "loss": 1.88, "step": 11640 }, { "epoch": 1.57, "learning_rate": 4.267765468792218e-07, "loss": 1.9462, "step": 11645 }, { "epoch": 1.57, "learning_rate": 4.2610105376925154e-07, "loss": 2.0004, "step": 11650 }, { "epoch": 1.57, "learning_rate": 4.2542556065928127e-07, "loss": 1.9346, "step": 11655 }, { "epoch": 1.58, "learning_rate": 4.24750067549311e-07, "loss": 2.0424, "step": 11660 }, { "epoch": 1.58, "learning_rate": 4.2407457443934067e-07, "loss": 1.966, "step": 11665 }, { "epoch": 1.58, "learning_rate": 4.233990813293704e-07, "loss": 2.016, "step": 11670 }, { "epoch": 1.58, "learning_rate": 4.227235882194001e-07, "loss": 1.9669, "step": 11675 }, { "epoch": 1.58, "learning_rate": 4.2204809510942985e-07, "loss": 1.9423, "step": 11680 }, { "epoch": 1.58, "learning_rate": 4.213726019994596e-07, "loss": 1.9125, "step": 11685 }, { "epoch": 1.58, "learning_rate": 4.206971088894893e-07, "loss": 1.8883, "step": 11690 }, { "epoch": 1.58, "learning_rate": 4.200216157795191e-07, "loss": 1.9714, "step": 11695 }, { "epoch": 1.58, "learning_rate": 4.1934612266954875e-07, "loss": 1.9444, "step": 11700 }, { "epoch": 1.58, "learning_rate": 4.186706295595785e-07, "loss": 1.9369, "step": 11705 }, { "epoch": 1.58, "learning_rate": 4.179951364496082e-07, "loss": 1.9787, "step": 11710 }, { "epoch": 1.58, "learning_rate": 4.1731964333963793e-07, "loss": 1.8535, "step": 11715 }, { "epoch": 1.58, "learning_rate": 4.1664415022966766e-07, "loss": 1.9828, "step": 11720 }, { "epoch": 1.58, "learning_rate": 4.159686571196974e-07, "loss": 1.9836, "step": 11725 }, { "epoch": 1.58, "learning_rate": 4.1529316400972706e-07, "loss": 1.9408, "step": 11730 }, { "epoch": 1.59, "learning_rate": 4.146176708997568e-07, "loss": 1.9519, "step": 11735 }, { "epoch": 1.59, "learning_rate": 4.139421777897865e-07, "loss": 2.0592, "step": 11740 }, { "epoch": 1.59, "learning_rate": 4.1326668467981624e-07, "loss": 1.9832, "step": 11745 }, { "epoch": 1.59, "learning_rate": 4.1259119156984596e-07, "loss": 1.9843, "step": 11750 }, { "epoch": 1.59, "learning_rate": 4.119156984598757e-07, "loss": 1.947, "step": 11755 }, { "epoch": 1.59, "learning_rate": 4.1124020534990536e-07, "loss": 1.9285, "step": 11760 }, { "epoch": 1.59, "learning_rate": 4.105647122399351e-07, "loss": 1.8873, "step": 11765 }, { "epoch": 1.59, "learning_rate": 4.0988921912996487e-07, "loss": 1.9133, "step": 11770 }, { "epoch": 1.59, "learning_rate": 4.092137260199946e-07, "loss": 2.0237, "step": 11775 }, { "epoch": 1.59, "learning_rate": 4.085382329100243e-07, "loss": 1.9161, "step": 11780 }, { "epoch": 1.59, "learning_rate": 4.0786273980005405e-07, "loss": 1.8488, "step": 11785 }, { "epoch": 1.59, "learning_rate": 4.071872466900838e-07, "loss": 2.0393, "step": 11790 }, { "epoch": 1.59, "learning_rate": 4.0651175358011345e-07, "loss": 1.9536, "step": 11795 }, { "epoch": 1.59, "learning_rate": 4.058362604701432e-07, "loss": 1.97, "step": 11800 }, { "epoch": 1.59, "learning_rate": 4.051607673601729e-07, "loss": 2.003, "step": 11805 }, { "epoch": 1.6, "learning_rate": 4.0448527425020263e-07, "loss": 1.9425, "step": 11810 }, { "epoch": 1.6, "learning_rate": 4.0380978114023235e-07, "loss": 1.9305, "step": 11815 }, { "epoch": 1.6, "learning_rate": 4.031342880302621e-07, "loss": 1.8838, "step": 11820 }, { "epoch": 1.6, "learning_rate": 4.0245879492029175e-07, "loss": 2.0721, "step": 11825 }, { "epoch": 1.6, "learning_rate": 4.017833018103215e-07, "loss": 1.8947, "step": 11830 }, { "epoch": 1.6, "learning_rate": 4.011078087003512e-07, "loss": 1.9371, "step": 11835 }, { "epoch": 1.6, "learning_rate": 4.00432315590381e-07, "loss": 1.9521, "step": 11840 }, { "epoch": 1.6, "learning_rate": 3.997568224804107e-07, "loss": 1.9498, "step": 11845 }, { "epoch": 1.6, "learning_rate": 3.9908132937044044e-07, "loss": 1.9536, "step": 11850 }, { "epoch": 1.6, "learning_rate": 3.9840583626047016e-07, "loss": 1.9916, "step": 11855 }, { "epoch": 1.6, "learning_rate": 3.9773034315049984e-07, "loss": 1.9506, "step": 11860 }, { "epoch": 1.6, "learning_rate": 3.9705485004052956e-07, "loss": 1.9623, "step": 11865 }, { "epoch": 1.6, "learning_rate": 3.963793569305593e-07, "loss": 1.9731, "step": 11870 }, { "epoch": 1.6, "learning_rate": 3.95703863820589e-07, "loss": 1.9713, "step": 11875 }, { "epoch": 1.6, "learning_rate": 3.9502837071061874e-07, "loss": 2.0692, "step": 11880 }, { "epoch": 1.61, "learning_rate": 3.9435287760064847e-07, "loss": 1.9771, "step": 11885 }, { "epoch": 1.61, "learning_rate": 3.9367738449067814e-07, "loss": 2.0068, "step": 11890 }, { "epoch": 1.61, "learning_rate": 3.9300189138070787e-07, "loss": 1.8429, "step": 11895 }, { "epoch": 1.61, "learning_rate": 3.923263982707376e-07, "loss": 2.0443, "step": 11900 }, { "epoch": 1.61, "learning_rate": 3.916509051607673e-07, "loss": 1.9551, "step": 11905 }, { "epoch": 1.61, "learning_rate": 3.9097541205079705e-07, "loss": 1.8964, "step": 11910 }, { "epoch": 1.61, "learning_rate": 3.902999189408268e-07, "loss": 1.9391, "step": 11915 }, { "epoch": 1.61, "learning_rate": 3.8962442583085655e-07, "loss": 2.0496, "step": 11920 }, { "epoch": 1.61, "learning_rate": 3.889489327208862e-07, "loss": 2.0249, "step": 11925 }, { "epoch": 1.61, "learning_rate": 3.8827343961091595e-07, "loss": 2.0152, "step": 11930 }, { "epoch": 1.61, "learning_rate": 3.875979465009457e-07, "loss": 1.9238, "step": 11935 }, { "epoch": 1.61, "learning_rate": 3.869224533909754e-07, "loss": 2.0196, "step": 11940 }, { "epoch": 1.61, "learning_rate": 3.8624696028100513e-07, "loss": 1.9307, "step": 11945 }, { "epoch": 1.61, "learning_rate": 3.8557146717103486e-07, "loss": 1.9859, "step": 11950 }, { "epoch": 1.62, "learning_rate": 3.8489597406106453e-07, "loss": 1.9542, "step": 11955 }, { "epoch": 1.62, "learning_rate": 3.8422048095109426e-07, "loss": 1.8884, "step": 11960 }, { "epoch": 1.62, "learning_rate": 3.83544987841124e-07, "loss": 2.0328, "step": 11965 }, { "epoch": 1.62, "learning_rate": 3.828694947311537e-07, "loss": 1.9707, "step": 11970 }, { "epoch": 1.62, "learning_rate": 3.8219400162118344e-07, "loss": 1.9789, "step": 11975 }, { "epoch": 1.62, "learning_rate": 3.8151850851121316e-07, "loss": 1.964, "step": 11980 }, { "epoch": 1.62, "learning_rate": 3.8084301540124294e-07, "loss": 1.9321, "step": 11985 }, { "epoch": 1.62, "learning_rate": 3.801675222912726e-07, "loss": 1.916, "step": 11990 }, { "epoch": 1.62, "learning_rate": 3.7949202918130234e-07, "loss": 1.8828, "step": 11995 }, { "epoch": 1.62, "learning_rate": 3.7881653607133207e-07, "loss": 1.9949, "step": 12000 }, { "epoch": 1.62, "eval_loss": 1.952235221862793, "eval_runtime": 165.3673, "eval_samples_per_second": 3.604, "eval_steps_per_second": 0.454, "step": 12000 }, { "epoch": 1.62, "learning_rate": 3.781410429613618e-07, "loss": 1.8852, "step": 12005 }, { "epoch": 1.62, "learning_rate": 3.774655498513915e-07, "loss": 1.9413, "step": 12010 }, { "epoch": 1.62, "learning_rate": 3.7679005674142125e-07, "loss": 2.0267, "step": 12015 }, { "epoch": 1.62, "learning_rate": 3.761145636314509e-07, "loss": 1.8799, "step": 12020 }, { "epoch": 1.62, "learning_rate": 3.7543907052148065e-07, "loss": 1.9891, "step": 12025 }, { "epoch": 1.63, "learning_rate": 3.7476357741151037e-07, "loss": 1.9889, "step": 12030 }, { "epoch": 1.63, "learning_rate": 3.740880843015401e-07, "loss": 1.984, "step": 12035 }, { "epoch": 1.63, "learning_rate": 3.734125911915698e-07, "loss": 1.8569, "step": 12040 }, { "epoch": 1.63, "learning_rate": 3.7273709808159955e-07, "loss": 1.9308, "step": 12045 }, { "epoch": 1.63, "learning_rate": 3.720616049716293e-07, "loss": 1.9315, "step": 12050 }, { "epoch": 1.63, "learning_rate": 3.7138611186165895e-07, "loss": 1.9194, "step": 12055 }, { "epoch": 1.63, "learning_rate": 3.7071061875168873e-07, "loss": 1.9769, "step": 12060 }, { "epoch": 1.63, "learning_rate": 3.7003512564171846e-07, "loss": 1.9433, "step": 12065 }, { "epoch": 1.63, "learning_rate": 3.693596325317482e-07, "loss": 1.8922, "step": 12070 }, { "epoch": 1.63, "learning_rate": 3.686841394217779e-07, "loss": 2.0268, "step": 12075 }, { "epoch": 1.63, "learning_rate": 3.6800864631180763e-07, "loss": 1.9862, "step": 12080 }, { "epoch": 1.63, "learning_rate": 3.673331532018373e-07, "loss": 2.0022, "step": 12085 }, { "epoch": 1.63, "learning_rate": 3.6665766009186703e-07, "loss": 1.9151, "step": 12090 }, { "epoch": 1.63, "learning_rate": 3.6598216698189676e-07, "loss": 2.0128, "step": 12095 }, { "epoch": 1.63, "learning_rate": 3.653066738719265e-07, "loss": 1.9876, "step": 12100 }, { "epoch": 1.64, "learning_rate": 3.646311807619562e-07, "loss": 2.0628, "step": 12105 }, { "epoch": 1.64, "learning_rate": 3.6395568765198594e-07, "loss": 1.9863, "step": 12110 }, { "epoch": 1.64, "learning_rate": 3.6328019454201567e-07, "loss": 1.9916, "step": 12115 }, { "epoch": 1.64, "learning_rate": 3.6260470143204534e-07, "loss": 2.0298, "step": 12120 }, { "epoch": 1.64, "learning_rate": 3.6192920832207506e-07, "loss": 2.0356, "step": 12125 }, { "epoch": 1.64, "learning_rate": 3.612537152121048e-07, "loss": 1.927, "step": 12130 }, { "epoch": 1.64, "learning_rate": 3.6057822210213457e-07, "loss": 1.9696, "step": 12135 }, { "epoch": 1.64, "learning_rate": 3.599027289921643e-07, "loss": 2.0689, "step": 12140 }, { "epoch": 1.64, "learning_rate": 3.59227235882194e-07, "loss": 1.9127, "step": 12145 }, { "epoch": 1.64, "learning_rate": 3.585517427722237e-07, "loss": 1.9824, "step": 12150 }, { "epoch": 1.64, "learning_rate": 3.578762496622534e-07, "loss": 2.0156, "step": 12155 }, { "epoch": 1.64, "learning_rate": 3.5720075655228315e-07, "loss": 1.9376, "step": 12160 }, { "epoch": 1.64, "learning_rate": 3.565252634423129e-07, "loss": 1.8839, "step": 12165 }, { "epoch": 1.64, "learning_rate": 3.558497703323426e-07, "loss": 1.986, "step": 12170 }, { "epoch": 1.64, "learning_rate": 3.5517427722237233e-07, "loss": 1.9391, "step": 12175 }, { "epoch": 1.65, "learning_rate": 3.5449878411240205e-07, "loss": 1.9136, "step": 12180 }, { "epoch": 1.65, "learning_rate": 3.5382329100243173e-07, "loss": 1.9457, "step": 12185 }, { "epoch": 1.65, "learning_rate": 3.5314779789246145e-07, "loss": 2.0062, "step": 12190 }, { "epoch": 1.65, "learning_rate": 3.524723047824912e-07, "loss": 1.9796, "step": 12195 }, { "epoch": 1.65, "learning_rate": 3.517968116725209e-07, "loss": 2.0649, "step": 12200 }, { "epoch": 1.65, "learning_rate": 3.511213185625507e-07, "loss": 2.0202, "step": 12205 }, { "epoch": 1.65, "learning_rate": 3.504458254525804e-07, "loss": 1.9865, "step": 12210 }, { "epoch": 1.65, "learning_rate": 3.497703323426101e-07, "loss": 1.9317, "step": 12215 }, { "epoch": 1.65, "learning_rate": 3.490948392326398e-07, "loss": 1.9266, "step": 12220 }, { "epoch": 1.65, "learning_rate": 3.4841934612266954e-07, "loss": 1.9678, "step": 12225 }, { "epoch": 1.65, "learning_rate": 3.4774385301269926e-07, "loss": 2.0067, "step": 12230 }, { "epoch": 1.65, "learning_rate": 3.47068359902729e-07, "loss": 1.9964, "step": 12235 }, { "epoch": 1.65, "learning_rate": 3.463928667927587e-07, "loss": 2.0902, "step": 12240 }, { "epoch": 1.65, "learning_rate": 3.457173736827884e-07, "loss": 1.9893, "step": 12245 }, { "epoch": 1.65, "learning_rate": 3.450418805728181e-07, "loss": 1.9439, "step": 12250 }, { "epoch": 1.66, "learning_rate": 3.4436638746284784e-07, "loss": 1.9843, "step": 12255 }, { "epoch": 1.66, "learning_rate": 3.4369089435287757e-07, "loss": 2.0811, "step": 12260 }, { "epoch": 1.66, "learning_rate": 3.430154012429073e-07, "loss": 1.8165, "step": 12265 }, { "epoch": 1.66, "learning_rate": 3.42339908132937e-07, "loss": 2.0074, "step": 12270 }, { "epoch": 1.66, "learning_rate": 3.4166441502296675e-07, "loss": 1.8722, "step": 12275 }, { "epoch": 1.66, "learning_rate": 3.4098892191299647e-07, "loss": 1.9534, "step": 12280 }, { "epoch": 1.66, "learning_rate": 3.403134288030262e-07, "loss": 2.0328, "step": 12285 }, { "epoch": 1.66, "learning_rate": 3.396379356930559e-07, "loss": 2.0205, "step": 12290 }, { "epoch": 1.66, "learning_rate": 3.3896244258308565e-07, "loss": 1.9499, "step": 12295 }, { "epoch": 1.66, "learning_rate": 3.382869494731154e-07, "loss": 1.9113, "step": 12300 }, { "epoch": 1.66, "learning_rate": 3.376114563631451e-07, "loss": 1.9177, "step": 12305 }, { "epoch": 1.66, "learning_rate": 3.369359632531748e-07, "loss": 1.8988, "step": 12310 }, { "epoch": 1.66, "learning_rate": 3.362604701432045e-07, "loss": 1.9675, "step": 12315 }, { "epoch": 1.66, "learning_rate": 3.3558497703323423e-07, "loss": 1.9732, "step": 12320 }, { "epoch": 1.67, "learning_rate": 3.3490948392326396e-07, "loss": 1.987, "step": 12325 }, { "epoch": 1.67, "learning_rate": 3.342339908132937e-07, "loss": 1.9473, "step": 12330 }, { "epoch": 1.67, "learning_rate": 3.335584977033234e-07, "loss": 1.9229, "step": 12335 }, { "epoch": 1.67, "learning_rate": 3.3288300459335314e-07, "loss": 1.9963, "step": 12340 }, { "epoch": 1.67, "learning_rate": 3.322075114833828e-07, "loss": 1.8348, "step": 12345 }, { "epoch": 1.67, "learning_rate": 3.315320183734126e-07, "loss": 1.9457, "step": 12350 }, { "epoch": 1.67, "learning_rate": 3.308565252634423e-07, "loss": 1.9232, "step": 12355 }, { "epoch": 1.67, "learning_rate": 3.3018103215347204e-07, "loss": 1.9743, "step": 12360 }, { "epoch": 1.67, "learning_rate": 3.2950553904350177e-07, "loss": 1.9761, "step": 12365 }, { "epoch": 1.67, "learning_rate": 3.288300459335315e-07, "loss": 1.9756, "step": 12370 }, { "epoch": 1.67, "learning_rate": 3.2815455282356117e-07, "loss": 1.8987, "step": 12375 }, { "epoch": 1.67, "learning_rate": 3.274790597135909e-07, "loss": 2.0811, "step": 12380 }, { "epoch": 1.67, "learning_rate": 3.268035666036206e-07, "loss": 1.9758, "step": 12385 }, { "epoch": 1.67, "learning_rate": 3.2612807349365035e-07, "loss": 1.9949, "step": 12390 }, { "epoch": 1.67, "learning_rate": 3.2545258038368007e-07, "loss": 1.8838, "step": 12395 }, { "epoch": 1.68, "learning_rate": 3.247770872737098e-07, "loss": 1.9292, "step": 12400 }, { "epoch": 1.68, "eval_loss": 1.9511464834213257, "eval_runtime": 165.4983, "eval_samples_per_second": 3.601, "eval_steps_per_second": 0.453, "step": 12400 }, { "epoch": 1.68, "learning_rate": 3.241015941637395e-07, "loss": 1.9073, "step": 12405 }, { "epoch": 1.68, "learning_rate": 3.234261010537692e-07, "loss": 1.9725, "step": 12410 }, { "epoch": 1.68, "learning_rate": 3.227506079437989e-07, "loss": 1.8673, "step": 12415 }, { "epoch": 1.68, "learning_rate": 3.2207511483382865e-07, "loss": 2.0299, "step": 12420 }, { "epoch": 1.68, "learning_rate": 3.2139962172385843e-07, "loss": 1.9586, "step": 12425 }, { "epoch": 1.68, "learning_rate": 3.2072412861388816e-07, "loss": 1.9822, "step": 12430 }, { "epoch": 1.68, "learning_rate": 3.200486355039179e-07, "loss": 2.0641, "step": 12435 }, { "epoch": 1.68, "learning_rate": 3.1937314239394756e-07, "loss": 2.0372, "step": 12440 }, { "epoch": 1.68, "learning_rate": 3.186976492839773e-07, "loss": 1.9135, "step": 12445 }, { "epoch": 1.68, "learning_rate": 3.18022156174007e-07, "loss": 1.9977, "step": 12450 }, { "epoch": 1.68, "learning_rate": 3.1734666306403673e-07, "loss": 1.9981, "step": 12455 }, { "epoch": 1.68, "learning_rate": 3.1667116995406646e-07, "loss": 2.0354, "step": 12460 }, { "epoch": 1.68, "learning_rate": 3.159956768440962e-07, "loss": 1.9714, "step": 12465 }, { "epoch": 1.68, "learning_rate": 3.153201837341259e-07, "loss": 2.0264, "step": 12470 }, { "epoch": 1.69, "learning_rate": 3.146446906241556e-07, "loss": 1.9805, "step": 12475 }, { "epoch": 1.69, "learning_rate": 3.139691975141853e-07, "loss": 2.0613, "step": 12480 }, { "epoch": 1.69, "learning_rate": 3.1329370440421504e-07, "loss": 1.8951, "step": 12485 }, { "epoch": 1.69, "learning_rate": 3.1261821129424477e-07, "loss": 1.9624, "step": 12490 }, { "epoch": 1.69, "learning_rate": 3.1194271818427454e-07, "loss": 1.9934, "step": 12495 }, { "epoch": 1.69, "learning_rate": 3.1126722507430427e-07, "loss": 1.9149, "step": 12500 }, { "epoch": 1.69, "learning_rate": 3.1059173196433394e-07, "loss": 1.9528, "step": 12505 }, { "epoch": 1.69, "learning_rate": 3.0991623885436367e-07, "loss": 1.9816, "step": 12510 }, { "epoch": 1.69, "learning_rate": 3.092407457443934e-07, "loss": 1.9338, "step": 12515 }, { "epoch": 1.69, "learning_rate": 3.085652526344231e-07, "loss": 1.9294, "step": 12520 }, { "epoch": 1.69, "learning_rate": 3.0788975952445285e-07, "loss": 1.9416, "step": 12525 }, { "epoch": 1.69, "learning_rate": 3.072142664144826e-07, "loss": 2.0216, "step": 12530 }, { "epoch": 1.69, "learning_rate": 3.065387733045123e-07, "loss": 1.9174, "step": 12535 }, { "epoch": 1.69, "learning_rate": 3.05863280194542e-07, "loss": 1.8646, "step": 12540 }, { "epoch": 1.69, "learning_rate": 3.051877870845717e-07, "loss": 2.0483, "step": 12545 }, { "epoch": 1.7, "learning_rate": 3.0451229397460143e-07, "loss": 1.9749, "step": 12550 }, { "epoch": 1.7, "learning_rate": 3.0383680086463115e-07, "loss": 2.0014, "step": 12555 }, { "epoch": 1.7, "learning_rate": 3.031613077546609e-07, "loss": 1.9313, "step": 12560 }, { "epoch": 1.7, "learning_rate": 3.024858146446906e-07, "loss": 1.8786, "step": 12565 }, { "epoch": 1.7, "learning_rate": 3.0181032153472033e-07, "loss": 1.8782, "step": 12570 }, { "epoch": 1.7, "learning_rate": 3.0113482842475006e-07, "loss": 1.9038, "step": 12575 }, { "epoch": 1.7, "learning_rate": 3.004593353147798e-07, "loss": 1.9865, "step": 12580 }, { "epoch": 1.7, "learning_rate": 2.997838422048095e-07, "loss": 1.9386, "step": 12585 }, { "epoch": 1.7, "learning_rate": 2.9910834909483924e-07, "loss": 1.884, "step": 12590 }, { "epoch": 1.7, "learning_rate": 2.9843285598486896e-07, "loss": 1.9116, "step": 12595 }, { "epoch": 1.7, "learning_rate": 2.977573628748987e-07, "loss": 1.9599, "step": 12600 }, { "epoch": 1.7, "learning_rate": 2.9708186976492836e-07, "loss": 1.966, "step": 12605 }, { "epoch": 1.7, "learning_rate": 2.964063766549581e-07, "loss": 1.9358, "step": 12610 }, { "epoch": 1.7, "learning_rate": 2.957308835449878e-07, "loss": 2.0271, "step": 12615 }, { "epoch": 1.7, "learning_rate": 2.9505539043501754e-07, "loss": 1.9708, "step": 12620 }, { "epoch": 1.71, "learning_rate": 2.9437989732504727e-07, "loss": 1.8144, "step": 12625 }, { "epoch": 1.71, "learning_rate": 2.93704404215077e-07, "loss": 2.0278, "step": 12630 }, { "epoch": 1.71, "learning_rate": 2.9302891110510667e-07, "loss": 1.9605, "step": 12635 }, { "epoch": 1.71, "learning_rate": 2.923534179951364e-07, "loss": 1.9958, "step": 12640 }, { "epoch": 1.71, "learning_rate": 2.916779248851662e-07, "loss": 1.9269, "step": 12645 }, { "epoch": 1.71, "learning_rate": 2.910024317751959e-07, "loss": 1.9597, "step": 12650 }, { "epoch": 1.71, "learning_rate": 2.903269386652256e-07, "loss": 1.9553, "step": 12655 }, { "epoch": 1.71, "learning_rate": 2.8965144555525535e-07, "loss": 1.9797, "step": 12660 }, { "epoch": 1.71, "learning_rate": 2.889759524452851e-07, "loss": 1.9698, "step": 12665 }, { "epoch": 1.71, "learning_rate": 2.8830045933531475e-07, "loss": 2.0515, "step": 12670 }, { "epoch": 1.71, "learning_rate": 2.876249662253445e-07, "loss": 1.9132, "step": 12675 }, { "epoch": 1.71, "learning_rate": 2.869494731153742e-07, "loss": 1.9046, "step": 12680 }, { "epoch": 1.71, "learning_rate": 2.8627398000540393e-07, "loss": 1.9502, "step": 12685 }, { "epoch": 1.71, "learning_rate": 2.8559848689543366e-07, "loss": 2.0462, "step": 12690 }, { "epoch": 1.72, "learning_rate": 2.849229937854634e-07, "loss": 2.0225, "step": 12695 }, { "epoch": 1.72, "learning_rate": 2.8424750067549306e-07, "loss": 1.9613, "step": 12700 }, { "epoch": 1.72, "learning_rate": 2.835720075655228e-07, "loss": 1.9458, "step": 12705 }, { "epoch": 1.72, "learning_rate": 2.828965144555525e-07, "loss": 1.9629, "step": 12710 }, { "epoch": 1.72, "learning_rate": 2.822210213455823e-07, "loss": 1.9982, "step": 12715 }, { "epoch": 1.72, "learning_rate": 2.81545528235612e-07, "loss": 1.9519, "step": 12720 }, { "epoch": 1.72, "learning_rate": 2.8087003512564174e-07, "loss": 2.0149, "step": 12725 }, { "epoch": 1.72, "learning_rate": 2.8019454201567147e-07, "loss": 1.9178, "step": 12730 }, { "epoch": 1.72, "learning_rate": 2.7951904890570114e-07, "loss": 1.9672, "step": 12735 }, { "epoch": 1.72, "learning_rate": 2.7884355579573087e-07, "loss": 1.9545, "step": 12740 }, { "epoch": 1.72, "learning_rate": 2.781680626857606e-07, "loss": 1.9199, "step": 12745 }, { "epoch": 1.72, "learning_rate": 2.774925695757903e-07, "loss": 1.962, "step": 12750 }, { "epoch": 1.72, "learning_rate": 2.7681707646582005e-07, "loss": 1.9126, "step": 12755 }, { "epoch": 1.72, "learning_rate": 2.7614158335584977e-07, "loss": 1.9452, "step": 12760 }, { "epoch": 1.72, "learning_rate": 2.7546609024587945e-07, "loss": 2.0155, "step": 12765 }, { "epoch": 1.73, "learning_rate": 2.7479059713590917e-07, "loss": 1.9335, "step": 12770 }, { "epoch": 1.73, "learning_rate": 2.741151040259389e-07, "loss": 1.8992, "step": 12775 }, { "epoch": 1.73, "learning_rate": 2.734396109159686e-07, "loss": 1.9687, "step": 12780 }, { "epoch": 1.73, "learning_rate": 2.7276411780599835e-07, "loss": 1.9148, "step": 12785 }, { "epoch": 1.73, "learning_rate": 2.7208862469602813e-07, "loss": 2.0001, "step": 12790 }, { "epoch": 1.73, "learning_rate": 2.7141313158605786e-07, "loss": 1.9786, "step": 12795 }, { "epoch": 1.73, "learning_rate": 2.7073763847608753e-07, "loss": 1.937, "step": 12800 }, { "epoch": 1.73, "eval_loss": 1.95003080368042, "eval_runtime": 165.3799, "eval_samples_per_second": 3.604, "eval_steps_per_second": 0.454, "step": 12800 }, { "epoch": 1.73, "learning_rate": 2.7006214536611726e-07, "loss": 1.9034, "step": 12805 }, { "epoch": 1.73, "learning_rate": 2.69386652256147e-07, "loss": 2.009, "step": 12810 }, { "epoch": 1.73, "learning_rate": 2.687111591461767e-07, "loss": 1.957, "step": 12815 }, { "epoch": 1.73, "learning_rate": 2.6803566603620643e-07, "loss": 2.0366, "step": 12820 }, { "epoch": 1.73, "learning_rate": 2.6736017292623616e-07, "loss": 1.9522, "step": 12825 }, { "epoch": 1.73, "learning_rate": 2.6668467981626583e-07, "loss": 1.9014, "step": 12830 }, { "epoch": 1.73, "learning_rate": 2.6600918670629556e-07, "loss": 1.8543, "step": 12835 }, { "epoch": 1.73, "learning_rate": 2.653336935963253e-07, "loss": 1.9508, "step": 12840 }, { "epoch": 1.74, "learning_rate": 2.64658200486355e-07, "loss": 1.9476, "step": 12845 }, { "epoch": 1.74, "learning_rate": 2.6398270737638474e-07, "loss": 1.9962, "step": 12850 }, { "epoch": 1.74, "learning_rate": 2.6330721426641447e-07, "loss": 1.8131, "step": 12855 }, { "epoch": 1.74, "learning_rate": 2.6263172115644424e-07, "loss": 1.8921, "step": 12860 }, { "epoch": 1.74, "learning_rate": 2.619562280464739e-07, "loss": 1.9959, "step": 12865 }, { "epoch": 1.74, "learning_rate": 2.6128073493650364e-07, "loss": 1.9768, "step": 12870 }, { "epoch": 1.74, "learning_rate": 2.6060524182653337e-07, "loss": 1.9248, "step": 12875 }, { "epoch": 1.74, "learning_rate": 2.599297487165631e-07, "loss": 1.9049, "step": 12880 }, { "epoch": 1.74, "learning_rate": 2.592542556065928e-07, "loss": 1.9095, "step": 12885 }, { "epoch": 1.74, "learning_rate": 2.5857876249662255e-07, "loss": 1.8965, "step": 12890 }, { "epoch": 1.74, "learning_rate": 2.579032693866522e-07, "loss": 1.9665, "step": 12895 }, { "epoch": 1.74, "learning_rate": 2.5722777627668195e-07, "loss": 1.9266, "step": 12900 }, { "epoch": 1.74, "learning_rate": 2.565522831667117e-07, "loss": 2.0833, "step": 12905 }, { "epoch": 1.74, "learning_rate": 2.558767900567414e-07, "loss": 1.9587, "step": 12910 }, { "epoch": 1.74, "learning_rate": 2.5520129694677113e-07, "loss": 2.1045, "step": 12915 }, { "epoch": 1.75, "learning_rate": 2.5452580383680085e-07, "loss": 2.0159, "step": 12920 }, { "epoch": 1.75, "learning_rate": 2.5385031072683053e-07, "loss": 1.9911, "step": 12925 }, { "epoch": 1.75, "learning_rate": 2.5317481761686025e-07, "loss": 1.9373, "step": 12930 }, { "epoch": 1.75, "learning_rate": 2.5249932450689003e-07, "loss": 2.012, "step": 12935 }, { "epoch": 1.75, "learning_rate": 2.5182383139691976e-07, "loss": 1.9334, "step": 12940 }, { "epoch": 1.75, "learning_rate": 2.511483382869495e-07, "loss": 2.0098, "step": 12945 }, { "epoch": 1.75, "learning_rate": 2.504728451769792e-07, "loss": 1.8779, "step": 12950 }, { "epoch": 1.75, "learning_rate": 2.4979735206700894e-07, "loss": 1.8818, "step": 12955 }, { "epoch": 1.75, "learning_rate": 2.491218589570386e-07, "loss": 1.9791, "step": 12960 }, { "epoch": 1.75, "learning_rate": 2.4844636584706834e-07, "loss": 1.9753, "step": 12965 }, { "epoch": 1.75, "learning_rate": 2.4777087273709806e-07, "loss": 1.9361, "step": 12970 }, { "epoch": 1.75, "learning_rate": 2.470953796271278e-07, "loss": 1.8509, "step": 12975 }, { "epoch": 1.75, "learning_rate": 2.464198865171575e-07, "loss": 2.0183, "step": 12980 }, { "epoch": 1.75, "learning_rate": 2.4574439340718724e-07, "loss": 1.9777, "step": 12985 }, { "epoch": 1.75, "learning_rate": 2.4506890029721697e-07, "loss": 1.9786, "step": 12990 }, { "epoch": 1.76, "learning_rate": 2.443934071872467e-07, "loss": 1.9021, "step": 12995 }, { "epoch": 1.76, "learning_rate": 2.437179140772764e-07, "loss": 2.0193, "step": 13000 }, { "epoch": 1.76, "learning_rate": 2.4304242096730615e-07, "loss": 1.8966, "step": 13005 }, { "epoch": 1.76, "learning_rate": 2.423669278573358e-07, "loss": 2.0073, "step": 13010 }, { "epoch": 1.76, "learning_rate": 2.4169143474736555e-07, "loss": 1.9552, "step": 13015 }, { "epoch": 1.76, "learning_rate": 2.410159416373953e-07, "loss": 1.9251, "step": 13020 }, { "epoch": 1.76, "learning_rate": 2.40340448527425e-07, "loss": 2.0429, "step": 13025 }, { "epoch": 1.76, "learning_rate": 2.3966495541745473e-07, "loss": 2.0433, "step": 13030 }, { "epoch": 1.76, "learning_rate": 2.3898946230748445e-07, "loss": 1.9169, "step": 13035 }, { "epoch": 1.76, "learning_rate": 2.3831396919751418e-07, "loss": 1.9662, "step": 13040 }, { "epoch": 1.76, "learning_rate": 2.376384760875439e-07, "loss": 1.9404, "step": 13045 }, { "epoch": 1.76, "learning_rate": 2.369629829775736e-07, "loss": 1.9799, "step": 13050 }, { "epoch": 1.76, "learning_rate": 2.3628748986760333e-07, "loss": 1.8214, "step": 13055 }, { "epoch": 1.76, "learning_rate": 2.3561199675763308e-07, "loss": 1.9932, "step": 13060 }, { "epoch": 1.77, "learning_rate": 2.3493650364766278e-07, "loss": 1.9733, "step": 13065 }, { "epoch": 1.77, "learning_rate": 2.342610105376925e-07, "loss": 1.9566, "step": 13070 }, { "epoch": 1.77, "learning_rate": 2.3358551742772224e-07, "loss": 1.897, "step": 13075 }, { "epoch": 1.77, "learning_rate": 2.3291002431775194e-07, "loss": 1.9646, "step": 13080 }, { "epoch": 1.77, "learning_rate": 2.3223453120778166e-07, "loss": 2.0013, "step": 13085 }, { "epoch": 1.77, "learning_rate": 2.315590380978114e-07, "loss": 1.8838, "step": 13090 }, { "epoch": 1.77, "learning_rate": 2.308835449878411e-07, "loss": 2.0112, "step": 13095 }, { "epoch": 1.77, "learning_rate": 2.3020805187787084e-07, "loss": 1.9287, "step": 13100 }, { "epoch": 1.77, "learning_rate": 2.2953255876790057e-07, "loss": 2.0699, "step": 13105 }, { "epoch": 1.77, "learning_rate": 2.288570656579303e-07, "loss": 1.8843, "step": 13110 }, { "epoch": 1.77, "learning_rate": 2.2818157254796e-07, "loss": 1.9018, "step": 13115 }, { "epoch": 1.77, "learning_rate": 2.2750607943798972e-07, "loss": 1.9611, "step": 13120 }, { "epoch": 1.77, "learning_rate": 2.2683058632801945e-07, "loss": 1.9831, "step": 13125 }, { "epoch": 1.77, "learning_rate": 2.2615509321804915e-07, "loss": 1.9069, "step": 13130 }, { "epoch": 1.77, "learning_rate": 2.254796001080789e-07, "loss": 1.9747, "step": 13135 }, { "epoch": 1.78, "learning_rate": 2.2480410699810863e-07, "loss": 1.9792, "step": 13140 }, { "epoch": 1.78, "learning_rate": 2.2412861388813833e-07, "loss": 2.0183, "step": 13145 }, { "epoch": 1.78, "learning_rate": 2.2345312077816805e-07, "loss": 1.9591, "step": 13150 }, { "epoch": 1.78, "learning_rate": 2.2277762766819778e-07, "loss": 1.9085, "step": 13155 }, { "epoch": 1.78, "learning_rate": 2.2210213455822748e-07, "loss": 2.0675, "step": 13160 }, { "epoch": 1.78, "learning_rate": 2.214266414482572e-07, "loss": 1.9204, "step": 13165 }, { "epoch": 1.78, "learning_rate": 2.2075114833828696e-07, "loss": 2.035, "step": 13170 }, { "epoch": 1.78, "learning_rate": 2.2007565522831668e-07, "loss": 2.0147, "step": 13175 }, { "epoch": 1.78, "learning_rate": 2.1940016211834638e-07, "loss": 1.917, "step": 13180 }, { "epoch": 1.78, "learning_rate": 2.187246690083761e-07, "loss": 1.9055, "step": 13185 }, { "epoch": 1.78, "learning_rate": 2.1804917589840584e-07, "loss": 1.9916, "step": 13190 }, { "epoch": 1.78, "learning_rate": 2.1737368278843553e-07, "loss": 1.9845, "step": 13195 }, { "epoch": 1.78, "learning_rate": 2.1669818967846526e-07, "loss": 2.0573, "step": 13200 }, { "epoch": 1.78, "eval_loss": 1.9494786262512207, "eval_runtime": 165.1665, "eval_samples_per_second": 3.608, "eval_steps_per_second": 0.454, "step": 13200 }, { "epoch": 1.78, "learning_rate": 2.1602269656849501e-07, "loss": 2.0237, "step": 13205 }, { "epoch": 1.78, "learning_rate": 2.1534720345852471e-07, "loss": 1.9262, "step": 13210 }, { "epoch": 1.79, "learning_rate": 2.1467171034855444e-07, "loss": 1.9819, "step": 13215 }, { "epoch": 1.79, "learning_rate": 2.1399621723858417e-07, "loss": 1.9537, "step": 13220 }, { "epoch": 1.79, "learning_rate": 2.1332072412861387e-07, "loss": 1.8926, "step": 13225 }, { "epoch": 1.79, "learning_rate": 2.126452310186436e-07, "loss": 1.9364, "step": 13230 }, { "epoch": 1.79, "learning_rate": 2.1196973790867332e-07, "loss": 1.9544, "step": 13235 }, { "epoch": 1.79, "learning_rate": 2.1129424479870302e-07, "loss": 1.9288, "step": 13240 }, { "epoch": 1.79, "learning_rate": 2.1061875168873277e-07, "loss": 1.9206, "step": 13245 }, { "epoch": 1.79, "learning_rate": 2.099432585787625e-07, "loss": 1.9185, "step": 13250 }, { "epoch": 1.79, "learning_rate": 2.0926776546879222e-07, "loss": 1.9306, "step": 13255 }, { "epoch": 1.79, "learning_rate": 2.0859227235882192e-07, "loss": 1.9818, "step": 13260 }, { "epoch": 1.79, "learning_rate": 2.0791677924885165e-07, "loss": 1.9057, "step": 13265 }, { "epoch": 1.79, "learning_rate": 2.0724128613888138e-07, "loss": 1.9167, "step": 13270 }, { "epoch": 1.79, "learning_rate": 2.0656579302891108e-07, "loss": 1.9288, "step": 13275 }, { "epoch": 1.79, "learning_rate": 2.0589029991894083e-07, "loss": 2.0249, "step": 13280 }, { "epoch": 1.79, "learning_rate": 2.0521480680897055e-07, "loss": 2.0134, "step": 13285 }, { "epoch": 1.8, "learning_rate": 2.0453931369900025e-07, "loss": 2.0235, "step": 13290 }, { "epoch": 1.8, "learning_rate": 2.0386382058902998e-07, "loss": 1.8421, "step": 13295 }, { "epoch": 1.8, "learning_rate": 2.031883274790597e-07, "loss": 1.9947, "step": 13300 }, { "epoch": 1.8, "learning_rate": 2.025128343690894e-07, "loss": 2.0185, "step": 13305 }, { "epoch": 1.8, "learning_rate": 2.0183734125911913e-07, "loss": 1.955, "step": 13310 }, { "epoch": 1.8, "learning_rate": 2.0116184814914889e-07, "loss": 2.025, "step": 13315 }, { "epoch": 1.8, "learning_rate": 2.004863550391786e-07, "loss": 2.0244, "step": 13320 }, { "epoch": 1.8, "learning_rate": 1.998108619292083e-07, "loss": 1.9509, "step": 13325 }, { "epoch": 1.8, "learning_rate": 1.9913536881923804e-07, "loss": 2.064, "step": 13330 }, { "epoch": 1.8, "learning_rate": 1.9845987570926776e-07, "loss": 1.8872, "step": 13335 }, { "epoch": 1.8, "learning_rate": 1.9778438259929746e-07, "loss": 1.9271, "step": 13340 }, { "epoch": 1.8, "learning_rate": 1.971088894893272e-07, "loss": 1.9738, "step": 13345 }, { "epoch": 1.8, "learning_rate": 1.9643339637935692e-07, "loss": 1.9089, "step": 13350 }, { "epoch": 1.8, "learning_rate": 1.9575790326938664e-07, "loss": 1.9611, "step": 13355 }, { "epoch": 1.8, "learning_rate": 1.9508241015941637e-07, "loss": 1.8635, "step": 13360 }, { "epoch": 1.81, "learning_rate": 1.944069170494461e-07, "loss": 2.0379, "step": 13365 }, { "epoch": 1.81, "learning_rate": 1.937314239394758e-07, "loss": 1.9776, "step": 13370 }, { "epoch": 1.81, "learning_rate": 1.9305593082950552e-07, "loss": 1.8788, "step": 13375 }, { "epoch": 1.81, "learning_rate": 1.9238043771953525e-07, "loss": 1.9518, "step": 13380 }, { "epoch": 1.81, "learning_rate": 1.9170494460956497e-07, "loss": 2.0224, "step": 13385 }, { "epoch": 1.81, "learning_rate": 1.910294514995947e-07, "loss": 1.8859, "step": 13390 }, { "epoch": 1.81, "learning_rate": 1.9035395838962443e-07, "loss": 1.9991, "step": 13395 }, { "epoch": 1.81, "learning_rate": 1.8967846527965415e-07, "loss": 1.9029, "step": 13400 }, { "epoch": 1.81, "learning_rate": 1.8900297216968385e-07, "loss": 1.9768, "step": 13405 }, { "epoch": 1.81, "learning_rate": 1.8832747905971358e-07, "loss": 1.8818, "step": 13410 }, { "epoch": 1.81, "learning_rate": 1.876519859497433e-07, "loss": 1.8601, "step": 13415 }, { "epoch": 1.81, "learning_rate": 1.86976492839773e-07, "loss": 1.9527, "step": 13420 }, { "epoch": 1.81, "learning_rate": 1.8630099972980276e-07, "loss": 1.9236, "step": 13425 }, { "epoch": 1.81, "learning_rate": 1.8562550661983248e-07, "loss": 1.8954, "step": 13430 }, { "epoch": 1.81, "learning_rate": 1.8495001350986218e-07, "loss": 2.0441, "step": 13435 }, { "epoch": 1.82, "learning_rate": 1.842745203998919e-07, "loss": 1.9821, "step": 13440 }, { "epoch": 1.82, "learning_rate": 1.8359902728992164e-07, "loss": 1.958, "step": 13445 }, { "epoch": 1.82, "learning_rate": 1.8292353417995134e-07, "loss": 1.8381, "step": 13450 }, { "epoch": 1.82, "learning_rate": 1.8224804106998106e-07, "loss": 1.9425, "step": 13455 }, { "epoch": 1.82, "learning_rate": 1.8157254796001082e-07, "loss": 1.9096, "step": 13460 }, { "epoch": 1.82, "learning_rate": 1.8089705485004054e-07, "loss": 1.9968, "step": 13465 }, { "epoch": 1.82, "learning_rate": 1.8022156174007024e-07, "loss": 1.8728, "step": 13470 }, { "epoch": 1.82, "learning_rate": 1.7954606863009997e-07, "loss": 1.9997, "step": 13475 }, { "epoch": 1.82, "learning_rate": 1.788705755201297e-07, "loss": 1.931, "step": 13480 }, { "epoch": 1.82, "learning_rate": 1.781950824101594e-07, "loss": 1.9951, "step": 13485 }, { "epoch": 1.82, "learning_rate": 1.7751958930018912e-07, "loss": 1.9485, "step": 13490 }, { "epoch": 1.82, "learning_rate": 1.7684409619021885e-07, "loss": 2.0155, "step": 13495 }, { "epoch": 1.82, "learning_rate": 1.7616860308024857e-07, "loss": 1.9448, "step": 13500 }, { "epoch": 1.82, "learning_rate": 1.754931099702783e-07, "loss": 2.0389, "step": 13505 }, { "epoch": 1.83, "learning_rate": 1.7481761686030803e-07, "loss": 1.9617, "step": 13510 }, { "epoch": 1.83, "learning_rate": 1.7414212375033773e-07, "loss": 2.0045, "step": 13515 }, { "epoch": 1.83, "learning_rate": 1.7346663064036745e-07, "loss": 1.8886, "step": 13520 }, { "epoch": 1.83, "learning_rate": 1.7279113753039718e-07, "loss": 2.0266, "step": 13525 }, { "epoch": 1.83, "learning_rate": 1.721156444204269e-07, "loss": 2.0236, "step": 13530 }, { "epoch": 1.83, "learning_rate": 1.7144015131045663e-07, "loss": 1.9988, "step": 13535 }, { "epoch": 1.83, "learning_rate": 1.7076465820048636e-07, "loss": 1.8733, "step": 13540 }, { "epoch": 1.83, "learning_rate": 1.7008916509051608e-07, "loss": 1.886, "step": 13545 }, { "epoch": 1.83, "learning_rate": 1.6941367198054578e-07, "loss": 2.0076, "step": 13550 }, { "epoch": 1.83, "learning_rate": 1.687381788705755e-07, "loss": 1.9322, "step": 13555 }, { "epoch": 1.83, "learning_rate": 1.6806268576060524e-07, "loss": 1.955, "step": 13560 }, { "epoch": 1.83, "learning_rate": 1.6738719265063494e-07, "loss": 1.9884, "step": 13565 }, { "epoch": 1.83, "learning_rate": 1.667116995406647e-07, "loss": 1.9705, "step": 13570 }, { "epoch": 1.83, "learning_rate": 1.6603620643069441e-07, "loss": 2.0089, "step": 13575 }, { "epoch": 1.83, "learning_rate": 1.6536071332072411e-07, "loss": 2.0441, "step": 13580 }, { "epoch": 1.84, "learning_rate": 1.6468522021075384e-07, "loss": 1.9654, "step": 13585 }, { "epoch": 1.84, "learning_rate": 1.6400972710078357e-07, "loss": 1.919, "step": 13590 }, { "epoch": 1.84, "learning_rate": 1.633342339908133e-07, "loss": 2.0188, "step": 13595 }, { "epoch": 1.84, "learning_rate": 1.62658740880843e-07, "loss": 2.0181, "step": 13600 }, { "epoch": 1.84, "eval_loss": 1.9488131999969482, "eval_runtime": 165.4599, "eval_samples_per_second": 3.602, "eval_steps_per_second": 0.453, "step": 13600 }, { "epoch": 1.84, "learning_rate": 1.6198324777087272e-07, "loss": 1.8996, "step": 13605 }, { "epoch": 1.84, "learning_rate": 1.6130775466090247e-07, "loss": 1.9036, "step": 13610 }, { "epoch": 1.84, "learning_rate": 1.6063226155093217e-07, "loss": 1.9854, "step": 13615 }, { "epoch": 1.84, "learning_rate": 1.599567684409619e-07, "loss": 2.0518, "step": 13620 }, { "epoch": 1.84, "learning_rate": 1.5928127533099162e-07, "loss": 1.9395, "step": 13625 }, { "epoch": 1.84, "learning_rate": 1.5860578222102132e-07, "loss": 1.941, "step": 13630 }, { "epoch": 1.84, "learning_rate": 1.5793028911105105e-07, "loss": 1.8729, "step": 13635 }, { "epoch": 1.84, "learning_rate": 1.5725479600108078e-07, "loss": 2.0032, "step": 13640 }, { "epoch": 1.84, "learning_rate": 1.565793028911105e-07, "loss": 1.8852, "step": 13645 }, { "epoch": 1.84, "learning_rate": 1.5590380978114023e-07, "loss": 2.0203, "step": 13650 }, { "epoch": 1.84, "learning_rate": 1.5522831667116996e-07, "loss": 2.0542, "step": 13655 }, { "epoch": 1.85, "learning_rate": 1.5455282356119968e-07, "loss": 1.979, "step": 13660 }, { "epoch": 1.85, "learning_rate": 1.5387733045122938e-07, "loss": 1.9459, "step": 13665 }, { "epoch": 1.85, "learning_rate": 1.532018373412591e-07, "loss": 1.9584, "step": 13670 }, { "epoch": 1.85, "learning_rate": 1.5252634423128883e-07, "loss": 2.0211, "step": 13675 }, { "epoch": 1.85, "learning_rate": 1.5185085112131856e-07, "loss": 1.9053, "step": 13680 }, { "epoch": 1.85, "learning_rate": 1.5117535801134829e-07, "loss": 1.9061, "step": 13685 }, { "epoch": 1.85, "learning_rate": 1.50499864901378e-07, "loss": 1.963, "step": 13690 }, { "epoch": 1.85, "learning_rate": 1.498243717914077e-07, "loss": 1.9455, "step": 13695 }, { "epoch": 1.85, "learning_rate": 1.4914887868143744e-07, "loss": 1.9384, "step": 13700 }, { "epoch": 1.85, "learning_rate": 1.4847338557146717e-07, "loss": 1.974, "step": 13705 }, { "epoch": 1.85, "learning_rate": 1.4779789246149686e-07, "loss": 2.0507, "step": 13710 }, { "epoch": 1.85, "learning_rate": 1.4712239935152662e-07, "loss": 1.8949, "step": 13715 }, { "epoch": 1.85, "learning_rate": 1.4644690624155634e-07, "loss": 1.9455, "step": 13720 }, { "epoch": 1.85, "learning_rate": 1.4577141313158607e-07, "loss": 1.8905, "step": 13725 }, { "epoch": 1.85, "learning_rate": 1.4509592002161577e-07, "loss": 1.9834, "step": 13730 }, { "epoch": 1.86, "learning_rate": 1.444204269116455e-07, "loss": 1.9444, "step": 13735 }, { "epoch": 1.86, "learning_rate": 1.4374493380167522e-07, "loss": 1.8684, "step": 13740 }, { "epoch": 1.86, "learning_rate": 1.4306944069170492e-07, "loss": 1.9758, "step": 13745 }, { "epoch": 1.86, "learning_rate": 1.4239394758173465e-07, "loss": 1.9717, "step": 13750 }, { "epoch": 1.86, "learning_rate": 1.417184544717644e-07, "loss": 2.0405, "step": 13755 }, { "epoch": 1.86, "learning_rate": 1.410429613617941e-07, "loss": 1.8387, "step": 13760 }, { "epoch": 1.86, "learning_rate": 1.4036746825182383e-07, "loss": 1.8584, "step": 13765 }, { "epoch": 1.86, "learning_rate": 1.3969197514185355e-07, "loss": 2.015, "step": 13770 }, { "epoch": 1.86, "learning_rate": 1.3901648203188325e-07, "loss": 1.9601, "step": 13775 }, { "epoch": 1.86, "learning_rate": 1.3834098892191298e-07, "loss": 2.0155, "step": 13780 }, { "epoch": 1.86, "learning_rate": 1.376654958119427e-07, "loss": 1.9741, "step": 13785 }, { "epoch": 1.86, "learning_rate": 1.3699000270197246e-07, "loss": 1.9371, "step": 13790 }, { "epoch": 1.86, "learning_rate": 1.3631450959200216e-07, "loss": 2.0218, "step": 13795 }, { "epoch": 1.86, "learning_rate": 1.3563901648203188e-07, "loss": 2.0401, "step": 13800 }, { "epoch": 1.86, "learning_rate": 1.349635233720616e-07, "loss": 2.0355, "step": 13805 }, { "epoch": 1.87, "learning_rate": 1.342880302620913e-07, "loss": 2.0323, "step": 13810 }, { "epoch": 1.87, "learning_rate": 1.3361253715212104e-07, "loss": 1.9805, "step": 13815 }, { "epoch": 1.87, "learning_rate": 1.3293704404215076e-07, "loss": 1.9251, "step": 13820 }, { "epoch": 1.87, "learning_rate": 1.322615509321805e-07, "loss": 1.9105, "step": 13825 }, { "epoch": 1.87, "learning_rate": 1.3158605782221022e-07, "loss": 1.9058, "step": 13830 }, { "epoch": 1.87, "learning_rate": 1.3091056471223994e-07, "loss": 1.99, "step": 13835 }, { "epoch": 1.87, "learning_rate": 1.3023507160226964e-07, "loss": 2.0117, "step": 13840 }, { "epoch": 1.87, "learning_rate": 1.2955957849229937e-07, "loss": 1.9481, "step": 13845 }, { "epoch": 1.87, "learning_rate": 1.288840853823291e-07, "loss": 1.9639, "step": 13850 }, { "epoch": 1.87, "learning_rate": 1.282085922723588e-07, "loss": 1.8786, "step": 13855 }, { "epoch": 1.87, "learning_rate": 1.2753309916238852e-07, "loss": 1.9065, "step": 13860 }, { "epoch": 1.87, "learning_rate": 1.2685760605241827e-07, "loss": 2.0212, "step": 13865 }, { "epoch": 1.87, "learning_rate": 1.26182112942448e-07, "loss": 2.0138, "step": 13870 }, { "epoch": 1.87, "learning_rate": 1.255066198324777e-07, "loss": 1.8938, "step": 13875 }, { "epoch": 1.88, "learning_rate": 1.2483112672250743e-07, "loss": 2.0057, "step": 13880 }, { "epoch": 1.88, "learning_rate": 1.2415563361253715e-07, "loss": 1.9609, "step": 13885 }, { "epoch": 1.88, "learning_rate": 1.2348014050256688e-07, "loss": 1.9497, "step": 13890 }, { "epoch": 1.88, "learning_rate": 1.2280464739259658e-07, "loss": 1.9263, "step": 13895 }, { "epoch": 1.88, "learning_rate": 1.221291542826263e-07, "loss": 1.9301, "step": 13900 }, { "epoch": 1.88, "learning_rate": 1.2145366117265603e-07, "loss": 1.9621, "step": 13905 }, { "epoch": 1.88, "learning_rate": 1.2077816806268576e-07, "loss": 1.9314, "step": 13910 }, { "epoch": 1.88, "learning_rate": 1.2010267495271548e-07, "loss": 1.9993, "step": 13915 }, { "epoch": 1.88, "learning_rate": 1.1942718184274518e-07, "loss": 1.9101, "step": 13920 }, { "epoch": 1.88, "learning_rate": 1.1875168873277492e-07, "loss": 1.9138, "step": 13925 }, { "epoch": 1.88, "learning_rate": 1.1807619562280465e-07, "loss": 2.0387, "step": 13930 }, { "epoch": 1.88, "learning_rate": 1.1740070251283436e-07, "loss": 1.9822, "step": 13935 }, { "epoch": 1.88, "learning_rate": 1.1672520940286408e-07, "loss": 1.8946, "step": 13940 }, { "epoch": 1.88, "learning_rate": 1.1604971629289381e-07, "loss": 1.9925, "step": 13945 }, { "epoch": 1.88, "learning_rate": 1.1537422318292353e-07, "loss": 1.9311, "step": 13950 }, { "epoch": 1.89, "learning_rate": 1.1469873007295325e-07, "loss": 1.9261, "step": 13955 }, { "epoch": 1.89, "learning_rate": 1.1402323696298297e-07, "loss": 2.0045, "step": 13960 }, { "epoch": 1.89, "learning_rate": 1.1334774385301269e-07, "loss": 1.932, "step": 13965 }, { "epoch": 1.89, "learning_rate": 1.1267225074304242e-07, "loss": 1.9065, "step": 13970 }, { "epoch": 1.89, "learning_rate": 1.1199675763307213e-07, "loss": 1.8788, "step": 13975 }, { "epoch": 1.89, "learning_rate": 1.1132126452310187e-07, "loss": 1.9045, "step": 13980 }, { "epoch": 1.89, "learning_rate": 1.1064577141313159e-07, "loss": 1.9846, "step": 13985 }, { "epoch": 1.89, "learning_rate": 1.099702783031613e-07, "loss": 1.9391, "step": 13990 }, { "epoch": 1.89, "learning_rate": 1.0929478519319102e-07, "loss": 2.0232, "step": 13995 }, { "epoch": 1.89, "learning_rate": 1.0861929208322075e-07, "loss": 1.9595, "step": 14000 }, { "epoch": 1.89, "eval_loss": 1.9482014179229736, "eval_runtime": 165.3869, "eval_samples_per_second": 3.604, "eval_steps_per_second": 0.453, "step": 14000 }, { "epoch": 1.89, "learning_rate": 1.0794379897325046e-07, "loss": 1.995, "step": 14005 }, { "epoch": 1.89, "learning_rate": 1.0726830586328019e-07, "loss": 1.9835, "step": 14010 }, { "epoch": 1.89, "learning_rate": 1.065928127533099e-07, "loss": 1.9466, "step": 14015 }, { "epoch": 1.89, "learning_rate": 1.0591731964333964e-07, "loss": 2.0073, "step": 14020 }, { "epoch": 1.89, "learning_rate": 1.0524182653336936e-07, "loss": 2.0322, "step": 14025 }, { "epoch": 1.9, "learning_rate": 1.0456633342339907e-07, "loss": 1.9171, "step": 14030 }, { "epoch": 1.9, "learning_rate": 1.0389084031342881e-07, "loss": 1.9881, "step": 14035 }, { "epoch": 1.9, "learning_rate": 1.0321534720345852e-07, "loss": 2.0138, "step": 14040 }, { "epoch": 1.9, "learning_rate": 1.0253985409348823e-07, "loss": 2.0584, "step": 14045 }, { "epoch": 1.9, "learning_rate": 1.0186436098351796e-07, "loss": 1.926, "step": 14050 }, { "epoch": 1.9, "learning_rate": 1.0118886787354769e-07, "loss": 1.9551, "step": 14055 }, { "epoch": 1.9, "learning_rate": 1.0051337476357741e-07, "loss": 1.9745, "step": 14060 }, { "epoch": 1.9, "learning_rate": 9.983788165360713e-08, "loss": 2.0285, "step": 14065 }, { "epoch": 1.9, "learning_rate": 9.916238854363685e-08, "loss": 2.0206, "step": 14070 }, { "epoch": 1.9, "learning_rate": 9.848689543366658e-08, "loss": 1.9034, "step": 14075 }, { "epoch": 1.9, "learning_rate": 9.781140232369629e-08, "loss": 1.9332, "step": 14080 }, { "epoch": 1.9, "learning_rate": 9.7135909213726e-08, "loss": 1.8562, "step": 14085 }, { "epoch": 1.9, "learning_rate": 9.646041610375574e-08, "loss": 1.9742, "step": 14090 }, { "epoch": 1.9, "learning_rate": 9.578492299378546e-08, "loss": 1.9395, "step": 14095 }, { "epoch": 1.9, "learning_rate": 9.510942988381518e-08, "loss": 2.0896, "step": 14100 }, { "epoch": 1.91, "learning_rate": 9.44339367738449e-08, "loss": 1.9833, "step": 14105 }, { "epoch": 1.91, "learning_rate": 9.375844366387462e-08, "loss": 1.909, "step": 14110 }, { "epoch": 1.91, "learning_rate": 9.308295055390435e-08, "loss": 1.8967, "step": 14115 }, { "epoch": 1.91, "learning_rate": 9.240745744393406e-08, "loss": 1.9244, "step": 14120 }, { "epoch": 1.91, "learning_rate": 9.17319643339638e-08, "loss": 1.8847, "step": 14125 }, { "epoch": 1.91, "learning_rate": 9.105647122399351e-08, "loss": 1.9759, "step": 14130 }, { "epoch": 1.91, "learning_rate": 9.038097811402323e-08, "loss": 2.0605, "step": 14135 }, { "epoch": 1.91, "learning_rate": 8.970548500405295e-08, "loss": 2.0337, "step": 14140 }, { "epoch": 1.91, "learning_rate": 8.902999189408268e-08, "loss": 2.0754, "step": 14145 }, { "epoch": 1.91, "learning_rate": 8.835449878411239e-08, "loss": 2.1058, "step": 14150 }, { "epoch": 1.91, "learning_rate": 8.767900567414212e-08, "loss": 1.9074, "step": 14155 }, { "epoch": 1.91, "learning_rate": 8.700351256417183e-08, "loss": 1.9853, "step": 14160 }, { "epoch": 1.91, "learning_rate": 8.632801945420157e-08, "loss": 1.9416, "step": 14165 }, { "epoch": 1.91, "learning_rate": 8.565252634423129e-08, "loss": 1.9635, "step": 14170 }, { "epoch": 1.91, "learning_rate": 8.4977033234261e-08, "loss": 1.9167, "step": 14175 }, { "epoch": 1.92, "learning_rate": 8.430154012429074e-08, "loss": 2.0947, "step": 14180 }, { "epoch": 1.92, "learning_rate": 8.362604701432045e-08, "loss": 1.9955, "step": 14185 }, { "epoch": 1.92, "learning_rate": 8.295055390435018e-08, "loss": 1.9231, "step": 14190 }, { "epoch": 1.92, "learning_rate": 8.227506079437989e-08, "loss": 1.9107, "step": 14195 }, { "epoch": 1.92, "learning_rate": 8.159956768440962e-08, "loss": 1.9193, "step": 14200 }, { "epoch": 1.92, "learning_rate": 8.092407457443934e-08, "loss": 1.955, "step": 14205 }, { "epoch": 1.92, "learning_rate": 8.024858146446906e-08, "loss": 2.0003, "step": 14210 }, { "epoch": 1.92, "learning_rate": 7.957308835449877e-08, "loss": 1.993, "step": 14215 }, { "epoch": 1.92, "learning_rate": 7.889759524452851e-08, "loss": 1.9449, "step": 14220 }, { "epoch": 1.92, "learning_rate": 7.822210213455822e-08, "loss": 2.0275, "step": 14225 }, { "epoch": 1.92, "learning_rate": 7.754660902458795e-08, "loss": 1.955, "step": 14230 }, { "epoch": 1.92, "learning_rate": 7.687111591461767e-08, "loss": 1.9618, "step": 14235 }, { "epoch": 1.92, "learning_rate": 7.619562280464739e-08, "loss": 1.9257, "step": 14240 }, { "epoch": 1.92, "learning_rate": 7.552012969467711e-08, "loss": 1.9296, "step": 14245 }, { "epoch": 1.93, "learning_rate": 7.484463658470683e-08, "loss": 2.0142, "step": 14250 }, { "epoch": 1.93, "learning_rate": 7.416914347473655e-08, "loss": 1.9446, "step": 14255 }, { "epoch": 1.93, "learning_rate": 7.349365036476628e-08, "loss": 1.9638, "step": 14260 }, { "epoch": 1.93, "learning_rate": 7.281815725479599e-08, "loss": 1.9541, "step": 14265 }, { "epoch": 1.93, "learning_rate": 7.214266414482572e-08, "loss": 1.9324, "step": 14270 }, { "epoch": 1.93, "learning_rate": 7.146717103485544e-08, "loss": 1.9464, "step": 14275 }, { "epoch": 1.93, "learning_rate": 7.079167792488516e-08, "loss": 1.8618, "step": 14280 }, { "epoch": 1.93, "learning_rate": 7.011618481491488e-08, "loss": 1.8554, "step": 14285 }, { "epoch": 1.93, "learning_rate": 6.944069170494461e-08, "loss": 1.8643, "step": 14290 }, { "epoch": 1.93, "learning_rate": 6.876519859497434e-08, "loss": 1.8951, "step": 14295 }, { "epoch": 1.93, "learning_rate": 6.808970548500405e-08, "loss": 2.0413, "step": 14300 }, { "epoch": 1.93, "learning_rate": 6.741421237503376e-08, "loss": 2.0529, "step": 14305 }, { "epoch": 1.93, "learning_rate": 6.67387192650635e-08, "loss": 1.9858, "step": 14310 }, { "epoch": 1.93, "learning_rate": 6.606322615509321e-08, "loss": 1.9525, "step": 14315 }, { "epoch": 1.93, "learning_rate": 6.538773304512293e-08, "loss": 1.9431, "step": 14320 }, { "epoch": 1.94, "learning_rate": 6.471223993515267e-08, "loss": 2.0377, "step": 14325 }, { "epoch": 1.94, "learning_rate": 6.403674682518238e-08, "loss": 1.804, "step": 14330 }, { "epoch": 1.94, "learning_rate": 6.336125371521211e-08, "loss": 2.0369, "step": 14335 }, { "epoch": 1.94, "learning_rate": 6.268576060524182e-08, "loss": 1.9369, "step": 14340 }, { "epoch": 1.94, "learning_rate": 6.201026749527155e-08, "loss": 1.9237, "step": 14345 }, { "epoch": 1.94, "learning_rate": 6.133477438530127e-08, "loss": 1.9972, "step": 14350 }, { "epoch": 1.94, "learning_rate": 6.065928127533099e-08, "loss": 1.9778, "step": 14355 }, { "epoch": 1.94, "learning_rate": 5.998378816536071e-08, "loss": 1.9137, "step": 14360 }, { "epoch": 1.94, "learning_rate": 5.930829505539043e-08, "loss": 1.9997, "step": 14365 }, { "epoch": 1.94, "learning_rate": 5.863280194542016e-08, "loss": 1.9587, "step": 14370 }, { "epoch": 1.94, "learning_rate": 5.795730883544987e-08, "loss": 2.0108, "step": 14375 }, { "epoch": 1.94, "learning_rate": 5.72818157254796e-08, "loss": 1.8876, "step": 14380 }, { "epoch": 1.94, "learning_rate": 5.660632261550932e-08, "loss": 2.042, "step": 14385 }, { "epoch": 1.94, "learning_rate": 5.593082950553904e-08, "loss": 1.9656, "step": 14390 }, { "epoch": 1.94, "learning_rate": 5.525533639556876e-08, "loss": 1.9729, "step": 14395 }, { "epoch": 1.95, "learning_rate": 5.457984328559848e-08, "loss": 2.0976, "step": 14400 }, { "epoch": 1.95, "eval_loss": 1.9481086730957031, "eval_runtime": 165.5103, "eval_samples_per_second": 3.601, "eval_steps_per_second": 0.453, "step": 14400 }, { "epoch": 1.95, "learning_rate": 5.390435017562821e-08, "loss": 1.8936, "step": 14405 }, { "epoch": 1.95, "learning_rate": 5.322885706565793e-08, "loss": 1.9298, "step": 14410 }, { "epoch": 1.95, "learning_rate": 5.255336395568765e-08, "loss": 1.9587, "step": 14415 }, { "epoch": 1.95, "learning_rate": 5.187787084571737e-08, "loss": 1.8686, "step": 14420 }, { "epoch": 1.95, "learning_rate": 5.1202377735747094e-08, "loss": 1.92, "step": 14425 }, { "epoch": 1.95, "learning_rate": 5.052688462577682e-08, "loss": 1.9898, "step": 14430 }, { "epoch": 1.95, "learning_rate": 4.985139151580653e-08, "loss": 1.9962, "step": 14435 }, { "epoch": 1.95, "learning_rate": 4.917589840583626e-08, "loss": 1.9581, "step": 14440 }, { "epoch": 1.95, "learning_rate": 4.850040529586598e-08, "loss": 2.0457, "step": 14445 }, { "epoch": 1.95, "learning_rate": 4.7824912185895705e-08, "loss": 2.0811, "step": 14450 }, { "epoch": 1.95, "learning_rate": 4.714941907592542e-08, "loss": 1.9705, "step": 14455 }, { "epoch": 1.95, "learning_rate": 4.6473925965955145e-08, "loss": 1.8892, "step": 14460 }, { "epoch": 1.95, "learning_rate": 4.5798432855984864e-08, "loss": 1.9186, "step": 14465 }, { "epoch": 1.95, "learning_rate": 4.512293974601459e-08, "loss": 2.0552, "step": 14470 }, { "epoch": 1.96, "learning_rate": 4.444744663604431e-08, "loss": 1.9579, "step": 14475 }, { "epoch": 1.96, "learning_rate": 4.377195352607403e-08, "loss": 1.9762, "step": 14480 }, { "epoch": 1.96, "learning_rate": 4.3096460416103756e-08, "loss": 2.0308, "step": 14485 }, { "epoch": 1.96, "learning_rate": 4.2420967306133476e-08, "loss": 1.9251, "step": 14490 }, { "epoch": 1.96, "learning_rate": 4.17454741961632e-08, "loss": 1.9119, "step": 14495 }, { "epoch": 1.96, "learning_rate": 4.1069981086192915e-08, "loss": 1.9872, "step": 14500 }, { "epoch": 1.96, "learning_rate": 4.039448797622264e-08, "loss": 1.9612, "step": 14505 }, { "epoch": 1.96, "learning_rate": 3.971899486625236e-08, "loss": 1.8368, "step": 14510 }, { "epoch": 1.96, "learning_rate": 3.904350175628209e-08, "loss": 2.0556, "step": 14515 }, { "epoch": 1.96, "learning_rate": 3.83680086463118e-08, "loss": 1.9518, "step": 14520 }, { "epoch": 1.96, "learning_rate": 3.7692515536341527e-08, "loss": 1.9561, "step": 14525 }, { "epoch": 1.96, "learning_rate": 3.701702242637125e-08, "loss": 2.0511, "step": 14530 }, { "epoch": 1.96, "learning_rate": 3.634152931640097e-08, "loss": 2.0264, "step": 14535 }, { "epoch": 1.96, "learning_rate": 3.56660362064307e-08, "loss": 1.9518, "step": 14540 }, { "epoch": 1.96, "learning_rate": 3.499054309646041e-08, "loss": 1.958, "step": 14545 }, { "epoch": 1.97, "learning_rate": 3.431504998649014e-08, "loss": 1.8629, "step": 14550 }, { "epoch": 1.97, "learning_rate": 3.363955687651986e-08, "loss": 1.9151, "step": 14555 }, { "epoch": 1.97, "learning_rate": 3.2964063766549584e-08, "loss": 2.046, "step": 14560 }, { "epoch": 1.97, "learning_rate": 3.22885706565793e-08, "loss": 1.9737, "step": 14565 }, { "epoch": 1.97, "learning_rate": 3.1613077546609023e-08, "loss": 1.9286, "step": 14570 }, { "epoch": 1.97, "learning_rate": 3.093758443663874e-08, "loss": 1.8991, "step": 14575 }, { "epoch": 1.97, "learning_rate": 3.026209132666847e-08, "loss": 1.9493, "step": 14580 }, { "epoch": 1.97, "learning_rate": 2.958659821669819e-08, "loss": 1.9246, "step": 14585 }, { "epoch": 1.97, "learning_rate": 2.891110510672791e-08, "loss": 2.0081, "step": 14590 }, { "epoch": 1.97, "learning_rate": 2.823561199675763e-08, "loss": 1.8692, "step": 14595 }, { "epoch": 1.97, "learning_rate": 2.756011888678735e-08, "loss": 1.9802, "step": 14600 }, { "epoch": 1.97, "learning_rate": 2.6884625776817074e-08, "loss": 1.9681, "step": 14605 }, { "epoch": 1.97, "learning_rate": 2.6209132666846797e-08, "loss": 1.9555, "step": 14610 }, { "epoch": 1.97, "learning_rate": 2.553363955687652e-08, "loss": 2.0044, "step": 14615 }, { "epoch": 1.98, "learning_rate": 2.485814644690624e-08, "loss": 1.9247, "step": 14620 }, { "epoch": 1.98, "learning_rate": 2.4182653336935963e-08, "loss": 1.9702, "step": 14625 }, { "epoch": 1.98, "learning_rate": 2.3507160226965682e-08, "loss": 1.9554, "step": 14630 }, { "epoch": 1.98, "learning_rate": 2.2831667116995405e-08, "loss": 2.0136, "step": 14635 }, { "epoch": 1.98, "learning_rate": 2.215617400702513e-08, "loss": 1.9539, "step": 14640 }, { "epoch": 1.98, "learning_rate": 2.1480680897054848e-08, "loss": 2.0292, "step": 14645 }, { "epoch": 1.98, "learning_rate": 2.080518778708457e-08, "loss": 1.9981, "step": 14650 }, { "epoch": 1.98, "learning_rate": 2.012969467711429e-08, "loss": 2.0681, "step": 14655 }, { "epoch": 1.98, "learning_rate": 1.9454201567144017e-08, "loss": 2.0592, "step": 14660 }, { "epoch": 1.98, "learning_rate": 1.8778708457173737e-08, "loss": 1.9643, "step": 14665 }, { "epoch": 1.98, "learning_rate": 1.810321534720346e-08, "loss": 2.0253, "step": 14670 }, { "epoch": 1.98, "learning_rate": 1.742772223723318e-08, "loss": 2.0889, "step": 14675 }, { "epoch": 1.98, "learning_rate": 1.6752229127262902e-08, "loss": 1.9485, "step": 14680 }, { "epoch": 1.98, "learning_rate": 1.6076736017292622e-08, "loss": 1.8839, "step": 14685 }, { "epoch": 1.98, "learning_rate": 1.5401242907322345e-08, "loss": 1.9396, "step": 14690 }, { "epoch": 1.99, "learning_rate": 1.4725749797352068e-08, "loss": 1.9704, "step": 14695 }, { "epoch": 1.99, "learning_rate": 1.4050256687381789e-08, "loss": 1.9368, "step": 14700 }, { "epoch": 1.99, "learning_rate": 1.337476357741151e-08, "loss": 1.9722, "step": 14705 }, { "epoch": 1.99, "learning_rate": 1.2699270467441232e-08, "loss": 2.0104, "step": 14710 }, { "epoch": 1.99, "learning_rate": 1.2023777357470953e-08, "loss": 1.9681, "step": 14715 }, { "epoch": 1.99, "learning_rate": 1.1348284247500676e-08, "loss": 1.9765, "step": 14720 }, { "epoch": 1.99, "learning_rate": 1.0672791137530397e-08, "loss": 1.9395, "step": 14725 }, { "epoch": 1.99, "learning_rate": 9.997298027560119e-09, "loss": 1.9876, "step": 14730 }, { "epoch": 1.99, "learning_rate": 9.32180491758984e-09, "loss": 1.9795, "step": 14735 }, { "epoch": 1.99, "learning_rate": 8.646311807619561e-09, "loss": 1.9714, "step": 14740 }, { "epoch": 1.99, "learning_rate": 7.970818697649283e-09, "loss": 1.9901, "step": 14745 }, { "epoch": 1.99, "learning_rate": 7.295325587679005e-09, "loss": 2.0808, "step": 14750 }, { "epoch": 1.99, "learning_rate": 6.619832477708728e-09, "loss": 1.9317, "step": 14755 }, { "epoch": 1.99, "learning_rate": 5.944339367738449e-09, "loss": 1.9596, "step": 14760 }, { "epoch": 1.99, "learning_rate": 5.268846257768171e-09, "loss": 1.9245, "step": 14765 }, { "epoch": 2.0, "learning_rate": 4.5933531477978924e-09, "loss": 2.0051, "step": 14770 }, { "epoch": 2.0, "learning_rate": 3.917860037827614e-09, "loss": 1.9284, "step": 14775 }, { "epoch": 2.0, "learning_rate": 3.2423669278573355e-09, "loss": 1.9367, "step": 14780 }, { "epoch": 2.0, "learning_rate": 2.566873817887057e-09, "loss": 2.0314, "step": 14785 }, { "epoch": 2.0, "learning_rate": 1.8913807079167793e-09, "loss": 1.8842, "step": 14790 }, { "epoch": 2.0, "learning_rate": 1.215887597946501e-09, "loss": 1.9025, "step": 14795 }, { "epoch": 2.0, "learning_rate": 5.403944879762226e-10, "loss": 1.9204, "step": 14800 }, { "epoch": 2.0, "eval_loss": 1.9478894472122192, "eval_runtime": 165.3905, "eval_samples_per_second": 3.604, "eval_steps_per_second": 0.453, "step": 14800 }, { "epoch": 2.0, "step": 14804, "total_flos": 8.826710223564841e+18, "train_loss": 2.036540643423514, "train_runtime": 233902.6153, "train_samples_per_second": 1.013, "train_steps_per_second": 0.063 } ], "logging_steps": 5, "max_steps": 14804, "num_train_epochs": 2, "save_steps": 2000, "total_flos": 8.826710223564841e+18, "trial_name": null, "trial_params": null }