| { |
| "best_metric": 1.1169735193252563, |
| "best_model_checkpoint": "outputs/checkpoint-8000", |
| "epoch": 2.56, |
| "global_step": 8000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 1e-05, |
| "loss": 1.8184, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 2e-05, |
| "loss": 1.7858, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 3e-05, |
| "loss": 1.7218, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4e-05, |
| "loss": 1.7218, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 5e-05, |
| "loss": 1.7009, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 6e-05, |
| "loss": 1.6714, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7e-05, |
| "loss": 1.6582, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 8e-05, |
| "loss": 1.5659, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 9e-05, |
| "loss": 1.5343, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.0001, |
| "loss": 1.5252, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00011000000000000002, |
| "loss": 1.414, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00012, |
| "loss": 1.3919, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00013000000000000002, |
| "loss": 1.352, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00014, |
| "loss": 1.3762, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00015000000000000001, |
| "loss": 1.3238, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00016, |
| "loss": 1.3306, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00017, |
| "loss": 1.3423, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00018, |
| "loss": 1.3199, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019, |
| "loss": 1.3506, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.0002, |
| "loss": 1.3315, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.0001998921832884097, |
| "loss": 1.3142, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019978436657681943, |
| "loss": 1.2994, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019967654986522912, |
| "loss": 1.2972, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019956873315363883, |
| "loss": 1.3135, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019946091644204851, |
| "loss": 1.2903, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019935309973045823, |
| "loss": 1.2783, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019924528301886794, |
| "loss": 1.3044, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019913746630727762, |
| "loss": 1.2947, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00019902964959568736, |
| "loss": 1.2759, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00019892183288409705, |
| "loss": 1.2746, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00019881401617250676, |
| "loss": 1.2869, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00019870619946091644, |
| "loss": 1.26, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00019859838274932616, |
| "loss": 1.258, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00019849056603773587, |
| "loss": 1.2649, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00019838274932614555, |
| "loss": 1.2549, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00019827493261455526, |
| "loss": 1.2859, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00019816711590296498, |
| "loss": 1.2934, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00019805929919137466, |
| "loss": 1.3011, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00019795148247978437, |
| "loss": 1.2304, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00019784366576819408, |
| "loss": 1.2849, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_loss": 1.2345943450927734, |
| "eval_runtime": 710.2534, |
| "eval_samples_per_second": 7.04, |
| "eval_steps_per_second": 0.88, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.0001977358490566038, |
| "loss": 1.2726, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00019762803234501348, |
| "loss": 1.2996, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.0001975202156334232, |
| "loss": 1.2456, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.0001974123989218329, |
| "loss": 1.2794, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.0001973045822102426, |
| "loss": 1.2637, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.0001971967654986523, |
| "loss": 1.252, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.000197088948787062, |
| "loss": 1.2492, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.0001969811320754717, |
| "loss": 1.2972, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.0001968733153638814, |
| "loss": 1.3023, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00019676549865229112, |
| "loss": 1.2682, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00019665768194070083, |
| "loss": 1.2981, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00019654986522911052, |
| "loss": 1.3084, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00019644204851752023, |
| "loss": 1.2666, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00019633423180592994, |
| "loss": 1.2623, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00019622641509433963, |
| "loss": 1.2406, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00019611859838274934, |
| "loss": 1.2623, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00019601078167115905, |
| "loss": 1.2562, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00019590296495956873, |
| "loss": 1.2166, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00019579514824797845, |
| "loss": 1.2931, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00019568733153638813, |
| "loss": 1.2354, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00019557951482479787, |
| "loss": 1.2246, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00019547169811320755, |
| "loss": 1.2236, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00019536388140161727, |
| "loss": 1.2442, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00019525606469002698, |
| "loss": 1.2779, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00019514824797843666, |
| "loss": 1.2285, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00019504043126684637, |
| "loss": 1.2157, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00019493261455525606, |
| "loss": 1.2405, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0001948247978436658, |
| "loss": 1.2335, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00019471698113207548, |
| "loss": 1.2401, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00019460916442048517, |
| "loss": 1.2521, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0001945013477088949, |
| "loss": 1.2386, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0001943935309973046, |
| "loss": 1.2196, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0001942857142857143, |
| "loss": 1.192, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.000194177897574124, |
| "loss": 1.1964, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0001940700808625337, |
| "loss": 1.2876, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0001939622641509434, |
| "loss": 1.2622, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0001938544474393531, |
| "loss": 1.2183, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00019374663072776284, |
| "loss": 1.206, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.00019363881401617252, |
| "loss": 1.2697, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0001935309973045822, |
| "loss": 1.2515, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.13, |
| "eval_loss": 1.207701563835144, |
| "eval_runtime": 709.7794, |
| "eval_samples_per_second": 7.044, |
| "eval_steps_per_second": 0.881, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.00019342318059299192, |
| "loss": 1.2596, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.00019331536388140163, |
| "loss": 1.2537, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.00019320754716981134, |
| "loss": 1.2545, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.00019309973045822103, |
| "loss": 1.2473, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.00019299191374663074, |
| "loss": 1.2308, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.00019288409703504045, |
| "loss": 1.2482, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.00019277628032345013, |
| "loss": 1.2342, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.00019266846361185985, |
| "loss": 1.2101, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.00019256064690026956, |
| "loss": 1.233, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.00019245283018867927, |
| "loss": 1.2401, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00019234501347708895, |
| "loss": 1.2088, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00019223719676549864, |
| "loss": 1.2205, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00019212938005390838, |
| "loss": 1.218, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00019202156334231806, |
| "loss": 1.2208, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00019191374663072777, |
| "loss": 1.2783, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00019180592991913749, |
| "loss": 1.1886, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.00019169811320754717, |
| "loss": 1.1939, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.00019159029649595688, |
| "loss": 1.2079, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.00019148247978436657, |
| "loss": 1.2473, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.0001913746630727763, |
| "loss": 1.2295, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.000191266846361186, |
| "loss": 1.2064, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.00019115902964959568, |
| "loss": 1.2138, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.00019105121293800541, |
| "loss": 1.2618, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0001909433962264151, |
| "loss": 1.2495, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0001908355795148248, |
| "loss": 1.2553, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0001907277628032345, |
| "loss": 1.2334, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0001906199460916442, |
| "loss": 1.2442, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.00019051212938005392, |
| "loss": 1.2301, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0001904043126684636, |
| "loss": 1.2678, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.00019029649595687334, |
| "loss": 1.2116, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.00019018867924528303, |
| "loss": 1.2556, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.00019008086253369274, |
| "loss": 1.2427, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.00018997304582210242, |
| "loss": 1.2138, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.00018986522911051214, |
| "loss": 1.248, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.00018975741239892185, |
| "loss": 1.2415, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00018964959568733153, |
| "loss": 1.2303, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00018954177897574125, |
| "loss": 1.1939, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00018943396226415096, |
| "loss": 1.1927, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00018932614555256064, |
| "loss": 1.1982, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00018921832884097035, |
| "loss": 1.1859, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.19, |
| "eval_loss": 1.1948214769363403, |
| "eval_runtime": 709.3061, |
| "eval_samples_per_second": 7.049, |
| "eval_steps_per_second": 0.881, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00018911051212938007, |
| "loss": 1.2265, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.00018900269541778978, |
| "loss": 1.2402, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.00018889487870619946, |
| "loss": 1.2261, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.00018878706199460917, |
| "loss": 1.2264, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.00018867924528301889, |
| "loss": 1.2044, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.00018857142857142857, |
| "loss": 1.2203, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.00018846361185983828, |
| "loss": 1.2258, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.000188355795148248, |
| "loss": 1.2475, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.00018824797843665768, |
| "loss": 1.239, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0001881401617250674, |
| "loss": 1.2374, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0001880323450134771, |
| "loss": 1.2472, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.00018792452830188681, |
| "loss": 1.2411, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0001878167115902965, |
| "loss": 1.1916, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0001877088948787062, |
| "loss": 1.2043, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00018760107816711592, |
| "loss": 1.2087, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.0001874932614555256, |
| "loss": 1.2147, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00018738544474393532, |
| "loss": 1.1999, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00018727762803234503, |
| "loss": 1.2031, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00018716981132075472, |
| "loss": 1.268, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.00018706199460916443, |
| "loss": 1.2181, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0001869541778975741, |
| "loss": 1.2187, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00018684636118598385, |
| "loss": 1.2475, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00018673854447439354, |
| "loss": 1.2043, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00018663072776280325, |
| "loss": 1.1833, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00018652291105121296, |
| "loss": 1.1899, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00018641509433962264, |
| "loss": 1.232, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00018630727762803236, |
| "loss": 1.1885, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00018619946091644204, |
| "loss": 1.2087, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00018609164420485178, |
| "loss": 1.2433, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00018598382749326146, |
| "loss": 1.2233, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00018587601078167115, |
| "loss": 1.2348, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.0001857681940700809, |
| "loss": 1.1667, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00018566037735849057, |
| "loss": 1.2952, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.00018555256064690029, |
| "loss": 1.1939, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.00018544474393530997, |
| "loss": 1.1646, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.00018533692722371968, |
| "loss": 1.2055, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.0001852291105121294, |
| "loss": 1.183, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.00018512129380053908, |
| "loss": 1.2367, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.00018501347708894882, |
| "loss": 1.2265, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0001849056603773585, |
| "loss": 1.2007, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.26, |
| "eval_loss": 1.184471845626831, |
| "eval_runtime": 708.4336, |
| "eval_samples_per_second": 7.058, |
| "eval_steps_per_second": 0.882, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0001847978436657682, |
| "loss": 1.204, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0001846900269541779, |
| "loss": 1.2379, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0001845822102425876, |
| "loss": 1.157, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.00018447439353099732, |
| "loss": 1.2072, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.000184366576819407, |
| "loss": 1.1764, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00018425876010781672, |
| "loss": 1.21, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00018415094339622643, |
| "loss": 1.19, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00018404312668463612, |
| "loss": 1.1923, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00018393530997304583, |
| "loss": 1.2367, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00018382749326145554, |
| "loss": 1.2394, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.00018371967654986525, |
| "loss": 1.2275, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00018361185983827494, |
| "loss": 1.2206, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00018350404312668465, |
| "loss": 1.2017, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00018339622641509436, |
| "loss": 1.2277, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00018328840970350404, |
| "loss": 1.1958, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00018318059299191376, |
| "loss": 1.2317, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00018307277628032347, |
| "loss": 1.2339, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00018296495956873315, |
| "loss": 1.2408, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.00018285714285714286, |
| "loss": 1.1818, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.00018274932614555258, |
| "loss": 1.2051, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.0001826415094339623, |
| "loss": 1.2304, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.00018253369272237197, |
| "loss": 1.1717, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.00018242587601078168, |
| "loss": 1.1811, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.0001823180592991914, |
| "loss": 1.2127, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00018221024258760108, |
| "loss": 1.2278, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0001821024258760108, |
| "loss": 1.223, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0001819946091644205, |
| "loss": 1.233, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0001818867924528302, |
| "loss": 1.2321, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0001817789757412399, |
| "loss": 1.1894, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00018167115902964959, |
| "loss": 1.2213, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00018156334231805933, |
| "loss": 1.2444, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.000181455525606469, |
| "loss": 1.1988, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00018134770889487872, |
| "loss": 1.2039, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00018123989218328843, |
| "loss": 1.1972, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00018113207547169812, |
| "loss": 1.2146, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00018102425876010783, |
| "loss": 1.2346, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00018091644204851751, |
| "loss": 1.2283, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00018080862533692723, |
| "loss": 1.201, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00018070080862533694, |
| "loss": 1.2149, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00018059299191374662, |
| "loss": 1.2085, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.32, |
| "eval_loss": 1.176331877708435, |
| "eval_runtime": 709.1565, |
| "eval_samples_per_second": 7.051, |
| "eval_steps_per_second": 0.881, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00018048517520215636, |
| "loss": 1.2484, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00018037735849056605, |
| "loss": 1.2193, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00018026954177897576, |
| "loss": 1.1733, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00018016172506738544, |
| "loss": 1.2152, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00018005390835579516, |
| "loss": 1.1688, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00017994609164420487, |
| "loss": 1.1893, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00017983827493261455, |
| "loss": 1.1483, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.0001797304582210243, |
| "loss": 1.189, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00017962264150943398, |
| "loss": 1.1873, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00017951482479784366, |
| "loss": 1.1883, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00017940700808625337, |
| "loss": 1.1741, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00017929919137466308, |
| "loss": 1.2546, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.0001791913746630728, |
| "loss": 1.2194, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00017908355795148248, |
| "loss": 1.1945, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.0001789757412398922, |
| "loss": 1.2011, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0001788679245283019, |
| "loss": 1.1836, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0001787601078167116, |
| "loss": 1.2116, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0001786522911051213, |
| "loss": 1.1983, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.000178544474393531, |
| "loss": 1.2095, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0001784366576819407, |
| "loss": 1.2007, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0001783288409703504, |
| "loss": 1.1936, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0001782210242587601, |
| "loss": 1.2207, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00017811320754716983, |
| "loss": 1.2016, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00017800539083557952, |
| "loss": 1.2013, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00017789757412398923, |
| "loss": 1.1794, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00017778975741239894, |
| "loss": 1.235, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00017768194070080863, |
| "loss": 1.1865, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00017757412398921834, |
| "loss": 1.2159, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00017746630727762802, |
| "loss": 1.1887, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00017735849056603776, |
| "loss": 1.1923, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00017725067385444745, |
| "loss": 1.1894, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00017714285714285713, |
| "loss": 1.1863, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00017703504043126687, |
| "loss": 1.2128, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00017692722371967655, |
| "loss": 1.214, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00017681940700808627, |
| "loss": 1.2083, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00017671159029649595, |
| "loss": 1.2252, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00017660377358490566, |
| "loss": 1.1818, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00017649595687331538, |
| "loss": 1.2334, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00017638814016172506, |
| "loss": 1.1752, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.0001762803234501348, |
| "loss": 1.2036, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.38, |
| "eval_loss": 1.1710153818130493, |
| "eval_runtime": 708.8537, |
| "eval_samples_per_second": 7.054, |
| "eval_steps_per_second": 0.882, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.00017617250673854448, |
| "loss": 1.2283, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0001760646900269542, |
| "loss": 1.2464, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.00017595687331536388, |
| "loss": 1.2126, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0001758490566037736, |
| "loss": 1.2217, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0001757412398921833, |
| "loss": 1.2245, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.000175633423180593, |
| "loss": 1.1941, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0001755256064690027, |
| "loss": 1.2041, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0001754177897574124, |
| "loss": 1.2076, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0001753099730458221, |
| "loss": 1.2154, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0001752021563342318, |
| "loss": 1.2247, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00017509433962264152, |
| "loss": 1.1822, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00017498652291105123, |
| "loss": 1.2015, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00017487870619946092, |
| "loss": 1.2111, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00017477088948787063, |
| "loss": 1.1946, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00017466307277628034, |
| "loss": 1.1365, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00017455525606469003, |
| "loss": 1.1742, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00017444743935309974, |
| "loss": 1.2442, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00017433962264150945, |
| "loss": 1.2095, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00017423180592991913, |
| "loss": 1.1727, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00017412398921832885, |
| "loss": 1.1948, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00017401617250673856, |
| "loss": 1.1737, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00017390835579514827, |
| "loss": 1.198, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00017380053908355795, |
| "loss": 1.2152, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00017369272237196767, |
| "loss": 1.2061, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00017358490566037738, |
| "loss": 1.1925, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00017347708894878706, |
| "loss": 1.2067, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00017336927223719677, |
| "loss": 1.1964, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.0001732614555256065, |
| "loss": 1.2062, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00017315363881401617, |
| "loss": 1.2208, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00017304582210242588, |
| "loss": 1.167, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00017293800539083557, |
| "loss": 1.1802, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0001728301886792453, |
| "loss": 1.2193, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.000172722371967655, |
| "loss": 1.2015, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0001726145552560647, |
| "loss": 1.2183, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00017250673854447442, |
| "loss": 1.1986, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0001723989218328841, |
| "loss": 1.1575, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0001722911051212938, |
| "loss": 1.1949, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0001721832884097035, |
| "loss": 1.2103, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00017207547169811324, |
| "loss": 1.2141, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00017196765498652292, |
| "loss": 1.2134, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.45, |
| "eval_loss": 1.1655155420303345, |
| "eval_runtime": 709.3424, |
| "eval_samples_per_second": 7.049, |
| "eval_steps_per_second": 0.881, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.0001718598382749326, |
| "loss": 1.1573, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00017175202156334234, |
| "loss": 1.1694, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00017164420485175203, |
| "loss": 1.226, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00017153638814016174, |
| "loss": 1.1758, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00017142857142857143, |
| "loss": 1.185, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00017132075471698114, |
| "loss": 1.1878, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00017121293800539085, |
| "loss": 1.2108, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00017110512129380053, |
| "loss": 1.254, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00017099730458221027, |
| "loss": 1.2071, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00017088948787061996, |
| "loss": 1.1914, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00017078167115902964, |
| "loss": 1.1936, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00017067385444743935, |
| "loss": 1.2369, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00017056603773584907, |
| "loss": 1.1627, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00017045822102425878, |
| "loss": 1.216, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00017035040431266846, |
| "loss": 1.1931, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00017024258760107817, |
| "loss": 1.1859, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00017013477088948789, |
| "loss": 1.1769, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00017002695417789757, |
| "loss": 1.1376, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00016991913746630728, |
| "loss": 1.1692, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.000169811320754717, |
| "loss": 1.1948, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.0001697035040431267, |
| "loss": 1.183, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.0001695956873315364, |
| "loss": 1.1687, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.0001694878706199461, |
| "loss": 1.1754, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00016938005390835581, |
| "loss": 1.2051, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0001692722371967655, |
| "loss": 1.1862, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0001691644204851752, |
| "loss": 1.2014, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00016905660377358492, |
| "loss": 1.1992, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0001689487870619946, |
| "loss": 1.2253, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00016884097035040432, |
| "loss": 1.2125, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00016873315363881403, |
| "loss": 1.1838, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00016862533692722374, |
| "loss": 1.2048, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00016851752021563343, |
| "loss": 1.1855, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.0001684097035040431, |
| "loss": 1.1646, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00016830188679245285, |
| "loss": 1.2024, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00016819407008086254, |
| "loss": 1.2162, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00016808625336927225, |
| "loss": 1.1909, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00016797843665768196, |
| "loss": 1.1807, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00016787061994609164, |
| "loss": 1.1803, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00016776280323450136, |
| "loss": 1.1894, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00016765498652291104, |
| "loss": 1.1869, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.51, |
| "eval_loss": 1.1610257625579834, |
| "eval_runtime": 709.4343, |
| "eval_samples_per_second": 7.048, |
| "eval_steps_per_second": 0.881, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00016754716981132078, |
| "loss": 1.1832, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00016743935309973047, |
| "loss": 1.1553, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00016733153638814018, |
| "loss": 1.1788, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.0001672237196765499, |
| "loss": 1.1896, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00016711590296495957, |
| "loss": 1.1839, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00016700808625336929, |
| "loss": 1.2063, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00016690026954177897, |
| "loss": 1.1756, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00016679245283018868, |
| "loss": 1.1903, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.0001666846361185984, |
| "loss": 1.1901, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.00016657681940700808, |
| "loss": 1.1852, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.00016646900269541782, |
| "loss": 1.1824, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.0001663611859838275, |
| "loss": 1.1514, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.00016625336927223721, |
| "loss": 1.1835, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.0001661455525606469, |
| "loss": 1.1564, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.0001660377358490566, |
| "loss": 1.2315, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.00016592991913746632, |
| "loss": 1.2016, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.000165822102425876, |
| "loss": 1.2058, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.00016571428571428575, |
| "loss": 1.2109, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.00016560646900269543, |
| "loss": 1.2114, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.00016549865229110512, |
| "loss": 1.1754, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00016539083557951483, |
| "loss": 1.1659, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00016528301886792454, |
| "loss": 1.2003, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00016517520215633425, |
| "loss": 1.2209, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00016506738544474394, |
| "loss": 1.173, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00016495956873315365, |
| "loss": 1.2318, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00016485175202156336, |
| "loss": 1.2312, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00016474393530997304, |
| "loss": 1.1828, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00016463611859838276, |
| "loss": 1.1994, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00016452830188679247, |
| "loss": 1.1953, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00016442048517520215, |
| "loss": 1.1821, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00016431266846361186, |
| "loss": 1.2484, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00016420485175202155, |
| "loss": 1.1838, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.0001640970350404313, |
| "loss": 1.1851, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.00016398921832884097, |
| "loss": 1.2036, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.00016388140161725068, |
| "loss": 1.2104, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.0001637735849056604, |
| "loss": 1.1789, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.00016366576819407008, |
| "loss": 1.2007, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.0001635579514824798, |
| "loss": 1.1963, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.00016345013477088948, |
| "loss": 1.1817, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00016334231805929922, |
| "loss": 1.2017, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.58, |
| "eval_loss": 1.157345175743103, |
| "eval_runtime": 708.5342, |
| "eval_samples_per_second": 7.057, |
| "eval_steps_per_second": 0.882, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.0001632345013477089, |
| "loss": 1.1706, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.0001631266846361186, |
| "loss": 1.2104, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00016301886792452833, |
| "loss": 1.1855, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.000162911051212938, |
| "loss": 1.1485, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00016280323450134772, |
| "loss": 1.1951, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.0001626954177897574, |
| "loss": 1.1655, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.00016258760107816712, |
| "loss": 1.153, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.00016247978436657683, |
| "loss": 1.1635, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.00016237196765498652, |
| "loss": 1.2176, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.00016226415094339625, |
| "loss": 1.1589, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.00016215633423180594, |
| "loss": 1.2518, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00016204851752021562, |
| "loss": 1.1931, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00016194070080862534, |
| "loss": 1.1886, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00016183288409703505, |
| "loss": 1.1653, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00016172506738544476, |
| "loss": 1.1975, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00016161725067385444, |
| "loss": 1.2009, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00016150943396226416, |
| "loss": 1.2339, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00016140161725067387, |
| "loss": 1.1958, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.00016129380053908355, |
| "loss": 1.1811, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.00016118598382749326, |
| "loss": 1.1835, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.00016107816711590298, |
| "loss": 1.1767, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.0001609703504043127, |
| "loss": 1.1796, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.00016086253369272237, |
| "loss": 1.2022, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.00016075471698113208, |
| "loss": 1.1926, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.0001606469002695418, |
| "loss": 1.1882, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.00016053908355795148, |
| "loss": 1.1504, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.0001604312668463612, |
| "loss": 1.1927, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.0001603234501347709, |
| "loss": 1.1601, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.0001602156334231806, |
| "loss": 1.1726, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.0001601078167115903, |
| "loss": 1.2053, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.00016, |
| "loss": 1.1919, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.00015989218328840972, |
| "loss": 1.1814, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.0001597843665768194, |
| "loss": 1.1838, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.00015967654986522912, |
| "loss": 1.1954, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.00015956873315363883, |
| "loss": 1.196, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.00015946091644204852, |
| "loss": 1.1877, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00015935309973045823, |
| "loss": 1.1853, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00015924528301886794, |
| "loss": 1.197, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00015913746630727763, |
| "loss": 1.1484, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00015902964959568734, |
| "loss": 1.2139, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.64, |
| "eval_loss": 1.153731107711792, |
| "eval_runtime": 709.6014, |
| "eval_samples_per_second": 7.046, |
| "eval_steps_per_second": 0.881, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00015892183288409702, |
| "loss": 1.219, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00015881401617250676, |
| "loss": 1.209, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00015870619946091645, |
| "loss": 1.1925, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.00015859838274932616, |
| "loss": 1.168, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.00015849056603773587, |
| "loss": 1.1759, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.00015838274932614556, |
| "loss": 1.1815, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.00015827493261455527, |
| "loss": 1.17, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.00015816711590296495, |
| "loss": 1.1778, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.00015805929919137466, |
| "loss": 1.1832, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.00015795148247978438, |
| "loss": 1.2256, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.00015784366576819406, |
| "loss": 1.1694, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.0001577358490566038, |
| "loss": 1.1549, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.00015762803234501348, |
| "loss": 1.1811, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.0001575202156334232, |
| "loss": 1.1529, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.00015741239892183288, |
| "loss": 1.1677, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.0001573045822102426, |
| "loss": 1.1657, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.0001571967654986523, |
| "loss": 1.1673, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.000157088948787062, |
| "loss": 1.1806, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.00015698113207547173, |
| "loss": 1.209, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.0001568733153638814, |
| "loss": 1.2106, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.0001567654986522911, |
| "loss": 1.1588, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.0001566576819407008, |
| "loss": 1.178, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.00015654986522911052, |
| "loss": 1.1232, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.00015644204851752023, |
| "loss": 1.2007, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.00015633423180592992, |
| "loss": 1.1999, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.00015622641509433963, |
| "loss": 1.1888, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.00015611859838274934, |
| "loss": 1.2105, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.00015601078167115903, |
| "loss": 1.1689, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00015590296495956874, |
| "loss": 1.1656, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00015579514824797845, |
| "loss": 1.1385, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00015568733153638813, |
| "loss": 1.1773, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00015557951482479785, |
| "loss": 1.1612, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00015547169811320756, |
| "loss": 1.1687, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00015536388140161727, |
| "loss": 1.1593, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00015525606469002695, |
| "loss": 1.1624, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00015514824797843667, |
| "loss": 1.1935, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00015504043126684638, |
| "loss": 1.1696, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00015493261455525606, |
| "loss": 1.2212, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00015482479784366577, |
| "loss": 1.1608, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.0001547169811320755, |
| "loss": 1.1302, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7, |
| "eval_loss": 1.1510419845581055, |
| "eval_runtime": 708.6685, |
| "eval_samples_per_second": 7.055, |
| "eval_steps_per_second": 0.882, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.0001546091644204852, |
| "loss": 1.2165, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.00015450134770889488, |
| "loss": 1.1766, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.00015439353099730457, |
| "loss": 1.1762, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.0001542857142857143, |
| "loss": 1.1523, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.000154177897574124, |
| "loss": 1.1889, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.0001540700808625337, |
| "loss": 1.1314, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.00015396226415094342, |
| "loss": 1.2006, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.0001538544474393531, |
| "loss": 1.2, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.0001537466307277628, |
| "loss": 1.1558, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.0001536388140161725, |
| "loss": 1.1979, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.00015353099730458224, |
| "loss": 1.1782, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.00015342318059299192, |
| "loss": 1.192, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.00015331536388140163, |
| "loss": 1.1937, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.00015320754716981134, |
| "loss": 1.1627, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.00015309973045822103, |
| "loss": 1.1617, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.00015299191374663074, |
| "loss": 1.2053, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.00015288409703504043, |
| "loss": 1.1716, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.00015277628032345014, |
| "loss": 1.214, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.00015266846361185985, |
| "loss": 1.1908, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.00015256064690026953, |
| "loss": 1.1819, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.00015245283018867927, |
| "loss": 1.1544, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.00015234501347708896, |
| "loss": 1.1634, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.00015223719676549867, |
| "loss": 1.1831, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.00015212938005390835, |
| "loss": 1.2191, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.00015202156334231807, |
| "loss": 1.1716, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00015191374663072778, |
| "loss": 1.1385, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00015180592991913746, |
| "loss": 1.2081, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00015169811320754717, |
| "loss": 1.1674, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00015159029649595689, |
| "loss": 1.1078, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00015148247978436657, |
| "loss": 1.1987, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00015137466307277628, |
| "loss": 1.1827, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.000151266846361186, |
| "loss": 1.1991, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.0001511590296495957, |
| "loss": 1.1718, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.0001510512129380054, |
| "loss": 1.187, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.0001509433962264151, |
| "loss": 1.1596, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.00015083557951482481, |
| "loss": 1.2077, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.0001507277628032345, |
| "loss": 1.1955, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.0001506199460916442, |
| "loss": 1.1653, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00015051212938005392, |
| "loss": 1.2213, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.0001504043126684636, |
| "loss": 1.1799, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.77, |
| "eval_loss": 1.1479781866073608, |
| "eval_runtime": 709.5516, |
| "eval_samples_per_second": 7.047, |
| "eval_steps_per_second": 0.881, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00015029649595687332, |
| "loss": 1.1505, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00015018867924528303, |
| "loss": 1.1371, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00015008086253369274, |
| "loss": 1.1523, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00014997304582210243, |
| "loss": 1.1613, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00014986522911051214, |
| "loss": 1.1648, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00014975741239892185, |
| "loss": 1.1729, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00014964959568733154, |
| "loss": 1.1626, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00014954177897574125, |
| "loss": 1.1489, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00014943396226415093, |
| "loss": 1.1673, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00014932614555256067, |
| "loss": 1.1718, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.00014921832884097036, |
| "loss": 1.1807, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.00014911051212938004, |
| "loss": 1.2018, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.00014900269541778978, |
| "loss": 1.2019, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.00014889487870619947, |
| "loss": 1.2028, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.00014878706199460918, |
| "loss": 1.1865, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.00014867924528301886, |
| "loss": 1.1472, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.00014857142857142857, |
| "loss": 1.1894, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.00014846361185983829, |
| "loss": 1.165, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.00014835579514824797, |
| "loss": 1.2207, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.0001482479784366577, |
| "loss": 1.1673, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.0001481401617250674, |
| "loss": 1.145, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.00014803234501347708, |
| "loss": 1.1389, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.0001479245283018868, |
| "loss": 1.1875, |
| "step": 2515 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.0001478167115902965, |
| "loss": 1.1756, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.00014770889487870621, |
| "loss": 1.1974, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.0001476010781671159, |
| "loss": 1.1955, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.0001474932614555256, |
| "loss": 1.1493, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.00014738544474393532, |
| "loss": 1.1846, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.000147277628032345, |
| "loss": 1.1558, |
| "step": 2545 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.00014716981132075472, |
| "loss": 1.1677, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.00014706199460916443, |
| "loss": 1.1772, |
| "step": 2555 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.00014695417789757414, |
| "loss": 1.2155, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.00014684636118598383, |
| "loss": 1.2052, |
| "step": 2565 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.00014673854447439354, |
| "loss": 1.1814, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.00014663072776280325, |
| "loss": 1.2011, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00014652291105121294, |
| "loss": 1.1852, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00014641509433962265, |
| "loss": 1.1847, |
| "step": 2585 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00014630727762803236, |
| "loss": 1.1983, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00014619946091644204, |
| "loss": 1.1576, |
| "step": 2595 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00014609164420485176, |
| "loss": 1.1518, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.83, |
| "eval_loss": 1.146146535873413, |
| "eval_runtime": 709.2392, |
| "eval_samples_per_second": 7.05, |
| "eval_steps_per_second": 0.881, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00014598382749326147, |
| "loss": 1.1805, |
| "step": 2605 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.00014587601078167118, |
| "loss": 1.2067, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.00014576819407008086, |
| "loss": 1.1257, |
| "step": 2615 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.00014566037735849055, |
| "loss": 1.1683, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.0001455525606469003, |
| "loss": 1.1763, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.00014544474393530997, |
| "loss": 1.1747, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.00014533692722371969, |
| "loss": 1.1658, |
| "step": 2635 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.0001452291105121294, |
| "loss": 1.2061, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.00014512129380053908, |
| "loss": 1.1888, |
| "step": 2645 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.0001450134770889488, |
| "loss": 1.1822, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.00014490566037735848, |
| "loss": 1.164, |
| "step": 2655 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.00014479784366576822, |
| "loss": 1.2047, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.0001446900269541779, |
| "loss": 1.1296, |
| "step": 2665 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.00014458221024258761, |
| "loss": 1.1748, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.00014447439353099733, |
| "loss": 1.1199, |
| "step": 2675 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.000144366576819407, |
| "loss": 1.1819, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.00014425876010781672, |
| "loss": 1.1801, |
| "step": 2685 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.0001441509433962264, |
| "loss": 1.1667, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.00014404312668463612, |
| "loss": 1.1823, |
| "step": 2695 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.00014393530997304583, |
| "loss": 1.2095, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.00014382749326145552, |
| "loss": 1.1903, |
| "step": 2705 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.00014371967654986525, |
| "loss": 1.1857, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.00014361185983827494, |
| "loss": 1.2016, |
| "step": 2715 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.00014350404312668465, |
| "loss": 1.2046, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.00014339622641509434, |
| "loss": 1.2048, |
| "step": 2725 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.00014328840970350405, |
| "loss": 1.1859, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.00014318059299191376, |
| "loss": 1.2309, |
| "step": 2735 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.00014307277628032344, |
| "loss": 1.1893, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.00014296495956873318, |
| "loss": 1.1932, |
| "step": 2745 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.00014285714285714287, |
| "loss": 1.192, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.00014274932614555255, |
| "loss": 1.188, |
| "step": 2755 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.00014264150943396226, |
| "loss": 1.1345, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.00014253369272237198, |
| "loss": 1.1902, |
| "step": 2765 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.0001424258760107817, |
| "loss": 1.1644, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.00014231805929919137, |
| "loss": 1.1259, |
| "step": 2775 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.00014221024258760108, |
| "loss": 1.1694, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.0001421024258760108, |
| "loss": 1.1837, |
| "step": 2785 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.00014199460916442048, |
| "loss": 1.153, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.0001418867924528302, |
| "loss": 1.1925, |
| "step": 2795 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.0001417789757412399, |
| "loss": 1.1752, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.9, |
| "eval_loss": 1.143415093421936, |
| "eval_runtime": 708.8805, |
| "eval_samples_per_second": 7.053, |
| "eval_steps_per_second": 0.882, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.0001416711590296496, |
| "loss": 1.154, |
| "step": 2805 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.0001415633423180593, |
| "loss": 1.1811, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.000141455525606469, |
| "loss": 1.1962, |
| "step": 2815 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.00014134770889487873, |
| "loss": 1.1968, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.0001412398921832884, |
| "loss": 1.1964, |
| "step": 2825 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.00014113207547169812, |
| "loss": 1.1485, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.00014102425876010783, |
| "loss": 1.1518, |
| "step": 2835 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.00014091644204851752, |
| "loss": 1.1405, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.00014080862533692723, |
| "loss": 1.1588, |
| "step": 2845 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.00014070080862533694, |
| "loss": 1.1797, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.00014059299191374665, |
| "loss": 1.1405, |
| "step": 2855 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.00014048517520215634, |
| "loss": 1.1603, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.00014037735849056602, |
| "loss": 1.2069, |
| "step": 2865 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.00014026954177897576, |
| "loss": 1.1864, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.00014016172506738545, |
| "loss": 1.1567, |
| "step": 2875 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.00014005390835579516, |
| "loss": 1.1836, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.00013994609164420487, |
| "loss": 1.2095, |
| "step": 2885 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.00013983827493261456, |
| "loss": 1.1931, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.00013973045822102427, |
| "loss": 1.2025, |
| "step": 2895 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.00013962264150943395, |
| "loss": 1.192, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.0001395148247978437, |
| "loss": 1.2305, |
| "step": 2905 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.00013940700808625338, |
| "loss": 1.1856, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.00013929919137466306, |
| "loss": 1.208, |
| "step": 2915 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.0001391913746630728, |
| "loss": 1.1751, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.00013908355795148248, |
| "loss": 1.119, |
| "step": 2925 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.0001389757412398922, |
| "loss": 1.1694, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.00013886792452830188, |
| "loss": 1.1712, |
| "step": 2935 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.0001387601078167116, |
| "loss": 1.1328, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.0001386522911051213, |
| "loss": 1.1756, |
| "step": 2945 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.000138544474393531, |
| "loss": 1.2139, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.00013843665768194073, |
| "loss": 1.1559, |
| "step": 2955 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.0001383288409703504, |
| "loss": 1.1384, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.00013822102425876012, |
| "loss": 1.1647, |
| "step": 2965 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.0001381132075471698, |
| "loss": 1.183, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.00013800539083557952, |
| "loss": 1.1357, |
| "step": 2975 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.00013789757412398923, |
| "loss": 1.1835, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.00013778975741239892, |
| "loss": 1.1927, |
| "step": 2985 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.00013768194070080863, |
| "loss": 1.1908, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.00013757412398921834, |
| "loss": 1.1586, |
| "step": 2995 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.00013746630727762803, |
| "loss": 1.1584, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.96, |
| "eval_loss": 1.1414791345596313, |
| "eval_runtime": 705.1717, |
| "eval_samples_per_second": 7.09, |
| "eval_steps_per_second": 0.886, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.00013735849056603774, |
| "loss": 1.2006, |
| "step": 3005 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.00013725067385444745, |
| "loss": 1.1835, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.00013714285714285716, |
| "loss": 1.1773, |
| "step": 3015 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.00013703504043126685, |
| "loss": 1.1719, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.00013692722371967656, |
| "loss": 1.1736, |
| "step": 3025 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.00013681940700808627, |
| "loss": 1.1536, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.00013671159029649595, |
| "loss": 1.1845, |
| "step": 3035 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.00013660377358490567, |
| "loss": 1.2087, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.00013649595687331538, |
| "loss": 1.1516, |
| "step": 3045 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.00013638814016172506, |
| "loss": 1.1443, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.00013628032345013478, |
| "loss": 1.1609, |
| "step": 3055 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.0001361725067385445, |
| "loss": 1.1798, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.0001360646900269542, |
| "loss": 1.215, |
| "step": 3065 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.00013595687331536388, |
| "loss": 1.1845, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.0001358490566037736, |
| "loss": 1.1515, |
| "step": 3075 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.0001357412398921833, |
| "loss": 1.18, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.000135633423180593, |
| "loss": 1.1741, |
| "step": 3085 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.0001355256064690027, |
| "loss": 1.1609, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.0001354177897574124, |
| "loss": 1.1581, |
| "step": 3095 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.0001353099730458221, |
| "loss": 1.2023, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.0001352021563342318, |
| "loss": 1.1553, |
| "step": 3105 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.0001350943396226415, |
| "loss": 1.1721, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.00013498652291105124, |
| "loss": 1.2398, |
| "step": 3115 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.00013487870619946092, |
| "loss": 1.1656, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.00013477088948787063, |
| "loss": 1.1591, |
| "step": 3125 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.00013466307277628032, |
| "loss": 1.1791, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.00013455525606469003, |
| "loss": 1.1315, |
| "step": 3135 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.00013444743935309974, |
| "loss": 1.1538, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.00013433962264150943, |
| "loss": 1.1189, |
| "step": 3145 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.00013423180592991916, |
| "loss": 1.1305, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.00013412398921832885, |
| "loss": 1.1633, |
| "step": 3155 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.00013401617250673853, |
| "loss": 1.1274, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.00013390835579514825, |
| "loss": 1.145, |
| "step": 3165 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.00013380053908355796, |
| "loss": 1.183, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.00013369272237196767, |
| "loss": 1.1849, |
| "step": 3175 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.00013358490566037735, |
| "loss": 1.1461, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.00013347708894878707, |
| "loss": 1.2046, |
| "step": 3185 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.00013336927223719678, |
| "loss": 1.1395, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.00013326145552560646, |
| "loss": 1.1611, |
| "step": 3195 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.00013315363881401617, |
| "loss": 1.1311, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.02, |
| "eval_loss": 1.140012264251709, |
| "eval_runtime": 701.8342, |
| "eval_samples_per_second": 7.124, |
| "eval_steps_per_second": 0.891, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.0001330458221024259, |
| "loss": 1.1558, |
| "step": 3205 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.0001329380053908356, |
| "loss": 1.1512, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.00013283018867924528, |
| "loss": 1.1567, |
| "step": 3215 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.000132722371967655, |
| "loss": 1.1612, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.0001326145552560647, |
| "loss": 1.1874, |
| "step": 3225 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.0001325067385444744, |
| "loss": 1.1561, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.0001323989218328841, |
| "loss": 1.1272, |
| "step": 3235 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.00013229110512129382, |
| "loss": 1.1693, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.0001321832884097035, |
| "loss": 1.1922, |
| "step": 3245 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.0001320754716981132, |
| "loss": 1.1438, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.00013196765498652292, |
| "loss": 1.173, |
| "step": 3255 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.00013185983827493264, |
| "loss": 1.216, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.00013175202156334232, |
| "loss": 1.1613, |
| "step": 3265 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.000131644204851752, |
| "loss": 1.183, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.00013153638814016174, |
| "loss": 1.1592, |
| "step": 3275 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.00013142857142857143, |
| "loss": 1.1487, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.00013132075471698114, |
| "loss": 1.1542, |
| "step": 3285 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.00013121293800539085, |
| "loss": 1.1404, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.00013110512129380054, |
| "loss": 1.192, |
| "step": 3295 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.00013099730458221025, |
| "loss": 1.1616, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.00013088948787061993, |
| "loss": 1.1554, |
| "step": 3305 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.00013078167115902967, |
| "loss": 1.1501, |
| "step": 3310 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.00013067385444743936, |
| "loss": 1.1444, |
| "step": 3315 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.00013056603773584907, |
| "loss": 1.1486, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.00013045822102425878, |
| "loss": 1.1683, |
| "step": 3325 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.00013035040431266847, |
| "loss": 1.1676, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.00013024258760107818, |
| "loss": 1.1348, |
| "step": 3335 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.00013013477088948786, |
| "loss": 1.196, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.00013002695417789757, |
| "loss": 1.1632, |
| "step": 3345 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.00012991913746630729, |
| "loss": 1.1347, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.00012981132075471697, |
| "loss": 1.1729, |
| "step": 3355 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.0001297035040431267, |
| "loss": 1.1397, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.0001295956873315364, |
| "loss": 1.1299, |
| "step": 3365 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.0001294878706199461, |
| "loss": 1.1473, |
| "step": 3370 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.0001293800539083558, |
| "loss": 1.182, |
| "step": 3375 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.0001292722371967655, |
| "loss": 1.1584, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.00012916442048517521, |
| "loss": 1.1567, |
| "step": 3385 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.0001290566037735849, |
| "loss": 1.1754, |
| "step": 3390 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.0001289487870619946, |
| "loss": 1.1705, |
| "step": 3395 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.00012884097035040432, |
| "loss": 1.1595, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.09, |
| "eval_loss": 1.1373599767684937, |
| "eval_runtime": 707.276, |
| "eval_samples_per_second": 7.069, |
| "eval_steps_per_second": 0.884, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.000128733153638814, |
| "loss": 1.1678, |
| "step": 3405 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.00012862533692722372, |
| "loss": 1.1624, |
| "step": 3410 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.00012851752021563343, |
| "loss": 1.1587, |
| "step": 3415 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.00012840970350404314, |
| "loss": 1.1972, |
| "step": 3420 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.00012830188679245283, |
| "loss": 1.1342, |
| "step": 3425 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.00012819407008086254, |
| "loss": 1.1575, |
| "step": 3430 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.00012808625336927225, |
| "loss": 1.1345, |
| "step": 3435 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.00012797843665768194, |
| "loss": 1.1628, |
| "step": 3440 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.00012787061994609165, |
| "loss": 1.1448, |
| "step": 3445 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.00012776280323450136, |
| "loss": 1.1618, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.00012765498652291104, |
| "loss": 1.1748, |
| "step": 3455 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.00012754716981132076, |
| "loss": 1.1441, |
| "step": 3460 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.00012743935309973047, |
| "loss": 1.1295, |
| "step": 3465 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.00012733153638814018, |
| "loss": 1.1516, |
| "step": 3470 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.00012722371967654987, |
| "loss": 1.1544, |
| "step": 3475 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.00012711590296495958, |
| "loss": 1.1614, |
| "step": 3480 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.0001270080862533693, |
| "loss": 1.1111, |
| "step": 3485 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.00012690026954177897, |
| "loss": 1.1604, |
| "step": 3490 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.00012679245283018869, |
| "loss": 1.1578, |
| "step": 3495 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.0001266846361185984, |
| "loss": 1.1796, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.0001265768194070081, |
| "loss": 1.1302, |
| "step": 3505 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.0001264690026954178, |
| "loss": 1.1551, |
| "step": 3510 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.00012636118598382748, |
| "loss": 1.1396, |
| "step": 3515 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.00012625336927223722, |
| "loss": 1.1993, |
| "step": 3520 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.0001261455525606469, |
| "loss": 1.153, |
| "step": 3525 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.00012603773584905661, |
| "loss": 1.1839, |
| "step": 3530 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.00012592991913746633, |
| "loss": 1.1693, |
| "step": 3535 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.000125822102425876, |
| "loss": 1.1559, |
| "step": 3540 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.00012571428571428572, |
| "loss": 1.1438, |
| "step": 3545 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.0001256064690026954, |
| "loss": 1.1634, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.00012549865229110515, |
| "loss": 1.1733, |
| "step": 3555 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.00012539083557951483, |
| "loss": 1.1699, |
| "step": 3560 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.00012528301886792452, |
| "loss": 1.1223, |
| "step": 3565 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.00012517520215633425, |
| "loss": 1.1479, |
| "step": 3570 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.00012506738544474394, |
| "loss": 1.1594, |
| "step": 3575 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.00012495956873315365, |
| "loss": 1.188, |
| "step": 3580 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.00012485175202156334, |
| "loss": 1.1451, |
| "step": 3585 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.00012474393530997305, |
| "loss": 1.1168, |
| "step": 3590 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.00012463611859838276, |
| "loss": 1.1582, |
| "step": 3595 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.00012452830188679244, |
| "loss": 1.1721, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.15, |
| "eval_loss": 1.1364716291427612, |
| "eval_runtime": 707.3467, |
| "eval_samples_per_second": 7.069, |
| "eval_steps_per_second": 0.884, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.00012442048517520218, |
| "loss": 1.1667, |
| "step": 3605 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.00012431266846361187, |
| "loss": 1.1731, |
| "step": 3610 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.00012420485175202158, |
| "loss": 1.1318, |
| "step": 3615 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.00012409703504043126, |
| "loss": 1.1717, |
| "step": 3620 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.00012398921832884098, |
| "loss": 1.1834, |
| "step": 3625 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.0001238814016172507, |
| "loss": 1.1426, |
| "step": 3630 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.00012377358490566037, |
| "loss": 1.1699, |
| "step": 3635 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.00012366576819407009, |
| "loss": 1.1116, |
| "step": 3640 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.0001235579514824798, |
| "loss": 1.1386, |
| "step": 3645 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.00012345013477088948, |
| "loss": 1.1674, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.0001233423180592992, |
| "loss": 1.1717, |
| "step": 3655 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.0001232345013477089, |
| "loss": 1.1575, |
| "step": 3660 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.00012312668463611862, |
| "loss": 1.154, |
| "step": 3665 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.0001230188679245283, |
| "loss": 1.1679, |
| "step": 3670 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.00012291105121293801, |
| "loss": 1.1936, |
| "step": 3675 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.00012280323450134773, |
| "loss": 1.1326, |
| "step": 3680 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.0001226954177897574, |
| "loss": 1.1841, |
| "step": 3685 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.00012258760107816712, |
| "loss": 1.1508, |
| "step": 3690 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.00012247978436657683, |
| "loss": 1.1581, |
| "step": 3695 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.00012237196765498652, |
| "loss": 1.1439, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.00012226415094339623, |
| "loss": 1.1153, |
| "step": 3705 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.00012215633423180594, |
| "loss": 1.1526, |
| "step": 3710 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.00012204851752021564, |
| "loss": 1.1678, |
| "step": 3715 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.00012194070080862534, |
| "loss": 1.1265, |
| "step": 3720 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.00012183288409703504, |
| "loss": 1.1766, |
| "step": 3725 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.00012172506738544476, |
| "loss": 1.1777, |
| "step": 3730 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.00012161725067385446, |
| "loss": 1.1883, |
| "step": 3735 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.00012150943396226415, |
| "loss": 1.2069, |
| "step": 3740 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.00012140161725067384, |
| "loss": 1.1264, |
| "step": 3745 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.00012129380053908357, |
| "loss": 1.1968, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.00012118598382749327, |
| "loss": 1.1553, |
| "step": 3755 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.00012107816711590297, |
| "loss": 1.1365, |
| "step": 3760 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.00012097035040431268, |
| "loss": 1.1957, |
| "step": 3765 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.00012086253369272238, |
| "loss": 1.165, |
| "step": 3770 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.00012075471698113207, |
| "loss": 1.1731, |
| "step": 3775 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.00012064690026954177, |
| "loss": 1.1722, |
| "step": 3780 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.0001205390835579515, |
| "loss": 1.1643, |
| "step": 3785 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.0001204312668463612, |
| "loss": 1.1166, |
| "step": 3790 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.00012032345013477088, |
| "loss": 1.1508, |
| "step": 3795 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.0001202156334231806, |
| "loss": 1.1597, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.22, |
| "eval_loss": 1.1354634761810303, |
| "eval_runtime": 707.7384, |
| "eval_samples_per_second": 7.065, |
| "eval_steps_per_second": 0.883, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.0001201078167115903, |
| "loss": 1.1863, |
| "step": 3805 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.00012, |
| "loss": 1.1569, |
| "step": 3810 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.0001198921832884097, |
| "loss": 1.1685, |
| "step": 3815 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.00011978436657681941, |
| "loss": 1.1185, |
| "step": 3820 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.00011967654986522911, |
| "loss": 1.1784, |
| "step": 3825 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.00011956873315363881, |
| "loss": 1.1922, |
| "step": 3830 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.00011946091644204854, |
| "loss": 1.152, |
| "step": 3835 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.00011935309973045823, |
| "loss": 1.1643, |
| "step": 3840 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.00011924528301886793, |
| "loss": 1.1586, |
| "step": 3845 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.00011913746630727762, |
| "loss": 1.1564, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.00011902964959568734, |
| "loss": 1.0981, |
| "step": 3855 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.00011892183288409704, |
| "loss": 1.1387, |
| "step": 3860 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.00011881401617250674, |
| "loss": 1.1637, |
| "step": 3865 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.00011870619946091645, |
| "loss": 1.1779, |
| "step": 3870 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.00011859838274932615, |
| "loss": 1.1363, |
| "step": 3875 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.00011849056603773585, |
| "loss": 1.1414, |
| "step": 3880 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.00011838274932614555, |
| "loss": 1.1384, |
| "step": 3885 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.00011827493261455527, |
| "loss": 1.1393, |
| "step": 3890 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.00011816711590296497, |
| "loss": 1.1296, |
| "step": 3895 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.00011805929919137467, |
| "loss": 1.1504, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.00011795148247978438, |
| "loss": 1.18, |
| "step": 3905 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.00011784366576819408, |
| "loss": 1.1383, |
| "step": 3910 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.00011773584905660378, |
| "loss": 1.166, |
| "step": 3915 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.00011762803234501347, |
| "loss": 1.1275, |
| "step": 3920 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.00011752021563342319, |
| "loss": 1.1321, |
| "step": 3925 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.00011741239892183288, |
| "loss": 1.1589, |
| "step": 3930 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.00011730458221024258, |
| "loss": 1.1128, |
| "step": 3935 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.00011719676549865231, |
| "loss": 1.1594, |
| "step": 3940 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.000117088948787062, |
| "loss": 1.1498, |
| "step": 3945 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.0001169811320754717, |
| "loss": 1.1294, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.0001168733153638814, |
| "loss": 1.1314, |
| "step": 3955 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.00011676549865229111, |
| "loss": 1.1602, |
| "step": 3960 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.00011665768194070081, |
| "loss": 1.1773, |
| "step": 3965 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.00011654986522911051, |
| "loss": 1.169, |
| "step": 3970 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.00011644204851752024, |
| "loss": 1.1976, |
| "step": 3975 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.00011633423180592992, |
| "loss": 1.1487, |
| "step": 3980 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.00011622641509433962, |
| "loss": 1.1847, |
| "step": 3985 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.00011611859838274932, |
| "loss": 1.2022, |
| "step": 3990 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.00011601078167115904, |
| "loss": 1.1604, |
| "step": 3995 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.00011590296495956874, |
| "loss": 1.1718, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.28, |
| "eval_loss": 1.1334933042526245, |
| "eval_runtime": 707.5579, |
| "eval_samples_per_second": 7.067, |
| "eval_steps_per_second": 0.883, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.00011579514824797844, |
| "loss": 1.1294, |
| "step": 4005 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.00011568733153638815, |
| "loss": 1.1297, |
| "step": 4010 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.00011557951482479785, |
| "loss": 1.1582, |
| "step": 4015 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.00011547169811320755, |
| "loss": 1.1615, |
| "step": 4020 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.00011536388140161725, |
| "loss": 1.1682, |
| "step": 4025 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.00011525606469002697, |
| "loss": 1.1441, |
| "step": 4030 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.00011514824797843666, |
| "loss": 1.1028, |
| "step": 4035 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.00011504043126684635, |
| "loss": 1.1539, |
| "step": 4040 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.00011493261455525608, |
| "loss": 1.1719, |
| "step": 4045 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.00011482479784366578, |
| "loss": 1.2023, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.00011471698113207548, |
| "loss": 1.128, |
| "step": 4055 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.00011460916442048518, |
| "loss": 1.1649, |
| "step": 4060 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.00011450134770889489, |
| "loss": 1.1482, |
| "step": 4065 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.00011439353099730459, |
| "loss": 1.1421, |
| "step": 4070 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.00011428571428571428, |
| "loss": 1.1656, |
| "step": 4075 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.00011417789757412401, |
| "loss": 1.136, |
| "step": 4080 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.00011407008086253371, |
| "loss": 1.1679, |
| "step": 4085 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.00011396226415094339, |
| "loss": 1.1591, |
| "step": 4090 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.00011385444743935309, |
| "loss": 1.1228, |
| "step": 4095 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.00011374663072776282, |
| "loss": 1.1393, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.00011363881401617251, |
| "loss": 1.1544, |
| "step": 4105 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.00011353099730458221, |
| "loss": 1.1307, |
| "step": 4110 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.00011342318059299192, |
| "loss": 1.1437, |
| "step": 4115 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.00011331536388140162, |
| "loss": 1.1607, |
| "step": 4120 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.00011320754716981132, |
| "loss": 1.1565, |
| "step": 4125 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.00011309973045822102, |
| "loss": 1.1418, |
| "step": 4130 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.00011299191374663074, |
| "loss": 1.151, |
| "step": 4135 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.00011288409703504044, |
| "loss": 1.1593, |
| "step": 4140 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.00011277628032345014, |
| "loss": 1.1928, |
| "step": 4145 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.00011266846361185985, |
| "loss": 1.1668, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.00011256064690026955, |
| "loss": 1.1321, |
| "step": 4155 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.00011245283018867925, |
| "loss": 1.1389, |
| "step": 4160 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.00011234501347708895, |
| "loss": 1.1438, |
| "step": 4165 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.00011223719676549866, |
| "loss": 1.136, |
| "step": 4170 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.00011212938005390836, |
| "loss": 1.1436, |
| "step": 4175 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.00011202156334231806, |
| "loss": 1.2055, |
| "step": 4180 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.00011191374663072778, |
| "loss": 1.1936, |
| "step": 4185 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.00011180592991913748, |
| "loss": 1.1591, |
| "step": 4190 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.00011169811320754718, |
| "loss": 1.144, |
| "step": 4195 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.00011159029649595688, |
| "loss": 1.1639, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.34, |
| "eval_loss": 1.1315429210662842, |
| "eval_runtime": 707.0979, |
| "eval_samples_per_second": 7.071, |
| "eval_steps_per_second": 0.884, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.00011148247978436659, |
| "loss": 1.1696, |
| "step": 4205 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.00011137466307277629, |
| "loss": 1.1316, |
| "step": 4210 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.00011126684636118598, |
| "loss": 1.1535, |
| "step": 4215 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.0001111590296495957, |
| "loss": 1.1378, |
| "step": 4220 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.0001110512129380054, |
| "loss": 1.159, |
| "step": 4225 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.00011094339622641509, |
| "loss": 1.171, |
| "step": 4230 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.00011083557951482479, |
| "loss": 1.1284, |
| "step": 4235 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.00011072776280323452, |
| "loss": 1.1462, |
| "step": 4240 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.00011061994609164422, |
| "loss": 1.1774, |
| "step": 4245 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.00011051212938005391, |
| "loss": 1.1677, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.00011040431266846363, |
| "loss": 1.1548, |
| "step": 4255 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.00011029649595687332, |
| "loss": 1.1967, |
| "step": 4260 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.00011018867924528302, |
| "loss": 1.1485, |
| "step": 4265 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.00011008086253369272, |
| "loss": 1.1481, |
| "step": 4270 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.00010997304582210243, |
| "loss": 1.1543, |
| "step": 4275 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.00010986522911051213, |
| "loss": 1.1812, |
| "step": 4280 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.00010975741239892183, |
| "loss": 1.0901, |
| "step": 4285 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.00010964959568733155, |
| "loss": 1.1405, |
| "step": 4290 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.00010954177897574125, |
| "loss": 1.17, |
| "step": 4295 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.00010943396226415095, |
| "loss": 1.1344, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.00010932614555256065, |
| "loss": 1.1109, |
| "step": 4305 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.00010921832884097036, |
| "loss": 1.1368, |
| "step": 4310 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.00010911051212938006, |
| "loss": 1.1534, |
| "step": 4315 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.00010900269541778976, |
| "loss": 1.1668, |
| "step": 4320 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.00010889487870619948, |
| "loss": 1.1145, |
| "step": 4325 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.00010878706199460917, |
| "loss": 1.1337, |
| "step": 4330 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.00010867924528301887, |
| "loss": 1.1432, |
| "step": 4335 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.00010857142857142856, |
| "loss": 1.1403, |
| "step": 4340 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.00010846361185983829, |
| "loss": 1.1239, |
| "step": 4345 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.00010835579514824799, |
| "loss": 1.1274, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.00010824797843665769, |
| "loss": 1.1574, |
| "step": 4355 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.0001081401617250674, |
| "loss": 1.1353, |
| "step": 4360 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.0001080323450134771, |
| "loss": 1.1426, |
| "step": 4365 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.0001079245283018868, |
| "loss": 1.1307, |
| "step": 4370 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.00010781671159029649, |
| "loss": 1.1896, |
| "step": 4375 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.00010770889487870622, |
| "loss": 1.1772, |
| "step": 4380 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.00010760107816711592, |
| "loss": 1.1649, |
| "step": 4385 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.0001074932614555256, |
| "loss": 1.1832, |
| "step": 4390 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.00010738544474393533, |
| "loss": 1.1893, |
| "step": 4395 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.00010727762803234502, |
| "loss": 1.1448, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.41, |
| "eval_loss": 1.1311901807785034, |
| "eval_runtime": 707.1464, |
| "eval_samples_per_second": 7.071, |
| "eval_steps_per_second": 0.884, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.00010716981132075472, |
| "loss": 1.1392, |
| "step": 4405 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.00010706199460916442, |
| "loss": 1.1417, |
| "step": 4410 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.00010695417789757413, |
| "loss": 1.1543, |
| "step": 4415 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.00010684636118598383, |
| "loss": 1.1421, |
| "step": 4420 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.00010673854447439353, |
| "loss": 1.1545, |
| "step": 4425 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.00010663072776280323, |
| "loss": 1.1377, |
| "step": 4430 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.00010652291105121295, |
| "loss": 1.1899, |
| "step": 4435 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.00010641509433962265, |
| "loss": 1.1412, |
| "step": 4440 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.00010630727762803234, |
| "loss": 1.1707, |
| "step": 4445 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.00010619946091644206, |
| "loss": 1.1455, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.00010609164420485176, |
| "loss": 1.1832, |
| "step": 4455 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.00010598382749326146, |
| "loss": 1.1532, |
| "step": 4460 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.00010587601078167116, |
| "loss": 1.1417, |
| "step": 4465 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.00010576819407008087, |
| "loss": 1.1404, |
| "step": 4470 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.00010566037735849057, |
| "loss": 1.1674, |
| "step": 4475 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.00010555256064690027, |
| "loss": 1.1477, |
| "step": 4480 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.00010544474393530999, |
| "loss": 1.1428, |
| "step": 4485 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.00010533692722371969, |
| "loss": 1.1282, |
| "step": 4490 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.00010522911051212939, |
| "loss": 1.1439, |
| "step": 4495 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.00010512129380053907, |
| "loss": 1.1323, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.0001050134770889488, |
| "loss": 1.134, |
| "step": 4505 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.0001049056603773585, |
| "loss": 1.1319, |
| "step": 4510 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.0001047978436657682, |
| "loss": 1.1644, |
| "step": 4515 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 0.0001046900269541779, |
| "loss": 1.1127, |
| "step": 4520 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 0.0001045822102425876, |
| "loss": 1.1419, |
| "step": 4525 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 0.0001044743935309973, |
| "loss": 1.1282, |
| "step": 4530 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 0.000104366576819407, |
| "loss": 1.1282, |
| "step": 4535 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 0.00010425876010781673, |
| "loss": 1.1401, |
| "step": 4540 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 0.00010415094339622642, |
| "loss": 1.1314, |
| "step": 4545 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 0.00010404312668463612, |
| "loss": 1.1251, |
| "step": 4550 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 0.00010393530997304583, |
| "loss": 1.1312, |
| "step": 4555 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 0.00010382749326145553, |
| "loss": 1.1063, |
| "step": 4560 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 0.00010371967654986523, |
| "loss": 1.1342, |
| "step": 4565 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 0.00010361185983827493, |
| "loss": 1.174, |
| "step": 4570 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 0.00010350404312668464, |
| "loss": 1.1457, |
| "step": 4575 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 0.00010339622641509434, |
| "loss": 1.1475, |
| "step": 4580 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 0.00010328840970350404, |
| "loss": 1.1827, |
| "step": 4585 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 0.00010318059299191376, |
| "loss": 1.1191, |
| "step": 4590 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 0.00010307277628032346, |
| "loss": 1.2093, |
| "step": 4595 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 0.00010296495956873316, |
| "loss": 1.1645, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.47, |
| "eval_loss": 1.1296443939208984, |
| "eval_runtime": 706.9133, |
| "eval_samples_per_second": 7.073, |
| "eval_steps_per_second": 0.884, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 0.00010285714285714286, |
| "loss": 1.1572, |
| "step": 4605 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 0.00010274932614555257, |
| "loss": 1.1238, |
| "step": 4610 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 0.00010264150943396227, |
| "loss": 1.1377, |
| "step": 4615 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 0.00010253369272237197, |
| "loss": 1.1914, |
| "step": 4620 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 0.00010242587601078169, |
| "loss": 1.133, |
| "step": 4625 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 0.00010231805929919138, |
| "loss": 1.1717, |
| "step": 4630 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 0.00010221024258760107, |
| "loss": 1.1676, |
| "step": 4635 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 0.00010210242587601077, |
| "loss": 1.1174, |
| "step": 4640 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 0.0001019946091644205, |
| "loss": 1.1082, |
| "step": 4645 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 0.0001018867924528302, |
| "loss": 1.1938, |
| "step": 4650 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 0.0001017789757412399, |
| "loss": 1.1183, |
| "step": 4655 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 0.0001016711590296496, |
| "loss": 1.175, |
| "step": 4660 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 0.0001015633423180593, |
| "loss": 1.1243, |
| "step": 4665 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 0.000101455525606469, |
| "loss": 1.125, |
| "step": 4670 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 0.0001013477088948787, |
| "loss": 1.1506, |
| "step": 4675 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 0.00010123989218328843, |
| "loss": 1.1263, |
| "step": 4680 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 0.00010113207547169811, |
| "loss": 1.1393, |
| "step": 4685 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 0.00010102425876010781, |
| "loss": 1.1207, |
| "step": 4690 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 0.00010091644204851754, |
| "loss": 1.1945, |
| "step": 4695 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 0.00010080862533692723, |
| "loss": 1.1823, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 0.00010070080862533693, |
| "loss": 1.1524, |
| "step": 4705 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 0.00010059299191374663, |
| "loss": 1.2009, |
| "step": 4710 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 0.00010048517520215634, |
| "loss": 1.1033, |
| "step": 4715 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 0.00010037735849056604, |
| "loss": 1.1254, |
| "step": 4720 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 0.00010026954177897574, |
| "loss": 1.2103, |
| "step": 4725 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 0.00010016172506738546, |
| "loss": 1.2174, |
| "step": 4730 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 0.00010005390835579516, |
| "loss": 1.1901, |
| "step": 4735 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 9.994609164420485e-05, |
| "loss": 1.1458, |
| "step": 4740 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 9.983827493261456e-05, |
| "loss": 1.1377, |
| "step": 4745 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 9.973045822102426e-05, |
| "loss": 1.1625, |
| "step": 4750 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 9.962264150943397e-05, |
| "loss": 1.1597, |
| "step": 4755 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 9.951482479784368e-05, |
| "loss": 1.1348, |
| "step": 4760 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 9.940700808625338e-05, |
| "loss": 1.1484, |
| "step": 4765 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 9.929919137466308e-05, |
| "loss": 1.1799, |
| "step": 4770 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 9.919137466307278e-05, |
| "loss": 1.1738, |
| "step": 4775 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 9.908355795148249e-05, |
| "loss": 1.1515, |
| "step": 4780 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 9.897574123989219e-05, |
| "loss": 1.1486, |
| "step": 4785 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 9.88679245283019e-05, |
| "loss": 1.1394, |
| "step": 4790 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 9.87601078167116e-05, |
| "loss": 1.1433, |
| "step": 4795 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 9.86522911051213e-05, |
| "loss": 1.1562, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.54, |
| "eval_loss": 1.128167986869812, |
| "eval_runtime": 707.3804, |
| "eval_samples_per_second": 7.068, |
| "eval_steps_per_second": 0.884, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 9.8544474393531e-05, |
| "loss": 1.12, |
| "step": 4805 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 9.84366576819407e-05, |
| "loss": 1.1696, |
| "step": 4810 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 9.832884097035042e-05, |
| "loss": 1.1363, |
| "step": 4815 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 9.822102425876011e-05, |
| "loss": 1.1891, |
| "step": 4820 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 9.811320754716981e-05, |
| "loss": 1.1303, |
| "step": 4825 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 9.800539083557952e-05, |
| "loss": 1.1488, |
| "step": 4830 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 9.789757412398922e-05, |
| "loss": 1.1169, |
| "step": 4835 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 9.778975741239893e-05, |
| "loss": 1.0968, |
| "step": 4840 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 9.768194070080863e-05, |
| "loss": 1.1275, |
| "step": 4845 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 9.757412398921833e-05, |
| "loss": 1.1252, |
| "step": 4850 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 9.746630727762803e-05, |
| "loss": 1.1488, |
| "step": 4855 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 9.735849056603774e-05, |
| "loss": 1.1267, |
| "step": 4860 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 9.725067385444745e-05, |
| "loss": 1.1264, |
| "step": 4865 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 9.714285714285715e-05, |
| "loss": 1.1256, |
| "step": 4870 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 9.703504043126685e-05, |
| "loss": 1.1534, |
| "step": 4875 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 9.692722371967655e-05, |
| "loss": 1.1283, |
| "step": 4880 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 9.681940700808626e-05, |
| "loss": 1.1439, |
| "step": 4885 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 9.671159029649596e-05, |
| "loss": 1.1406, |
| "step": 4890 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 9.660377358490567e-05, |
| "loss": 1.1333, |
| "step": 4895 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 9.649595687331537e-05, |
| "loss": 1.1718, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 9.638814016172507e-05, |
| "loss": 1.1617, |
| "step": 4905 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 9.628032345013478e-05, |
| "loss": 1.1754, |
| "step": 4910 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 9.617250673854448e-05, |
| "loss": 1.1204, |
| "step": 4915 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 9.606469002695419e-05, |
| "loss": 1.1385, |
| "step": 4920 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 9.595687331536389e-05, |
| "loss": 1.1249, |
| "step": 4925 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 9.584905660377359e-05, |
| "loss": 1.1681, |
| "step": 4930 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 9.574123989218328e-05, |
| "loss": 1.1449, |
| "step": 4935 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 9.5633423180593e-05, |
| "loss": 1.117, |
| "step": 4940 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 9.552560646900271e-05, |
| "loss": 1.1392, |
| "step": 4945 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 9.54177897574124e-05, |
| "loss": 1.1271, |
| "step": 4950 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 9.53099730458221e-05, |
| "loss": 1.157, |
| "step": 4955 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 9.52021563342318e-05, |
| "loss": 1.1708, |
| "step": 4960 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 9.509433962264151e-05, |
| "loss": 1.1373, |
| "step": 4965 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 9.498652291105121e-05, |
| "loss": 1.1687, |
| "step": 4970 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 9.487870619946092e-05, |
| "loss": 1.1701, |
| "step": 4975 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 9.477088948787062e-05, |
| "loss": 1.1579, |
| "step": 4980 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 9.466307277628032e-05, |
| "loss": 1.1384, |
| "step": 4985 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 9.455525606469003e-05, |
| "loss": 1.1591, |
| "step": 4990 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 9.444743935309973e-05, |
| "loss": 1.1136, |
| "step": 4995 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 9.433962264150944e-05, |
| "loss": 1.1619, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.6, |
| "eval_loss": 1.1272780895233154, |
| "eval_runtime": 707.2365, |
| "eval_samples_per_second": 7.07, |
| "eval_steps_per_second": 0.884, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 9.423180592991914e-05, |
| "loss": 1.153, |
| "step": 5005 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 9.412398921832884e-05, |
| "loss": 1.1507, |
| "step": 5010 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 9.401617250673855e-05, |
| "loss": 1.1421, |
| "step": 5015 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 9.390835579514825e-05, |
| "loss": 1.1353, |
| "step": 5020 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 9.380053908355796e-05, |
| "loss": 1.1522, |
| "step": 5025 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 9.369272237196766e-05, |
| "loss": 1.1577, |
| "step": 5030 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 9.358490566037736e-05, |
| "loss": 1.1632, |
| "step": 5035 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 9.347708894878706e-05, |
| "loss": 1.1245, |
| "step": 5040 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 9.336927223719677e-05, |
| "loss": 1.1527, |
| "step": 5045 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 9.326145552560648e-05, |
| "loss": 1.1252, |
| "step": 5050 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 9.315363881401618e-05, |
| "loss": 1.1444, |
| "step": 5055 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 9.304582210242589e-05, |
| "loss": 1.133, |
| "step": 5060 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 9.293800539083557e-05, |
| "loss": 1.1292, |
| "step": 5065 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 9.283018867924529e-05, |
| "loss": 1.1702, |
| "step": 5070 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 9.272237196765498e-05, |
| "loss": 1.1355, |
| "step": 5075 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 9.26145552560647e-05, |
| "loss": 1.1623, |
| "step": 5080 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 9.250673854447441e-05, |
| "loss": 1.1476, |
| "step": 5085 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 9.23989218328841e-05, |
| "loss": 1.1391, |
| "step": 5090 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 9.22911051212938e-05, |
| "loss": 1.1771, |
| "step": 5095 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 9.21832884097035e-05, |
| "loss": 1.1592, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 9.207547169811322e-05, |
| "loss": 1.1665, |
| "step": 5105 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 9.196765498652291e-05, |
| "loss": 1.1477, |
| "step": 5110 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 9.185983827493263e-05, |
| "loss": 1.2003, |
| "step": 5115 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 9.175202156334232e-05, |
| "loss": 1.1141, |
| "step": 5120 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 9.164420485175202e-05, |
| "loss": 1.1356, |
| "step": 5125 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 9.153638814016173e-05, |
| "loss": 1.1667, |
| "step": 5130 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 9.142857142857143e-05, |
| "loss": 1.1361, |
| "step": 5135 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 9.132075471698114e-05, |
| "loss": 1.1095, |
| "step": 5140 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 9.121293800539084e-05, |
| "loss": 1.1414, |
| "step": 5145 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 9.110512129380054e-05, |
| "loss": 1.1622, |
| "step": 5150 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 9.099730458221025e-05, |
| "loss": 1.162, |
| "step": 5155 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 9.088948787061995e-05, |
| "loss": 1.1777, |
| "step": 5160 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 9.078167115902966e-05, |
| "loss": 1.1357, |
| "step": 5165 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 9.067385444743936e-05, |
| "loss": 1.1572, |
| "step": 5170 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 9.056603773584906e-05, |
| "loss": 1.1978, |
| "step": 5175 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 9.045822102425876e-05, |
| "loss": 1.1218, |
| "step": 5180 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 9.035040431266847e-05, |
| "loss": 1.1853, |
| "step": 5185 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 9.024258760107818e-05, |
| "loss": 1.1537, |
| "step": 5190 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 9.013477088948788e-05, |
| "loss": 1.0893, |
| "step": 5195 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 9.002695417789758e-05, |
| "loss": 1.1794, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.66, |
| "eval_loss": 1.1269601583480835, |
| "eval_runtime": 708.5983, |
| "eval_samples_per_second": 7.056, |
| "eval_steps_per_second": 0.882, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 8.991913746630728e-05, |
| "loss": 1.1298, |
| "step": 5205 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 8.981132075471699e-05, |
| "loss": 1.1408, |
| "step": 5210 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 8.970350404312669e-05, |
| "loss": 1.1277, |
| "step": 5215 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 8.95956873315364e-05, |
| "loss": 1.1564, |
| "step": 5220 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 8.94878706199461e-05, |
| "loss": 1.1639, |
| "step": 5225 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 8.93800539083558e-05, |
| "loss": 1.149, |
| "step": 5230 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 8.92722371967655e-05, |
| "loss": 1.1306, |
| "step": 5235 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 8.91644204851752e-05, |
| "loss": 1.126, |
| "step": 5240 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 8.905660377358492e-05, |
| "loss": 1.1414, |
| "step": 5245 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 8.894878706199461e-05, |
| "loss": 1.1661, |
| "step": 5250 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 8.884097035040431e-05, |
| "loss": 1.1618, |
| "step": 5255 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 8.873315363881401e-05, |
| "loss": 1.1907, |
| "step": 5260 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 8.862533692722372e-05, |
| "loss": 1.176, |
| "step": 5265 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 8.851752021563344e-05, |
| "loss": 1.1137, |
| "step": 5270 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 8.840970350404313e-05, |
| "loss": 1.1383, |
| "step": 5275 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 8.830188679245283e-05, |
| "loss": 1.1332, |
| "step": 5280 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 8.819407008086253e-05, |
| "loss": 1.122, |
| "step": 5285 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 8.808625336927224e-05, |
| "loss": 1.1449, |
| "step": 5290 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 8.797843665768194e-05, |
| "loss": 1.1524, |
| "step": 5295 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 8.787061994609165e-05, |
| "loss": 1.1562, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 8.776280323450135e-05, |
| "loss": 1.1209, |
| "step": 5305 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 8.765498652291105e-05, |
| "loss": 1.1726, |
| "step": 5310 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 8.754716981132076e-05, |
| "loss": 1.1234, |
| "step": 5315 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 8.743935309973046e-05, |
| "loss": 1.1384, |
| "step": 5320 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 8.733153638814017e-05, |
| "loss": 1.1346, |
| "step": 5325 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 8.722371967654987e-05, |
| "loss": 1.1506, |
| "step": 5330 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 8.711590296495957e-05, |
| "loss": 1.1599, |
| "step": 5335 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 8.700808625336928e-05, |
| "loss": 1.1481, |
| "step": 5340 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 8.690026954177898e-05, |
| "loss": 1.1296, |
| "step": 5345 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 8.679245283018869e-05, |
| "loss": 1.1611, |
| "step": 5350 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 8.668463611859839e-05, |
| "loss": 1.1754, |
| "step": 5355 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 8.657681940700809e-05, |
| "loss": 1.1376, |
| "step": 5360 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 8.646900269541778e-05, |
| "loss": 1.127, |
| "step": 5365 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 8.63611859838275e-05, |
| "loss": 1.1552, |
| "step": 5370 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 8.625336927223721e-05, |
| "loss": 1.1317, |
| "step": 5375 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 8.61455525606469e-05, |
| "loss": 1.1213, |
| "step": 5380 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 8.603773584905662e-05, |
| "loss": 1.1766, |
| "step": 5385 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 8.59299191374663e-05, |
| "loss": 1.11, |
| "step": 5390 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 8.582210242587601e-05, |
| "loss": 1.149, |
| "step": 5395 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 8.571428571428571e-05, |
| "loss": 1.178, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.73, |
| "eval_loss": 1.1253609657287598, |
| "eval_runtime": 707.9371, |
| "eval_samples_per_second": 7.063, |
| "eval_steps_per_second": 0.883, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 8.560646900269542e-05, |
| "loss": 1.1173, |
| "step": 5405 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 8.549865229110514e-05, |
| "loss": 1.1671, |
| "step": 5410 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 8.539083557951482e-05, |
| "loss": 1.1929, |
| "step": 5415 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 8.528301886792453e-05, |
| "loss": 1.1562, |
| "step": 5420 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 8.517520215633423e-05, |
| "loss": 1.1141, |
| "step": 5425 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 8.506738544474394e-05, |
| "loss": 1.1815, |
| "step": 5430 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 8.495956873315364e-05, |
| "loss": 1.1629, |
| "step": 5435 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 8.485175202156335e-05, |
| "loss": 1.1369, |
| "step": 5440 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 8.474393530997305e-05, |
| "loss": 1.1209, |
| "step": 5445 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 8.463611859838275e-05, |
| "loss": 1.1323, |
| "step": 5450 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 8.452830188679246e-05, |
| "loss": 1.1619, |
| "step": 5455 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 8.442048517520216e-05, |
| "loss": 1.1546, |
| "step": 5460 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 8.431266846361187e-05, |
| "loss": 1.1305, |
| "step": 5465 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 8.420485175202156e-05, |
| "loss": 1.1441, |
| "step": 5470 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 8.409703504043127e-05, |
| "loss": 1.1359, |
| "step": 5475 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 8.398921832884098e-05, |
| "loss": 1.1962, |
| "step": 5480 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 8.388140161725068e-05, |
| "loss": 1.1114, |
| "step": 5485 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 8.377358490566039e-05, |
| "loss": 1.1593, |
| "step": 5490 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 8.366576819407009e-05, |
| "loss": 1.1577, |
| "step": 5495 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 8.355795148247979e-05, |
| "loss": 1.1396, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 8.345013477088949e-05, |
| "loss": 1.1991, |
| "step": 5505 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 8.33423180592992e-05, |
| "loss": 1.1574, |
| "step": 5510 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 8.323450134770891e-05, |
| "loss": 1.1467, |
| "step": 5515 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 8.312668463611861e-05, |
| "loss": 1.1064, |
| "step": 5520 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 8.30188679245283e-05, |
| "loss": 1.1502, |
| "step": 5525 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 8.2911051212938e-05, |
| "loss": 1.1659, |
| "step": 5530 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 8.280323450134772e-05, |
| "loss": 1.1119, |
| "step": 5535 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 8.269541778975741e-05, |
| "loss": 1.1043, |
| "step": 5540 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 8.258760107816713e-05, |
| "loss": 1.1421, |
| "step": 5545 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 8.247978436657682e-05, |
| "loss": 1.1593, |
| "step": 5550 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 8.237196765498652e-05, |
| "loss": 1.1251, |
| "step": 5555 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 8.226415094339623e-05, |
| "loss": 1.1282, |
| "step": 5560 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 8.215633423180593e-05, |
| "loss": 1.1778, |
| "step": 5565 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 8.204851752021564e-05, |
| "loss": 1.1589, |
| "step": 5570 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 8.194070080862534e-05, |
| "loss": 1.1671, |
| "step": 5575 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 8.183288409703504e-05, |
| "loss": 1.1406, |
| "step": 5580 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 8.172506738544474e-05, |
| "loss": 1.1273, |
| "step": 5585 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 8.161725067385445e-05, |
| "loss": 1.154, |
| "step": 5590 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 8.150943396226416e-05, |
| "loss": 1.1378, |
| "step": 5595 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 8.140161725067386e-05, |
| "loss": 1.1291, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.79, |
| "eval_loss": 1.1246120929718018, |
| "eval_runtime": 708.847, |
| "eval_samples_per_second": 7.054, |
| "eval_steps_per_second": 0.882, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 8.129380053908356e-05, |
| "loss": 1.1714, |
| "step": 5605 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 8.118598382749326e-05, |
| "loss": 1.1276, |
| "step": 5610 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 8.107816711590297e-05, |
| "loss": 1.1828, |
| "step": 5615 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 8.097035040431267e-05, |
| "loss": 1.1738, |
| "step": 5620 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 8.086253369272238e-05, |
| "loss": 1.1562, |
| "step": 5625 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 8.075471698113208e-05, |
| "loss": 1.1343, |
| "step": 5630 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 8.064690026954178e-05, |
| "loss": 1.1061, |
| "step": 5635 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 8.053908355795149e-05, |
| "loss": 1.1554, |
| "step": 5640 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 8.043126684636119e-05, |
| "loss": 1.1211, |
| "step": 5645 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 8.03234501347709e-05, |
| "loss": 1.1452, |
| "step": 5650 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 8.02156334231806e-05, |
| "loss": 1.1848, |
| "step": 5655 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 8.01078167115903e-05, |
| "loss": 1.1437, |
| "step": 5660 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 8e-05, |
| "loss": 1.1274, |
| "step": 5665 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 7.98921832884097e-05, |
| "loss": 1.1448, |
| "step": 5670 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 7.978436657681942e-05, |
| "loss": 1.1465, |
| "step": 5675 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 7.967654986522911e-05, |
| "loss": 1.1623, |
| "step": 5680 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 7.956873315363881e-05, |
| "loss": 1.1076, |
| "step": 5685 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 7.946091644204851e-05, |
| "loss": 1.0992, |
| "step": 5690 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 7.935309973045822e-05, |
| "loss": 1.1708, |
| "step": 5695 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 7.924528301886794e-05, |
| "loss": 1.1544, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 7.913746630727763e-05, |
| "loss": 1.1789, |
| "step": 5705 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 7.902964959568733e-05, |
| "loss": 1.0851, |
| "step": 5710 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 7.892183288409703e-05, |
| "loss": 1.1349, |
| "step": 5715 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 7.881401617250674e-05, |
| "loss": 1.185, |
| "step": 5720 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 7.870619946091644e-05, |
| "loss": 1.1079, |
| "step": 5725 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 7.859838274932615e-05, |
| "loss": 1.1531, |
| "step": 5730 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 7.849056603773586e-05, |
| "loss": 1.1264, |
| "step": 5735 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 7.838274932614555e-05, |
| "loss": 1.1382, |
| "step": 5740 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 7.827493261455526e-05, |
| "loss": 1.156, |
| "step": 5745 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 7.816711590296496e-05, |
| "loss": 1.1298, |
| "step": 5750 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 7.805929919137467e-05, |
| "loss": 1.1912, |
| "step": 5755 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 7.795148247978437e-05, |
| "loss": 1.1744, |
| "step": 5760 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 7.784366576819407e-05, |
| "loss": 1.1279, |
| "step": 5765 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 7.773584905660378e-05, |
| "loss": 1.1514, |
| "step": 5770 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 7.762803234501348e-05, |
| "loss": 1.1473, |
| "step": 5775 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 7.752021563342319e-05, |
| "loss": 1.1433, |
| "step": 5780 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 7.741239892183289e-05, |
| "loss": 1.1108, |
| "step": 5785 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 7.73045822102426e-05, |
| "loss": 1.1355, |
| "step": 5790 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 7.719676549865228e-05, |
| "loss": 1.1299, |
| "step": 5795 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 7.7088948787062e-05, |
| "loss": 1.1235, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.86, |
| "eval_loss": 1.1234204769134521, |
| "eval_runtime": 708.361, |
| "eval_samples_per_second": 7.059, |
| "eval_steps_per_second": 0.882, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 7.698113207547171e-05, |
| "loss": 1.1556, |
| "step": 5805 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 7.68733153638814e-05, |
| "loss": 1.1396, |
| "step": 5810 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 7.676549865229112e-05, |
| "loss": 1.1485, |
| "step": 5815 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 7.665768194070082e-05, |
| "loss": 1.1156, |
| "step": 5820 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 7.654986522911051e-05, |
| "loss": 1.1641, |
| "step": 5825 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 7.644204851752021e-05, |
| "loss": 1.1273, |
| "step": 5830 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 7.633423180592992e-05, |
| "loss": 1.1479, |
| "step": 5835 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 7.622641509433964e-05, |
| "loss": 1.1303, |
| "step": 5840 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 7.611859838274933e-05, |
| "loss": 1.1477, |
| "step": 5845 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 7.601078167115903e-05, |
| "loss": 1.1717, |
| "step": 5850 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 7.590296495956873e-05, |
| "loss": 1.1657, |
| "step": 5855 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 7.579514824797844e-05, |
| "loss": 1.152, |
| "step": 5860 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 7.568733153638814e-05, |
| "loss": 1.1412, |
| "step": 5865 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 7.557951482479785e-05, |
| "loss": 1.1498, |
| "step": 5870 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 7.547169811320755e-05, |
| "loss": 1.1489, |
| "step": 5875 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 7.536388140161725e-05, |
| "loss": 1.0998, |
| "step": 5880 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 7.525606469002696e-05, |
| "loss": 1.1705, |
| "step": 5885 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 7.514824797843666e-05, |
| "loss": 1.1654, |
| "step": 5890 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 7.504043126684637e-05, |
| "loss": 1.1193, |
| "step": 5895 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 7.493261455525607e-05, |
| "loss": 1.1159, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 7.482479784366577e-05, |
| "loss": 1.1513, |
| "step": 5905 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 7.471698113207547e-05, |
| "loss": 1.1356, |
| "step": 5910 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 7.460916442048518e-05, |
| "loss": 1.1395, |
| "step": 5915 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 7.450134770889489e-05, |
| "loss": 1.1645, |
| "step": 5920 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 7.439353099730459e-05, |
| "loss": 1.1229, |
| "step": 5925 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 7.428571428571429e-05, |
| "loss": 1.1309, |
| "step": 5930 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 7.417789757412399e-05, |
| "loss": 1.1611, |
| "step": 5935 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 7.40700808625337e-05, |
| "loss": 1.1397, |
| "step": 5940 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 7.39622641509434e-05, |
| "loss": 1.1251, |
| "step": 5945 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 7.385444743935311e-05, |
| "loss": 1.1873, |
| "step": 5950 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 7.37466307277628e-05, |
| "loss": 1.1578, |
| "step": 5955 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 7.36388140161725e-05, |
| "loss": 1.1668, |
| "step": 5960 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 7.353099730458222e-05, |
| "loss": 1.1618, |
| "step": 5965 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 7.342318059299191e-05, |
| "loss": 1.1863, |
| "step": 5970 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 7.331536388140163e-05, |
| "loss": 1.1452, |
| "step": 5975 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 7.320754716981132e-05, |
| "loss": 1.1126, |
| "step": 5980 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 7.309973045822102e-05, |
| "loss": 1.1471, |
| "step": 5985 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 7.299191374663073e-05, |
| "loss": 1.161, |
| "step": 5990 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 7.288409703504043e-05, |
| "loss": 1.1914, |
| "step": 5995 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 7.277628032345014e-05, |
| "loss": 1.1169, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.92, |
| "eval_loss": 1.1221721172332764, |
| "eval_runtime": 708.7447, |
| "eval_samples_per_second": 7.055, |
| "eval_steps_per_second": 0.882, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 7.266846361185984e-05, |
| "loss": 1.1571, |
| "step": 6005 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 7.256064690026954e-05, |
| "loss": 1.1785, |
| "step": 6010 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 7.245283018867924e-05, |
| "loss": 1.1026, |
| "step": 6015 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 7.234501347708895e-05, |
| "loss": 1.1112, |
| "step": 6020 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 7.223719676549866e-05, |
| "loss": 1.1555, |
| "step": 6025 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 7.212938005390836e-05, |
| "loss": 1.2065, |
| "step": 6030 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 7.202156334231806e-05, |
| "loss": 1.127, |
| "step": 6035 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 7.191374663072776e-05, |
| "loss": 1.1024, |
| "step": 6040 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 7.180592991913747e-05, |
| "loss": 1.1093, |
| "step": 6045 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 7.169811320754717e-05, |
| "loss": 1.1564, |
| "step": 6050 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 7.159029649595688e-05, |
| "loss": 1.1111, |
| "step": 6055 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 7.148247978436659e-05, |
| "loss": 1.1451, |
| "step": 6060 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 7.137466307277628e-05, |
| "loss": 1.1357, |
| "step": 6065 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 7.126684636118599e-05, |
| "loss": 1.1229, |
| "step": 6070 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 7.115902964959569e-05, |
| "loss": 1.1311, |
| "step": 6075 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 7.10512129380054e-05, |
| "loss": 1.0995, |
| "step": 6080 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 7.09433962264151e-05, |
| "loss": 1.1511, |
| "step": 6085 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 7.08355795148248e-05, |
| "loss": 1.169, |
| "step": 6090 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 7.07277628032345e-05, |
| "loss": 1.1516, |
| "step": 6095 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 7.06199460916442e-05, |
| "loss": 1.1379, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 7.051212938005392e-05, |
| "loss": 1.1285, |
| "step": 6105 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 7.040431266846362e-05, |
| "loss": 1.1431, |
| "step": 6110 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 7.029649595687333e-05, |
| "loss": 1.104, |
| "step": 6115 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 7.018867924528301e-05, |
| "loss": 1.1092, |
| "step": 6120 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 7.008086253369272e-05, |
| "loss": 1.1518, |
| "step": 6125 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 6.997304582210244e-05, |
| "loss": 1.1509, |
| "step": 6130 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 6.986522911051213e-05, |
| "loss": 1.155, |
| "step": 6135 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 6.975741239892185e-05, |
| "loss": 1.1457, |
| "step": 6140 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 6.964959568733153e-05, |
| "loss": 1.1325, |
| "step": 6145 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 6.954177897574124e-05, |
| "loss": 1.1372, |
| "step": 6150 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 6.943396226415094e-05, |
| "loss": 1.1459, |
| "step": 6155 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 6.932614555256065e-05, |
| "loss": 1.1753, |
| "step": 6160 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 6.921832884097036e-05, |
| "loss": 1.1231, |
| "step": 6165 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 6.911051212938006e-05, |
| "loss": 1.139, |
| "step": 6170 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 6.900269541778976e-05, |
| "loss": 1.1578, |
| "step": 6175 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 6.889487870619946e-05, |
| "loss": 1.1372, |
| "step": 6180 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 6.878706199460917e-05, |
| "loss": 1.1771, |
| "step": 6185 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 6.867924528301887e-05, |
| "loss": 1.127, |
| "step": 6190 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 6.857142857142858e-05, |
| "loss": 1.136, |
| "step": 6195 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 6.846361185983828e-05, |
| "loss": 1.1494, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.98, |
| "eval_loss": 1.1213899850845337, |
| "eval_runtime": 708.4845, |
| "eval_samples_per_second": 7.057, |
| "eval_steps_per_second": 0.882, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 6.835579514824798e-05, |
| "loss": 1.1189, |
| "step": 6205 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 6.824797843665769e-05, |
| "loss": 1.1756, |
| "step": 6210 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 6.814016172506739e-05, |
| "loss": 1.1679, |
| "step": 6215 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 6.80323450134771e-05, |
| "loss": 1.1325, |
| "step": 6220 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 6.79245283018868e-05, |
| "loss": 1.1555, |
| "step": 6225 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 6.78167115902965e-05, |
| "loss": 1.1639, |
| "step": 6230 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 6.77088948787062e-05, |
| "loss": 1.1832, |
| "step": 6235 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 6.76010781671159e-05, |
| "loss": 1.1124, |
| "step": 6240 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 6.749326145552562e-05, |
| "loss": 1.1919, |
| "step": 6245 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 6.738544474393532e-05, |
| "loss": 1.144, |
| "step": 6250 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 6.727762803234501e-05, |
| "loss": 1.1215, |
| "step": 6255 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 6.716981132075471e-05, |
| "loss": 1.1617, |
| "step": 6260 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 6.706199460916442e-05, |
| "loss": 1.1014, |
| "step": 6265 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 6.695417789757412e-05, |
| "loss": 1.1435, |
| "step": 6270 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 6.684636118598383e-05, |
| "loss": 1.0861, |
| "step": 6275 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 6.673854447439353e-05, |
| "loss": 1.1146, |
| "step": 6280 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 6.663072776280323e-05, |
| "loss": 1.1454, |
| "step": 6285 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 6.652291105121294e-05, |
| "loss": 1.1066, |
| "step": 6290 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 6.641509433962264e-05, |
| "loss": 1.0868, |
| "step": 6295 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 6.630727762803235e-05, |
| "loss": 1.0816, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 6.619946091644205e-05, |
| "loss": 1.1261, |
| "step": 6305 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 6.609164420485175e-05, |
| "loss": 1.1004, |
| "step": 6310 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 6.598382749326146e-05, |
| "loss": 1.1468, |
| "step": 6315 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 6.587601078167116e-05, |
| "loss": 1.1015, |
| "step": 6320 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 6.576819407008087e-05, |
| "loss": 1.1251, |
| "step": 6325 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 6.566037735849057e-05, |
| "loss": 1.1474, |
| "step": 6330 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 6.555256064690027e-05, |
| "loss": 1.0953, |
| "step": 6335 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 6.544474393530997e-05, |
| "loss": 1.1378, |
| "step": 6340 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 6.533692722371968e-05, |
| "loss": 1.1533, |
| "step": 6345 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 6.522911051212939e-05, |
| "loss": 1.1223, |
| "step": 6350 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 6.512129380053909e-05, |
| "loss": 1.1522, |
| "step": 6355 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 6.501347708894879e-05, |
| "loss": 1.1321, |
| "step": 6360 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 6.490566037735849e-05, |
| "loss": 1.1377, |
| "step": 6365 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 6.47978436657682e-05, |
| "loss": 1.13, |
| "step": 6370 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 6.46900269541779e-05, |
| "loss": 1.1452, |
| "step": 6375 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 6.458221024258761e-05, |
| "loss": 1.1247, |
| "step": 6380 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 6.44743935309973e-05, |
| "loss": 1.1229, |
| "step": 6385 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 6.4366576819407e-05, |
| "loss": 1.1578, |
| "step": 6390 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 6.425876010781672e-05, |
| "loss": 1.0794, |
| "step": 6395 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 6.415094339622641e-05, |
| "loss": 1.1032, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.05, |
| "eval_loss": 1.121792197227478, |
| "eval_runtime": 716.6981, |
| "eval_samples_per_second": 6.976, |
| "eval_steps_per_second": 0.872, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 6.404312668463613e-05, |
| "loss": 1.1125, |
| "step": 6405 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 6.393530997304582e-05, |
| "loss": 1.134, |
| "step": 6410 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 6.382749326145552e-05, |
| "loss": 1.0889, |
| "step": 6415 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 6.371967654986523e-05, |
| "loss": 1.1131, |
| "step": 6420 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 6.361185983827493e-05, |
| "loss": 1.116, |
| "step": 6425 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 6.350404312668464e-05, |
| "loss": 1.1867, |
| "step": 6430 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 6.339622641509434e-05, |
| "loss": 1.1806, |
| "step": 6435 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 6.328840970350405e-05, |
| "loss": 1.0977, |
| "step": 6440 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 6.318059299191374e-05, |
| "loss": 1.139, |
| "step": 6445 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 6.307277628032345e-05, |
| "loss": 1.1024, |
| "step": 6450 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 6.296495956873316e-05, |
| "loss": 1.098, |
| "step": 6455 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 6.285714285714286e-05, |
| "loss": 1.1284, |
| "step": 6460 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 6.274932614555257e-05, |
| "loss": 1.1443, |
| "step": 6465 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 6.264150943396226e-05, |
| "loss": 1.1181, |
| "step": 6470 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 6.253369272237197e-05, |
| "loss": 1.1608, |
| "step": 6475 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 6.242587601078167e-05, |
| "loss": 1.118, |
| "step": 6480 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 6.231805929919138e-05, |
| "loss": 1.1304, |
| "step": 6485 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 6.221024258760109e-05, |
| "loss": 1.1141, |
| "step": 6490 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 6.210242587601079e-05, |
| "loss": 1.1557, |
| "step": 6495 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 6.199460916442049e-05, |
| "loss": 1.1305, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 6.188679245283019e-05, |
| "loss": 1.1406, |
| "step": 6505 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 6.17789757412399e-05, |
| "loss": 1.165, |
| "step": 6510 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 6.16711590296496e-05, |
| "loss": 1.1582, |
| "step": 6515 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 6.156334231805931e-05, |
| "loss": 1.0994, |
| "step": 6520 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 6.145552560646901e-05, |
| "loss": 1.1519, |
| "step": 6525 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 6.13477088948787e-05, |
| "loss": 1.1391, |
| "step": 6530 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 6.123989218328842e-05, |
| "loss": 1.1137, |
| "step": 6535 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 6.113207547169812e-05, |
| "loss": 1.1192, |
| "step": 6540 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 6.102425876010782e-05, |
| "loss": 1.1092, |
| "step": 6545 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 6.091644204851752e-05, |
| "loss": 1.1046, |
| "step": 6550 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 6.080862533692723e-05, |
| "loss": 1.0976, |
| "step": 6555 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 6.070080862533692e-05, |
| "loss": 1.1552, |
| "step": 6560 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 6.0592991913746634e-05, |
| "loss": 1.1162, |
| "step": 6565 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 6.048517520215634e-05, |
| "loss": 1.1421, |
| "step": 6570 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 6.037735849056604e-05, |
| "loss": 1.1436, |
| "step": 6575 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 6.026954177897575e-05, |
| "loss": 1.1187, |
| "step": 6580 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 6.016172506738544e-05, |
| "loss": 1.1287, |
| "step": 6585 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 6.005390835579515e-05, |
| "loss": 1.1822, |
| "step": 6590 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 5.994609164420485e-05, |
| "loss": 1.1511, |
| "step": 6595 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 5.9838274932614556e-05, |
| "loss": 1.1269, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.11, |
| "eval_loss": 1.120741367340088, |
| "eval_runtime": 716.6947, |
| "eval_samples_per_second": 6.976, |
| "eval_steps_per_second": 0.872, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 5.973045822102427e-05, |
| "loss": 1.1346, |
| "step": 6605 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 5.9622641509433966e-05, |
| "loss": 1.1152, |
| "step": 6610 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 5.951482479784367e-05, |
| "loss": 1.1415, |
| "step": 6615 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 5.940700808625337e-05, |
| "loss": 1.1027, |
| "step": 6620 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 5.9299191374663074e-05, |
| "loss": 1.0834, |
| "step": 6625 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 5.919137466307277e-05, |
| "loss": 1.1248, |
| "step": 6630 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 5.9083557951482484e-05, |
| "loss": 1.139, |
| "step": 6635 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 5.897574123989219e-05, |
| "loss": 1.1356, |
| "step": 6640 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 5.886792452830189e-05, |
| "loss": 1.1461, |
| "step": 6645 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 5.876010781671159e-05, |
| "loss": 1.1083, |
| "step": 6650 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 5.865229110512129e-05, |
| "loss": 1.1003, |
| "step": 6655 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 5.8544474393531e-05, |
| "loss": 1.123, |
| "step": 6660 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 5.84366576819407e-05, |
| "loss": 1.1715, |
| "step": 6665 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 5.8328840970350406e-05, |
| "loss": 1.1531, |
| "step": 6670 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 5.822102425876012e-05, |
| "loss": 1.1347, |
| "step": 6675 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 5.811320754716981e-05, |
| "loss": 1.1178, |
| "step": 6680 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 5.800539083557952e-05, |
| "loss": 1.1399, |
| "step": 6685 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 5.789757412398922e-05, |
| "loss": 1.1273, |
| "step": 6690 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 5.7789757412398925e-05, |
| "loss": 1.1454, |
| "step": 6695 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 5.768194070080862e-05, |
| "loss": 1.0934, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 5.757412398921833e-05, |
| "loss": 1.1846, |
| "step": 6705 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 5.746630727762804e-05, |
| "loss": 1.1376, |
| "step": 6710 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 5.735849056603774e-05, |
| "loss": 1.1198, |
| "step": 6715 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 5.7250673854447443e-05, |
| "loss": 1.1395, |
| "step": 6720 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 5.714285714285714e-05, |
| "loss": 1.1126, |
| "step": 6725 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 5.7035040431266854e-05, |
| "loss": 1.1555, |
| "step": 6730 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 5.6927223719676545e-05, |
| "loss": 1.1061, |
| "step": 6735 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 5.681940700808626e-05, |
| "loss": 1.1171, |
| "step": 6740 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 5.671159029649596e-05, |
| "loss": 1.1551, |
| "step": 6745 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 5.660377358490566e-05, |
| "loss": 1.1333, |
| "step": 6750 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 5.649595687331537e-05, |
| "loss": 1.1478, |
| "step": 6755 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 5.638814016172507e-05, |
| "loss": 1.1385, |
| "step": 6760 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 5.6280323450134775e-05, |
| "loss": 1.0975, |
| "step": 6765 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 5.6172506738544474e-05, |
| "loss": 1.145, |
| "step": 6770 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 5.606469002695418e-05, |
| "loss": 1.1109, |
| "step": 6775 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 5.595687331536389e-05, |
| "loss": 1.1417, |
| "step": 6780 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 5.584905660377359e-05, |
| "loss": 1.1496, |
| "step": 6785 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 5.5741239892183294e-05, |
| "loss": 1.1372, |
| "step": 6790 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 5.563342318059299e-05, |
| "loss": 1.1561, |
| "step": 6795 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 5.55256064690027e-05, |
| "loss": 1.1561, |
| "step": 6800 |
| }, |
| { |
| "epoch": 2.18, |
| "eval_loss": 1.1202932596206665, |
| "eval_runtime": 717.4224, |
| "eval_samples_per_second": 6.969, |
| "eval_steps_per_second": 0.871, |
| "step": 6800 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 5.5417789757412396e-05, |
| "loss": 1.0799, |
| "step": 6805 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 5.530997304582211e-05, |
| "loss": 1.1074, |
| "step": 6810 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 5.520215633423181e-05, |
| "loss": 1.1076, |
| "step": 6815 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 5.509433962264151e-05, |
| "loss": 1.0994, |
| "step": 6820 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 5.4986522911051216e-05, |
| "loss": 1.1337, |
| "step": 6825 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 5.4878706199460914e-05, |
| "loss": 1.1384, |
| "step": 6830 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 5.4770889487870626e-05, |
| "loss": 1.1165, |
| "step": 6835 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 5.4663072776280324e-05, |
| "loss": 1.1305, |
| "step": 6840 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 5.455525606469003e-05, |
| "loss": 1.1546, |
| "step": 6845 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 5.444743935309974e-05, |
| "loss": 1.1509, |
| "step": 6850 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 5.433962264150943e-05, |
| "loss": 1.1228, |
| "step": 6855 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 5.4231805929919145e-05, |
| "loss": 1.1319, |
| "step": 6860 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 5.412398921832884e-05, |
| "loss": 1.1475, |
| "step": 6865 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 5.401617250673855e-05, |
| "loss": 1.162, |
| "step": 6870 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 5.3908355795148246e-05, |
| "loss": 1.1662, |
| "step": 6875 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 5.380053908355796e-05, |
| "loss": 1.1238, |
| "step": 6880 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 5.369272237196766e-05, |
| "loss": 1.1314, |
| "step": 6885 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 5.358490566037736e-05, |
| "loss": 1.1442, |
| "step": 6890 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 5.3477088948787067e-05, |
| "loss": 1.1729, |
| "step": 6895 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 5.3369272237196765e-05, |
| "loss": 1.1426, |
| "step": 6900 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 5.326145552560648e-05, |
| "loss": 1.138, |
| "step": 6905 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 5.315363881401617e-05, |
| "loss": 1.0854, |
| "step": 6910 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 5.304582210242588e-05, |
| "loss": 1.1255, |
| "step": 6915 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 5.293800539083558e-05, |
| "loss": 1.1752, |
| "step": 6920 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 5.283018867924528e-05, |
| "loss": 1.1557, |
| "step": 6925 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 5.2722371967654995e-05, |
| "loss": 1.1245, |
| "step": 6930 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 5.2614555256064694e-05, |
| "loss": 1.1235, |
| "step": 6935 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 5.25067385444744e-05, |
| "loss": 1.1595, |
| "step": 6940 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 5.23989218328841e-05, |
| "loss": 1.1765, |
| "step": 6945 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 5.22911051212938e-05, |
| "loss": 1.1796, |
| "step": 6950 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 5.21832884097035e-05, |
| "loss": 1.0909, |
| "step": 6955 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 5.207547169811321e-05, |
| "loss": 1.1388, |
| "step": 6960 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 5.196765498652292e-05, |
| "loss": 1.1379, |
| "step": 6965 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 5.1859838274932615e-05, |
| "loss": 1.1465, |
| "step": 6970 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 5.175202156334232e-05, |
| "loss": 1.1286, |
| "step": 6975 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 5.164420485175202e-05, |
| "loss": 1.1646, |
| "step": 6980 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 5.153638814016173e-05, |
| "loss": 1.1102, |
| "step": 6985 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 5.142857142857143e-05, |
| "loss": 1.1754, |
| "step": 6990 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 5.1320754716981134e-05, |
| "loss": 1.1503, |
| "step": 6995 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 5.1212938005390846e-05, |
| "loss": 1.152, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.24, |
| "eval_loss": 1.1198647022247314, |
| "eval_runtime": 717.1594, |
| "eval_samples_per_second": 6.972, |
| "eval_steps_per_second": 0.871, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 5.110512129380054e-05, |
| "loss": 1.1532, |
| "step": 7005 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 5.099730458221025e-05, |
| "loss": 1.1111, |
| "step": 7010 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 5.088948787061995e-05, |
| "loss": 1.1262, |
| "step": 7015 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 5.078167115902965e-05, |
| "loss": 1.1293, |
| "step": 7020 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 5.067385444743935e-05, |
| "loss": 1.1026, |
| "step": 7025 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 5.0566037735849056e-05, |
| "loss": 1.1437, |
| "step": 7030 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 5.045822102425877e-05, |
| "loss": 1.1341, |
| "step": 7035 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 5.0350404312668466e-05, |
| "loss": 1.1646, |
| "step": 7040 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 5.024258760107817e-05, |
| "loss": 1.1285, |
| "step": 7045 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 5.013477088948787e-05, |
| "loss": 1.1252, |
| "step": 7050 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 5.002695417789758e-05, |
| "loss": 1.1496, |
| "step": 7055 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 4.991913746630728e-05, |
| "loss": 1.1383, |
| "step": 7060 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 4.9811320754716985e-05, |
| "loss": 1.1362, |
| "step": 7065 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 4.970350404312669e-05, |
| "loss": 1.1089, |
| "step": 7070 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 4.959568733153639e-05, |
| "loss": 1.097, |
| "step": 7075 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 4.948787061994609e-05, |
| "loss": 1.1597, |
| "step": 7080 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 4.93800539083558e-05, |
| "loss": 1.1252, |
| "step": 7085 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 4.92722371967655e-05, |
| "loss": 1.1322, |
| "step": 7090 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 4.916442048517521e-05, |
| "loss": 1.1595, |
| "step": 7095 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 4.9056603773584906e-05, |
| "loss": 1.1265, |
| "step": 7100 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 4.894878706199461e-05, |
| "loss": 1.1304, |
| "step": 7105 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 4.8840970350404317e-05, |
| "loss": 1.1174, |
| "step": 7110 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 4.8733153638814015e-05, |
| "loss": 1.137, |
| "step": 7115 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 4.862533692722373e-05, |
| "loss": 1.1274, |
| "step": 7120 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 4.8517520215633425e-05, |
| "loss": 1.1584, |
| "step": 7125 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 4.840970350404313e-05, |
| "loss": 1.1802, |
| "step": 7130 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 4.8301886792452835e-05, |
| "loss": 1.1159, |
| "step": 7135 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 4.8194070080862533e-05, |
| "loss": 1.1318, |
| "step": 7140 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 4.808625336927224e-05, |
| "loss": 1.1352, |
| "step": 7145 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 4.7978436657681944e-05, |
| "loss": 1.1037, |
| "step": 7150 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 4.787061994609164e-05, |
| "loss": 1.1554, |
| "step": 7155 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 4.7762803234501354e-05, |
| "loss": 1.1338, |
| "step": 7160 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 4.765498652291105e-05, |
| "loss": 1.1391, |
| "step": 7165 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 4.754716981132076e-05, |
| "loss": 1.1573, |
| "step": 7170 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 4.743935309973046e-05, |
| "loss": 1.125, |
| "step": 7175 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 4.733153638814016e-05, |
| "loss": 1.124, |
| "step": 7180 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 4.7223719676549865e-05, |
| "loss": 1.1424, |
| "step": 7185 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 4.711590296495957e-05, |
| "loss": 1.1412, |
| "step": 7190 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 4.7008086253369276e-05, |
| "loss": 1.0773, |
| "step": 7195 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 4.690026954177898e-05, |
| "loss": 1.1335, |
| "step": 7200 |
| }, |
| { |
| "epoch": 2.3, |
| "eval_loss": 1.1191344261169434, |
| "eval_runtime": 716.7443, |
| "eval_samples_per_second": 6.976, |
| "eval_steps_per_second": 0.872, |
| "step": 7200 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 4.679245283018868e-05, |
| "loss": 1.1325, |
| "step": 7205 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 4.6684636118598384e-05, |
| "loss": 1.1582, |
| "step": 7210 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 4.657681940700809e-05, |
| "loss": 1.1142, |
| "step": 7215 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 4.646900269541779e-05, |
| "loss": 1.1219, |
| "step": 7220 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 4.636118598382749e-05, |
| "loss": 1.1456, |
| "step": 7225 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 4.6253369272237204e-05, |
| "loss": 1.1222, |
| "step": 7230 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 4.61455525606469e-05, |
| "loss": 1.1322, |
| "step": 7235 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 4.603773584905661e-05, |
| "loss": 1.1216, |
| "step": 7240 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 4.592991913746631e-05, |
| "loss": 1.1269, |
| "step": 7245 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 4.582210242587601e-05, |
| "loss": 1.1032, |
| "step": 7250 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 4.5714285714285716e-05, |
| "loss": 1.1208, |
| "step": 7255 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 4.560646900269542e-05, |
| "loss": 1.1113, |
| "step": 7260 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 4.5498652291105126e-05, |
| "loss": 1.1572, |
| "step": 7265 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 4.539083557951483e-05, |
| "loss": 1.1526, |
| "step": 7270 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 4.528301886792453e-05, |
| "loss": 1.1145, |
| "step": 7275 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 4.5175202156334235e-05, |
| "loss": 1.1173, |
| "step": 7280 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 4.506738544474394e-05, |
| "loss": 1.1657, |
| "step": 7285 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 4.495956873315364e-05, |
| "loss": 1.1353, |
| "step": 7290 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 4.485175202156334e-05, |
| "loss": 1.136, |
| "step": 7295 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 4.474393530997305e-05, |
| "loss": 1.0772, |
| "step": 7300 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 4.463611859838275e-05, |
| "loss": 1.1747, |
| "step": 7305 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 4.452830188679246e-05, |
| "loss": 1.1233, |
| "step": 7310 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 4.4420485175202157e-05, |
| "loss": 1.1423, |
| "step": 7315 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 4.431266846361186e-05, |
| "loss": 1.1383, |
| "step": 7320 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 4.420485175202157e-05, |
| "loss": 1.1057, |
| "step": 7325 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 4.4097035040431265e-05, |
| "loss": 1.1164, |
| "step": 7330 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 4.398921832884097e-05, |
| "loss": 1.1548, |
| "step": 7335 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 4.3881401617250675e-05, |
| "loss": 1.0605, |
| "step": 7340 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 4.377358490566038e-05, |
| "loss": 1.103, |
| "step": 7345 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 4.3665768194070085e-05, |
| "loss": 1.1284, |
| "step": 7350 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 4.3557951482479784e-05, |
| "loss": 1.1254, |
| "step": 7355 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 4.345013477088949e-05, |
| "loss": 1.1247, |
| "step": 7360 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 4.3342318059299194e-05, |
| "loss": 1.142, |
| "step": 7365 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 4.323450134770889e-05, |
| "loss": 1.1464, |
| "step": 7370 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 4.3126684636118604e-05, |
| "loss": 1.1506, |
| "step": 7375 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 4.301886792452831e-05, |
| "loss": 1.099, |
| "step": 7380 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 4.291105121293801e-05, |
| "loss": 1.1338, |
| "step": 7385 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 4.280323450134771e-05, |
| "loss": 1.1082, |
| "step": 7390 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 4.269541778975741e-05, |
| "loss": 1.1218, |
| "step": 7395 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 4.2587601078167116e-05, |
| "loss": 1.1341, |
| "step": 7400 |
| }, |
| { |
| "epoch": 2.37, |
| "eval_loss": 1.118433952331543, |
| "eval_runtime": 716.9245, |
| "eval_samples_per_second": 6.974, |
| "eval_steps_per_second": 0.872, |
| "step": 7400 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 4.247978436657682e-05, |
| "loss": 1.1114, |
| "step": 7405 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 4.2371967654986526e-05, |
| "loss": 1.1285, |
| "step": 7410 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 4.226415094339623e-05, |
| "loss": 1.1396, |
| "step": 7415 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 4.2156334231805936e-05, |
| "loss": 1.1344, |
| "step": 7420 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 4.2048517520215634e-05, |
| "loss": 1.1173, |
| "step": 7425 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 4.194070080862534e-05, |
| "loss": 1.1263, |
| "step": 7430 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 4.1832884097035044e-05, |
| "loss": 1.0763, |
| "step": 7435 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 4.172506738544474e-05, |
| "loss": 1.1176, |
| "step": 7440 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 4.1617250673854454e-05, |
| "loss": 1.1052, |
| "step": 7445 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 4.150943396226415e-05, |
| "loss": 1.1396, |
| "step": 7450 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 4.140161725067386e-05, |
| "loss": 1.1377, |
| "step": 7455 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 4.129380053908356e-05, |
| "loss": 1.1224, |
| "step": 7460 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 4.118598382749326e-05, |
| "loss": 1.1033, |
| "step": 7465 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 4.1078167115902966e-05, |
| "loss": 1.0884, |
| "step": 7470 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 4.097035040431267e-05, |
| "loss": 1.0988, |
| "step": 7475 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 4.086253369272237e-05, |
| "loss": 1.1457, |
| "step": 7480 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 4.075471698113208e-05, |
| "loss": 1.1008, |
| "step": 7485 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 4.064690026954178e-05, |
| "loss": 1.1183, |
| "step": 7490 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 4.0539083557951485e-05, |
| "loss": 1.0995, |
| "step": 7495 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 4.043126684636119e-05, |
| "loss": 1.1238, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 4.032345013477089e-05, |
| "loss": 1.1586, |
| "step": 7505 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 4.021563342318059e-05, |
| "loss": 1.181, |
| "step": 7510 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 4.01078167115903e-05, |
| "loss": 1.0943, |
| "step": 7515 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 4e-05, |
| "loss": 1.1529, |
| "step": 7520 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 3.989218328840971e-05, |
| "loss": 1.1176, |
| "step": 7525 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 3.978436657681941e-05, |
| "loss": 1.1275, |
| "step": 7530 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 3.967654986522911e-05, |
| "loss": 1.1479, |
| "step": 7535 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 3.956873315363882e-05, |
| "loss": 1.097, |
| "step": 7540 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 3.9460916442048515e-05, |
| "loss": 1.1455, |
| "step": 7545 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 3.935309973045822e-05, |
| "loss": 1.1122, |
| "step": 7550 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 3.924528301886793e-05, |
| "loss": 1.1603, |
| "step": 7555 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 3.913746630727763e-05, |
| "loss": 1.0986, |
| "step": 7560 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 3.9029649595687335e-05, |
| "loss": 1.1328, |
| "step": 7565 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 3.8921832884097034e-05, |
| "loss": 1.1158, |
| "step": 7570 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 3.881401617250674e-05, |
| "loss": 1.1074, |
| "step": 7575 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 3.8706199460916444e-05, |
| "loss": 1.142, |
| "step": 7580 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 3.859838274932614e-05, |
| "loss": 1.1426, |
| "step": 7585 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 3.8490566037735854e-05, |
| "loss": 1.1353, |
| "step": 7590 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 3.838274932614556e-05, |
| "loss": 1.1017, |
| "step": 7595 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 3.827493261455526e-05, |
| "loss": 1.0963, |
| "step": 7600 |
| }, |
| { |
| "epoch": 2.43, |
| "eval_loss": 1.117969036102295, |
| "eval_runtime": 716.8821, |
| "eval_samples_per_second": 6.975, |
| "eval_steps_per_second": 0.872, |
| "step": 7600 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 3.816711590296496e-05, |
| "loss": 1.1373, |
| "step": 7605 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 3.805929919137467e-05, |
| "loss": 1.1025, |
| "step": 7610 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 3.7951482479784366e-05, |
| "loss": 1.1404, |
| "step": 7615 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 3.784366576819407e-05, |
| "loss": 1.1269, |
| "step": 7620 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 3.7735849056603776e-05, |
| "loss": 1.1264, |
| "step": 7625 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 3.762803234501348e-05, |
| "loss": 1.1281, |
| "step": 7630 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 3.7520215633423186e-05, |
| "loss": 1.132, |
| "step": 7635 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 3.7412398921832884e-05, |
| "loss": 1.079, |
| "step": 7640 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 3.730458221024259e-05, |
| "loss": 1.1217, |
| "step": 7645 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 3.7196765498652294e-05, |
| "loss": 1.125, |
| "step": 7650 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 3.708894878706199e-05, |
| "loss": 1.1258, |
| "step": 7655 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 3.69811320754717e-05, |
| "loss": 1.0938, |
| "step": 7660 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 3.68733153638814e-05, |
| "loss": 1.1181, |
| "step": 7665 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 3.676549865229111e-05, |
| "loss": 1.1001, |
| "step": 7670 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 3.665768194070081e-05, |
| "loss": 1.1424, |
| "step": 7675 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 3.654986522911051e-05, |
| "loss": 1.1255, |
| "step": 7680 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 3.6442048517520216e-05, |
| "loss": 1.1088, |
| "step": 7685 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 3.633423180592992e-05, |
| "loss": 1.121, |
| "step": 7690 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 3.622641509433962e-05, |
| "loss": 1.1385, |
| "step": 7695 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 3.611859838274933e-05, |
| "loss": 1.1282, |
| "step": 7700 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 3.601078167115903e-05, |
| "loss": 1.1501, |
| "step": 7705 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 3.5902964959568735e-05, |
| "loss": 1.0923, |
| "step": 7710 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 3.579514824797844e-05, |
| "loss": 1.153, |
| "step": 7715 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 3.568733153638814e-05, |
| "loss": 1.0961, |
| "step": 7720 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 3.557951482479784e-05, |
| "loss": 1.1106, |
| "step": 7725 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 3.547169811320755e-05, |
| "loss": 1.1629, |
| "step": 7730 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 3.536388140161725e-05, |
| "loss": 1.1088, |
| "step": 7735 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 3.525606469002696e-05, |
| "loss": 1.1123, |
| "step": 7740 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 3.5148247978436663e-05, |
| "loss": 1.1312, |
| "step": 7745 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 3.504043126684636e-05, |
| "loss": 1.1153, |
| "step": 7750 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 3.493261455525607e-05, |
| "loss": 1.1378, |
| "step": 7755 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 3.4824797843665765e-05, |
| "loss": 1.1055, |
| "step": 7760 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 3.471698113207547e-05, |
| "loss": 1.1149, |
| "step": 7765 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 3.460916442048518e-05, |
| "loss": 1.1527, |
| "step": 7770 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 3.450134770889488e-05, |
| "loss": 1.1662, |
| "step": 7775 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 3.4393530997304585e-05, |
| "loss": 1.1549, |
| "step": 7780 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 3.428571428571429e-05, |
| "loss": 1.1324, |
| "step": 7785 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 3.417789757412399e-05, |
| "loss": 1.1516, |
| "step": 7790 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 3.4070080862533694e-05, |
| "loss": 1.1692, |
| "step": 7795 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 3.39622641509434e-05, |
| "loss": 1.1179, |
| "step": 7800 |
| }, |
| { |
| "epoch": 2.5, |
| "eval_loss": 1.1172122955322266, |
| "eval_runtime": 716.879, |
| "eval_samples_per_second": 6.975, |
| "eval_steps_per_second": 0.872, |
| "step": 7800 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 3.38544474393531e-05, |
| "loss": 1.1023, |
| "step": 7805 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 3.374663072776281e-05, |
| "loss": 1.1447, |
| "step": 7810 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 3.363881401617251e-05, |
| "loss": 1.1184, |
| "step": 7815 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 3.353099730458221e-05, |
| "loss": 1.1611, |
| "step": 7820 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 3.342318059299192e-05, |
| "loss": 1.1307, |
| "step": 7825 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 3.3315363881401616e-05, |
| "loss": 1.0971, |
| "step": 7830 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 3.320754716981132e-05, |
| "loss": 1.1353, |
| "step": 7835 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 3.3099730458221026e-05, |
| "loss": 1.1658, |
| "step": 7840 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 3.299191374663073e-05, |
| "loss": 1.1222, |
| "step": 7845 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 3.2884097035040436e-05, |
| "loss": 1.1599, |
| "step": 7850 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 3.2776280323450134e-05, |
| "loss": 1.1143, |
| "step": 7855 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 3.266846361185984e-05, |
| "loss": 1.1374, |
| "step": 7860 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 3.2560646900269544e-05, |
| "loss": 1.1684, |
| "step": 7865 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 3.245283018867924e-05, |
| "loss": 1.1088, |
| "step": 7870 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 3.234501347708895e-05, |
| "loss": 1.1227, |
| "step": 7875 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 3.223719676549865e-05, |
| "loss": 1.1582, |
| "step": 7880 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 3.212938005390836e-05, |
| "loss": 1.1689, |
| "step": 7885 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 3.202156334231806e-05, |
| "loss": 1.126, |
| "step": 7890 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 3.191374663072776e-05, |
| "loss": 1.107, |
| "step": 7895 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 3.1805929919137466e-05, |
| "loss": 1.1576, |
| "step": 7900 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 3.169811320754717e-05, |
| "loss": 1.1343, |
| "step": 7905 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 3.159029649595687e-05, |
| "loss": 1.151, |
| "step": 7910 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 3.148247978436658e-05, |
| "loss": 1.1448, |
| "step": 7915 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 3.1374663072776287e-05, |
| "loss": 1.1156, |
| "step": 7920 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 3.1266846361185985e-05, |
| "loss": 1.0951, |
| "step": 7925 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 3.115902964959569e-05, |
| "loss": 1.1228, |
| "step": 7930 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 3.1051212938005395e-05, |
| "loss": 1.141, |
| "step": 7935 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 3.094339622641509e-05, |
| "loss": 1.1388, |
| "step": 7940 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 3.08355795148248e-05, |
| "loss": 1.1202, |
| "step": 7945 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 3.0727762803234503e-05, |
| "loss": 1.1202, |
| "step": 7950 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 3.061994609164421e-05, |
| "loss": 1.1414, |
| "step": 7955 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 3.051212938005391e-05, |
| "loss": 1.1243, |
| "step": 7960 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 3.0404312668463615e-05, |
| "loss": 1.1399, |
| "step": 7965 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 3.0296495956873317e-05, |
| "loss": 1.1242, |
| "step": 7970 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 3.018867924528302e-05, |
| "loss": 1.1221, |
| "step": 7975 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 3.008086253369272e-05, |
| "loss": 1.0985, |
| "step": 7980 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 2.9973045822102425e-05, |
| "loss": 1.1319, |
| "step": 7985 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 2.9865229110512134e-05, |
| "loss": 1.124, |
| "step": 7990 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 2.9757412398921835e-05, |
| "loss": 1.1069, |
| "step": 7995 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 2.9649595687331537e-05, |
| "loss": 1.1176, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.56, |
| "eval_loss": 1.1169735193252563, |
| "eval_runtime": 716.7649, |
| "eval_samples_per_second": 6.976, |
| "eval_steps_per_second": 0.872, |
| "step": 8000 |
| } |
| ], |
| "max_steps": 9375, |
| "num_train_epochs": 3, |
| "total_flos": 4.76478839980032e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|