| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 12450, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0024099289070972406, |
| "grad_norm": 3.061742472441796, |
| "learning_rate": 7.228915662650603e-08, |
| "loss": 0.4777, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.004819857814194481, |
| "grad_norm": 3.1319247179565144, |
| "learning_rate": 1.526104417670683e-07, |
| "loss": 0.4705, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.007229786721291722, |
| "grad_norm": 2.859984534144655, |
| "learning_rate": 2.3293172690763053e-07, |
| "loss": 0.466, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.009639715628388962, |
| "grad_norm": 2.234244386802424, |
| "learning_rate": 3.1325301204819284e-07, |
| "loss": 0.4548, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.012049644535486204, |
| "grad_norm": 1.335883737586674, |
| "learning_rate": 3.93574297188755e-07, |
| "loss": 0.444, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.014459573442583444, |
| "grad_norm": 0.9241353014597365, |
| "learning_rate": 4.738955823293173e-07, |
| "loss": 0.4204, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.016869502349680685, |
| "grad_norm": 0.6871403762148126, |
| "learning_rate": 5.542168674698796e-07, |
| "loss": 0.4033, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.019279431256777925, |
| "grad_norm": 0.4449835353465216, |
| "learning_rate": 6.345381526104419e-07, |
| "loss": 0.38, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.021689360163875165, |
| "grad_norm": 0.33573830468668203, |
| "learning_rate": 7.14859437751004e-07, |
| "loss": 0.371, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.024099289070972408, |
| "grad_norm": 0.31411059766897775, |
| "learning_rate": 7.951807228915663e-07, |
| "loss": 0.358, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.026509217978069648, |
| "grad_norm": 0.24806762698542578, |
| "learning_rate": 8.755020080321286e-07, |
| "loss": 0.3611, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.028919146885166887, |
| "grad_norm": 0.2062168171574948, |
| "learning_rate": 9.558232931726909e-07, |
| "loss": 0.3501, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.03132907579226413, |
| "grad_norm": 0.22819708161386978, |
| "learning_rate": 1.0361445783132532e-06, |
| "loss": 0.3467, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.03373900469936137, |
| "grad_norm": 0.19089804750862022, |
| "learning_rate": 1.1164658634538152e-06, |
| "loss": 0.3379, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.03614893360645861, |
| "grad_norm": 0.19371156821756932, |
| "learning_rate": 1.1967871485943775e-06, |
| "loss": 0.3384, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.03855886251355585, |
| "grad_norm": 0.17878783833936646, |
| "learning_rate": 1.2771084337349398e-06, |
| "loss": 0.3366, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.04096879142065309, |
| "grad_norm": 0.18620932367241894, |
| "learning_rate": 1.357429718875502e-06, |
| "loss": 0.3282, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.04337872032775033, |
| "grad_norm": 0.18464480499928992, |
| "learning_rate": 1.4377510040160644e-06, |
| "loss": 0.3288, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.04578864923484757, |
| "grad_norm": 0.1856419400290462, |
| "learning_rate": 1.5180722891566266e-06, |
| "loss": 0.3276, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.048198578141944816, |
| "grad_norm": 0.17651783370755456, |
| "learning_rate": 1.598393574297189e-06, |
| "loss": 0.3237, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05060850704904205, |
| "grad_norm": 0.1726606356660889, |
| "learning_rate": 1.6787148594377512e-06, |
| "loss": 0.3215, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.053018435956139295, |
| "grad_norm": 0.1969898186994874, |
| "learning_rate": 1.7590361445783133e-06, |
| "loss": 0.3238, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.05542836486323653, |
| "grad_norm": 0.20162437238614309, |
| "learning_rate": 1.8393574297188758e-06, |
| "loss": 0.3149, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.057838293770333775, |
| "grad_norm": 0.1993790121523761, |
| "learning_rate": 1.919678714859438e-06, |
| "loss": 0.3156, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.06024822267743102, |
| "grad_norm": 0.19730108814459651, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.3135, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.06265815158452825, |
| "grad_norm": 0.1943527748598857, |
| "learning_rate": 2.0803212851405624e-06, |
| "loss": 0.3137, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.06506808049162549, |
| "grad_norm": 0.20086798832915087, |
| "learning_rate": 2.1606425702811245e-06, |
| "loss": 0.3119, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.06747800939872274, |
| "grad_norm": 0.19049098841650877, |
| "learning_rate": 2.240963855421687e-06, |
| "loss": 0.3078, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.06988793830581998, |
| "grad_norm": 0.2018582398985099, |
| "learning_rate": 2.321285140562249e-06, |
| "loss": 0.3089, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.07229786721291721, |
| "grad_norm": 0.19621506196853225, |
| "learning_rate": 2.4016064257028115e-06, |
| "loss": 0.3076, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.07470779612001446, |
| "grad_norm": 0.19712224295336245, |
| "learning_rate": 2.4819277108433736e-06, |
| "loss": 0.3013, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.0771177250271117, |
| "grad_norm": 0.20569722339954372, |
| "learning_rate": 2.5622489959839357e-06, |
| "loss": 0.3043, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.07952765393420894, |
| "grad_norm": 0.22847044972146283, |
| "learning_rate": 2.642570281124498e-06, |
| "loss": 0.2991, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.08193758284130619, |
| "grad_norm": 0.18513011684761646, |
| "learning_rate": 2.7228915662650607e-06, |
| "loss": 0.2993, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.08434751174840342, |
| "grad_norm": 0.17755985106917166, |
| "learning_rate": 2.8032128514056227e-06, |
| "loss": 0.2992, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.08675744065550066, |
| "grad_norm": 0.1928704075964709, |
| "learning_rate": 2.883534136546185e-06, |
| "loss": 0.2976, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.08916736956259791, |
| "grad_norm": 0.21268570126746547, |
| "learning_rate": 2.9638554216867473e-06, |
| "loss": 0.2965, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.09157729846969515, |
| "grad_norm": 0.22070919267123282, |
| "learning_rate": 3.0441767068273094e-06, |
| "loss": 0.2963, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.09398722737679238, |
| "grad_norm": 0.22897219750975534, |
| "learning_rate": 3.124497991967872e-06, |
| "loss": 0.2957, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.09639715628388963, |
| "grad_norm": 0.22510919638722657, |
| "learning_rate": 3.204819277108434e-06, |
| "loss": 0.2963, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.09880708519098687, |
| "grad_norm": 0.23679493098050808, |
| "learning_rate": 3.2851405622489964e-06, |
| "loss": 0.2888, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.1012170140980841, |
| "grad_norm": 0.22162593174405135, |
| "learning_rate": 3.3654618473895585e-06, |
| "loss": 0.2918, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.10362694300518134, |
| "grad_norm": 0.19267376702827746, |
| "learning_rate": 3.4457831325301206e-06, |
| "loss": 0.2957, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.10603687191227859, |
| "grad_norm": 0.1958695805729227, |
| "learning_rate": 3.526104417670683e-06, |
| "loss": 0.2885, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.10844680081937583, |
| "grad_norm": 0.20467559437897054, |
| "learning_rate": 3.6064257028112455e-06, |
| "loss": 0.2902, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.11085672972647306, |
| "grad_norm": 0.22130857005209242, |
| "learning_rate": 3.6867469879518076e-06, |
| "loss": 0.2923, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.11326665863357031, |
| "grad_norm": 0.19296929058892381, |
| "learning_rate": 3.7670682730923697e-06, |
| "loss": 0.2886, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.11567658754066755, |
| "grad_norm": 0.2544710309958475, |
| "learning_rate": 3.847389558232932e-06, |
| "loss": 0.2885, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.11808651644776479, |
| "grad_norm": 0.20884554616299403, |
| "learning_rate": 3.927710843373494e-06, |
| "loss": 0.2861, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.12049644535486204, |
| "grad_norm": 0.18833668005434673, |
| "learning_rate": 4.008032128514057e-06, |
| "loss": 0.2907, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.12290637426195927, |
| "grad_norm": 0.27808054171677704, |
| "learning_rate": 4.088353413654618e-06, |
| "loss": 0.2855, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.1253163031690565, |
| "grad_norm": 0.21917995568637305, |
| "learning_rate": 4.168674698795181e-06, |
| "loss": 0.2908, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.12772623207615375, |
| "grad_norm": 0.2306361686144042, |
| "learning_rate": 4.248995983935743e-06, |
| "loss": 0.287, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.13013616098325098, |
| "grad_norm": 0.21127215848273082, |
| "learning_rate": 4.329317269076306e-06, |
| "loss": 0.2859, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.13254608989034825, |
| "grad_norm": 0.2736115011698697, |
| "learning_rate": 4.4096385542168675e-06, |
| "loss": 0.2849, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.13495601879744548, |
| "grad_norm": 0.26395897628735243, |
| "learning_rate": 4.48995983935743e-06, |
| "loss": 0.2821, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.13736594770454272, |
| "grad_norm": 0.23503592007454566, |
| "learning_rate": 4.5702811244979925e-06, |
| "loss": 0.2871, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.13977587661163995, |
| "grad_norm": 0.24939199452163055, |
| "learning_rate": 4.650602409638554e-06, |
| "loss": 0.2846, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.1421858055187372, |
| "grad_norm": 0.2012519003076539, |
| "learning_rate": 4.730923694779117e-06, |
| "loss": 0.2854, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.14459573442583443, |
| "grad_norm": 0.21989122710529682, |
| "learning_rate": 4.811244979919679e-06, |
| "loss": 0.28, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.1470056633329317, |
| "grad_norm": 0.23927038060480976, |
| "learning_rate": 4.891566265060242e-06, |
| "loss": 0.28, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.14941559224002893, |
| "grad_norm": 0.24410829601803702, |
| "learning_rate": 4.971887550200803e-06, |
| "loss": 0.2813, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.15182552114712616, |
| "grad_norm": 0.22745972355704097, |
| "learning_rate": 5.052208835341366e-06, |
| "loss": 0.2807, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.1542354500542234, |
| "grad_norm": 0.23537466122987794, |
| "learning_rate": 5.132530120481927e-06, |
| "loss": 0.2795, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.15664537896132064, |
| "grad_norm": 0.2164573922113711, |
| "learning_rate": 5.21285140562249e-06, |
| "loss": 0.28, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.15905530786841787, |
| "grad_norm": 0.22708556657406564, |
| "learning_rate": 5.293172690763053e-06, |
| "loss": 0.2786, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.1614652367755151, |
| "grad_norm": 0.22925652110061312, |
| "learning_rate": 5.373493975903615e-06, |
| "loss": 0.2813, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.16387516568261237, |
| "grad_norm": 0.33120835623384376, |
| "learning_rate": 5.453815261044177e-06, |
| "loss": 0.278, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.1662850945897096, |
| "grad_norm": 0.22771802520007675, |
| "learning_rate": 5.53413654618474e-06, |
| "loss": 0.2799, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.16869502349680685, |
| "grad_norm": 0.22593239995366798, |
| "learning_rate": 5.6144578313253015e-06, |
| "loss": 0.2736, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.17110495240390408, |
| "grad_norm": 0.25050781538921346, |
| "learning_rate": 5.694779116465864e-06, |
| "loss": 0.2742, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.17351488131100132, |
| "grad_norm": 0.20145247640957603, |
| "learning_rate": 5.775100401606426e-06, |
| "loss": 0.2759, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.17592481021809855, |
| "grad_norm": 0.27996259086839936, |
| "learning_rate": 5.855421686746988e-06, |
| "loss": 0.2808, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.17833473912519582, |
| "grad_norm": 0.2244854088378441, |
| "learning_rate": 5.935742971887551e-06, |
| "loss": 0.2802, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.18074466803229305, |
| "grad_norm": 0.2531382738440264, |
| "learning_rate": 6.016064257028112e-06, |
| "loss": 0.2781, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.1831545969393903, |
| "grad_norm": 0.265808716023224, |
| "learning_rate": 6.096385542168676e-06, |
| "loss": 0.2715, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.18556452584648753, |
| "grad_norm": 0.2407841873678655, |
| "learning_rate": 6.176706827309238e-06, |
| "loss": 0.2763, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.18797445475358476, |
| "grad_norm": 0.22445783962351623, |
| "learning_rate": 6.2570281124498e-06, |
| "loss": 0.2727, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.190384383660682, |
| "grad_norm": 0.27702862591879435, |
| "learning_rate": 6.337349397590362e-06, |
| "loss": 0.2779, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.19279431256777926, |
| "grad_norm": 0.27662266231206134, |
| "learning_rate": 6.417670682730924e-06, |
| "loss": 0.275, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.1952042414748765, |
| "grad_norm": 0.24857374737387905, |
| "learning_rate": 6.4979919678714864e-06, |
| "loss": 0.2734, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.19761417038197374, |
| "grad_norm": 0.26560232229665853, |
| "learning_rate": 6.578313253012049e-06, |
| "loss": 0.2752, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.20002409928907097, |
| "grad_norm": 0.26625093052499677, |
| "learning_rate": 6.6586345381526106e-06, |
| "loss": 0.2768, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.2024340281961682, |
| "grad_norm": 0.324801464798607, |
| "learning_rate": 6.738955823293173e-06, |
| "loss": 0.2724, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.20484395710326544, |
| "grad_norm": 0.22943419602266416, |
| "learning_rate": 6.819277108433735e-06, |
| "loss": 0.2747, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.20725388601036268, |
| "grad_norm": 0.24308043167115134, |
| "learning_rate": 6.899598393574298e-06, |
| "loss": 0.2721, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.20966381491745995, |
| "grad_norm": 0.35950510615165837, |
| "learning_rate": 6.9799196787148605e-06, |
| "loss": 0.2739, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.21207374382455718, |
| "grad_norm": 0.24054708786501794, |
| "learning_rate": 7.060240963855422e-06, |
| "loss": 0.2726, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.21448367273165442, |
| "grad_norm": 0.2742769014652827, |
| "learning_rate": 7.140562248995985e-06, |
| "loss": 0.2722, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.21689360163875165, |
| "grad_norm": 0.299445285411188, |
| "learning_rate": 7.220883534136547e-06, |
| "loss": 0.268, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.2193035305458489, |
| "grad_norm": 0.21043357726618872, |
| "learning_rate": 7.301204819277109e-06, |
| "loss": 0.2703, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.22171345945294613, |
| "grad_norm": 0.22851348242239414, |
| "learning_rate": 7.381526104417671e-06, |
| "loss": 0.2731, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.2241233883600434, |
| "grad_norm": 0.253060897115378, |
| "learning_rate": 7.461847389558233e-06, |
| "loss": 0.271, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.22653331726714063, |
| "grad_norm": 0.2268198019889769, |
| "learning_rate": 7.5421686746987955e-06, |
| "loss": 0.2707, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.22894324617423786, |
| "grad_norm": 0.30365639506090863, |
| "learning_rate": 7.622489959839358e-06, |
| "loss": 0.2706, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.2313531750813351, |
| "grad_norm": 0.3080733224657331, |
| "learning_rate": 7.702811244979921e-06, |
| "loss": 0.271, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.23376310398843234, |
| "grad_norm": 0.28888638048424176, |
| "learning_rate": 7.783132530120484e-06, |
| "loss": 0.2704, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.23617303289552957, |
| "grad_norm": 0.22854881889089107, |
| "learning_rate": 7.863453815261045e-06, |
| "loss": 0.2664, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.23858296180262684, |
| "grad_norm": 0.23426238793409673, |
| "learning_rate": 7.943775100401607e-06, |
| "loss": 0.2687, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.24099289070972407, |
| "grad_norm": 0.2186701220253525, |
| "learning_rate": 8.02409638554217e-06, |
| "loss": 0.2712, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2434028196168213, |
| "grad_norm": 0.2530982669271462, |
| "learning_rate": 8.104417670682732e-06, |
| "loss": 0.2693, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.24581274852391855, |
| "grad_norm": 0.3531544186297275, |
| "learning_rate": 8.184738955823293e-06, |
| "loss": 0.2702, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.24822267743101578, |
| "grad_norm": 0.29217935835451525, |
| "learning_rate": 8.265060240963855e-06, |
| "loss": 0.2681, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.250632606338113, |
| "grad_norm": 0.29815474443639217, |
| "learning_rate": 8.345381526104418e-06, |
| "loss": 0.2687, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.25304253524521025, |
| "grad_norm": 0.24107590302295004, |
| "learning_rate": 8.42570281124498e-06, |
| "loss": 0.2675, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.2554524641523075, |
| "grad_norm": 0.2930542900668613, |
| "learning_rate": 8.506024096385543e-06, |
| "loss": 0.2686, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.2578623930594047, |
| "grad_norm": 0.24015859707456538, |
| "learning_rate": 8.586345381526105e-06, |
| "loss": 0.2658, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.26027232196650196, |
| "grad_norm": 0.3550411988857324, |
| "learning_rate": 8.666666666666668e-06, |
| "loss": 0.2682, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.26268225087359925, |
| "grad_norm": 0.3207909230937528, |
| "learning_rate": 8.74698795180723e-06, |
| "loss": 0.2694, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.2650921797806965, |
| "grad_norm": 0.2115013961725572, |
| "learning_rate": 8.827309236947791e-06, |
| "loss": 0.2671, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.2675021086877937, |
| "grad_norm": 0.262259367038145, |
| "learning_rate": 8.907630522088354e-06, |
| "loss": 0.2721, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.26991203759489096, |
| "grad_norm": 0.27548437600981057, |
| "learning_rate": 8.987951807228916e-06, |
| "loss": 0.2683, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.2723219665019882, |
| "grad_norm": 0.2575897318662765, |
| "learning_rate": 9.068273092369479e-06, |
| "loss": 0.2636, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.27473189540908544, |
| "grad_norm": 0.3080553736593152, |
| "learning_rate": 9.148594377510041e-06, |
| "loss": 0.2684, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.27714182431618267, |
| "grad_norm": 0.34249365935741327, |
| "learning_rate": 9.228915662650602e-06, |
| "loss": 0.2633, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.2795517532232799, |
| "grad_norm": 0.27932791325775014, |
| "learning_rate": 9.309236947791166e-06, |
| "loss": 0.266, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.28196168213037714, |
| "grad_norm": 0.2417799994880218, |
| "learning_rate": 9.389558232931729e-06, |
| "loss": 0.2631, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.2843716110374744, |
| "grad_norm": 0.31884031573816535, |
| "learning_rate": 9.46987951807229e-06, |
| "loss": 0.265, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.2867815399445716, |
| "grad_norm": 0.20968871620333449, |
| "learning_rate": 9.550200803212852e-06, |
| "loss": 0.2619, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.28919146885166885, |
| "grad_norm": 0.3173946884992909, |
| "learning_rate": 9.630522088353414e-06, |
| "loss": 0.2626, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.2916013977587661, |
| "grad_norm": 0.2569598600853588, |
| "learning_rate": 9.710843373493977e-06, |
| "loss": 0.2658, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.2940113266658634, |
| "grad_norm": 0.31728256244538405, |
| "learning_rate": 9.79116465863454e-06, |
| "loss": 0.2591, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.2964212555729606, |
| "grad_norm": 0.2989553189617791, |
| "learning_rate": 9.8714859437751e-06, |
| "loss": 0.2667, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.29883118448005785, |
| "grad_norm": 0.2230569846038911, |
| "learning_rate": 9.951807228915663e-06, |
| "loss": 0.2625, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.3012411133871551, |
| "grad_norm": 0.33874428864380246, |
| "learning_rate": 9.999996855613166e-06, |
| "loss": 0.2631, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.3036510422942523, |
| "grad_norm": 0.272656260094118, |
| "learning_rate": 9.999961481306676e-06, |
| "loss": 0.2626, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.30606097120134956, |
| "grad_norm": 0.23871465341653153, |
| "learning_rate": 9.999886802489159e-06, |
| "loss": 0.2603, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.3084709001084468, |
| "grad_norm": 0.23650817035191204, |
| "learning_rate": 9.999772819747658e-06, |
| "loss": 0.264, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.31088082901554404, |
| "grad_norm": 0.24442333804519362, |
| "learning_rate": 9.99961953397819e-06, |
| "loss": 0.267, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.31329075792264127, |
| "grad_norm": 0.24952567873292492, |
| "learning_rate": 9.999426946385727e-06, |
| "loss": 0.2673, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.3157006868297385, |
| "grad_norm": 0.25096847242020526, |
| "learning_rate": 9.999195058484192e-06, |
| "loss": 0.2639, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.31811061573683574, |
| "grad_norm": 0.2621430290919802, |
| "learning_rate": 9.998923872096449e-06, |
| "loss": 0.2647, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.320520544643933, |
| "grad_norm": 0.3228004089841622, |
| "learning_rate": 9.998613389354283e-06, |
| "loss": 0.2603, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.3229304735510302, |
| "grad_norm": 0.264968941324506, |
| "learning_rate": 9.998263612698386e-06, |
| "loss": 0.2663, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.3253404024581275, |
| "grad_norm": 0.309286015249272, |
| "learning_rate": 9.997874544878343e-06, |
| "loss": 0.2618, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.32775033136522475, |
| "grad_norm": 0.2839373966652251, |
| "learning_rate": 9.997446188952599e-06, |
| "loss": 0.2564, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.330160260272322, |
| "grad_norm": 0.29115675145342884, |
| "learning_rate": 9.996978548288446e-06, |
| "loss": 0.2653, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.3325701891794192, |
| "grad_norm": 0.324856987750109, |
| "learning_rate": 9.996471626561988e-06, |
| "loss": 0.2627, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.33498011808651645, |
| "grad_norm": 0.29239610514838105, |
| "learning_rate": 9.995925427758117e-06, |
| "loss": 0.2625, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.3373900469936137, |
| "grad_norm": 0.2931229070423582, |
| "learning_rate": 9.995339956170482e-06, |
| "loss": 0.2606, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3397999759007109, |
| "grad_norm": 0.24710624492399202, |
| "learning_rate": 9.994715216401457e-06, |
| "loss": 0.2591, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.34220990480780816, |
| "grad_norm": 0.22685709163092377, |
| "learning_rate": 9.994051213362091e-06, |
| "loss": 0.2599, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.3446198337149054, |
| "grad_norm": 0.2782337126782948, |
| "learning_rate": 9.993347952272095e-06, |
| "loss": 0.2548, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.34702976262200264, |
| "grad_norm": 0.2824044193510219, |
| "learning_rate": 9.992605438659773e-06, |
| "loss": 0.2592, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.34943969152909987, |
| "grad_norm": 0.32671194252531005, |
| "learning_rate": 9.991823678361997e-06, |
| "loss": 0.2605, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.3518496204361971, |
| "grad_norm": 0.2915418808907502, |
| "learning_rate": 9.991002677524158e-06, |
| "loss": 0.2583, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.3542595493432944, |
| "grad_norm": 0.2517434194186832, |
| "learning_rate": 9.990142442600113e-06, |
| "loss": 0.2626, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.35666947825039164, |
| "grad_norm": 0.22802864482216906, |
| "learning_rate": 9.989242980352134e-06, |
| "loss": 0.2621, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.3590794071574889, |
| "grad_norm": 0.2510160887893319, |
| "learning_rate": 9.988304297850864e-06, |
| "loss": 0.2602, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.3614893360645861, |
| "grad_norm": 0.31638803764677614, |
| "learning_rate": 9.987326402475246e-06, |
| "loss": 0.2617, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.36389926497168334, |
| "grad_norm": 0.3247371483532704, |
| "learning_rate": 9.986309301912484e-06, |
| "loss": 0.256, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.3663091938787806, |
| "grad_norm": 0.22051929320874422, |
| "learning_rate": 9.985253004157967e-06, |
| "loss": 0.2576, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.3687191227858778, |
| "grad_norm": 0.2510308559539802, |
| "learning_rate": 9.984157517515209e-06, |
| "loss": 0.2553, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.37112905169297505, |
| "grad_norm": 0.2704669389868318, |
| "learning_rate": 9.983022850595794e-06, |
| "loss": 0.2576, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.3735389806000723, |
| "grad_norm": 0.25917018433066413, |
| "learning_rate": 9.981849012319294e-06, |
| "loss": 0.2571, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.3759489095071695, |
| "grad_norm": 0.255429313228651, |
| "learning_rate": 9.980636011913207e-06, |
| "loss": 0.2574, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.37835883841426676, |
| "grad_norm": 0.2808466488624173, |
| "learning_rate": 9.979383858912886e-06, |
| "loss": 0.2581, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.380768767321364, |
| "grad_norm": 0.27710076879535755, |
| "learning_rate": 9.97809256316146e-06, |
| "loss": 0.2553, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.38317869622846124, |
| "grad_norm": 0.25575994754858516, |
| "learning_rate": 9.976762134809752e-06, |
| "loss": 0.259, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.3855886251355585, |
| "grad_norm": 0.25900999268687813, |
| "learning_rate": 9.975392584316215e-06, |
| "loss": 0.2549, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.38799855404265576, |
| "grad_norm": 0.268086200203761, |
| "learning_rate": 9.973983922446832e-06, |
| "loss": 0.259, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.390408482949753, |
| "grad_norm": 0.26732900461472714, |
| "learning_rate": 9.972536160275042e-06, |
| "loss": 0.2548, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.39281841185685024, |
| "grad_norm": 0.30665669138498935, |
| "learning_rate": 9.971049309181648e-06, |
| "loss": 0.2565, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.39522834076394747, |
| "grad_norm": 0.28937131132395905, |
| "learning_rate": 9.969523380854736e-06, |
| "loss": 0.2586, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.3976382696710447, |
| "grad_norm": 0.3141555384161138, |
| "learning_rate": 9.967958387289564e-06, |
| "loss": 0.2593, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.40004819857814194, |
| "grad_norm": 0.23076311163985025, |
| "learning_rate": 9.966354340788496e-06, |
| "loss": 0.2545, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.4024581274852392, |
| "grad_norm": 0.25203421622592836, |
| "learning_rate": 9.964711253960877e-06, |
| "loss": 0.2559, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.4048680563923364, |
| "grad_norm": 0.2645397042409831, |
| "learning_rate": 9.963029139722952e-06, |
| "loss": 0.2567, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.40727798529943365, |
| "grad_norm": 0.30274969528624446, |
| "learning_rate": 9.96130801129776e-06, |
| "loss": 0.2549, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.4096879142065309, |
| "grad_norm": 0.2771458908420823, |
| "learning_rate": 9.959547882215025e-06, |
| "loss": 0.2562, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.4120978431136281, |
| "grad_norm": 0.27175651166846754, |
| "learning_rate": 9.957748766311059e-06, |
| "loss": 0.259, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.41450777202072536, |
| "grad_norm": 0.2634574217505447, |
| "learning_rate": 9.955910677728643e-06, |
| "loss": 0.2574, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.41691770092782265, |
| "grad_norm": 0.2517866807829483, |
| "learning_rate": 9.954033630916926e-06, |
| "loss": 0.2535, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.4193276298349199, |
| "grad_norm": 0.24198911390492772, |
| "learning_rate": 9.952117640631298e-06, |
| "loss": 0.2531, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.4217375587420171, |
| "grad_norm": 0.25567617243925783, |
| "learning_rate": 9.950162721933292e-06, |
| "loss": 0.2557, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.42414748764911436, |
| "grad_norm": 0.24724127942420843, |
| "learning_rate": 9.948168890190448e-06, |
| "loss": 0.2526, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.4265574165562116, |
| "grad_norm": 0.22259661645254947, |
| "learning_rate": 9.946136161076205e-06, |
| "loss": 0.2545, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.42896734546330884, |
| "grad_norm": 0.2544717021977746, |
| "learning_rate": 9.94406455056977e-06, |
| "loss": 0.2562, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.43137727437040607, |
| "grad_norm": 0.29699746917090547, |
| "learning_rate": 9.941954074955995e-06, |
| "loss": 0.2496, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.4337872032775033, |
| "grad_norm": 0.28028866452513135, |
| "learning_rate": 9.939804750825253e-06, |
| "loss": 0.2522, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.43619713218460054, |
| "grad_norm": 0.23894168010090663, |
| "learning_rate": 9.937616595073299e-06, |
| "loss": 0.2544, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.4386070610916978, |
| "grad_norm": 0.23390200650134418, |
| "learning_rate": 9.935389624901143e-06, |
| "loss": 0.2562, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.441016989998795, |
| "grad_norm": 0.24222324515161398, |
| "learning_rate": 9.933123857814917e-06, |
| "loss": 0.2551, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.44342691890589225, |
| "grad_norm": 0.32911301369164847, |
| "learning_rate": 9.93081931162573e-06, |
| "loss": 0.2543, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.4458368478129895, |
| "grad_norm": 0.26294976961002553, |
| "learning_rate": 9.928476004449534e-06, |
| "loss": 0.2536, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.4482467767200868, |
| "grad_norm": 0.25013849250306375, |
| "learning_rate": 9.926093954706982e-06, |
| "loss": 0.2533, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.450656705627184, |
| "grad_norm": 0.2805429371994657, |
| "learning_rate": 9.923673181123273e-06, |
| "loss": 0.2567, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.45306663453428125, |
| "grad_norm": 0.2196569788959264, |
| "learning_rate": 9.921213702728023e-06, |
| "loss": 0.2519, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.4554765634413785, |
| "grad_norm": 0.2547332130190858, |
| "learning_rate": 9.918715538855098e-06, |
| "loss": 0.2524, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.4578864923484757, |
| "grad_norm": 0.2598591930709734, |
| "learning_rate": 9.916178709142472e-06, |
| "loss": 0.2523, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.46029642125557296, |
| "grad_norm": 0.24383497437844423, |
| "learning_rate": 9.913603233532067e-06, |
| "loss": 0.2538, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.4627063501626702, |
| "grad_norm": 0.2289012736200408, |
| "learning_rate": 9.910989132269604e-06, |
| "loss": 0.253, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.46511627906976744, |
| "grad_norm": 0.26872158304325294, |
| "learning_rate": 9.908336425904432e-06, |
| "loss": 0.2536, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.46752620797686467, |
| "grad_norm": 0.22899065898873444, |
| "learning_rate": 9.905645135289378e-06, |
| "loss": 0.2536, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.4699361368839619, |
| "grad_norm": 0.2339437193920751, |
| "learning_rate": 9.902915281580581e-06, |
| "loss": 0.2548, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.47234606579105914, |
| "grad_norm": 0.281501527050063, |
| "learning_rate": 9.900146886237316e-06, |
| "loss": 0.2532, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.4747559946981564, |
| "grad_norm": 0.2930560528551073, |
| "learning_rate": 9.897339971021836e-06, |
| "loss": 0.2523, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.4771659236052537, |
| "grad_norm": 0.2962587159259821, |
| "learning_rate": 9.894494557999195e-06, |
| "loss": 0.2512, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.4795758525123509, |
| "grad_norm": 0.2777269071036824, |
| "learning_rate": 9.891610669537084e-06, |
| "loss": 0.2482, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.48198578141944814, |
| "grad_norm": 0.2592070661747237, |
| "learning_rate": 9.888688328305638e-06, |
| "loss": 0.2515, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.4843957103265454, |
| "grad_norm": 0.2323305440409319, |
| "learning_rate": 9.885727557277275e-06, |
| "loss": 0.2511, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.4868056392336426, |
| "grad_norm": 0.2639926538094649, |
| "learning_rate": 9.882728379726506e-06, |
| "loss": 0.2526, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.48921556814073985, |
| "grad_norm": 0.2369644299043803, |
| "learning_rate": 9.879690819229752e-06, |
| "loss": 0.2457, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.4916254970478371, |
| "grad_norm": 0.24161366225089992, |
| "learning_rate": 9.876614899665167e-06, |
| "loss": 0.2519, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.4940354259549343, |
| "grad_norm": 0.30395052527650174, |
| "learning_rate": 9.873500645212434e-06, |
| "loss": 0.2519, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.49644535486203156, |
| "grad_norm": 0.21943203874143705, |
| "learning_rate": 9.870348080352597e-06, |
| "loss": 0.2515, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.4988552837691288, |
| "grad_norm": 0.23272359587212466, |
| "learning_rate": 9.867157229867847e-06, |
| "loss": 0.2488, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.501265212676226, |
| "grad_norm": 0.26362099006055606, |
| "learning_rate": 9.863928118841344e-06, |
| "loss": 0.2533, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.5036751415833233, |
| "grad_norm": 0.2641813401826469, |
| "learning_rate": 9.860660772657008e-06, |
| "loss": 0.2498, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.5060850704904205, |
| "grad_norm": 0.2509299238141831, |
| "learning_rate": 9.857355216999324e-06, |
| "loss": 0.2476, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.5084949993975177, |
| "grad_norm": 0.2287556133058195, |
| "learning_rate": 9.854011477853147e-06, |
| "loss": 0.2512, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.510904928304615, |
| "grad_norm": 0.2309736644440354, |
| "learning_rate": 9.850629581503481e-06, |
| "loss": 0.2504, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.5133148572117122, |
| "grad_norm": 0.22734652525944837, |
| "learning_rate": 9.847209554535288e-06, |
| "loss": 0.2503, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.5157247861188095, |
| "grad_norm": 0.2680129551109617, |
| "learning_rate": 9.843751423833274e-06, |
| "loss": 0.2483, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.5181347150259067, |
| "grad_norm": 0.24577001348408803, |
| "learning_rate": 9.840255216581676e-06, |
| "loss": 0.247, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.5205446439330039, |
| "grad_norm": 0.22672067617484173, |
| "learning_rate": 9.836720960264049e-06, |
| "loss": 0.2533, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.5229545728401012, |
| "grad_norm": 0.20879946688247455, |
| "learning_rate": 9.833148682663048e-06, |
| "loss": 0.2519, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.5253645017471985, |
| "grad_norm": 0.27256348962492954, |
| "learning_rate": 9.829538411860218e-06, |
| "loss": 0.2501, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.5277744306542957, |
| "grad_norm": 1.3586399738190187, |
| "learning_rate": 9.82589017623576e-06, |
| "loss": 0.2517, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.530184359561393, |
| "grad_norm": 0.243485851387389, |
| "learning_rate": 9.822204004468319e-06, |
| "loss": 0.2523, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.5325942884684902, |
| "grad_norm": 0.23042125839781646, |
| "learning_rate": 9.818479925534755e-06, |
| "loss": 0.2462, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.5350042173755875, |
| "grad_norm": 0.24445277638786383, |
| "learning_rate": 9.814717968709912e-06, |
| "loss": 0.2534, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.5374141462826847, |
| "grad_norm": 0.28216268342519074, |
| "learning_rate": 9.810918163566396e-06, |
| "loss": 0.2494, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.5398240751897819, |
| "grad_norm": 0.21727871013389716, |
| "learning_rate": 9.80708053997433e-06, |
| "loss": 0.2493, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.5422340040968792, |
| "grad_norm": 0.24147885785957263, |
| "learning_rate": 9.803205128101134e-06, |
| "loss": 0.252, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.5446439330039764, |
| "grad_norm": 0.2063861668840542, |
| "learning_rate": 9.799291958411273e-06, |
| "loss": 0.2503, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.5470538619110736, |
| "grad_norm": 0.2133593464613344, |
| "learning_rate": 9.795341061666031e-06, |
| "loss": 0.2493, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.5494637908181709, |
| "grad_norm": 0.26539292934413866, |
| "learning_rate": 9.791352468923257e-06, |
| "loss": 0.2518, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.5518737197252681, |
| "grad_norm": 0.25707985610511475, |
| "learning_rate": 9.787326211537132e-06, |
| "loss": 0.2458, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.5542836486323653, |
| "grad_norm": 0.24587473091786494, |
| "learning_rate": 9.783262321157915e-06, |
| "loss": 0.2508, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.5566935775394626, |
| "grad_norm": 0.2229806857389122, |
| "learning_rate": 9.779160829731698e-06, |
| "loss": 0.2502, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.5591035064465598, |
| "grad_norm": 0.23206569570129085, |
| "learning_rate": 9.77502176950015e-06, |
| "loss": 0.2484, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.561513435353657, |
| "grad_norm": 0.22458750897161675, |
| "learning_rate": 9.770845173000272e-06, |
| "loss": 0.2487, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.5639233642607543, |
| "grad_norm": 0.23194601750011765, |
| "learning_rate": 9.766631073064132e-06, |
| "loss": 0.2468, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.5663332931678515, |
| "grad_norm": 0.25190985385722175, |
| "learning_rate": 9.762379502818613e-06, |
| "loss": 0.2485, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.5687432220749488, |
| "grad_norm": 0.2553366523883217, |
| "learning_rate": 9.758090495685151e-06, |
| "loss": 0.2481, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.571153150982046, |
| "grad_norm": 0.2515668619417224, |
| "learning_rate": 9.75376408537947e-06, |
| "loss": 0.2497, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.5735630798891432, |
| "grad_norm": 0.25601950123499484, |
| "learning_rate": 9.749400305911323e-06, |
| "loss": 0.2488, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.5759730087962405, |
| "grad_norm": 0.2331291552504394, |
| "learning_rate": 9.744999191584214e-06, |
| "loss": 0.2474, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.5783829377033377, |
| "grad_norm": 0.2355737529576478, |
| "learning_rate": 9.740560776995142e-06, |
| "loss": 0.2512, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.5807928666104349, |
| "grad_norm": 0.21848904000264835, |
| "learning_rate": 9.736085097034318e-06, |
| "loss": 0.2478, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.5832027955175322, |
| "grad_norm": 0.2482349804436847, |
| "learning_rate": 9.731572186884894e-06, |
| "loss": 0.2466, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.5856127244246295, |
| "grad_norm": 0.24162183009865262, |
| "learning_rate": 9.727022082022692e-06, |
| "loss": 0.2433, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.5880226533317268, |
| "grad_norm": 0.20748843660065733, |
| "learning_rate": 9.722434818215914e-06, |
| "loss": 0.2505, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.590432582238824, |
| "grad_norm": 0.2460720452800874, |
| "learning_rate": 9.71781043152487e-06, |
| "loss": 0.2485, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.5928425111459212, |
| "grad_norm": 0.22678432897781045, |
| "learning_rate": 9.713148958301692e-06, |
| "loss": 0.247, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.5952524400530185, |
| "grad_norm": 0.22757995456174757, |
| "learning_rate": 9.708450435190048e-06, |
| "loss": 0.2501, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.5976623689601157, |
| "grad_norm": 0.22394638428804461, |
| "learning_rate": 9.703714899124853e-06, |
| "loss": 0.2451, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.600072297867213, |
| "grad_norm": 0.2252056194114915, |
| "learning_rate": 9.698942387331983e-06, |
| "loss": 0.2472, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.6024822267743102, |
| "grad_norm": 0.23661161159464128, |
| "learning_rate": 9.694132937327969e-06, |
| "loss": 0.248, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.6048921556814074, |
| "grad_norm": 0.24027883229892857, |
| "learning_rate": 9.689286586919721e-06, |
| "loss": 0.2498, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.6073020845885047, |
| "grad_norm": 0.2243299626272604, |
| "learning_rate": 9.684403374204223e-06, |
| "loss": 0.251, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.6097120134956019, |
| "grad_norm": 0.2316376178427852, |
| "learning_rate": 9.679483337568223e-06, |
| "loss": 0.2489, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.6121219424026991, |
| "grad_norm": 0.21188098705704417, |
| "learning_rate": 9.674526515687947e-06, |
| "loss": 0.2498, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.6145318713097964, |
| "grad_norm": 0.23835194355460387, |
| "learning_rate": 9.669532947528789e-06, |
| "loss": 0.2438, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.6169418002168936, |
| "grad_norm": 0.27203353811670977, |
| "learning_rate": 9.664502672345002e-06, |
| "loss": 0.2496, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.6193517291239908, |
| "grad_norm": 0.2596580314905258, |
| "learning_rate": 9.65943572967939e-06, |
| "loss": 0.2465, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.6217616580310881, |
| "grad_norm": 0.2527876679791275, |
| "learning_rate": 9.654332159363004e-06, |
| "loss": 0.2508, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.6241715869381853, |
| "grad_norm": 0.22550245644158665, |
| "learning_rate": 9.649192001514817e-06, |
| "loss": 0.2442, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.6265815158452825, |
| "grad_norm": 0.21863943460147847, |
| "learning_rate": 9.64401529654142e-06, |
| "loss": 0.2448, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.6289914447523798, |
| "grad_norm": 0.30256431453237753, |
| "learning_rate": 9.638802085136698e-06, |
| "loss": 0.2448, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.631401373659477, |
| "grad_norm": 0.24072486871882548, |
| "learning_rate": 9.63355240828151e-06, |
| "loss": 0.2459, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.6338113025665743, |
| "grad_norm": 0.2188135061985191, |
| "learning_rate": 9.628266307243373e-06, |
| "loss": 0.2446, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.6362212314736715, |
| "grad_norm": 0.2773809052151068, |
| "learning_rate": 9.62294382357613e-06, |
| "loss": 0.2457, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.6386311603807687, |
| "grad_norm": 0.26375943823108927, |
| "learning_rate": 9.617584999119624e-06, |
| "loss": 0.2427, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.641041089287866, |
| "grad_norm": 0.23240691161743587, |
| "learning_rate": 9.612189875999378e-06, |
| "loss": 0.2426, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.6434510181949632, |
| "grad_norm": 0.23630077680221265, |
| "learning_rate": 9.606758496626252e-06, |
| "loss": 0.247, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.6458609471020604, |
| "grad_norm": 0.2509016999348782, |
| "learning_rate": 9.60129090369612e-06, |
| "loss": 0.2468, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.6482708760091578, |
| "grad_norm": 0.24870182504529034, |
| "learning_rate": 9.59578714018952e-06, |
| "loss": 0.2466, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.650680804916255, |
| "grad_norm": 0.2671086569185108, |
| "learning_rate": 9.590247249371338e-06, |
| "loss": 0.2491, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.6530907338233523, |
| "grad_norm": 0.26864211130483057, |
| "learning_rate": 9.584671274790447e-06, |
| "loss": 0.2412, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.6555006627304495, |
| "grad_norm": 0.2449067677760779, |
| "learning_rate": 9.579059260279376e-06, |
| "loss": 0.2471, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.6579105916375467, |
| "grad_norm": 0.22591514758628464, |
| "learning_rate": 9.573411249953963e-06, |
| "loss": 0.2464, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.660320520544644, |
| "grad_norm": 0.24171251882805125, |
| "learning_rate": 9.567727288213005e-06, |
| "loss": 0.247, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.6627304494517412, |
| "grad_norm": 0.24987162681467964, |
| "learning_rate": 9.562007419737916e-06, |
| "loss": 0.247, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.6651403783588384, |
| "grad_norm": 0.38858502012585033, |
| "learning_rate": 9.556251689492366e-06, |
| "loss": 0.2487, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.6675503072659357, |
| "grad_norm": 0.30447726327068786, |
| "learning_rate": 9.550460142721938e-06, |
| "loss": 0.2442, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.6699602361730329, |
| "grad_norm": 0.2508452315448291, |
| "learning_rate": 9.544632824953767e-06, |
| "loss": 0.2415, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.6723701650801301, |
| "grad_norm": 0.25979236953572127, |
| "learning_rate": 9.538769781996178e-06, |
| "loss": 0.2441, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.6747800939872274, |
| "grad_norm": 0.2645435314981733, |
| "learning_rate": 9.532871059938335e-06, |
| "loss": 0.2458, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.6771900228943246, |
| "grad_norm": 0.23441896195422374, |
| "learning_rate": 9.526936705149872e-06, |
| "loss": 0.2461, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.6795999518014219, |
| "grad_norm": 0.22615424165523812, |
| "learning_rate": 9.520966764280532e-06, |
| "loss": 0.2444, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.6820098807085191, |
| "grad_norm": 0.22477062313643148, |
| "learning_rate": 9.514961284259796e-06, |
| "loss": 0.2478, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.6844198096156163, |
| "grad_norm": 0.24163024792894153, |
| "learning_rate": 9.50892031229652e-06, |
| "loss": 0.2435, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.6868297385227136, |
| "grad_norm": 0.29876826315076066, |
| "learning_rate": 9.50284389587856e-06, |
| "loss": 0.2471, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.6892396674298108, |
| "grad_norm": 0.2590904612965771, |
| "learning_rate": 9.4967320827724e-06, |
| "loss": 0.2434, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.691649596336908, |
| "grad_norm": 0.2751396270681613, |
| "learning_rate": 9.490584921022773e-06, |
| "loss": 0.2447, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.6940595252440053, |
| "grad_norm": 0.21928936619489664, |
| "learning_rate": 9.484402458952289e-06, |
| "loss": 0.2438, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.6964694541511025, |
| "grad_norm": 0.20604736521846012, |
| "learning_rate": 9.478184745161052e-06, |
| "loss": 0.2447, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.6988793830581997, |
| "grad_norm": 0.20293332685140275, |
| "learning_rate": 9.471931828526282e-06, |
| "loss": 0.2471, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.701289311965297, |
| "grad_norm": 0.20898386618795675, |
| "learning_rate": 9.46564375820192e-06, |
| "loss": 0.2418, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.7036992408723942, |
| "grad_norm": 0.20265885486415477, |
| "learning_rate": 9.459320583618253e-06, |
| "loss": 0.2444, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.7061091697794915, |
| "grad_norm": 0.21784081138054445, |
| "learning_rate": 9.452962354481523e-06, |
| "loss": 0.2442, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.7085190986865888, |
| "grad_norm": 0.2057677350800323, |
| "learning_rate": 9.44656912077353e-06, |
| "loss": 0.2447, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.710929027593686, |
| "grad_norm": 0.2077146527923193, |
| "learning_rate": 9.440140932751249e-06, |
| "loss": 0.2462, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.7133389565007833, |
| "grad_norm": 0.24559268041039045, |
| "learning_rate": 9.433677840946424e-06, |
| "loss": 0.2428, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.7157488854078805, |
| "grad_norm": 0.2632289330359035, |
| "learning_rate": 9.427179896165182e-06, |
| "loss": 0.2434, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.7181588143149777, |
| "grad_norm": 0.4728478661757523, |
| "learning_rate": 9.420647149487622e-06, |
| "loss": 0.2458, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.720568743222075, |
| "grad_norm": 0.20781499356547853, |
| "learning_rate": 9.414079652267422e-06, |
| "loss": 0.2421, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.7229786721291722, |
| "grad_norm": 0.22440035705857272, |
| "learning_rate": 9.407477456131438e-06, |
| "loss": 0.2449, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.7253886010362695, |
| "grad_norm": 0.22093101419055877, |
| "learning_rate": 9.400840612979283e-06, |
| "loss": 0.2475, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.7277985299433667, |
| "grad_norm": 0.209019758476293, |
| "learning_rate": 9.394169174982935e-06, |
| "loss": 0.2436, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.7302084588504639, |
| "grad_norm": 0.21446014178337364, |
| "learning_rate": 9.387463194586321e-06, |
| "loss": 0.242, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.7326183877575612, |
| "grad_norm": 0.2741566137088202, |
| "learning_rate": 9.380722724504902e-06, |
| "loss": 0.2429, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.7350283166646584, |
| "grad_norm": 0.26552515232851714, |
| "learning_rate": 9.373947817725262e-06, |
| "loss": 0.2453, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.7374382455717556, |
| "grad_norm": 0.23218798648926703, |
| "learning_rate": 9.367138527504694e-06, |
| "loss": 0.2431, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.7398481744788529, |
| "grad_norm": 0.2737399903169623, |
| "learning_rate": 9.36029490737077e-06, |
| "loss": 0.2433, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.7422581033859501, |
| "grad_norm": 0.2833606536832588, |
| "learning_rate": 9.353417011120937e-06, |
| "loss": 0.2417, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.7446680322930473, |
| "grad_norm": 0.20297079664835432, |
| "learning_rate": 9.34650489282208e-06, |
| "loss": 0.2454, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.7470779612001446, |
| "grad_norm": 0.19661403837278102, |
| "learning_rate": 9.339558606810102e-06, |
| "loss": 0.2398, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.7494878901072418, |
| "grad_norm": 0.23698258341720282, |
| "learning_rate": 9.332578207689501e-06, |
| "loss": 0.2412, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.751897819014339, |
| "grad_norm": 0.27440813714781526, |
| "learning_rate": 9.325563750332935e-06, |
| "loss": 0.2441, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.7543077479214363, |
| "grad_norm": 0.21576707599165224, |
| "learning_rate": 9.31851528988079e-06, |
| "loss": 0.2416, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.7567176768285335, |
| "grad_norm": 0.21870244436654282, |
| "learning_rate": 9.311432881740752e-06, |
| "loss": 0.2419, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.7591276057356308, |
| "grad_norm": 0.2519064274406455, |
| "learning_rate": 9.304316581587367e-06, |
| "loss": 0.2396, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.761537534642728, |
| "grad_norm": 0.3391396856841686, |
| "learning_rate": 9.297166445361608e-06, |
| "loss": 0.2387, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.7639474635498252, |
| "grad_norm": 0.2795773536176513, |
| "learning_rate": 9.289982529270424e-06, |
| "loss": 0.2462, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.7663573924569225, |
| "grad_norm": 0.20844625168835318, |
| "learning_rate": 9.28276488978632e-06, |
| "loss": 0.239, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.7687673213640197, |
| "grad_norm": 0.2872676811457329, |
| "learning_rate": 9.275513583646885e-06, |
| "loss": 0.2426, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.771177250271117, |
| "grad_norm": 0.2695671149655012, |
| "learning_rate": 9.26822866785437e-06, |
| "loss": 0.2437, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.7735871791782143, |
| "grad_norm": 0.23733636792731364, |
| "learning_rate": 9.260910199675224e-06, |
| "loss": 0.2405, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.7759971080853115, |
| "grad_norm": 0.2519834401705272, |
| "learning_rate": 9.253558236639654e-06, |
| "loss": 0.2447, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.7784070369924088, |
| "grad_norm": 0.2994461867359508, |
| "learning_rate": 9.246172836541167e-06, |
| "loss": 0.2424, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.780816965899506, |
| "grad_norm": 0.285434401778984, |
| "learning_rate": 9.238754057436121e-06, |
| "loss": 0.2459, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.7832268948066032, |
| "grad_norm": 0.2478713737633632, |
| "learning_rate": 9.23130195764326e-06, |
| "loss": 0.241, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.7856368237137005, |
| "grad_norm": 0.23085486709682762, |
| "learning_rate": 9.223816595743263e-06, |
| "loss": 0.2382, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.7880467526207977, |
| "grad_norm": 0.2102415877652219, |
| "learning_rate": 9.21629803057828e-06, |
| "loss": 0.2467, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.7904566815278949, |
| "grad_norm": 0.20235721757640468, |
| "learning_rate": 9.208746321251477e-06, |
| "loss": 0.2439, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.7928666104349922, |
| "grad_norm": 0.24134151252384362, |
| "learning_rate": 9.201161527126554e-06, |
| "loss": 0.2387, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.7952765393420894, |
| "grad_norm": 0.2429745767975388, |
| "learning_rate": 9.193543707827297e-06, |
| "loss": 0.2421, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.7976864682491867, |
| "grad_norm": 0.24545430876644317, |
| "learning_rate": 9.185892923237101e-06, |
| "loss": 0.2384, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.8000963971562839, |
| "grad_norm": 0.23413187282452325, |
| "learning_rate": 9.178209233498497e-06, |
| "loss": 0.2413, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.8025063260633811, |
| "grad_norm": 0.22264754893987343, |
| "learning_rate": 9.170492699012686e-06, |
| "loss": 0.2441, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.8049162549704784, |
| "grad_norm": 0.2242518581750092, |
| "learning_rate": 9.162743380439057e-06, |
| "loss": 0.2437, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.8073261838775756, |
| "grad_norm": 0.2178074552318658, |
| "learning_rate": 9.154961338694714e-06, |
| "loss": 0.2366, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.8097361127846728, |
| "grad_norm": 0.2154415811499556, |
| "learning_rate": 9.147146634954e-06, |
| "loss": 0.2418, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.8121460416917701, |
| "grad_norm": 0.22353296872616982, |
| "learning_rate": 9.139299330648006e-06, |
| "loss": 0.2423, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.8145559705988673, |
| "grad_norm": 0.24935251325993996, |
| "learning_rate": 9.131419487464104e-06, |
| "loss": 0.2401, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.8169658995059645, |
| "grad_norm": 0.2870367439432121, |
| "learning_rate": 9.123507167345444e-06, |
| "loss": 0.2438, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.8193758284130618, |
| "grad_norm": 0.24533380471693478, |
| "learning_rate": 9.115562432490482e-06, |
| "loss": 0.2435, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.821785757320159, |
| "grad_norm": 0.25027692705581994, |
| "learning_rate": 9.107585345352481e-06, |
| "loss": 0.2387, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.8241956862272563, |
| "grad_norm": 0.20473070923362255, |
| "learning_rate": 9.099575968639028e-06, |
| "loss": 0.2418, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.8266056151343535, |
| "grad_norm": 0.2087558514245916, |
| "learning_rate": 9.091534365311531e-06, |
| "loss": 0.2417, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.8290155440414507, |
| "grad_norm": 0.19243197368268125, |
| "learning_rate": 9.08346059858474e-06, |
| "loss": 0.239, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.8314254729485481, |
| "grad_norm": 0.2389322016697769, |
| "learning_rate": 9.075354731926232e-06, |
| "loss": 0.2398, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.8338354018556453, |
| "grad_norm": 0.20955906880592914, |
| "learning_rate": 9.067216829055922e-06, |
| "loss": 0.2423, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.8362453307627425, |
| "grad_norm": 0.21621910041719575, |
| "learning_rate": 9.059046953945563e-06, |
| "loss": 0.2404, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.8386552596698398, |
| "grad_norm": 0.20962354744285525, |
| "learning_rate": 9.050845170818239e-06, |
| "loss": 0.2397, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.841065188576937, |
| "grad_norm": 0.2784866402301869, |
| "learning_rate": 9.04261154414786e-06, |
| "loss": 0.2412, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.8434751174840343, |
| "grad_norm": 0.213421075751132, |
| "learning_rate": 9.03434613865866e-06, |
| "loss": 0.2415, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.8458850463911315, |
| "grad_norm": 0.2432009063623774, |
| "learning_rate": 9.026049019324686e-06, |
| "loss": 0.2447, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.8482949752982287, |
| "grad_norm": 0.22778860067999604, |
| "learning_rate": 9.01772025136928e-06, |
| "loss": 0.2354, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.850704904205326, |
| "grad_norm": 0.21714110048244442, |
| "learning_rate": 9.009359900264579e-06, |
| "loss": 0.2404, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.8531148331124232, |
| "grad_norm": 0.22093004627284601, |
| "learning_rate": 9.00096803173099e-06, |
| "loss": 0.2352, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.8555247620195204, |
| "grad_norm": 0.2079051465955922, |
| "learning_rate": 8.992544711736682e-06, |
| "loss": 0.2379, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.8579346909266177, |
| "grad_norm": 0.23996175709226994, |
| "learning_rate": 8.984090006497056e-06, |
| "loss": 0.2383, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.8603446198337149, |
| "grad_norm": 0.27470508136131533, |
| "learning_rate": 8.97560398247424e-06, |
| "loss": 0.2372, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.8627545487408121, |
| "grad_norm": 0.23847577557581387, |
| "learning_rate": 8.96708670637655e-06, |
| "loss": 0.2372, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.8651644776479094, |
| "grad_norm": 0.1927026016514168, |
| "learning_rate": 8.958538245157975e-06, |
| "loss": 0.2411, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.8675744065550066, |
| "grad_norm": 0.20779115915350238, |
| "learning_rate": 8.949958666017652e-06, |
| "loss": 0.2398, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.8699843354621039, |
| "grad_norm": 0.2440155187659645, |
| "learning_rate": 8.941348036399333e-06, |
| "loss": 0.2402, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.8723942643692011, |
| "grad_norm": 0.2589623848581086, |
| "learning_rate": 8.932706423990856e-06, |
| "loss": 0.242, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.8748041932762983, |
| "grad_norm": 0.20029907640445951, |
| "learning_rate": 8.924033896723617e-06, |
| "loss": 0.2421, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.8772141221833956, |
| "grad_norm": 0.22197406646318488, |
| "learning_rate": 8.915330522772028e-06, |
| "loss": 0.2408, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.8796240510904928, |
| "grad_norm": 0.22624317150949583, |
| "learning_rate": 8.906596370552985e-06, |
| "loss": 0.2382, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.88203397999759, |
| "grad_norm": 0.23656619525765019, |
| "learning_rate": 8.897831508725338e-06, |
| "loss": 0.2379, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.8844439089046873, |
| "grad_norm": 0.2328517091519601, |
| "learning_rate": 8.889036006189338e-06, |
| "loss": 0.2388, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.8868538378117845, |
| "grad_norm": 0.22331596836468462, |
| "learning_rate": 8.880209932086102e-06, |
| "loss": 0.2418, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.8892637667188817, |
| "grad_norm": 0.22171497642798174, |
| "learning_rate": 8.87135335579707e-06, |
| "loss": 0.2355, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.891673695625979, |
| "grad_norm": 0.22384435255581855, |
| "learning_rate": 8.862466346943457e-06, |
| "loss": 0.2422, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.8940836245330763, |
| "grad_norm": 0.22527164593480525, |
| "learning_rate": 8.853548975385714e-06, |
| "loss": 0.2389, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.8964935534401736, |
| "grad_norm": 0.23176983646267607, |
| "learning_rate": 8.84460131122296e-06, |
| "loss": 0.2387, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.8989034823472708, |
| "grad_norm": 0.280292027220555, |
| "learning_rate": 8.835623424792453e-06, |
| "loss": 0.2424, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.901313411254368, |
| "grad_norm": 0.22622921375840255, |
| "learning_rate": 8.826615386669025e-06, |
| "loss": 0.2386, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.9037233401614653, |
| "grad_norm": 0.209504843641764, |
| "learning_rate": 8.817577267664528e-06, |
| "loss": 0.2399, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.9061332690685625, |
| "grad_norm": 0.2014356959834231, |
| "learning_rate": 8.808509138827275e-06, |
| "loss": 0.2388, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.9085431979756597, |
| "grad_norm": 0.22477916464908484, |
| "learning_rate": 8.799411071441496e-06, |
| "loss": 0.238, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.910953126882757, |
| "grad_norm": 0.24112220047729305, |
| "learning_rate": 8.790283137026754e-06, |
| "loss": 0.2382, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.9133630557898542, |
| "grad_norm": 0.23524427726283031, |
| "learning_rate": 8.781125407337405e-06, |
| "loss": 0.24, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.9157729846969515, |
| "grad_norm": 0.18849866981351815, |
| "learning_rate": 8.77193795436202e-06, |
| "loss": 0.2362, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.9181829136040487, |
| "grad_norm": 0.2256694441450033, |
| "learning_rate": 8.762720850322823e-06, |
| "loss": 0.238, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.9205928425111459, |
| "grad_norm": 0.20964103549862023, |
| "learning_rate": 8.753474167675128e-06, |
| "loss": 0.2403, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.9230027714182432, |
| "grad_norm": 0.22505474797625605, |
| "learning_rate": 8.744197979106763e-06, |
| "loss": 0.2417, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.9254127003253404, |
| "grad_norm": 0.21096678717130304, |
| "learning_rate": 8.7348923575375e-06, |
| "loss": 0.2399, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.9278226292324376, |
| "grad_norm": 0.28829858771631195, |
| "learning_rate": 8.725557376118482e-06, |
| "loss": 0.24, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.9302325581395349, |
| "grad_norm": 0.21605631711409606, |
| "learning_rate": 8.716193108231655e-06, |
| "loss": 0.2382, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.9326424870466321, |
| "grad_norm": 0.23010907578749432, |
| "learning_rate": 8.706799627489175e-06, |
| "loss": 0.2391, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.9350524159537293, |
| "grad_norm": 0.1986626914383043, |
| "learning_rate": 8.697377007732848e-06, |
| "loss": 0.2379, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.9374623448608266, |
| "grad_norm": 0.24457215435863935, |
| "learning_rate": 8.687925323033536e-06, |
| "loss": 0.2384, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.9398722737679238, |
| "grad_norm": 0.2400460855871951, |
| "learning_rate": 8.67844464769058e-06, |
| "loss": 0.2375, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.942282202675021, |
| "grad_norm": 0.20759698468905713, |
| "learning_rate": 8.668935056231216e-06, |
| "loss": 0.2351, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.9446921315821183, |
| "grad_norm": 0.2314303079993546, |
| "learning_rate": 8.659396623409987e-06, |
| "loss": 0.2419, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.9471020604892155, |
| "grad_norm": 0.20578452598163302, |
| "learning_rate": 8.649829424208163e-06, |
| "loss": 0.2402, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.9495119893963128, |
| "grad_norm": 0.22203294343090166, |
| "learning_rate": 8.640233533833136e-06, |
| "loss": 0.236, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.95192191830341, |
| "grad_norm": 0.23695728669881866, |
| "learning_rate": 8.630609027717843e-06, |
| "loss": 0.2373, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.9543318472105073, |
| "grad_norm": 0.22602095173170259, |
| "learning_rate": 8.620955981520171e-06, |
| "loss": 0.2374, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.9567417761176046, |
| "grad_norm": 0.22002954889841164, |
| "learning_rate": 8.611274471122355e-06, |
| "loss": 0.241, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.9591517050247018, |
| "grad_norm": 0.20036228785191246, |
| "learning_rate": 8.601564572630387e-06, |
| "loss": 0.2368, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.961561633931799, |
| "grad_norm": 0.23987199491581082, |
| "learning_rate": 8.591826362373421e-06, |
| "loss": 0.2417, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.9639715628388963, |
| "grad_norm": 0.2666936971172365, |
| "learning_rate": 8.58205991690316e-06, |
| "loss": 0.2407, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.9663814917459935, |
| "grad_norm": 0.26043640766704723, |
| "learning_rate": 8.572265312993274e-06, |
| "loss": 0.2374, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.9687914206530908, |
| "grad_norm": 0.20723580075002096, |
| "learning_rate": 8.562442627638774e-06, |
| "loss": 0.2391, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.971201349560188, |
| "grad_norm": 0.22633246655669187, |
| "learning_rate": 8.552591938055425e-06, |
| "loss": 0.2358, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.9736112784672852, |
| "grad_norm": 0.19736774547140887, |
| "learning_rate": 8.542713321679137e-06, |
| "loss": 0.2399, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.9760212073743825, |
| "grad_norm": 0.2264847666207545, |
| "learning_rate": 8.532806856165337e-06, |
| "loss": 0.2363, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.9784311362814797, |
| "grad_norm": 0.2123889512251932, |
| "learning_rate": 8.522872619388387e-06, |
| "loss": 0.2397, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.9808410651885769, |
| "grad_norm": 0.20895886845966094, |
| "learning_rate": 8.512910689440951e-06, |
| "loss": 0.2379, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.9832509940956742, |
| "grad_norm": 0.2335328134396977, |
| "learning_rate": 8.50292114463339e-06, |
| "loss": 0.2338, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.9856609230027714, |
| "grad_norm": 0.23096136487661056, |
| "learning_rate": 8.49290406349314e-06, |
| "loss": 0.2378, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.9880708519098687, |
| "grad_norm": 0.235592090678458, |
| "learning_rate": 8.482859524764108e-06, |
| "loss": 0.2404, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.9904807808169659, |
| "grad_norm": 0.20944731655700624, |
| "learning_rate": 8.472787607406036e-06, |
| "loss": 0.2363, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.9928907097240631, |
| "grad_norm": 0.20365146059721848, |
| "learning_rate": 8.462688390593894e-06, |
| "loss": 0.2427, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.9953006386311604, |
| "grad_norm": 0.2151839265324596, |
| "learning_rate": 8.452561953717246e-06, |
| "loss": 0.2353, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.9977105675382576, |
| "grad_norm": 0.20685275108614723, |
| "learning_rate": 8.442408376379637e-06, |
| "loss": 0.2397, |
| "step": 4140 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.2586521391195523, |
| "learning_rate": 8.43222773839796e-06, |
| "loss": 0.2343, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.0024099289070973, |
| "grad_norm": 0.24188143764437817, |
| "learning_rate": 8.422020119801831e-06, |
| "loss": 0.2301, |
| "step": 4160 |
| }, |
| { |
| "epoch": 1.0048198578141945, |
| "grad_norm": 0.2150368185663544, |
| "learning_rate": 8.411785600832959e-06, |
| "loss": 0.2343, |
| "step": 4170 |
| }, |
| { |
| "epoch": 1.0072297867212918, |
| "grad_norm": 0.21763846175940527, |
| "learning_rate": 8.401524261944519e-06, |
| "loss": 0.2329, |
| "step": 4180 |
| }, |
| { |
| "epoch": 1.009639715628389, |
| "grad_norm": 0.2103637253475913, |
| "learning_rate": 8.39123618380051e-06, |
| "loss": 0.2314, |
| "step": 4190 |
| }, |
| { |
| "epoch": 1.0120496445354863, |
| "grad_norm": 0.24851068438913623, |
| "learning_rate": 8.380921447275137e-06, |
| "loss": 0.2344, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.0144595734425834, |
| "grad_norm": 0.23702433965413158, |
| "learning_rate": 8.370580133452153e-06, |
| "loss": 0.2344, |
| "step": 4210 |
| }, |
| { |
| "epoch": 1.0168695023496808, |
| "grad_norm": 0.20592307353537548, |
| "learning_rate": 8.360212323624246e-06, |
| "loss": 0.2322, |
| "step": 4220 |
| }, |
| { |
| "epoch": 1.019279431256778, |
| "grad_norm": 0.22892056279165002, |
| "learning_rate": 8.349818099292379e-06, |
| "loss": 0.2348, |
| "step": 4230 |
| }, |
| { |
| "epoch": 1.0216893601638752, |
| "grad_norm": 0.1959115919448313, |
| "learning_rate": 8.339397542165166e-06, |
| "loss": 0.2312, |
| "step": 4240 |
| }, |
| { |
| "epoch": 1.0240992890709724, |
| "grad_norm": 0.22592046247928438, |
| "learning_rate": 8.328950734158219e-06, |
| "loss": 0.2322, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.0265092179780697, |
| "grad_norm": 0.23157099144064822, |
| "learning_rate": 8.318477757393502e-06, |
| "loss": 0.2297, |
| "step": 4260 |
| }, |
| { |
| "epoch": 1.0289191468851668, |
| "grad_norm": 0.2466643779765108, |
| "learning_rate": 8.3079786941987e-06, |
| "loss": 0.2312, |
| "step": 4270 |
| }, |
| { |
| "epoch": 1.0313290757922642, |
| "grad_norm": 0.22269882927373885, |
| "learning_rate": 8.297453627106556e-06, |
| "loss": 0.2292, |
| "step": 4280 |
| }, |
| { |
| "epoch": 1.0337390046993613, |
| "grad_norm": 0.22564018444770104, |
| "learning_rate": 8.28690263885423e-06, |
| "loss": 0.2304, |
| "step": 4290 |
| }, |
| { |
| "epoch": 1.0361489336064587, |
| "grad_norm": 0.20380329709098385, |
| "learning_rate": 8.276325812382648e-06, |
| "loss": 0.2309, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.0385588625135558, |
| "grad_norm": 0.22594177420899356, |
| "learning_rate": 8.265723230835852e-06, |
| "loss": 0.2286, |
| "step": 4310 |
| }, |
| { |
| "epoch": 1.0409687914206531, |
| "grad_norm": 0.2211795887430096, |
| "learning_rate": 8.255094977560335e-06, |
| "loss": 0.2306, |
| "step": 4320 |
| }, |
| { |
| "epoch": 1.0433787203277503, |
| "grad_norm": 0.2206250811806614, |
| "learning_rate": 8.244441136104406e-06, |
| "loss": 0.2298, |
| "step": 4330 |
| }, |
| { |
| "epoch": 1.0457886492348476, |
| "grad_norm": 0.2089423604875909, |
| "learning_rate": 8.233761790217512e-06, |
| "loss": 0.2317, |
| "step": 4340 |
| }, |
| { |
| "epoch": 1.0481985781419447, |
| "grad_norm": 0.20682119942283125, |
| "learning_rate": 8.223057023849595e-06, |
| "loss": 0.2305, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.050608507049042, |
| "grad_norm": 0.20643946883650358, |
| "learning_rate": 8.212326921150426e-06, |
| "loss": 0.2341, |
| "step": 4360 |
| }, |
| { |
| "epoch": 1.0530184359561392, |
| "grad_norm": 0.20808166658774438, |
| "learning_rate": 8.20157156646894e-06, |
| "loss": 0.2342, |
| "step": 4370 |
| }, |
| { |
| "epoch": 1.0554283648632365, |
| "grad_norm": 0.2142171649274259, |
| "learning_rate": 8.190791044352581e-06, |
| "loss": 0.2289, |
| "step": 4380 |
| }, |
| { |
| "epoch": 1.0578382937703337, |
| "grad_norm": 0.21675585403201775, |
| "learning_rate": 8.179985439546633e-06, |
| "loss": 0.2293, |
| "step": 4390 |
| }, |
| { |
| "epoch": 1.060248222677431, |
| "grad_norm": 0.22031312657094632, |
| "learning_rate": 8.16915483699355e-06, |
| "loss": 0.2297, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.0626581515845284, |
| "grad_norm": 0.23416365396054778, |
| "learning_rate": 8.158299321832301e-06, |
| "loss": 0.2299, |
| "step": 4410 |
| }, |
| { |
| "epoch": 1.0650680804916255, |
| "grad_norm": 0.22560680707030648, |
| "learning_rate": 8.147418979397682e-06, |
| "loss": 0.2372, |
| "step": 4420 |
| }, |
| { |
| "epoch": 1.0674780093987228, |
| "grad_norm": 0.22554131254570797, |
| "learning_rate": 8.13651389521966e-06, |
| "loss": 0.2336, |
| "step": 4430 |
| }, |
| { |
| "epoch": 1.06988793830582, |
| "grad_norm": 0.21745513943006545, |
| "learning_rate": 8.125584155022696e-06, |
| "loss": 0.2315, |
| "step": 4440 |
| }, |
| { |
| "epoch": 1.0722978672129173, |
| "grad_norm": 0.21899082395931707, |
| "learning_rate": 8.114629844725073e-06, |
| "loss": 0.2299, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.0747077961200144, |
| "grad_norm": 0.19395411691567005, |
| "learning_rate": 8.103651050438213e-06, |
| "loss": 0.2303, |
| "step": 4460 |
| }, |
| { |
| "epoch": 1.0771177250271118, |
| "grad_norm": 0.20453167778614773, |
| "learning_rate": 8.09264785846601e-06, |
| "loss": 0.233, |
| "step": 4470 |
| }, |
| { |
| "epoch": 1.079527653934209, |
| "grad_norm": 0.22971826595899175, |
| "learning_rate": 8.081620355304147e-06, |
| "loss": 0.2285, |
| "step": 4480 |
| }, |
| { |
| "epoch": 1.0819375828413063, |
| "grad_norm": 0.2198565653788356, |
| "learning_rate": 8.070568627639418e-06, |
| "loss": 0.2325, |
| "step": 4490 |
| }, |
| { |
| "epoch": 1.0843475117484034, |
| "grad_norm": 0.19813813821821347, |
| "learning_rate": 8.059492762349037e-06, |
| "loss": 0.2305, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.0867574406555007, |
| "grad_norm": 0.24928525614074773, |
| "learning_rate": 8.048392846499974e-06, |
| "loss": 0.2313, |
| "step": 4510 |
| }, |
| { |
| "epoch": 1.0891673695625979, |
| "grad_norm": 0.2222700266271006, |
| "learning_rate": 8.037268967348252e-06, |
| "loss": 0.2333, |
| "step": 4520 |
| }, |
| { |
| "epoch": 1.0915772984696952, |
| "grad_norm": 0.26603597575633053, |
| "learning_rate": 8.026121212338271e-06, |
| "loss": 0.2302, |
| "step": 4530 |
| }, |
| { |
| "epoch": 1.0939872273767923, |
| "grad_norm": 0.24862585253580038, |
| "learning_rate": 8.014949669102117e-06, |
| "loss": 0.2277, |
| "step": 4540 |
| }, |
| { |
| "epoch": 1.0963971562838897, |
| "grad_norm": 0.2193998181971378, |
| "learning_rate": 8.003754425458878e-06, |
| "loss": 0.2318, |
| "step": 4550 |
| }, |
| { |
| "epoch": 1.0988070851909868, |
| "grad_norm": 0.21015258570287143, |
| "learning_rate": 7.992535569413944e-06, |
| "loss": 0.2271, |
| "step": 4560 |
| }, |
| { |
| "epoch": 1.1012170140980841, |
| "grad_norm": 0.2223685303111012, |
| "learning_rate": 7.981293189158327e-06, |
| "loss": 0.2275, |
| "step": 4570 |
| }, |
| { |
| "epoch": 1.1036269430051813, |
| "grad_norm": 0.19907027072000166, |
| "learning_rate": 7.970027373067961e-06, |
| "loss": 0.2307, |
| "step": 4580 |
| }, |
| { |
| "epoch": 1.1060368719122786, |
| "grad_norm": 0.24724710916745146, |
| "learning_rate": 7.958738209703004e-06, |
| "loss": 0.2295, |
| "step": 4590 |
| }, |
| { |
| "epoch": 1.1084468008193757, |
| "grad_norm": 0.25180113626386774, |
| "learning_rate": 7.94742578780715e-06, |
| "loss": 0.2287, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.110856729726473, |
| "grad_norm": 0.19639239274233874, |
| "learning_rate": 7.936090196306925e-06, |
| "loss": 0.2308, |
| "step": 4610 |
| }, |
| { |
| "epoch": 1.1132666586335702, |
| "grad_norm": 0.2232352206466752, |
| "learning_rate": 7.924731524310993e-06, |
| "loss": 0.2301, |
| "step": 4620 |
| }, |
| { |
| "epoch": 1.1156765875406676, |
| "grad_norm": 0.22636794187309509, |
| "learning_rate": 7.91334986110945e-06, |
| "loss": 0.2346, |
| "step": 4630 |
| }, |
| { |
| "epoch": 1.1180865164477647, |
| "grad_norm": 0.256290459496497, |
| "learning_rate": 7.90194529617313e-06, |
| "loss": 0.2309, |
| "step": 4640 |
| }, |
| { |
| "epoch": 1.120496445354862, |
| "grad_norm": 0.2591624292153268, |
| "learning_rate": 7.890517919152892e-06, |
| "loss": 0.232, |
| "step": 4650 |
| }, |
| { |
| "epoch": 1.1229063742619592, |
| "grad_norm": 0.19260606757836757, |
| "learning_rate": 7.879067819878918e-06, |
| "loss": 0.2313, |
| "step": 4660 |
| }, |
| { |
| "epoch": 1.1253163031690565, |
| "grad_norm": 0.19841858423210013, |
| "learning_rate": 7.867595088360016e-06, |
| "loss": 0.2294, |
| "step": 4670 |
| }, |
| { |
| "epoch": 1.1277262320761539, |
| "grad_norm": 0.26595594788967863, |
| "learning_rate": 7.856099814782901e-06, |
| "loss": 0.2314, |
| "step": 4680 |
| }, |
| { |
| "epoch": 1.130136160983251, |
| "grad_norm": 0.21035558207154897, |
| "learning_rate": 7.844582089511486e-06, |
| "loss": 0.229, |
| "step": 4690 |
| }, |
| { |
| "epoch": 1.1325460898903483, |
| "grad_norm": 0.20246625511987365, |
| "learning_rate": 7.833042003086186e-06, |
| "loss": 0.229, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.1349560187974455, |
| "grad_norm": 0.20841216029515003, |
| "learning_rate": 7.82147964622319e-06, |
| "loss": 0.2304, |
| "step": 4710 |
| }, |
| { |
| "epoch": 1.1373659477045428, |
| "grad_norm": 0.22073742055789836, |
| "learning_rate": 7.809895109813752e-06, |
| "loss": 0.2288, |
| "step": 4720 |
| }, |
| { |
| "epoch": 1.13977587661164, |
| "grad_norm": 0.18142927405365206, |
| "learning_rate": 7.798288484923482e-06, |
| "loss": 0.2297, |
| "step": 4730 |
| }, |
| { |
| "epoch": 1.1421858055187373, |
| "grad_norm": 0.24062642626538192, |
| "learning_rate": 7.786659862791628e-06, |
| "loss": 0.2302, |
| "step": 4740 |
| }, |
| { |
| "epoch": 1.1445957344258344, |
| "grad_norm": 0.22193052781385864, |
| "learning_rate": 7.775009334830354e-06, |
| "loss": 0.2329, |
| "step": 4750 |
| }, |
| { |
| "epoch": 1.1470056633329317, |
| "grad_norm": 0.21856366154677967, |
| "learning_rate": 7.763336992624027e-06, |
| "loss": 0.2314, |
| "step": 4760 |
| }, |
| { |
| "epoch": 1.1494155922400289, |
| "grad_norm": 0.20821223267820507, |
| "learning_rate": 7.751642927928495e-06, |
| "loss": 0.2306, |
| "step": 4770 |
| }, |
| { |
| "epoch": 1.1518255211471262, |
| "grad_norm": 0.21083267374524334, |
| "learning_rate": 7.739927232670363e-06, |
| "loss": 0.2319, |
| "step": 4780 |
| }, |
| { |
| "epoch": 1.1542354500542233, |
| "grad_norm": 0.25813360176828715, |
| "learning_rate": 7.728189998946278e-06, |
| "loss": 0.2302, |
| "step": 4790 |
| }, |
| { |
| "epoch": 1.1566453789613207, |
| "grad_norm": 0.20745737378001733, |
| "learning_rate": 7.716431319022197e-06, |
| "loss": 0.2322, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.1590553078684178, |
| "grad_norm": 0.21777607886366784, |
| "learning_rate": 7.704651285332662e-06, |
| "loss": 0.2302, |
| "step": 4810 |
| }, |
| { |
| "epoch": 1.1614652367755152, |
| "grad_norm": 0.2500667820522283, |
| "learning_rate": 7.692849990480082e-06, |
| "loss": 0.2319, |
| "step": 4820 |
| }, |
| { |
| "epoch": 1.1638751656826123, |
| "grad_norm": 0.22973690269313038, |
| "learning_rate": 7.681027527233995e-06, |
| "loss": 0.2286, |
| "step": 4830 |
| }, |
| { |
| "epoch": 1.1662850945897096, |
| "grad_norm": 0.2393654973776252, |
| "learning_rate": 7.669183988530346e-06, |
| "loss": 0.2321, |
| "step": 4840 |
| }, |
| { |
| "epoch": 1.1686950234968068, |
| "grad_norm": 0.20135227414103743, |
| "learning_rate": 7.65731946747075e-06, |
| "loss": 0.2322, |
| "step": 4850 |
| }, |
| { |
| "epoch": 1.171104952403904, |
| "grad_norm": 0.2349735209341509, |
| "learning_rate": 7.645434057321765e-06, |
| "loss": 0.2292, |
| "step": 4860 |
| }, |
| { |
| "epoch": 1.1735148813110012, |
| "grad_norm": 0.22188798867882703, |
| "learning_rate": 7.633527851514163e-06, |
| "loss": 0.2295, |
| "step": 4870 |
| }, |
| { |
| "epoch": 1.1759248102180986, |
| "grad_norm": 0.21584337957181052, |
| "learning_rate": 7.621600943642175e-06, |
| "loss": 0.2308, |
| "step": 4880 |
| }, |
| { |
| "epoch": 1.1783347391251957, |
| "grad_norm": 0.21567191870978752, |
| "learning_rate": 7.609653427462789e-06, |
| "loss": 0.2328, |
| "step": 4890 |
| }, |
| { |
| "epoch": 1.180744668032293, |
| "grad_norm": 0.21823427735015707, |
| "learning_rate": 7.5976853968949785e-06, |
| "loss": 0.2306, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.1831545969393904, |
| "grad_norm": 0.19275894842946628, |
| "learning_rate": 7.585696946018988e-06, |
| "loss": 0.2304, |
| "step": 4910 |
| }, |
| { |
| "epoch": 1.1855645258464875, |
| "grad_norm": 0.1892116420936937, |
| "learning_rate": 7.573688169075584e-06, |
| "loss": 0.2264, |
| "step": 4920 |
| }, |
| { |
| "epoch": 1.1879744547535847, |
| "grad_norm": 0.23178204518007942, |
| "learning_rate": 7.561659160465314e-06, |
| "loss": 0.2267, |
| "step": 4930 |
| }, |
| { |
| "epoch": 1.190384383660682, |
| "grad_norm": 0.18506621548247348, |
| "learning_rate": 7.549610014747769e-06, |
| "loss": 0.2307, |
| "step": 4940 |
| }, |
| { |
| "epoch": 1.1927943125677793, |
| "grad_norm": 0.2110590602040692, |
| "learning_rate": 7.537540826640834e-06, |
| "loss": 0.227, |
| "step": 4950 |
| }, |
| { |
| "epoch": 1.1952042414748765, |
| "grad_norm": 0.19886821016214, |
| "learning_rate": 7.525451691019945e-06, |
| "loss": 0.2277, |
| "step": 4960 |
| }, |
| { |
| "epoch": 1.1976141703819738, |
| "grad_norm": 0.19634675946926783, |
| "learning_rate": 7.513342702917349e-06, |
| "loss": 0.2308, |
| "step": 4970 |
| }, |
| { |
| "epoch": 1.200024099289071, |
| "grad_norm": 0.18955446533332626, |
| "learning_rate": 7.5012139575213505e-06, |
| "loss": 0.2309, |
| "step": 4980 |
| }, |
| { |
| "epoch": 1.2024340281961683, |
| "grad_norm": 0.19340590578019815, |
| "learning_rate": 7.4890655501755634e-06, |
| "loss": 0.2308, |
| "step": 4990 |
| }, |
| { |
| "epoch": 1.2048439571032654, |
| "grad_norm": 0.2374177804445115, |
| "learning_rate": 7.476897576378169e-06, |
| "loss": 0.2296, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.2072538860103628, |
| "grad_norm": 0.18948808744982817, |
| "learning_rate": 7.464710131781154e-06, |
| "loss": 0.2305, |
| "step": 5010 |
| }, |
| { |
| "epoch": 1.20966381491746, |
| "grad_norm": 0.20473350688580597, |
| "learning_rate": 7.452503312189567e-06, |
| "loss": 0.2263, |
| "step": 5020 |
| }, |
| { |
| "epoch": 1.2120737438245572, |
| "grad_norm": 0.2487841384973806, |
| "learning_rate": 7.440277213560763e-06, |
| "loss": 0.2281, |
| "step": 5030 |
| }, |
| { |
| "epoch": 1.2144836727316544, |
| "grad_norm": 0.21207576261257263, |
| "learning_rate": 7.428031932003647e-06, |
| "loss": 0.2284, |
| "step": 5040 |
| }, |
| { |
| "epoch": 1.2168936016387517, |
| "grad_norm": 0.26298990038991266, |
| "learning_rate": 7.415767563777922e-06, |
| "loss": 0.2298, |
| "step": 5050 |
| }, |
| { |
| "epoch": 1.2193035305458488, |
| "grad_norm": 0.19174817934191188, |
| "learning_rate": 7.40348420529333e-06, |
| "loss": 0.2292, |
| "step": 5060 |
| }, |
| { |
| "epoch": 1.2217134594529462, |
| "grad_norm": 0.254206380168484, |
| "learning_rate": 7.3911819531088926e-06, |
| "loss": 0.2303, |
| "step": 5070 |
| }, |
| { |
| "epoch": 1.2241233883600433, |
| "grad_norm": 0.2028578654449949, |
| "learning_rate": 7.378860903932159e-06, |
| "loss": 0.228, |
| "step": 5080 |
| }, |
| { |
| "epoch": 1.2265333172671407, |
| "grad_norm": 0.20991917606285404, |
| "learning_rate": 7.366521154618438e-06, |
| "loss": 0.2294, |
| "step": 5090 |
| }, |
| { |
| "epoch": 1.2289432461742378, |
| "grad_norm": 0.1869168911570863, |
| "learning_rate": 7.354162802170037e-06, |
| "loss": 0.2297, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.2313531750813351, |
| "grad_norm": 0.19475732673688276, |
| "learning_rate": 7.341785943735507e-06, |
| "loss": 0.23, |
| "step": 5110 |
| }, |
| { |
| "epoch": 1.2337631039884323, |
| "grad_norm": 0.19873278255233023, |
| "learning_rate": 7.3293906766088694e-06, |
| "loss": 0.2286, |
| "step": 5120 |
| }, |
| { |
| "epoch": 1.2361730328955296, |
| "grad_norm": 0.21835927087464604, |
| "learning_rate": 7.316977098228858e-06, |
| "loss": 0.2328, |
| "step": 5130 |
| }, |
| { |
| "epoch": 1.238582961802627, |
| "grad_norm": 0.1962496132128952, |
| "learning_rate": 7.3045453061781504e-06, |
| "loss": 0.2322, |
| "step": 5140 |
| }, |
| { |
| "epoch": 1.240992890709724, |
| "grad_norm": 0.22272783896480952, |
| "learning_rate": 7.292095398182601e-06, |
| "loss": 0.2315, |
| "step": 5150 |
| }, |
| { |
| "epoch": 1.2434028196168212, |
| "grad_norm": 0.20767197551415945, |
| "learning_rate": 7.2796274721104745e-06, |
| "loss": 0.2282, |
| "step": 5160 |
| }, |
| { |
| "epoch": 1.2458127485239185, |
| "grad_norm": 0.25400482777604805, |
| "learning_rate": 7.267141625971672e-06, |
| "loss": 0.2289, |
| "step": 5170 |
| }, |
| { |
| "epoch": 1.248222677431016, |
| "grad_norm": 0.20257802498669103, |
| "learning_rate": 7.254637957916964e-06, |
| "loss": 0.2262, |
| "step": 5180 |
| }, |
| { |
| "epoch": 1.250632606338113, |
| "grad_norm": 0.2310974031539901, |
| "learning_rate": 7.2421165662372216e-06, |
| "loss": 0.2292, |
| "step": 5190 |
| }, |
| { |
| "epoch": 1.2530425352452101, |
| "grad_norm": 0.19336151933192486, |
| "learning_rate": 7.229577549362638e-06, |
| "loss": 0.2286, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.2554524641523075, |
| "grad_norm": 0.20300052483445732, |
| "learning_rate": 7.217021005861957e-06, |
| "loss": 0.2273, |
| "step": 5210 |
| }, |
| { |
| "epoch": 1.2578623930594048, |
| "grad_norm": 0.18459425837787372, |
| "learning_rate": 7.204447034441699e-06, |
| "loss": 0.2307, |
| "step": 5220 |
| }, |
| { |
| "epoch": 1.260272321966502, |
| "grad_norm": 0.24088235156689228, |
| "learning_rate": 7.191855733945388e-06, |
| "loss": 0.23, |
| "step": 5230 |
| }, |
| { |
| "epoch": 1.2626822508735993, |
| "grad_norm": 0.255264207491406, |
| "learning_rate": 7.179247203352766e-06, |
| "loss": 0.2287, |
| "step": 5240 |
| }, |
| { |
| "epoch": 1.2650921797806964, |
| "grad_norm": 0.20378257098420147, |
| "learning_rate": 7.166621541779023e-06, |
| "loss": 0.2305, |
| "step": 5250 |
| }, |
| { |
| "epoch": 1.2675021086877938, |
| "grad_norm": 0.18810118416785937, |
| "learning_rate": 7.153978848474015e-06, |
| "loss": 0.2266, |
| "step": 5260 |
| }, |
| { |
| "epoch": 1.269912037594891, |
| "grad_norm": 0.2050559840972869, |
| "learning_rate": 7.141319222821483e-06, |
| "loss": 0.228, |
| "step": 5270 |
| }, |
| { |
| "epoch": 1.2723219665019883, |
| "grad_norm": 0.18200273106922982, |
| "learning_rate": 7.128642764338273e-06, |
| "loss": 0.2309, |
| "step": 5280 |
| }, |
| { |
| "epoch": 1.2747318954090854, |
| "grad_norm": 0.2155277251856691, |
| "learning_rate": 7.115949572673552e-06, |
| "loss": 0.226, |
| "step": 5290 |
| }, |
| { |
| "epoch": 1.2771418243161827, |
| "grad_norm": 0.19242590008108504, |
| "learning_rate": 7.1032397476080285e-06, |
| "loss": 0.2278, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.2795517532232799, |
| "grad_norm": 0.2239440690407902, |
| "learning_rate": 7.090513389053164e-06, |
| "loss": 0.2266, |
| "step": 5310 |
| }, |
| { |
| "epoch": 1.2819616821303772, |
| "grad_norm": 0.24270003968208573, |
| "learning_rate": 7.0777705970503885e-06, |
| "loss": 0.2299, |
| "step": 5320 |
| }, |
| { |
| "epoch": 1.2843716110374743, |
| "grad_norm": 0.23294221068708462, |
| "learning_rate": 7.065011471770316e-06, |
| "loss": 0.2269, |
| "step": 5330 |
| }, |
| { |
| "epoch": 1.2867815399445717, |
| "grad_norm": 0.21053633421688792, |
| "learning_rate": 7.052236113511955e-06, |
| "loss": 0.2241, |
| "step": 5340 |
| }, |
| { |
| "epoch": 1.2891914688516688, |
| "grad_norm": 0.19608862159785578, |
| "learning_rate": 7.039444622701922e-06, |
| "loss": 0.2296, |
| "step": 5350 |
| }, |
| { |
| "epoch": 1.2916013977587661, |
| "grad_norm": 0.22616517954071558, |
| "learning_rate": 7.0266370998936475e-06, |
| "loss": 0.2269, |
| "step": 5360 |
| }, |
| { |
| "epoch": 1.2940113266658635, |
| "grad_norm": 0.20766665105263057, |
| "learning_rate": 7.013813645766593e-06, |
| "loss": 0.226, |
| "step": 5370 |
| }, |
| { |
| "epoch": 1.2964212555729606, |
| "grad_norm": 0.18829088009135178, |
| "learning_rate": 7.000974361125454e-06, |
| "loss": 0.2276, |
| "step": 5380 |
| }, |
| { |
| "epoch": 1.2988311844800577, |
| "grad_norm": 0.18258019162959943, |
| "learning_rate": 6.98811934689937e-06, |
| "loss": 0.2299, |
| "step": 5390 |
| }, |
| { |
| "epoch": 1.301241113387155, |
| "grad_norm": 0.18341245031560655, |
| "learning_rate": 6.975248704141128e-06, |
| "loss": 0.2275, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.3036510422942524, |
| "grad_norm": 0.24334797713544493, |
| "learning_rate": 6.96236253402637e-06, |
| "loss": 0.2306, |
| "step": 5410 |
| }, |
| { |
| "epoch": 1.3060609712013496, |
| "grad_norm": 0.5233523471468396, |
| "learning_rate": 6.949460937852803e-06, |
| "loss": 0.2276, |
| "step": 5420 |
| }, |
| { |
| "epoch": 1.3084709001084467, |
| "grad_norm": 0.21206752669860374, |
| "learning_rate": 6.936544017039391e-06, |
| "loss": 0.2322, |
| "step": 5430 |
| }, |
| { |
| "epoch": 1.310880829015544, |
| "grad_norm": 0.1896253139379362, |
| "learning_rate": 6.923611873125568e-06, |
| "loss": 0.2277, |
| "step": 5440 |
| }, |
| { |
| "epoch": 1.3132907579226414, |
| "grad_norm": 0.2782655506709367, |
| "learning_rate": 6.910664607770436e-06, |
| "loss": 0.2286, |
| "step": 5450 |
| }, |
| { |
| "epoch": 1.3157006868297385, |
| "grad_norm": 0.18511039089832573, |
| "learning_rate": 6.897702322751968e-06, |
| "loss": 0.2288, |
| "step": 5460 |
| }, |
| { |
| "epoch": 1.3181106157368356, |
| "grad_norm": 0.21295171566695434, |
| "learning_rate": 6.8847251199662025e-06, |
| "loss": 0.2302, |
| "step": 5470 |
| }, |
| { |
| "epoch": 1.320520544643933, |
| "grad_norm": 0.19238310734145622, |
| "learning_rate": 6.871733101426446e-06, |
| "loss": 0.2238, |
| "step": 5480 |
| }, |
| { |
| "epoch": 1.3229304735510303, |
| "grad_norm": 0.20056476380505142, |
| "learning_rate": 6.858726369262474e-06, |
| "loss": 0.226, |
| "step": 5490 |
| }, |
| { |
| "epoch": 1.3253404024581275, |
| "grad_norm": 0.1956017782664204, |
| "learning_rate": 6.8457050257197225e-06, |
| "loss": 0.2281, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.3277503313652248, |
| "grad_norm": 0.1751541976806092, |
| "learning_rate": 6.832669173158488e-06, |
| "loss": 0.2286, |
| "step": 5510 |
| }, |
| { |
| "epoch": 1.330160260272322, |
| "grad_norm": 0.18240728173049087, |
| "learning_rate": 6.819618914053126e-06, |
| "loss": 0.2257, |
| "step": 5520 |
| }, |
| { |
| "epoch": 1.3325701891794193, |
| "grad_norm": 0.2570130239616969, |
| "learning_rate": 6.806554350991233e-06, |
| "loss": 0.23, |
| "step": 5530 |
| }, |
| { |
| "epoch": 1.3349801180865164, |
| "grad_norm": 0.2084762248164331, |
| "learning_rate": 6.793475586672853e-06, |
| "loss": 0.2295, |
| "step": 5540 |
| }, |
| { |
| "epoch": 1.3373900469936137, |
| "grad_norm": 0.24661934253287357, |
| "learning_rate": 6.780382723909669e-06, |
| "loss": 0.2274, |
| "step": 5550 |
| }, |
| { |
| "epoch": 1.3397999759007109, |
| "grad_norm": 0.23325686225142206, |
| "learning_rate": 6.767275865624183e-06, |
| "loss": 0.2299, |
| "step": 5560 |
| }, |
| { |
| "epoch": 1.3422099048078082, |
| "grad_norm": 0.19812508835006593, |
| "learning_rate": 6.754155114848924e-06, |
| "loss": 0.2298, |
| "step": 5570 |
| }, |
| { |
| "epoch": 1.3446198337149053, |
| "grad_norm": 0.20280170570743694, |
| "learning_rate": 6.741020574725622e-06, |
| "loss": 0.2264, |
| "step": 5580 |
| }, |
| { |
| "epoch": 1.3470297626220027, |
| "grad_norm": 0.20219258329186535, |
| "learning_rate": 6.72787234850441e-06, |
| "loss": 0.2307, |
| "step": 5590 |
| }, |
| { |
| "epoch": 1.3494396915290998, |
| "grad_norm": 0.1960187022815678, |
| "learning_rate": 6.7147105395430045e-06, |
| "loss": 0.2279, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.3518496204361972, |
| "grad_norm": 0.18457553639130797, |
| "learning_rate": 6.701535251305895e-06, |
| "loss": 0.2259, |
| "step": 5610 |
| }, |
| { |
| "epoch": 1.3542595493432943, |
| "grad_norm": 0.19263285885748785, |
| "learning_rate": 6.688346587363533e-06, |
| "loss": 0.2272, |
| "step": 5620 |
| }, |
| { |
| "epoch": 1.3566694782503916, |
| "grad_norm": 0.2232492652164214, |
| "learning_rate": 6.675144651391511e-06, |
| "loss": 0.2294, |
| "step": 5630 |
| }, |
| { |
| "epoch": 1.359079407157489, |
| "grad_norm": 0.1953074646888988, |
| "learning_rate": 6.661929547169761e-06, |
| "loss": 0.2249, |
| "step": 5640 |
| }, |
| { |
| "epoch": 1.361489336064586, |
| "grad_norm": 0.21587598520170284, |
| "learning_rate": 6.648701378581722e-06, |
| "loss": 0.2295, |
| "step": 5650 |
| }, |
| { |
| "epoch": 1.3638992649716832, |
| "grad_norm": 0.20143584316068605, |
| "learning_rate": 6.635460249613537e-06, |
| "loss": 0.2239, |
| "step": 5660 |
| }, |
| { |
| "epoch": 1.3663091938787806, |
| "grad_norm": 0.19960659463265804, |
| "learning_rate": 6.622206264353225e-06, |
| "loss": 0.2241, |
| "step": 5670 |
| }, |
| { |
| "epoch": 1.368719122785878, |
| "grad_norm": 0.19611685852374722, |
| "learning_rate": 6.6089395269898715e-06, |
| "loss": 0.2239, |
| "step": 5680 |
| }, |
| { |
| "epoch": 1.371129051692975, |
| "grad_norm": 0.1931676123964051, |
| "learning_rate": 6.595660141812806e-06, |
| "loss": 0.2277, |
| "step": 5690 |
| }, |
| { |
| "epoch": 1.3735389806000722, |
| "grad_norm": 0.18944286763165288, |
| "learning_rate": 6.582368213210781e-06, |
| "loss": 0.2262, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.3759489095071695, |
| "grad_norm": 0.20447646830552416, |
| "learning_rate": 6.569063845671153e-06, |
| "loss": 0.2281, |
| "step": 5710 |
| }, |
| { |
| "epoch": 1.3783588384142669, |
| "grad_norm": 0.19176463481552258, |
| "learning_rate": 6.555747143779058e-06, |
| "loss": 0.2281, |
| "step": 5720 |
| }, |
| { |
| "epoch": 1.380768767321364, |
| "grad_norm": 0.21254229639351263, |
| "learning_rate": 6.542418212216592e-06, |
| "loss": 0.2275, |
| "step": 5730 |
| }, |
| { |
| "epoch": 1.3831786962284611, |
| "grad_norm": 0.20174431066122445, |
| "learning_rate": 6.5290771557619935e-06, |
| "loss": 0.2278, |
| "step": 5740 |
| }, |
| { |
| "epoch": 1.3855886251355585, |
| "grad_norm": 0.197270181914635, |
| "learning_rate": 6.51572407928881e-06, |
| "loss": 0.2273, |
| "step": 5750 |
| }, |
| { |
| "epoch": 1.3879985540426558, |
| "grad_norm": 0.17489037423808987, |
| "learning_rate": 6.502359087765077e-06, |
| "loss": 0.2308, |
| "step": 5760 |
| }, |
| { |
| "epoch": 1.390408482949753, |
| "grad_norm": 0.19168355960493882, |
| "learning_rate": 6.488982286252495e-06, |
| "loss": 0.2292, |
| "step": 5770 |
| }, |
| { |
| "epoch": 1.3928184118568503, |
| "grad_norm": 0.16580497958419663, |
| "learning_rate": 6.475593779905604e-06, |
| "loss": 0.2245, |
| "step": 5780 |
| }, |
| { |
| "epoch": 1.3952283407639474, |
| "grad_norm": 0.19296171404899046, |
| "learning_rate": 6.462193673970954e-06, |
| "loss": 0.227, |
| "step": 5790 |
| }, |
| { |
| "epoch": 1.3976382696710448, |
| "grad_norm": 0.189156083395686, |
| "learning_rate": 6.448782073786276e-06, |
| "loss": 0.2266, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.400048198578142, |
| "grad_norm": 0.2037692613925673, |
| "learning_rate": 6.435359084779663e-06, |
| "loss": 0.226, |
| "step": 5810 |
| }, |
| { |
| "epoch": 1.4024581274852392, |
| "grad_norm": 0.19808058211512877, |
| "learning_rate": 6.4219248124687295e-06, |
| "loss": 0.2315, |
| "step": 5820 |
| }, |
| { |
| "epoch": 1.4048680563923364, |
| "grad_norm": 0.1794716797971473, |
| "learning_rate": 6.408479362459791e-06, |
| "loss": 0.2242, |
| "step": 5830 |
| }, |
| { |
| "epoch": 1.4072779852994337, |
| "grad_norm": 0.1893744410848405, |
| "learning_rate": 6.39502284044703e-06, |
| "loss": 0.2266, |
| "step": 5840 |
| }, |
| { |
| "epoch": 1.4096879142065308, |
| "grad_norm": 0.1993227055216763, |
| "learning_rate": 6.381555352211663e-06, |
| "loss": 0.2251, |
| "step": 5850 |
| }, |
| { |
| "epoch": 1.4120978431136282, |
| "grad_norm": 0.1965462408573375, |
| "learning_rate": 6.368077003621116e-06, |
| "loss": 0.2234, |
| "step": 5860 |
| }, |
| { |
| "epoch": 1.4145077720207253, |
| "grad_norm": 0.19169760813528383, |
| "learning_rate": 6.354587900628184e-06, |
| "loss": 0.2293, |
| "step": 5870 |
| }, |
| { |
| "epoch": 1.4169177009278227, |
| "grad_norm": 0.22229563732388877, |
| "learning_rate": 6.341088149270204e-06, |
| "loss": 0.2297, |
| "step": 5880 |
| }, |
| { |
| "epoch": 1.4193276298349198, |
| "grad_norm": 0.21237292562708282, |
| "learning_rate": 6.327577855668216e-06, |
| "loss": 0.2274, |
| "step": 5890 |
| }, |
| { |
| "epoch": 1.4217375587420171, |
| "grad_norm": 0.19450312912993603, |
| "learning_rate": 6.3140571260261385e-06, |
| "loss": 0.2297, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.4241474876491145, |
| "grad_norm": 0.19802196601286834, |
| "learning_rate": 6.300526066629923e-06, |
| "loss": 0.2263, |
| "step": 5910 |
| }, |
| { |
| "epoch": 1.4265574165562116, |
| "grad_norm": 0.1857582079954937, |
| "learning_rate": 6.286984783846723e-06, |
| "loss": 0.2305, |
| "step": 5920 |
| }, |
| { |
| "epoch": 1.4289673454633087, |
| "grad_norm": 0.19993450682119196, |
| "learning_rate": 6.273433384124058e-06, |
| "loss": 0.2286, |
| "step": 5930 |
| }, |
| { |
| "epoch": 1.431377274370406, |
| "grad_norm": 0.2112259520849729, |
| "learning_rate": 6.259871973988977e-06, |
| "loss": 0.2287, |
| "step": 5940 |
| }, |
| { |
| "epoch": 1.4337872032775034, |
| "grad_norm": 0.21191303733205036, |
| "learning_rate": 6.24630066004722e-06, |
| "loss": 0.229, |
| "step": 5950 |
| }, |
| { |
| "epoch": 1.4361971321846005, |
| "grad_norm": 0.24349704964606145, |
| "learning_rate": 6.232719548982381e-06, |
| "loss": 0.2286, |
| "step": 5960 |
| }, |
| { |
| "epoch": 1.4386070610916977, |
| "grad_norm": 0.29189020677335287, |
| "learning_rate": 6.219128747555066e-06, |
| "loss": 0.2259, |
| "step": 5970 |
| }, |
| { |
| "epoch": 1.441016989998795, |
| "grad_norm": 0.23738337856881636, |
| "learning_rate": 6.205528362602064e-06, |
| "loss": 0.2245, |
| "step": 5980 |
| }, |
| { |
| "epoch": 1.4434269189058924, |
| "grad_norm": 0.18345791920291135, |
| "learning_rate": 6.19191850103549e-06, |
| "loss": 0.2264, |
| "step": 5990 |
| }, |
| { |
| "epoch": 1.4458368478129895, |
| "grad_norm": 0.18465260929725813, |
| "learning_rate": 6.1782992698419605e-06, |
| "loss": 0.2268, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.4482467767200868, |
| "grad_norm": 0.19885714580604938, |
| "learning_rate": 6.164670776081746e-06, |
| "loss": 0.2264, |
| "step": 6010 |
| }, |
| { |
| "epoch": 1.450656705627184, |
| "grad_norm": 0.2053985766919299, |
| "learning_rate": 6.151033126887928e-06, |
| "loss": 0.2274, |
| "step": 6020 |
| }, |
| { |
| "epoch": 1.4530666345342813, |
| "grad_norm": 0.2042216984984624, |
| "learning_rate": 6.137386429465557e-06, |
| "loss": 0.2269, |
| "step": 6030 |
| }, |
| { |
| "epoch": 1.4554765634413784, |
| "grad_norm": 0.20791530136282474, |
| "learning_rate": 6.123730791090814e-06, |
| "loss": 0.2252, |
| "step": 6040 |
| }, |
| { |
| "epoch": 1.4578864923484758, |
| "grad_norm": 0.2008627864136984, |
| "learning_rate": 6.1100663191101664e-06, |
| "loss": 0.2282, |
| "step": 6050 |
| }, |
| { |
| "epoch": 1.460296421255573, |
| "grad_norm": 0.18447722635767383, |
| "learning_rate": 6.0963931209395165e-06, |
| "loss": 0.2261, |
| "step": 6060 |
| }, |
| { |
| "epoch": 1.4627063501626703, |
| "grad_norm": 0.17609401887308862, |
| "learning_rate": 6.082711304063369e-06, |
| "loss": 0.2266, |
| "step": 6070 |
| }, |
| { |
| "epoch": 1.4651162790697674, |
| "grad_norm": 0.19142220510019697, |
| "learning_rate": 6.069020976033973e-06, |
| "loss": 0.2262, |
| "step": 6080 |
| }, |
| { |
| "epoch": 1.4675262079768647, |
| "grad_norm": 0.19275591690666521, |
| "learning_rate": 6.055322244470492e-06, |
| "loss": 0.2294, |
| "step": 6090 |
| }, |
| { |
| "epoch": 1.4699361368839619, |
| "grad_norm": 0.21176823643116072, |
| "learning_rate": 6.041615217058141e-06, |
| "loss": 0.2315, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.4723460657910592, |
| "grad_norm": 0.19486774201967316, |
| "learning_rate": 6.027900001547354e-06, |
| "loss": 0.2252, |
| "step": 6110 |
| }, |
| { |
| "epoch": 1.4747559946981563, |
| "grad_norm": 0.18722173921206922, |
| "learning_rate": 6.014176705752928e-06, |
| "loss": 0.2251, |
| "step": 6120 |
| }, |
| { |
| "epoch": 1.4771659236052537, |
| "grad_norm": 0.20049685353598987, |
| "learning_rate": 6.000445437553182e-06, |
| "loss": 0.23, |
| "step": 6130 |
| }, |
| { |
| "epoch": 1.479575852512351, |
| "grad_norm": 0.18610902051883838, |
| "learning_rate": 5.986706304889103e-06, |
| "loss": 0.2267, |
| "step": 6140 |
| }, |
| { |
| "epoch": 1.4819857814194481, |
| "grad_norm": 0.18659560365834538, |
| "learning_rate": 5.9729594157635e-06, |
| "loss": 0.225, |
| "step": 6150 |
| }, |
| { |
| "epoch": 1.4843957103265453, |
| "grad_norm": 0.2193854420448443, |
| "learning_rate": 5.95920487824016e-06, |
| "loss": 0.2257, |
| "step": 6160 |
| }, |
| { |
| "epoch": 1.4868056392336426, |
| "grad_norm": 0.190100765830309, |
| "learning_rate": 5.945442800442989e-06, |
| "loss": 0.2265, |
| "step": 6170 |
| }, |
| { |
| "epoch": 1.48921556814074, |
| "grad_norm": 0.19881879433053243, |
| "learning_rate": 5.9316732905551655e-06, |
| "loss": 0.2272, |
| "step": 6180 |
| }, |
| { |
| "epoch": 1.491625497047837, |
| "grad_norm": 0.17847404212264667, |
| "learning_rate": 5.917896456818296e-06, |
| "loss": 0.2222, |
| "step": 6190 |
| }, |
| { |
| "epoch": 1.4940354259549342, |
| "grad_norm": 0.1824001838096183, |
| "learning_rate": 5.904112407531558e-06, |
| "loss": 0.2274, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.4964453548620316, |
| "grad_norm": 0.17796059867318986, |
| "learning_rate": 5.89032125105085e-06, |
| "loss": 0.2258, |
| "step": 6210 |
| }, |
| { |
| "epoch": 1.498855283769129, |
| "grad_norm": 0.22233474474187953, |
| "learning_rate": 5.876523095787938e-06, |
| "loss": 0.2253, |
| "step": 6220 |
| }, |
| { |
| "epoch": 1.501265212676226, |
| "grad_norm": 0.17993439545364096, |
| "learning_rate": 5.862718050209608e-06, |
| "loss": 0.2249, |
| "step": 6230 |
| }, |
| { |
| "epoch": 1.5036751415833232, |
| "grad_norm": 0.20152480587420749, |
| "learning_rate": 5.848906222836808e-06, |
| "loss": 0.2278, |
| "step": 6240 |
| }, |
| { |
| "epoch": 1.5060850704904205, |
| "grad_norm": 0.18840867400342193, |
| "learning_rate": 5.835087722243801e-06, |
| "loss": 0.2242, |
| "step": 6250 |
| }, |
| { |
| "epoch": 1.5084949993975179, |
| "grad_norm": 0.2027634828385987, |
| "learning_rate": 5.821262657057303e-06, |
| "loss": 0.2269, |
| "step": 6260 |
| }, |
| { |
| "epoch": 1.510904928304615, |
| "grad_norm": 0.18720966640230777, |
| "learning_rate": 5.807431135955637e-06, |
| "loss": 0.2271, |
| "step": 6270 |
| }, |
| { |
| "epoch": 1.513314857211712, |
| "grad_norm": 0.1840393105636915, |
| "learning_rate": 5.793593267667876e-06, |
| "loss": 0.2262, |
| "step": 6280 |
| }, |
| { |
| "epoch": 1.5157247861188095, |
| "grad_norm": 0.23049081410939198, |
| "learning_rate": 5.779749160972988e-06, |
| "loss": 0.2279, |
| "step": 6290 |
| }, |
| { |
| "epoch": 1.5181347150259068, |
| "grad_norm": 0.19694083590378286, |
| "learning_rate": 5.76589892469898e-06, |
| "loss": 0.2229, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.520544643933004, |
| "grad_norm": 0.20401577598098544, |
| "learning_rate": 5.752042667722044e-06, |
| "loss": 0.2267, |
| "step": 6310 |
| }, |
| { |
| "epoch": 1.522954572840101, |
| "grad_norm": 0.20188238838066902, |
| "learning_rate": 5.7381804989656995e-06, |
| "loss": 0.2264, |
| "step": 6320 |
| }, |
| { |
| "epoch": 1.5253645017471986, |
| "grad_norm": 0.18414125355335784, |
| "learning_rate": 5.724312527399939e-06, |
| "loss": 0.2241, |
| "step": 6330 |
| }, |
| { |
| "epoch": 1.5277744306542957, |
| "grad_norm": 0.18776954169886498, |
| "learning_rate": 5.710438862040368e-06, |
| "loss": 0.2221, |
| "step": 6340 |
| }, |
| { |
| "epoch": 1.5301843595613929, |
| "grad_norm": 0.2119314292336933, |
| "learning_rate": 5.696559611947359e-06, |
| "loss": 0.2251, |
| "step": 6350 |
| }, |
| { |
| "epoch": 1.5325942884684902, |
| "grad_norm": 0.18502737108284242, |
| "learning_rate": 5.682674886225175e-06, |
| "loss": 0.2237, |
| "step": 6360 |
| }, |
| { |
| "epoch": 1.5350042173755876, |
| "grad_norm": 0.22919724100407374, |
| "learning_rate": 5.6687847940211304e-06, |
| "loss": 0.225, |
| "step": 6370 |
| }, |
| { |
| "epoch": 1.5374141462826847, |
| "grad_norm": 0.1865313994122086, |
| "learning_rate": 5.654889444524723e-06, |
| "loss": 0.2235, |
| "step": 6380 |
| }, |
| { |
| "epoch": 1.5398240751897818, |
| "grad_norm": 0.19215543319827763, |
| "learning_rate": 5.6409889469667765e-06, |
| "loss": 0.2253, |
| "step": 6390 |
| }, |
| { |
| "epoch": 1.5422340040968792, |
| "grad_norm": 0.19018456354370034, |
| "learning_rate": 5.627083410618585e-06, |
| "loss": 0.2246, |
| "step": 6400 |
| }, |
| { |
| "epoch": 1.5446439330039765, |
| "grad_norm": 0.21580346487141808, |
| "learning_rate": 5.613172944791053e-06, |
| "loss": 0.2244, |
| "step": 6410 |
| }, |
| { |
| "epoch": 1.5470538619110736, |
| "grad_norm": 0.17583022124719408, |
| "learning_rate": 5.599257658833833e-06, |
| "loss": 0.2243, |
| "step": 6420 |
| }, |
| { |
| "epoch": 1.5494637908181708, |
| "grad_norm": 0.19045315684837752, |
| "learning_rate": 5.585337662134471e-06, |
| "loss": 0.2278, |
| "step": 6430 |
| }, |
| { |
| "epoch": 1.551873719725268, |
| "grad_norm": 0.17716555648796484, |
| "learning_rate": 5.571413064117542e-06, |
| "loss": 0.2269, |
| "step": 6440 |
| }, |
| { |
| "epoch": 1.5542836486323655, |
| "grad_norm": 0.19837465081347957, |
| "learning_rate": 5.5574839742437924e-06, |
| "loss": 0.2256, |
| "step": 6450 |
| }, |
| { |
| "epoch": 1.5566935775394626, |
| "grad_norm": 0.18456356011473574, |
| "learning_rate": 5.543550502009279e-06, |
| "loss": 0.2282, |
| "step": 6460 |
| }, |
| { |
| "epoch": 1.5591035064465597, |
| "grad_norm": 0.20336048803130954, |
| "learning_rate": 5.529612756944509e-06, |
| "loss": 0.223, |
| "step": 6470 |
| }, |
| { |
| "epoch": 1.561513435353657, |
| "grad_norm": 0.18181389891852381, |
| "learning_rate": 5.515670848613577e-06, |
| "loss": 0.2248, |
| "step": 6480 |
| }, |
| { |
| "epoch": 1.5639233642607544, |
| "grad_norm": 0.18523380669323106, |
| "learning_rate": 5.501724886613304e-06, |
| "loss": 0.2275, |
| "step": 6490 |
| }, |
| { |
| "epoch": 1.5663332931678515, |
| "grad_norm": 0.19457096677614677, |
| "learning_rate": 5.4877749805723805e-06, |
| "loss": 0.2254, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.5687432220749487, |
| "grad_norm": 0.27735967175439274, |
| "learning_rate": 5.4738212401504966e-06, |
| "loss": 0.2291, |
| "step": 6510 |
| }, |
| { |
| "epoch": 1.571153150982046, |
| "grad_norm": 0.18179365106635872, |
| "learning_rate": 5.459863775037486e-06, |
| "loss": 0.2292, |
| "step": 6520 |
| }, |
| { |
| "epoch": 1.5735630798891433, |
| "grad_norm": 0.20305157688564834, |
| "learning_rate": 5.445902694952464e-06, |
| "loss": 0.2252, |
| "step": 6530 |
| }, |
| { |
| "epoch": 1.5759730087962405, |
| "grad_norm": 0.19395103431959637, |
| "learning_rate": 5.43193810964296e-06, |
| "loss": 0.2259, |
| "step": 6540 |
| }, |
| { |
| "epoch": 1.5783829377033376, |
| "grad_norm": 0.2075049041420315, |
| "learning_rate": 5.417970128884061e-06, |
| "loss": 0.2266, |
| "step": 6550 |
| }, |
| { |
| "epoch": 1.580792866610435, |
| "grad_norm": 0.19540137566110335, |
| "learning_rate": 5.403998862477538e-06, |
| "loss": 0.2266, |
| "step": 6560 |
| }, |
| { |
| "epoch": 1.5832027955175323, |
| "grad_norm": 0.22939617989477673, |
| "learning_rate": 5.390024420251003e-06, |
| "loss": 0.2229, |
| "step": 6570 |
| }, |
| { |
| "epoch": 1.5856127244246294, |
| "grad_norm": 0.2492743357321698, |
| "learning_rate": 5.376046912057022e-06, |
| "loss": 0.2226, |
| "step": 6580 |
| }, |
| { |
| "epoch": 1.5880226533317268, |
| "grad_norm": 0.22929705915276918, |
| "learning_rate": 5.3620664477722686e-06, |
| "loss": 0.2285, |
| "step": 6590 |
| }, |
| { |
| "epoch": 1.590432582238824, |
| "grad_norm": 0.21813501370107669, |
| "learning_rate": 5.34808313729665e-06, |
| "loss": 0.2223, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.5928425111459212, |
| "grad_norm": 0.20849814738966235, |
| "learning_rate": 5.3340970905524515e-06, |
| "loss": 0.2277, |
| "step": 6610 |
| }, |
| { |
| "epoch": 1.5952524400530184, |
| "grad_norm": 0.20201921903562342, |
| "learning_rate": 5.3201084174834615e-06, |
| "loss": 0.2262, |
| "step": 6620 |
| }, |
| { |
| "epoch": 1.5976623689601157, |
| "grad_norm": 0.1803455527617659, |
| "learning_rate": 5.306117228054123e-06, |
| "loss": 0.2221, |
| "step": 6630 |
| }, |
| { |
| "epoch": 1.600072297867213, |
| "grad_norm": 0.18552923789191267, |
| "learning_rate": 5.292123632248652e-06, |
| "loss": 0.226, |
| "step": 6640 |
| }, |
| { |
| "epoch": 1.6024822267743102, |
| "grad_norm": 0.2556810550855232, |
| "learning_rate": 5.278127740070187e-06, |
| "loss": 0.2253, |
| "step": 6650 |
| }, |
| { |
| "epoch": 1.6048921556814073, |
| "grad_norm": 0.18900173763140687, |
| "learning_rate": 5.2641296615399116e-06, |
| "loss": 0.2208, |
| "step": 6660 |
| }, |
| { |
| "epoch": 1.6073020845885047, |
| "grad_norm": 0.1985741664662957, |
| "learning_rate": 5.2501295066962035e-06, |
| "loss": 0.2264, |
| "step": 6670 |
| }, |
| { |
| "epoch": 1.609712013495602, |
| "grad_norm": 0.17666088788879142, |
| "learning_rate": 5.236127385593754e-06, |
| "loss": 0.2265, |
| "step": 6680 |
| }, |
| { |
| "epoch": 1.6121219424026991, |
| "grad_norm": 0.21901931213384007, |
| "learning_rate": 5.222123408302722e-06, |
| "loss": 0.2236, |
| "step": 6690 |
| }, |
| { |
| "epoch": 1.6145318713097963, |
| "grad_norm": 0.22203947856380532, |
| "learning_rate": 5.208117684907846e-06, |
| "loss": 0.2271, |
| "step": 6700 |
| }, |
| { |
| "epoch": 1.6169418002168936, |
| "grad_norm": 0.19989460066321701, |
| "learning_rate": 5.194110325507599e-06, |
| "loss": 0.2281, |
| "step": 6710 |
| }, |
| { |
| "epoch": 1.619351729123991, |
| "grad_norm": 0.19557970025488464, |
| "learning_rate": 5.180101440213311e-06, |
| "loss": 0.2248, |
| "step": 6720 |
| }, |
| { |
| "epoch": 1.621761658031088, |
| "grad_norm": 0.20440404306269325, |
| "learning_rate": 5.166091139148307e-06, |
| "loss": 0.227, |
| "step": 6730 |
| }, |
| { |
| "epoch": 1.6241715869381852, |
| "grad_norm": 0.19241051500362122, |
| "learning_rate": 5.152079532447042e-06, |
| "loss": 0.2228, |
| "step": 6740 |
| }, |
| { |
| "epoch": 1.6265815158452825, |
| "grad_norm": 0.1999142505841929, |
| "learning_rate": 5.138066730254236e-06, |
| "loss": 0.2217, |
| "step": 6750 |
| }, |
| { |
| "epoch": 1.62899144475238, |
| "grad_norm": 0.1883189315734027, |
| "learning_rate": 5.124052842724005e-06, |
| "loss": 0.2225, |
| "step": 6760 |
| }, |
| { |
| "epoch": 1.631401373659477, |
| "grad_norm": 0.18244725170752862, |
| "learning_rate": 5.110037980018996e-06, |
| "loss": 0.2285, |
| "step": 6770 |
| }, |
| { |
| "epoch": 1.6338113025665741, |
| "grad_norm": 0.1741959447266688, |
| "learning_rate": 5.0960222523095235e-06, |
| "loss": 0.2222, |
| "step": 6780 |
| }, |
| { |
| "epoch": 1.6362212314736715, |
| "grad_norm": 0.19534121943393284, |
| "learning_rate": 5.0820057697727e-06, |
| "loss": 0.2293, |
| "step": 6790 |
| }, |
| { |
| "epoch": 1.6386311603807688, |
| "grad_norm": 0.17063220118953726, |
| "learning_rate": 5.067988642591575e-06, |
| "loss": 0.2228, |
| "step": 6800 |
| }, |
| { |
| "epoch": 1.641041089287866, |
| "grad_norm": 0.1898996673825106, |
| "learning_rate": 5.053970980954263e-06, |
| "loss": 0.2241, |
| "step": 6810 |
| }, |
| { |
| "epoch": 1.643451018194963, |
| "grad_norm": 0.19810541992436406, |
| "learning_rate": 5.0399528950530776e-06, |
| "loss": 0.2228, |
| "step": 6820 |
| }, |
| { |
| "epoch": 1.6458609471020604, |
| "grad_norm": 0.19615743145739967, |
| "learning_rate": 5.0259344950836715e-06, |
| "loss": 0.2275, |
| "step": 6830 |
| }, |
| { |
| "epoch": 1.6482708760091578, |
| "grad_norm": 0.18410790413561248, |
| "learning_rate": 5.011915891244167e-06, |
| "loss": 0.2242, |
| "step": 6840 |
| }, |
| { |
| "epoch": 1.650680804916255, |
| "grad_norm": 0.17986309985667576, |
| "learning_rate": 4.997897193734285e-06, |
| "loss": 0.2256, |
| "step": 6850 |
| }, |
| { |
| "epoch": 1.6530907338233523, |
| "grad_norm": 0.19626127128989843, |
| "learning_rate": 4.9838785127544826e-06, |
| "loss": 0.2269, |
| "step": 6860 |
| }, |
| { |
| "epoch": 1.6555006627304496, |
| "grad_norm": 0.1948316832083952, |
| "learning_rate": 4.969859958505094e-06, |
| "loss": 0.2222, |
| "step": 6870 |
| }, |
| { |
| "epoch": 1.6579105916375467, |
| "grad_norm": 0.19917619420105298, |
| "learning_rate": 4.955841641185447e-06, |
| "loss": 0.2285, |
| "step": 6880 |
| }, |
| { |
| "epoch": 1.6603205205446439, |
| "grad_norm": 0.21813153794529366, |
| "learning_rate": 4.941823670993016e-06, |
| "loss": 0.2271, |
| "step": 6890 |
| }, |
| { |
| "epoch": 1.6627304494517412, |
| "grad_norm": 0.18905170301252883, |
| "learning_rate": 4.92780615812254e-06, |
| "loss": 0.2225, |
| "step": 6900 |
| }, |
| { |
| "epoch": 1.6651403783588385, |
| "grad_norm": 0.17347979698273475, |
| "learning_rate": 4.913789212765166e-06, |
| "loss": 0.2235, |
| "step": 6910 |
| }, |
| { |
| "epoch": 1.6675503072659357, |
| "grad_norm": 0.21525749133833247, |
| "learning_rate": 4.899772945107583e-06, |
| "loss": 0.224, |
| "step": 6920 |
| }, |
| { |
| "epoch": 1.6699602361730328, |
| "grad_norm": 0.1866707522575154, |
| "learning_rate": 4.885757465331144e-06, |
| "loss": 0.2287, |
| "step": 6930 |
| }, |
| { |
| "epoch": 1.6723701650801301, |
| "grad_norm": 0.18550313186716808, |
| "learning_rate": 4.871742883611018e-06, |
| "loss": 0.2229, |
| "step": 6940 |
| }, |
| { |
| "epoch": 1.6747800939872275, |
| "grad_norm": 0.1802360362786429, |
| "learning_rate": 4.857729310115307e-06, |
| "loss": 0.2231, |
| "step": 6950 |
| }, |
| { |
| "epoch": 1.6771900228943246, |
| "grad_norm": 0.19058873213547273, |
| "learning_rate": 4.843716855004194e-06, |
| "loss": 0.2255, |
| "step": 6960 |
| }, |
| { |
| "epoch": 1.6795999518014217, |
| "grad_norm": 0.18963628222606987, |
| "learning_rate": 4.829705628429061e-06, |
| "loss": 0.2265, |
| "step": 6970 |
| }, |
| { |
| "epoch": 1.682009880708519, |
| "grad_norm": 0.18708287216215744, |
| "learning_rate": 4.815695740531643e-06, |
| "loss": 0.2241, |
| "step": 6980 |
| }, |
| { |
| "epoch": 1.6844198096156164, |
| "grad_norm": 0.1958945372902418, |
| "learning_rate": 4.801687301443149e-06, |
| "loss": 0.2251, |
| "step": 6990 |
| }, |
| { |
| "epoch": 1.6868297385227136, |
| "grad_norm": 0.17987992459683932, |
| "learning_rate": 4.787680421283391e-06, |
| "loss": 0.2221, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.6892396674298107, |
| "grad_norm": 0.1860519886024203, |
| "learning_rate": 4.773675210159938e-06, |
| "loss": 0.2228, |
| "step": 7010 |
| }, |
| { |
| "epoch": 1.691649596336908, |
| "grad_norm": 0.19306967475728493, |
| "learning_rate": 4.759671778167228e-06, |
| "loss": 0.2229, |
| "step": 7020 |
| }, |
| { |
| "epoch": 1.6940595252440054, |
| "grad_norm": 0.17062441179718144, |
| "learning_rate": 4.745670235385723e-06, |
| "loss": 0.2273, |
| "step": 7030 |
| }, |
| { |
| "epoch": 1.6964694541511025, |
| "grad_norm": 0.18936334423122136, |
| "learning_rate": 4.7316706918810265e-06, |
| "loss": 0.2259, |
| "step": 7040 |
| }, |
| { |
| "epoch": 1.6988793830581996, |
| "grad_norm": 0.1840599548380237, |
| "learning_rate": 4.71767325770303e-06, |
| "loss": 0.2238, |
| "step": 7050 |
| }, |
| { |
| "epoch": 1.701289311965297, |
| "grad_norm": 0.17655524669662206, |
| "learning_rate": 4.703678042885044e-06, |
| "loss": 0.2244, |
| "step": 7060 |
| }, |
| { |
| "epoch": 1.7036992408723943, |
| "grad_norm": 0.20152554192315303, |
| "learning_rate": 4.689685157442927e-06, |
| "loss": 0.22, |
| "step": 7070 |
| }, |
| { |
| "epoch": 1.7061091697794915, |
| "grad_norm": 0.18196398365456765, |
| "learning_rate": 4.675694711374234e-06, |
| "loss": 0.2218, |
| "step": 7080 |
| }, |
| { |
| "epoch": 1.7085190986865888, |
| "grad_norm": 0.17770652308008, |
| "learning_rate": 4.661706814657338e-06, |
| "loss": 0.2248, |
| "step": 7090 |
| }, |
| { |
| "epoch": 1.7109290275936861, |
| "grad_norm": 0.31880757965769346, |
| "learning_rate": 4.647721577250578e-06, |
| "loss": 0.2223, |
| "step": 7100 |
| }, |
| { |
| "epoch": 1.7133389565007833, |
| "grad_norm": 0.18498314392547346, |
| "learning_rate": 4.633739109091379e-06, |
| "loss": 0.2297, |
| "step": 7110 |
| }, |
| { |
| "epoch": 1.7157488854078804, |
| "grad_norm": 0.19087746418387228, |
| "learning_rate": 4.61975952009541e-06, |
| "loss": 0.2226, |
| "step": 7120 |
| }, |
| { |
| "epoch": 1.7181588143149777, |
| "grad_norm": 0.18495262200957335, |
| "learning_rate": 4.6057829201556905e-06, |
| "loss": 0.2245, |
| "step": 7130 |
| }, |
| { |
| "epoch": 1.720568743222075, |
| "grad_norm": 0.17530802321085223, |
| "learning_rate": 4.591809419141758e-06, |
| "loss": 0.2238, |
| "step": 7140 |
| }, |
| { |
| "epoch": 1.7229786721291722, |
| "grad_norm": 0.20450718057983405, |
| "learning_rate": 4.577839126898784e-06, |
| "loss": 0.2257, |
| "step": 7150 |
| }, |
| { |
| "epoch": 1.7253886010362693, |
| "grad_norm": 0.18207109544592398, |
| "learning_rate": 4.5638721532467125e-06, |
| "loss": 0.224, |
| "step": 7160 |
| }, |
| { |
| "epoch": 1.7277985299433667, |
| "grad_norm": 0.17671624106610764, |
| "learning_rate": 4.549908607979407e-06, |
| "loss": 0.2239, |
| "step": 7170 |
| }, |
| { |
| "epoch": 1.730208458850464, |
| "grad_norm": 0.19697657500815421, |
| "learning_rate": 4.535948600863774e-06, |
| "loss": 0.2272, |
| "step": 7180 |
| }, |
| { |
| "epoch": 1.7326183877575612, |
| "grad_norm": 0.1785343432269176, |
| "learning_rate": 4.521992241638912e-06, |
| "loss": 0.222, |
| "step": 7190 |
| }, |
| { |
| "epoch": 1.7350283166646583, |
| "grad_norm": 0.18203567866087253, |
| "learning_rate": 4.508039640015237e-06, |
| "loss": 0.2241, |
| "step": 7200 |
| }, |
| { |
| "epoch": 1.7374382455717556, |
| "grad_norm": 0.1670992860773588, |
| "learning_rate": 4.494090905673634e-06, |
| "loss": 0.2271, |
| "step": 7210 |
| }, |
| { |
| "epoch": 1.739848174478853, |
| "grad_norm": 0.1974541948876522, |
| "learning_rate": 4.480146148264586e-06, |
| "loss": 0.2241, |
| "step": 7220 |
| }, |
| { |
| "epoch": 1.74225810338595, |
| "grad_norm": 0.20086161735901403, |
| "learning_rate": 4.466205477407308e-06, |
| "loss": 0.2237, |
| "step": 7230 |
| }, |
| { |
| "epoch": 1.7446680322930472, |
| "grad_norm": 0.17998346424231534, |
| "learning_rate": 4.452269002688897e-06, |
| "loss": 0.2252, |
| "step": 7240 |
| }, |
| { |
| "epoch": 1.7470779612001446, |
| "grad_norm": 0.18085102911658998, |
| "learning_rate": 4.438336833663459e-06, |
| "loss": 0.2268, |
| "step": 7250 |
| }, |
| { |
| "epoch": 1.749487890107242, |
| "grad_norm": 0.18593488483632506, |
| "learning_rate": 4.424409079851262e-06, |
| "loss": 0.2255, |
| "step": 7260 |
| }, |
| { |
| "epoch": 1.751897819014339, |
| "grad_norm": 0.17804819176418915, |
| "learning_rate": 4.410485850737853e-06, |
| "loss": 0.2254, |
| "step": 7270 |
| }, |
| { |
| "epoch": 1.7543077479214362, |
| "grad_norm": 0.1847683782268045, |
| "learning_rate": 4.39656725577322e-06, |
| "loss": 0.2238, |
| "step": 7280 |
| }, |
| { |
| "epoch": 1.7567176768285335, |
| "grad_norm": 0.18441987945987579, |
| "learning_rate": 4.382653404370922e-06, |
| "loss": 0.2243, |
| "step": 7290 |
| }, |
| { |
| "epoch": 1.7591276057356309, |
| "grad_norm": 0.16499681867454, |
| "learning_rate": 4.368744405907224e-06, |
| "loss": 0.2255, |
| "step": 7300 |
| }, |
| { |
| "epoch": 1.761537534642728, |
| "grad_norm": 0.17614665034679972, |
| "learning_rate": 4.354840369720249e-06, |
| "loss": 0.2256, |
| "step": 7310 |
| }, |
| { |
| "epoch": 1.7639474635498251, |
| "grad_norm": 0.18039379340034864, |
| "learning_rate": 4.340941405109102e-06, |
| "loss": 0.2273, |
| "step": 7320 |
| }, |
| { |
| "epoch": 1.7663573924569225, |
| "grad_norm": 0.19238344379534025, |
| "learning_rate": 4.327047621333031e-06, |
| "loss": 0.2238, |
| "step": 7330 |
| }, |
| { |
| "epoch": 1.7687673213640198, |
| "grad_norm": 0.17065564380742512, |
| "learning_rate": 4.31315912761055e-06, |
| "loss": 0.2216, |
| "step": 7340 |
| }, |
| { |
| "epoch": 1.771177250271117, |
| "grad_norm": 0.18611762682949998, |
| "learning_rate": 4.299276033118592e-06, |
| "loss": 0.2253, |
| "step": 7350 |
| }, |
| { |
| "epoch": 1.7735871791782143, |
| "grad_norm": 0.16105936235245313, |
| "learning_rate": 4.285398446991648e-06, |
| "loss": 0.2258, |
| "step": 7360 |
| }, |
| { |
| "epoch": 1.7759971080853116, |
| "grad_norm": 0.19660246043412044, |
| "learning_rate": 4.271526478320901e-06, |
| "loss": 0.2236, |
| "step": 7370 |
| }, |
| { |
| "epoch": 1.7784070369924088, |
| "grad_norm": 0.16366026015308957, |
| "learning_rate": 4.257660236153387e-06, |
| "loss": 0.2208, |
| "step": 7380 |
| }, |
| { |
| "epoch": 1.7808169658995059, |
| "grad_norm": 0.17419982693568545, |
| "learning_rate": 4.2437998294911114e-06, |
| "loss": 0.2251, |
| "step": 7390 |
| }, |
| { |
| "epoch": 1.7832268948066032, |
| "grad_norm": 0.16892847652220977, |
| "learning_rate": 4.229945367290222e-06, |
| "loss": 0.2247, |
| "step": 7400 |
| }, |
| { |
| "epoch": 1.7856368237137006, |
| "grad_norm": 0.20361521580127576, |
| "learning_rate": 4.216096958460126e-06, |
| "loss": 0.2229, |
| "step": 7410 |
| }, |
| { |
| "epoch": 1.7880467526207977, |
| "grad_norm": 0.1775285583270596, |
| "learning_rate": 4.2022547118626515e-06, |
| "loss": 0.2253, |
| "step": 7420 |
| }, |
| { |
| "epoch": 1.7904566815278948, |
| "grad_norm": 0.1808904694092097, |
| "learning_rate": 4.18841873631118e-06, |
| "loss": 0.225, |
| "step": 7430 |
| }, |
| { |
| "epoch": 1.7928666104349922, |
| "grad_norm": 0.179679581955328, |
| "learning_rate": 4.174589140569805e-06, |
| "loss": 0.2247, |
| "step": 7440 |
| }, |
| { |
| "epoch": 1.7952765393420895, |
| "grad_norm": 0.18544401063113647, |
| "learning_rate": 4.160766033352462e-06, |
| "loss": 0.2239, |
| "step": 7450 |
| }, |
| { |
| "epoch": 1.7976864682491867, |
| "grad_norm": 0.16930775632787115, |
| "learning_rate": 4.14694952332208e-06, |
| "loss": 0.2217, |
| "step": 7460 |
| }, |
| { |
| "epoch": 1.8000963971562838, |
| "grad_norm": 0.21077077331059035, |
| "learning_rate": 4.133139719089735e-06, |
| "loss": 0.2202, |
| "step": 7470 |
| }, |
| { |
| "epoch": 1.8025063260633811, |
| "grad_norm": 0.17033669932151718, |
| "learning_rate": 4.119336729213778e-06, |
| "loss": 0.2269, |
| "step": 7480 |
| }, |
| { |
| "epoch": 1.8049162549704785, |
| "grad_norm": 0.16731905701849073, |
| "learning_rate": 4.105540662199002e-06, |
| "loss": 0.2256, |
| "step": 7490 |
| }, |
| { |
| "epoch": 1.8073261838775756, |
| "grad_norm": 0.17889834249912376, |
| "learning_rate": 4.0917516264957735e-06, |
| "loss": 0.2208, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.8097361127846727, |
| "grad_norm": 0.1735614032658692, |
| "learning_rate": 4.07796973049919e-06, |
| "loss": 0.2239, |
| "step": 7510 |
| }, |
| { |
| "epoch": 1.81214604169177, |
| "grad_norm": 0.18611565785055215, |
| "learning_rate": 4.0641950825482265e-06, |
| "loss": 0.2236, |
| "step": 7520 |
| }, |
| { |
| "epoch": 1.8145559705988674, |
| "grad_norm": 0.17672187738278908, |
| "learning_rate": 4.0504277909248715e-06, |
| "loss": 0.2217, |
| "step": 7530 |
| }, |
| { |
| "epoch": 1.8169658995059645, |
| "grad_norm": 0.18369567766938746, |
| "learning_rate": 4.036667963853296e-06, |
| "loss": 0.2254, |
| "step": 7540 |
| }, |
| { |
| "epoch": 1.8193758284130617, |
| "grad_norm": 0.18448055701952337, |
| "learning_rate": 4.022915709498985e-06, |
| "loss": 0.2244, |
| "step": 7550 |
| }, |
| { |
| "epoch": 1.821785757320159, |
| "grad_norm": 0.2013693032332627, |
| "learning_rate": 4.009171135967902e-06, |
| "loss": 0.2239, |
| "step": 7560 |
| }, |
| { |
| "epoch": 1.8241956862272564, |
| "grad_norm": 0.18294968080752183, |
| "learning_rate": 3.9954343513056236e-06, |
| "loss": 0.224, |
| "step": 7570 |
| }, |
| { |
| "epoch": 1.8266056151343535, |
| "grad_norm": 0.1796825438704204, |
| "learning_rate": 3.981705463496504e-06, |
| "loss": 0.2214, |
| "step": 7580 |
| }, |
| { |
| "epoch": 1.8290155440414506, |
| "grad_norm": 0.17589549129978024, |
| "learning_rate": 3.967984580462821e-06, |
| "loss": 0.2251, |
| "step": 7590 |
| }, |
| { |
| "epoch": 1.8314254729485482, |
| "grad_norm": 0.18972595411666385, |
| "learning_rate": 3.954271810063922e-06, |
| "loss": 0.226, |
| "step": 7600 |
| }, |
| { |
| "epoch": 1.8338354018556453, |
| "grad_norm": 0.20036647780013492, |
| "learning_rate": 3.940567260095389e-06, |
| "loss": 0.2248, |
| "step": 7610 |
| }, |
| { |
| "epoch": 1.8362453307627424, |
| "grad_norm": 0.23575531902953417, |
| "learning_rate": 3.926871038288173e-06, |
| "loss": 0.2226, |
| "step": 7620 |
| }, |
| { |
| "epoch": 1.8386552596698398, |
| "grad_norm": 0.16814570911780818, |
| "learning_rate": 3.9131832523077685e-06, |
| "loss": 0.2239, |
| "step": 7630 |
| }, |
| { |
| "epoch": 1.8410651885769371, |
| "grad_norm": 0.1755996654649451, |
| "learning_rate": 3.8995040097533495e-06, |
| "loss": 0.2247, |
| "step": 7640 |
| }, |
| { |
| "epoch": 1.8434751174840343, |
| "grad_norm": 0.18358495829429392, |
| "learning_rate": 3.885833418156932e-06, |
| "loss": 0.225, |
| "step": 7650 |
| }, |
| { |
| "epoch": 1.8458850463911314, |
| "grad_norm": 0.17793116646513968, |
| "learning_rate": 3.8721715849825305e-06, |
| "loss": 0.2234, |
| "step": 7660 |
| }, |
| { |
| "epoch": 1.8482949752982287, |
| "grad_norm": 0.15909506204464002, |
| "learning_rate": 3.858518617625301e-06, |
| "loss": 0.223, |
| "step": 7670 |
| }, |
| { |
| "epoch": 1.850704904205326, |
| "grad_norm": 0.1812421140160083, |
| "learning_rate": 3.844874623410718e-06, |
| "loss": 0.2225, |
| "step": 7680 |
| }, |
| { |
| "epoch": 1.8531148331124232, |
| "grad_norm": 0.16679844130510826, |
| "learning_rate": 3.831239709593707e-06, |
| "loss": 0.2228, |
| "step": 7690 |
| }, |
| { |
| "epoch": 1.8555247620195203, |
| "grad_norm": 0.19908033100116543, |
| "learning_rate": 3.8176139833578215e-06, |
| "loss": 0.2224, |
| "step": 7700 |
| }, |
| { |
| "epoch": 1.8579346909266177, |
| "grad_norm": 0.19064123442148453, |
| "learning_rate": 3.8039975518143862e-06, |
| "loss": 0.2227, |
| "step": 7710 |
| }, |
| { |
| "epoch": 1.860344619833715, |
| "grad_norm": 0.21898928257467018, |
| "learning_rate": 3.790390522001662e-06, |
| "loss": 0.2241, |
| "step": 7720 |
| }, |
| { |
| "epoch": 1.8627545487408121, |
| "grad_norm": 0.1681259532863957, |
| "learning_rate": 3.7767930008840055e-06, |
| "loss": 0.2223, |
| "step": 7730 |
| }, |
| { |
| "epoch": 1.8651644776479093, |
| "grad_norm": 0.1794902111657126, |
| "learning_rate": 3.763205095351021e-06, |
| "loss": 0.2217, |
| "step": 7740 |
| }, |
| { |
| "epoch": 1.8675744065550066, |
| "grad_norm": 0.17102807562479017, |
| "learning_rate": 3.7496269122167306e-06, |
| "loss": 0.2242, |
| "step": 7750 |
| }, |
| { |
| "epoch": 1.869984335462104, |
| "grad_norm": 0.18806004637499396, |
| "learning_rate": 3.7360585582187246e-06, |
| "loss": 0.2265, |
| "step": 7760 |
| }, |
| { |
| "epoch": 1.872394264369201, |
| "grad_norm": 0.1707242907449845, |
| "learning_rate": 3.7225001400173303e-06, |
| "loss": 0.2225, |
| "step": 7770 |
| }, |
| { |
| "epoch": 1.8748041932762982, |
| "grad_norm": 0.19734700974365144, |
| "learning_rate": 3.708951764194767e-06, |
| "loss": 0.222, |
| "step": 7780 |
| }, |
| { |
| "epoch": 1.8772141221833956, |
| "grad_norm": 0.1728393308698463, |
| "learning_rate": 3.6954135372543133e-06, |
| "loss": 0.2229, |
| "step": 7790 |
| }, |
| { |
| "epoch": 1.879624051090493, |
| "grad_norm": 0.16648912989483883, |
| "learning_rate": 3.681885565619465e-06, |
| "loss": 0.2233, |
| "step": 7800 |
| }, |
| { |
| "epoch": 1.88203397999759, |
| "grad_norm": 0.16395732152739753, |
| "learning_rate": 3.668367955633107e-06, |
| "loss": 0.2236, |
| "step": 7810 |
| }, |
| { |
| "epoch": 1.8844439089046872, |
| "grad_norm": 0.1694565515257199, |
| "learning_rate": 3.654860813556666e-06, |
| "loss": 0.2251, |
| "step": 7820 |
| }, |
| { |
| "epoch": 1.8868538378117845, |
| "grad_norm": 0.1716386768781902, |
| "learning_rate": 3.6413642455692826e-06, |
| "loss": 0.2189, |
| "step": 7830 |
| }, |
| { |
| "epoch": 1.8892637667188819, |
| "grad_norm": 0.20682300301341583, |
| "learning_rate": 3.6278783577669762e-06, |
| "loss": 0.2247, |
| "step": 7840 |
| }, |
| { |
| "epoch": 1.891673695625979, |
| "grad_norm": 0.1775688282488739, |
| "learning_rate": 3.614403256161807e-06, |
| "loss": 0.2216, |
| "step": 7850 |
| }, |
| { |
| "epoch": 1.8940836245330763, |
| "grad_norm": 0.17634851448940067, |
| "learning_rate": 3.60093904668105e-06, |
| "loss": 0.2228, |
| "step": 7860 |
| }, |
| { |
| "epoch": 1.8964935534401737, |
| "grad_norm": 0.16399162009507923, |
| "learning_rate": 3.5874858351663513e-06, |
| "loss": 0.2226, |
| "step": 7870 |
| }, |
| { |
| "epoch": 1.8989034823472708, |
| "grad_norm": 0.1730805519369056, |
| "learning_rate": 3.5740437273729074e-06, |
| "loss": 0.2241, |
| "step": 7880 |
| }, |
| { |
| "epoch": 1.901313411254368, |
| "grad_norm": 0.17978445360647452, |
| "learning_rate": 3.560612828968627e-06, |
| "loss": 0.2224, |
| "step": 7890 |
| }, |
| { |
| "epoch": 1.9037233401614653, |
| "grad_norm": 0.1874874144673897, |
| "learning_rate": 3.5471932455333013e-06, |
| "loss": 0.2231, |
| "step": 7900 |
| }, |
| { |
| "epoch": 1.9061332690685626, |
| "grad_norm": 0.17097357070938057, |
| "learning_rate": 3.533785082557779e-06, |
| "loss": 0.2233, |
| "step": 7910 |
| }, |
| { |
| "epoch": 1.9085431979756597, |
| "grad_norm": 0.19446460452256684, |
| "learning_rate": 3.520388445443126e-06, |
| "loss": 0.224, |
| "step": 7920 |
| }, |
| { |
| "epoch": 1.9109531268827569, |
| "grad_norm": 0.17779215047148145, |
| "learning_rate": 3.5070034394998108e-06, |
| "loss": 0.2212, |
| "step": 7930 |
| }, |
| { |
| "epoch": 1.9133630557898542, |
| "grad_norm": 0.16935165552714176, |
| "learning_rate": 3.4936301699468646e-06, |
| "loss": 0.2227, |
| "step": 7940 |
| }, |
| { |
| "epoch": 1.9157729846969516, |
| "grad_norm": 0.2979242189337486, |
| "learning_rate": 3.4802687419110635e-06, |
| "loss": 0.22, |
| "step": 7950 |
| }, |
| { |
| "epoch": 1.9181829136040487, |
| "grad_norm": 0.1684493452126008, |
| "learning_rate": 3.466919260426095e-06, |
| "loss": 0.2238, |
| "step": 7960 |
| }, |
| { |
| "epoch": 1.9205928425111458, |
| "grad_norm": 0.17072882225032673, |
| "learning_rate": 3.4535818304317338e-06, |
| "loss": 0.2208, |
| "step": 7970 |
| }, |
| { |
| "epoch": 1.9230027714182432, |
| "grad_norm": 0.18637622372482193, |
| "learning_rate": 3.440256556773025e-06, |
| "loss": 0.2202, |
| "step": 7980 |
| }, |
| { |
| "epoch": 1.9254127003253405, |
| "grad_norm": 0.1682864817905964, |
| "learning_rate": 3.426943544199444e-06, |
| "loss": 0.2233, |
| "step": 7990 |
| }, |
| { |
| "epoch": 1.9278226292324376, |
| "grad_norm": 0.17853901830012536, |
| "learning_rate": 3.413642897364091e-06, |
| "loss": 0.2231, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.9302325581395348, |
| "grad_norm": 0.17559136987967267, |
| "learning_rate": 3.400354720822851e-06, |
| "loss": 0.2244, |
| "step": 8010 |
| }, |
| { |
| "epoch": 1.932642487046632, |
| "grad_norm": 0.17952527502600563, |
| "learning_rate": 3.38707911903359e-06, |
| "loss": 0.226, |
| "step": 8020 |
| }, |
| { |
| "epoch": 1.9350524159537295, |
| "grad_norm": 0.17835478587888995, |
| "learning_rate": 3.373816196355315e-06, |
| "loss": 0.221, |
| "step": 8030 |
| }, |
| { |
| "epoch": 1.9374623448608266, |
| "grad_norm": 0.1726943531694181, |
| "learning_rate": 3.3605660570473687e-06, |
| "loss": 0.2226, |
| "step": 8040 |
| }, |
| { |
| "epoch": 1.9398722737679237, |
| "grad_norm": 0.16742660057488765, |
| "learning_rate": 3.3473288052686055e-06, |
| "loss": 0.2238, |
| "step": 8050 |
| }, |
| { |
| "epoch": 1.942282202675021, |
| "grad_norm": 0.16448937520786722, |
| "learning_rate": 3.334104545076564e-06, |
| "loss": 0.2214, |
| "step": 8060 |
| }, |
| { |
| "epoch": 1.9446921315821184, |
| "grad_norm": 0.16720270952416597, |
| "learning_rate": 3.320893380426667e-06, |
| "loss": 0.2241, |
| "step": 8070 |
| }, |
| { |
| "epoch": 1.9471020604892155, |
| "grad_norm": 0.310561498171491, |
| "learning_rate": 3.3076954151713815e-06, |
| "loss": 0.2241, |
| "step": 8080 |
| }, |
| { |
| "epoch": 1.9495119893963127, |
| "grad_norm": 0.16403397557520163, |
| "learning_rate": 3.294510753059427e-06, |
| "loss": 0.2191, |
| "step": 8090 |
| }, |
| { |
| "epoch": 1.95192191830341, |
| "grad_norm": 0.17149544218196924, |
| "learning_rate": 3.2813394977349356e-06, |
| "loss": 0.2198, |
| "step": 8100 |
| }, |
| { |
| "epoch": 1.9543318472105073, |
| "grad_norm": 0.18045930837078897, |
| "learning_rate": 3.2681817527366575e-06, |
| "loss": 0.2206, |
| "step": 8110 |
| }, |
| { |
| "epoch": 1.9567417761176045, |
| "grad_norm": 0.17619845608114204, |
| "learning_rate": 3.2550376214971395e-06, |
| "loss": 0.2242, |
| "step": 8120 |
| }, |
| { |
| "epoch": 1.9591517050247018, |
| "grad_norm": 0.1631192758221241, |
| "learning_rate": 3.241907207341902e-06, |
| "loss": 0.2216, |
| "step": 8130 |
| }, |
| { |
| "epoch": 1.9615616339317992, |
| "grad_norm": 0.16547799093940346, |
| "learning_rate": 3.2287906134886483e-06, |
| "loss": 0.2203, |
| "step": 8140 |
| }, |
| { |
| "epoch": 1.9639715628388963, |
| "grad_norm": 0.16899930757123302, |
| "learning_rate": 3.215687943046427e-06, |
| "loss": 0.2211, |
| "step": 8150 |
| }, |
| { |
| "epoch": 1.9663814917459934, |
| "grad_norm": 0.18011488236625958, |
| "learning_rate": 3.202599299014849e-06, |
| "loss": 0.2252, |
| "step": 8160 |
| }, |
| { |
| "epoch": 1.9687914206530908, |
| "grad_norm": 0.17363559346316712, |
| "learning_rate": 3.1895247842832523e-06, |
| "loss": 0.2229, |
| "step": 8170 |
| }, |
| { |
| "epoch": 1.971201349560188, |
| "grad_norm": 0.19772713108336182, |
| "learning_rate": 3.1764645016299133e-06, |
| "loss": 0.2161, |
| "step": 8180 |
| }, |
| { |
| "epoch": 1.9736112784672852, |
| "grad_norm": 0.18583640048149713, |
| "learning_rate": 3.163418553721229e-06, |
| "loss": 0.2223, |
| "step": 8190 |
| }, |
| { |
| "epoch": 1.9760212073743824, |
| "grad_norm": 0.17640639663201332, |
| "learning_rate": 3.1503870431109067e-06, |
| "loss": 0.2253, |
| "step": 8200 |
| }, |
| { |
| "epoch": 1.9784311362814797, |
| "grad_norm": 0.1674920718152874, |
| "learning_rate": 3.1373700722391696e-06, |
| "loss": 0.22, |
| "step": 8210 |
| }, |
| { |
| "epoch": 1.980841065188577, |
| "grad_norm": 0.17975281665397086, |
| "learning_rate": 3.1243677434319373e-06, |
| "loss": 0.2239, |
| "step": 8220 |
| }, |
| { |
| "epoch": 1.9832509940956742, |
| "grad_norm": 0.1840902808197311, |
| "learning_rate": 3.111380158900037e-06, |
| "loss": 0.2203, |
| "step": 8230 |
| }, |
| { |
| "epoch": 1.9856609230027713, |
| "grad_norm": 0.19795445591857247, |
| "learning_rate": 3.098407420738382e-06, |
| "loss": 0.2218, |
| "step": 8240 |
| }, |
| { |
| "epoch": 1.9880708519098687, |
| "grad_norm": 0.18833669666391664, |
| "learning_rate": 3.0854496309251857e-06, |
| "loss": 0.2229, |
| "step": 8250 |
| }, |
| { |
| "epoch": 1.990480780816966, |
| "grad_norm": 0.18567799265574716, |
| "learning_rate": 3.0725068913211546e-06, |
| "loss": 0.2203, |
| "step": 8260 |
| }, |
| { |
| "epoch": 1.9928907097240631, |
| "grad_norm": 0.161711200487969, |
| "learning_rate": 3.059579303668678e-06, |
| "loss": 0.2241, |
| "step": 8270 |
| }, |
| { |
| "epoch": 1.9953006386311603, |
| "grad_norm": 0.18934300415156607, |
| "learning_rate": 3.046666969591046e-06, |
| "loss": 0.2223, |
| "step": 8280 |
| }, |
| { |
| "epoch": 1.9977105675382576, |
| "grad_norm": 0.17067235022289873, |
| "learning_rate": 3.0337699905916308e-06, |
| "loss": 0.2235, |
| "step": 8290 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.2287907262213035, |
| "learning_rate": 3.020888468053109e-06, |
| "loss": 0.2196, |
| "step": 8300 |
| }, |
| { |
| "epoch": 2.002409928907097, |
| "grad_norm": 0.17380593294798988, |
| "learning_rate": 3.0080225032366443e-06, |
| "loss": 0.2175, |
| "step": 8310 |
| }, |
| { |
| "epoch": 2.0048198578141947, |
| "grad_norm": 0.17856012160948653, |
| "learning_rate": 2.9951721972811133e-06, |
| "loss": 0.2161, |
| "step": 8320 |
| }, |
| { |
| "epoch": 2.007229786721292, |
| "grad_norm": 0.17306415633708103, |
| "learning_rate": 2.982337651202286e-06, |
| "loss": 0.2137, |
| "step": 8330 |
| }, |
| { |
| "epoch": 2.009639715628389, |
| "grad_norm": 0.1753178866495624, |
| "learning_rate": 2.9695189658920555e-06, |
| "loss": 0.2169, |
| "step": 8340 |
| }, |
| { |
| "epoch": 2.012049644535486, |
| "grad_norm": 0.20368687273238206, |
| "learning_rate": 2.95671624211763e-06, |
| "loss": 0.2178, |
| "step": 8350 |
| }, |
| { |
| "epoch": 2.0144595734425836, |
| "grad_norm": 0.1755146229130234, |
| "learning_rate": 2.9439295805207415e-06, |
| "loss": 0.2139, |
| "step": 8360 |
| }, |
| { |
| "epoch": 2.0168695023496808, |
| "grad_norm": 0.3060582694141564, |
| "learning_rate": 2.9311590816168646e-06, |
| "loss": 0.2155, |
| "step": 8370 |
| }, |
| { |
| "epoch": 2.019279431256778, |
| "grad_norm": 0.17867123362736373, |
| "learning_rate": 2.918404845794411e-06, |
| "loss": 0.2147, |
| "step": 8380 |
| }, |
| { |
| "epoch": 2.021689360163875, |
| "grad_norm": 0.17255262439737776, |
| "learning_rate": 2.905666973313957e-06, |
| "loss": 0.2153, |
| "step": 8390 |
| }, |
| { |
| "epoch": 2.0240992890709726, |
| "grad_norm": 0.16978024384357635, |
| "learning_rate": 2.8929455643074433e-06, |
| "loss": 0.2153, |
| "step": 8400 |
| }, |
| { |
| "epoch": 2.0265092179780697, |
| "grad_norm": 0.17816549745269436, |
| "learning_rate": 2.8802407187773917e-06, |
| "loss": 0.2183, |
| "step": 8410 |
| }, |
| { |
| "epoch": 2.028919146885167, |
| "grad_norm": 0.18110851567573646, |
| "learning_rate": 2.86755253659612e-06, |
| "loss": 0.2171, |
| "step": 8420 |
| }, |
| { |
| "epoch": 2.031329075792264, |
| "grad_norm": 0.1688476927906645, |
| "learning_rate": 2.854881117504954e-06, |
| "loss": 0.2176, |
| "step": 8430 |
| }, |
| { |
| "epoch": 2.0337390046993615, |
| "grad_norm": 0.16214314468077717, |
| "learning_rate": 2.8422265611134535e-06, |
| "loss": 0.218, |
| "step": 8440 |
| }, |
| { |
| "epoch": 2.0361489336064587, |
| "grad_norm": 0.17334756127095827, |
| "learning_rate": 2.829588966898607e-06, |
| "loss": 0.2158, |
| "step": 8450 |
| }, |
| { |
| "epoch": 2.038558862513556, |
| "grad_norm": 0.17925427677927808, |
| "learning_rate": 2.8169684342040802e-06, |
| "loss": 0.2171, |
| "step": 8460 |
| }, |
| { |
| "epoch": 2.040968791420653, |
| "grad_norm": 0.17577266218362214, |
| "learning_rate": 2.8043650622394023e-06, |
| "loss": 0.2171, |
| "step": 8470 |
| }, |
| { |
| "epoch": 2.0433787203277505, |
| "grad_norm": 0.1642561949193348, |
| "learning_rate": 2.791778950079217e-06, |
| "loss": 0.2159, |
| "step": 8480 |
| }, |
| { |
| "epoch": 2.0457886492348476, |
| "grad_norm": 0.16498503901124467, |
| "learning_rate": 2.779210196662482e-06, |
| "loss": 0.2156, |
| "step": 8490 |
| }, |
| { |
| "epoch": 2.0481985781419447, |
| "grad_norm": 0.21655161258737973, |
| "learning_rate": 2.766658900791699e-06, |
| "loss": 0.218, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.050608507049042, |
| "grad_norm": 0.16932876786189752, |
| "learning_rate": 2.7541251611321385e-06, |
| "loss": 0.2191, |
| "step": 8510 |
| }, |
| { |
| "epoch": 2.0530184359561394, |
| "grad_norm": 0.16740404283620977, |
| "learning_rate": 2.7416090762110603e-06, |
| "loss": 0.2162, |
| "step": 8520 |
| }, |
| { |
| "epoch": 2.0554283648632365, |
| "grad_norm": 0.2005816431066049, |
| "learning_rate": 2.729110744416943e-06, |
| "loss": 0.2176, |
| "step": 8530 |
| }, |
| { |
| "epoch": 2.0578382937703337, |
| "grad_norm": 0.17332780605061382, |
| "learning_rate": 2.716630263998706e-06, |
| "loss": 0.2158, |
| "step": 8540 |
| }, |
| { |
| "epoch": 2.060248222677431, |
| "grad_norm": 0.17912431633062656, |
| "learning_rate": 2.7041677330649408e-06, |
| "loss": 0.2203, |
| "step": 8550 |
| }, |
| { |
| "epoch": 2.0626581515845284, |
| "grad_norm": 0.18464270915555614, |
| "learning_rate": 2.6917232495831436e-06, |
| "loss": 0.2151, |
| "step": 8560 |
| }, |
| { |
| "epoch": 2.0650680804916255, |
| "grad_norm": 0.16531551068221872, |
| "learning_rate": 2.6792969113789285e-06, |
| "loss": 0.2191, |
| "step": 8570 |
| }, |
| { |
| "epoch": 2.0674780093987226, |
| "grad_norm": 0.15912695139805036, |
| "learning_rate": 2.666888816135285e-06, |
| "loss": 0.214, |
| "step": 8580 |
| }, |
| { |
| "epoch": 2.06988793830582, |
| "grad_norm": 0.18551106929744193, |
| "learning_rate": 2.6544990613917803e-06, |
| "loss": 0.2161, |
| "step": 8590 |
| }, |
| { |
| "epoch": 2.0722978672129173, |
| "grad_norm": 0.1667801868250344, |
| "learning_rate": 2.642127744543823e-06, |
| "loss": 0.2186, |
| "step": 8600 |
| }, |
| { |
| "epoch": 2.0747077961200144, |
| "grad_norm": 0.16806458954443468, |
| "learning_rate": 2.6297749628418654e-06, |
| "loss": 0.2154, |
| "step": 8610 |
| }, |
| { |
| "epoch": 2.0771177250271116, |
| "grad_norm": 0.17810232628165085, |
| "learning_rate": 2.617440813390674e-06, |
| "loss": 0.2172, |
| "step": 8620 |
| }, |
| { |
| "epoch": 2.079527653934209, |
| "grad_norm": 0.17496292769430696, |
| "learning_rate": 2.605125393148529e-06, |
| "loss": 0.2149, |
| "step": 8630 |
| }, |
| { |
| "epoch": 2.0819375828413063, |
| "grad_norm": 0.15911822350764088, |
| "learning_rate": 2.592828798926496e-06, |
| "loss": 0.212, |
| "step": 8640 |
| }, |
| { |
| "epoch": 2.0843475117484034, |
| "grad_norm": 0.17420196250002615, |
| "learning_rate": 2.580551127387644e-06, |
| "loss": 0.2173, |
| "step": 8650 |
| }, |
| { |
| "epoch": 2.0867574406555005, |
| "grad_norm": 0.1662547058151516, |
| "learning_rate": 2.5682924750462907e-06, |
| "loss": 0.2158, |
| "step": 8660 |
| }, |
| { |
| "epoch": 2.089167369562598, |
| "grad_norm": 0.1613429520475328, |
| "learning_rate": 2.5560529382672462e-06, |
| "loss": 0.2152, |
| "step": 8670 |
| }, |
| { |
| "epoch": 2.091577298469695, |
| "grad_norm": 0.16954707749177175, |
| "learning_rate": 2.5438326132650524e-06, |
| "loss": 0.2173, |
| "step": 8680 |
| }, |
| { |
| "epoch": 2.0939872273767923, |
| "grad_norm": 0.16913124560212983, |
| "learning_rate": 2.531631596103231e-06, |
| "loss": 0.2156, |
| "step": 8690 |
| }, |
| { |
| "epoch": 2.0963971562838895, |
| "grad_norm": 0.16879141003032017, |
| "learning_rate": 2.5194499826935216e-06, |
| "loss": 0.2143, |
| "step": 8700 |
| }, |
| { |
| "epoch": 2.098807085190987, |
| "grad_norm": 0.16315254865336404, |
| "learning_rate": 2.507287868795133e-06, |
| "loss": 0.216, |
| "step": 8710 |
| }, |
| { |
| "epoch": 2.101217014098084, |
| "grad_norm": 0.16388215305040107, |
| "learning_rate": 2.495145350013995e-06, |
| "loss": 0.2146, |
| "step": 8720 |
| }, |
| { |
| "epoch": 2.1036269430051813, |
| "grad_norm": 0.16534824958304595, |
| "learning_rate": 2.4830225218019878e-06, |
| "loss": 0.2171, |
| "step": 8730 |
| }, |
| { |
| "epoch": 2.1060368719122784, |
| "grad_norm": 0.15905688332066786, |
| "learning_rate": 2.4709194794562204e-06, |
| "loss": 0.2159, |
| "step": 8740 |
| }, |
| { |
| "epoch": 2.108446800819376, |
| "grad_norm": 0.15904245633268263, |
| "learning_rate": 2.45883631811825e-06, |
| "loss": 0.216, |
| "step": 8750 |
| }, |
| { |
| "epoch": 2.110856729726473, |
| "grad_norm": 0.16054858978224282, |
| "learning_rate": 2.4467731327733665e-06, |
| "loss": 0.2142, |
| "step": 8760 |
| }, |
| { |
| "epoch": 2.11326665863357, |
| "grad_norm": 0.16784446070573833, |
| "learning_rate": 2.4347300182498116e-06, |
| "loss": 0.2165, |
| "step": 8770 |
| }, |
| { |
| "epoch": 2.1156765875406673, |
| "grad_norm": 0.1673268741605228, |
| "learning_rate": 2.422707069218068e-06, |
| "loss": 0.2169, |
| "step": 8780 |
| }, |
| { |
| "epoch": 2.118086516447765, |
| "grad_norm": 0.15872613245790043, |
| "learning_rate": 2.4107043801900863e-06, |
| "loss": 0.2148, |
| "step": 8790 |
| }, |
| { |
| "epoch": 2.120496445354862, |
| "grad_norm": 0.17281567057815952, |
| "learning_rate": 2.39872204551856e-06, |
| "loss": 0.2151, |
| "step": 8800 |
| }, |
| { |
| "epoch": 2.122906374261959, |
| "grad_norm": 0.1724472046576275, |
| "learning_rate": 2.3867601593961744e-06, |
| "loss": 0.2198, |
| "step": 8810 |
| }, |
| { |
| "epoch": 2.1253163031690567, |
| "grad_norm": 0.16333811582659072, |
| "learning_rate": 2.374818815854871e-06, |
| "loss": 0.2144, |
| "step": 8820 |
| }, |
| { |
| "epoch": 2.127726232076154, |
| "grad_norm": 0.16371139176236396, |
| "learning_rate": 2.3628981087651073e-06, |
| "loss": 0.2141, |
| "step": 8830 |
| }, |
| { |
| "epoch": 2.130136160983251, |
| "grad_norm": 0.17914701375544792, |
| "learning_rate": 2.350998131835117e-06, |
| "loss": 0.2182, |
| "step": 8840 |
| }, |
| { |
| "epoch": 2.132546089890348, |
| "grad_norm": 0.18359917351190413, |
| "learning_rate": 2.339118978610175e-06, |
| "loss": 0.219, |
| "step": 8850 |
| }, |
| { |
| "epoch": 2.1349560187974457, |
| "grad_norm": 0.16535504532578257, |
| "learning_rate": 2.3272607424718675e-06, |
| "loss": 0.2145, |
| "step": 8860 |
| }, |
| { |
| "epoch": 2.137365947704543, |
| "grad_norm": 0.17201129354733538, |
| "learning_rate": 2.315423516637339e-06, |
| "loss": 0.217, |
| "step": 8870 |
| }, |
| { |
| "epoch": 2.13977587661164, |
| "grad_norm": 0.1588995828237807, |
| "learning_rate": 2.3036073941585898e-06, |
| "loss": 0.2141, |
| "step": 8880 |
| }, |
| { |
| "epoch": 2.142185805518737, |
| "grad_norm": 0.16261768738355556, |
| "learning_rate": 2.2918124679217106e-06, |
| "loss": 0.2118, |
| "step": 8890 |
| }, |
| { |
| "epoch": 2.1445957344258346, |
| "grad_norm": 0.167527249342233, |
| "learning_rate": 2.2800388306461847e-06, |
| "loss": 0.2152, |
| "step": 8900 |
| }, |
| { |
| "epoch": 2.1470056633329317, |
| "grad_norm": 0.20249631878757005, |
| "learning_rate": 2.2682865748841293e-06, |
| "loss": 0.2145, |
| "step": 8910 |
| }, |
| { |
| "epoch": 2.149415592240029, |
| "grad_norm": 0.16486526747031777, |
| "learning_rate": 2.2565557930195963e-06, |
| "loss": 0.2183, |
| "step": 8920 |
| }, |
| { |
| "epoch": 2.151825521147126, |
| "grad_norm": 0.16944793274222641, |
| "learning_rate": 2.244846577267818e-06, |
| "loss": 0.216, |
| "step": 8930 |
| }, |
| { |
| "epoch": 2.1542354500542236, |
| "grad_norm": 0.15983493156003722, |
| "learning_rate": 2.2331590196745094e-06, |
| "loss": 0.2177, |
| "step": 8940 |
| }, |
| { |
| "epoch": 2.1566453789613207, |
| "grad_norm": 0.1545123343194527, |
| "learning_rate": 2.221493212115123e-06, |
| "loss": 0.2182, |
| "step": 8950 |
| }, |
| { |
| "epoch": 2.159055307868418, |
| "grad_norm": 0.1619577230120136, |
| "learning_rate": 2.209849246294138e-06, |
| "loss": 0.2165, |
| "step": 8960 |
| }, |
| { |
| "epoch": 2.161465236775515, |
| "grad_norm": 0.24442185861489918, |
| "learning_rate": 2.1982272137443356e-06, |
| "loss": 0.2146, |
| "step": 8970 |
| }, |
| { |
| "epoch": 2.1638751656826125, |
| "grad_norm": 0.15921527725825574, |
| "learning_rate": 2.186627205826082e-06, |
| "loss": 0.2164, |
| "step": 8980 |
| }, |
| { |
| "epoch": 2.1662850945897096, |
| "grad_norm": 0.16043526153345314, |
| "learning_rate": 2.1750493137266064e-06, |
| "loss": 0.2199, |
| "step": 8990 |
| }, |
| { |
| "epoch": 2.1686950234968068, |
| "grad_norm": 0.15627209243370632, |
| "learning_rate": 2.1634936284592882e-06, |
| "loss": 0.2179, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.171104952403904, |
| "grad_norm": 0.16964769263007193, |
| "learning_rate": 2.151960240862937e-06, |
| "loss": 0.2187, |
| "step": 9010 |
| }, |
| { |
| "epoch": 2.1735148813110015, |
| "grad_norm": 0.16431980235812013, |
| "learning_rate": 2.1404492416010885e-06, |
| "loss": 0.2147, |
| "step": 9020 |
| }, |
| { |
| "epoch": 2.1759248102180986, |
| "grad_norm": 0.17463699702695645, |
| "learning_rate": 2.128960721161273e-06, |
| "loss": 0.2176, |
| "step": 9030 |
| }, |
| { |
| "epoch": 2.1783347391251957, |
| "grad_norm": 0.17530300865667098, |
| "learning_rate": 2.1174947698543276e-06, |
| "loss": 0.2151, |
| "step": 9040 |
| }, |
| { |
| "epoch": 2.1807446680322933, |
| "grad_norm": 0.16166935143888916, |
| "learning_rate": 2.106051477813662e-06, |
| "loss": 0.2179, |
| "step": 9050 |
| }, |
| { |
| "epoch": 2.1831545969393904, |
| "grad_norm": 0.16238872449097044, |
| "learning_rate": 2.0946309349945764e-06, |
| "loss": 0.2167, |
| "step": 9060 |
| }, |
| { |
| "epoch": 2.1855645258464875, |
| "grad_norm": 0.16039482488274223, |
| "learning_rate": 2.0832332311735255e-06, |
| "loss": 0.2144, |
| "step": 9070 |
| }, |
| { |
| "epoch": 2.1879744547535847, |
| "grad_norm": 0.16505377352940512, |
| "learning_rate": 2.071858455947439e-06, |
| "loss": 0.2181, |
| "step": 9080 |
| }, |
| { |
| "epoch": 2.1903843836606818, |
| "grad_norm": 0.1675497829557917, |
| "learning_rate": 2.0605066987330015e-06, |
| "loss": 0.2207, |
| "step": 9090 |
| }, |
| { |
| "epoch": 2.1927943125677793, |
| "grad_norm": 0.1572991482149228, |
| "learning_rate": 2.0491780487659518e-06, |
| "loss": 0.2173, |
| "step": 9100 |
| }, |
| { |
| "epoch": 2.1952042414748765, |
| "grad_norm": 0.16912848336632816, |
| "learning_rate": 2.0378725951003863e-06, |
| "loss": 0.2186, |
| "step": 9110 |
| }, |
| { |
| "epoch": 2.1976141703819736, |
| "grad_norm": 0.18850643446777107, |
| "learning_rate": 2.0265904266080553e-06, |
| "loss": 0.2149, |
| "step": 9120 |
| }, |
| { |
| "epoch": 2.200024099289071, |
| "grad_norm": 0.14826409894986128, |
| "learning_rate": 2.0153316319776663e-06, |
| "loss": 0.2142, |
| "step": 9130 |
| }, |
| { |
| "epoch": 2.2024340281961683, |
| "grad_norm": 0.17082868050396657, |
| "learning_rate": 2.004096299714182e-06, |
| "loss": 0.2127, |
| "step": 9140 |
| }, |
| { |
| "epoch": 2.2048439571032654, |
| "grad_norm": 0.1719463725331706, |
| "learning_rate": 1.992884518138132e-06, |
| "loss": 0.2156, |
| "step": 9150 |
| }, |
| { |
| "epoch": 2.2072538860103625, |
| "grad_norm": 0.1610758282639183, |
| "learning_rate": 1.9816963753849173e-06, |
| "loss": 0.2148, |
| "step": 9160 |
| }, |
| { |
| "epoch": 2.20966381491746, |
| "grad_norm": 0.1554002287635683, |
| "learning_rate": 1.9705319594041055e-06, |
| "loss": 0.2183, |
| "step": 9170 |
| }, |
| { |
| "epoch": 2.2120737438245572, |
| "grad_norm": 0.17242910483030338, |
| "learning_rate": 1.959391357958761e-06, |
| "loss": 0.2164, |
| "step": 9180 |
| }, |
| { |
| "epoch": 2.2144836727316544, |
| "grad_norm": 0.16608068532952636, |
| "learning_rate": 1.9482746586247307e-06, |
| "loss": 0.2167, |
| "step": 9190 |
| }, |
| { |
| "epoch": 2.2168936016387515, |
| "grad_norm": 0.16352782662612533, |
| "learning_rate": 1.937181948789979e-06, |
| "loss": 0.2169, |
| "step": 9200 |
| }, |
| { |
| "epoch": 2.219303530545849, |
| "grad_norm": 0.16152274881546919, |
| "learning_rate": 1.926113315653883e-06, |
| "loss": 0.2148, |
| "step": 9210 |
| }, |
| { |
| "epoch": 2.221713459452946, |
| "grad_norm": 0.16365516218565154, |
| "learning_rate": 1.9150688462265567e-06, |
| "loss": 0.214, |
| "step": 9220 |
| }, |
| { |
| "epoch": 2.2241233883600433, |
| "grad_norm": 0.1735669877552533, |
| "learning_rate": 1.9040486273281611e-06, |
| "loss": 0.2149, |
| "step": 9230 |
| }, |
| { |
| "epoch": 2.2265333172671404, |
| "grad_norm": 0.17065946042939198, |
| "learning_rate": 1.8930527455882285e-06, |
| "loss": 0.2169, |
| "step": 9240 |
| }, |
| { |
| "epoch": 2.228943246174238, |
| "grad_norm": 0.1555338243111709, |
| "learning_rate": 1.8820812874449745e-06, |
| "loss": 0.2183, |
| "step": 9250 |
| }, |
| { |
| "epoch": 2.231353175081335, |
| "grad_norm": 0.21359053652417603, |
| "learning_rate": 1.871134339144624e-06, |
| "loss": 0.2167, |
| "step": 9260 |
| }, |
| { |
| "epoch": 2.2337631039884323, |
| "grad_norm": 0.15566089867920296, |
| "learning_rate": 1.8602119867407293e-06, |
| "loss": 0.217, |
| "step": 9270 |
| }, |
| { |
| "epoch": 2.2361730328955294, |
| "grad_norm": 0.14838955803193682, |
| "learning_rate": 1.8493143160934962e-06, |
| "loss": 0.2148, |
| "step": 9280 |
| }, |
| { |
| "epoch": 2.238582961802627, |
| "grad_norm": 0.15951732160631296, |
| "learning_rate": 1.838441412869108e-06, |
| "loss": 0.2162, |
| "step": 9290 |
| }, |
| { |
| "epoch": 2.240992890709724, |
| "grad_norm": 0.15631738955988378, |
| "learning_rate": 1.827593362539052e-06, |
| "loss": 0.2167, |
| "step": 9300 |
| }, |
| { |
| "epoch": 2.243402819616821, |
| "grad_norm": 0.15828824954437395, |
| "learning_rate": 1.816770250379446e-06, |
| "loss": 0.2142, |
| "step": 9310 |
| }, |
| { |
| "epoch": 2.2458127485239183, |
| "grad_norm": 0.15486252175299592, |
| "learning_rate": 1.8059721614703757e-06, |
| "loss": 0.2175, |
| "step": 9320 |
| }, |
| { |
| "epoch": 2.248222677431016, |
| "grad_norm": 0.17717797925756032, |
| "learning_rate": 1.7951991806952135e-06, |
| "loss": 0.2174, |
| "step": 9330 |
| }, |
| { |
| "epoch": 2.250632606338113, |
| "grad_norm": 0.170954254933961, |
| "learning_rate": 1.7844513927399605e-06, |
| "loss": 0.2145, |
| "step": 9340 |
| }, |
| { |
| "epoch": 2.25304253524521, |
| "grad_norm": 0.1638966110114353, |
| "learning_rate": 1.7737288820925775e-06, |
| "loss": 0.2166, |
| "step": 9350 |
| }, |
| { |
| "epoch": 2.2554524641523077, |
| "grad_norm": 0.17459885896163269, |
| "learning_rate": 1.7630317330423213e-06, |
| "loss": 0.2145, |
| "step": 9360 |
| }, |
| { |
| "epoch": 2.257862393059405, |
| "grad_norm": 0.16546769479523815, |
| "learning_rate": 1.7523600296790827e-06, |
| "loss": 0.2155, |
| "step": 9370 |
| }, |
| { |
| "epoch": 2.260272321966502, |
| "grad_norm": 0.15492608425825188, |
| "learning_rate": 1.7417138558927244e-06, |
| "loss": 0.2178, |
| "step": 9380 |
| }, |
| { |
| "epoch": 2.262682250873599, |
| "grad_norm": 0.1715375758716431, |
| "learning_rate": 1.731093295372422e-06, |
| "loss": 0.2156, |
| "step": 9390 |
| }, |
| { |
| "epoch": 2.2650921797806967, |
| "grad_norm": 0.15847482780437644, |
| "learning_rate": 1.7204984316060063e-06, |
| "loss": 0.2138, |
| "step": 9400 |
| }, |
| { |
| "epoch": 2.267502108687794, |
| "grad_norm": 0.15911470696042906, |
| "learning_rate": 1.7099293478793066e-06, |
| "loss": 0.2178, |
| "step": 9410 |
| }, |
| { |
| "epoch": 2.269912037594891, |
| "grad_norm": 0.16348878980019585, |
| "learning_rate": 1.699386127275497e-06, |
| "loss": 0.2171, |
| "step": 9420 |
| }, |
| { |
| "epoch": 2.272321966501988, |
| "grad_norm": 0.16658170181352588, |
| "learning_rate": 1.6888688526744419e-06, |
| "loss": 0.217, |
| "step": 9430 |
| }, |
| { |
| "epoch": 2.2747318954090856, |
| "grad_norm": 0.16066213045543976, |
| "learning_rate": 1.6783776067520435e-06, |
| "loss": 0.2154, |
| "step": 9440 |
| }, |
| { |
| "epoch": 2.2771418243161827, |
| "grad_norm": 0.16082101620948763, |
| "learning_rate": 1.667912471979599e-06, |
| "loss": 0.2164, |
| "step": 9450 |
| }, |
| { |
| "epoch": 2.27955175322328, |
| "grad_norm": 0.17881678098179862, |
| "learning_rate": 1.6574735306231415e-06, |
| "loss": 0.2145, |
| "step": 9460 |
| }, |
| { |
| "epoch": 2.281961682130377, |
| "grad_norm": 0.1537018703658371, |
| "learning_rate": 1.6470608647427994e-06, |
| "loss": 0.2177, |
| "step": 9470 |
| }, |
| { |
| "epoch": 2.2843716110374745, |
| "grad_norm": 0.1701217571070372, |
| "learning_rate": 1.6366745561921526e-06, |
| "loss": 0.2187, |
| "step": 9480 |
| }, |
| { |
| "epoch": 2.2867815399445717, |
| "grad_norm": 0.16907410414510796, |
| "learning_rate": 1.626314686617586e-06, |
| "loss": 0.2193, |
| "step": 9490 |
| }, |
| { |
| "epoch": 2.289191468851669, |
| "grad_norm": 0.14941652687909515, |
| "learning_rate": 1.6159813374576473e-06, |
| "loss": 0.2158, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.291601397758766, |
| "grad_norm": 0.16442210723302128, |
| "learning_rate": 1.605674589942411e-06, |
| "loss": 0.215, |
| "step": 9510 |
| }, |
| { |
| "epoch": 2.2940113266658635, |
| "grad_norm": 0.1468941473576951, |
| "learning_rate": 1.5953945250928337e-06, |
| "loss": 0.2172, |
| "step": 9520 |
| }, |
| { |
| "epoch": 2.2964212555729606, |
| "grad_norm": 0.1563069184300272, |
| "learning_rate": 1.5851412237201241e-06, |
| "loss": 0.2191, |
| "step": 9530 |
| }, |
| { |
| "epoch": 2.2988311844800577, |
| "grad_norm": 0.17864469161641042, |
| "learning_rate": 1.5749147664251008e-06, |
| "loss": 0.2141, |
| "step": 9540 |
| }, |
| { |
| "epoch": 2.301241113387155, |
| "grad_norm": 0.15936407627764404, |
| "learning_rate": 1.5647152335975675e-06, |
| "loss": 0.2175, |
| "step": 9550 |
| }, |
| { |
| "epoch": 2.3036510422942524, |
| "grad_norm": 0.1786482233152988, |
| "learning_rate": 1.5545427054156659e-06, |
| "loss": 0.214, |
| "step": 9560 |
| }, |
| { |
| "epoch": 2.3060609712013496, |
| "grad_norm": 0.3481171659056702, |
| "learning_rate": 1.5443972618452685e-06, |
| "loss": 0.2126, |
| "step": 9570 |
| }, |
| { |
| "epoch": 2.3084709001084467, |
| "grad_norm": 0.1554740296898682, |
| "learning_rate": 1.5342789826393223e-06, |
| "loss": 0.2162, |
| "step": 9580 |
| }, |
| { |
| "epoch": 2.3108808290155443, |
| "grad_norm": 0.1530966450179755, |
| "learning_rate": 1.5241879473372501e-06, |
| "loss": 0.2147, |
| "step": 9590 |
| }, |
| { |
| "epoch": 2.3132907579226414, |
| "grad_norm": 0.16046256248741983, |
| "learning_rate": 1.5141242352642975e-06, |
| "loss": 0.2144, |
| "step": 9600 |
| }, |
| { |
| "epoch": 2.3157006868297385, |
| "grad_norm": 0.15279090220025765, |
| "learning_rate": 1.5040879255309366e-06, |
| "loss": 0.2136, |
| "step": 9610 |
| }, |
| { |
| "epoch": 2.3181106157368356, |
| "grad_norm": 0.1535195155590413, |
| "learning_rate": 1.4940790970322217e-06, |
| "loss": 0.2149, |
| "step": 9620 |
| }, |
| { |
| "epoch": 2.3205205446439328, |
| "grad_norm": 0.15539258455309055, |
| "learning_rate": 1.4840978284471818e-06, |
| "loss": 0.2126, |
| "step": 9630 |
| }, |
| { |
| "epoch": 2.3229304735510303, |
| "grad_norm": 0.16168682210373636, |
| "learning_rate": 1.4741441982381965e-06, |
| "loss": 0.2164, |
| "step": 9640 |
| }, |
| { |
| "epoch": 2.3253404024581275, |
| "grad_norm": 0.17188373165148013, |
| "learning_rate": 1.4642182846503834e-06, |
| "loss": 0.2147, |
| "step": 9650 |
| }, |
| { |
| "epoch": 2.3277503313652246, |
| "grad_norm": 0.16379929980091218, |
| "learning_rate": 1.454320165710979e-06, |
| "loss": 0.2181, |
| "step": 9660 |
| }, |
| { |
| "epoch": 2.330160260272322, |
| "grad_norm": 0.1521292345680125, |
| "learning_rate": 1.4444499192287275e-06, |
| "loss": 0.215, |
| "step": 9670 |
| }, |
| { |
| "epoch": 2.3325701891794193, |
| "grad_norm": 0.19323544915918778, |
| "learning_rate": 1.434607622793268e-06, |
| "loss": 0.2182, |
| "step": 9680 |
| }, |
| { |
| "epoch": 2.3349801180865164, |
| "grad_norm": 0.15582009476234696, |
| "learning_rate": 1.4247933537745312e-06, |
| "loss": 0.216, |
| "step": 9690 |
| }, |
| { |
| "epoch": 2.3373900469936135, |
| "grad_norm": 0.15708094972066383, |
| "learning_rate": 1.4150071893221134e-06, |
| "loss": 0.2159, |
| "step": 9700 |
| }, |
| { |
| "epoch": 2.339799975900711, |
| "grad_norm": 0.1737618881263453, |
| "learning_rate": 1.4052492063646954e-06, |
| "loss": 0.2148, |
| "step": 9710 |
| }, |
| { |
| "epoch": 2.342209904807808, |
| "grad_norm": 0.15317616467027403, |
| "learning_rate": 1.395519481609412e-06, |
| "loss": 0.2141, |
| "step": 9720 |
| }, |
| { |
| "epoch": 2.3446198337149053, |
| "grad_norm": 0.15366195231866672, |
| "learning_rate": 1.3858180915412733e-06, |
| "loss": 0.2139, |
| "step": 9730 |
| }, |
| { |
| "epoch": 2.3470297626220025, |
| "grad_norm": 0.16986901286246847, |
| "learning_rate": 1.376145112422539e-06, |
| "loss": 0.2189, |
| "step": 9740 |
| }, |
| { |
| "epoch": 2.3494396915291, |
| "grad_norm": 0.17802400709398805, |
| "learning_rate": 1.3665006202921422e-06, |
| "loss": 0.2158, |
| "step": 9750 |
| }, |
| { |
| "epoch": 2.351849620436197, |
| "grad_norm": 0.16650587609111575, |
| "learning_rate": 1.3568846909650757e-06, |
| "loss": 0.2147, |
| "step": 9760 |
| }, |
| { |
| "epoch": 2.3542595493432943, |
| "grad_norm": 0.15886679018003938, |
| "learning_rate": 1.347297400031801e-06, |
| "loss": 0.216, |
| "step": 9770 |
| }, |
| { |
| "epoch": 2.3566694782503914, |
| "grad_norm": 0.1558887168853167, |
| "learning_rate": 1.337738822857656e-06, |
| "loss": 0.2144, |
| "step": 9780 |
| }, |
| { |
| "epoch": 2.359079407157489, |
| "grad_norm": 0.17399136027452464, |
| "learning_rate": 1.3282090345822591e-06, |
| "loss": 0.2172, |
| "step": 9790 |
| }, |
| { |
| "epoch": 2.361489336064586, |
| "grad_norm": 0.15809538352206384, |
| "learning_rate": 1.3187081101189215e-06, |
| "loss": 0.2148, |
| "step": 9800 |
| }, |
| { |
| "epoch": 2.3638992649716832, |
| "grad_norm": 0.15410052322227458, |
| "learning_rate": 1.309236124154057e-06, |
| "loss": 0.2198, |
| "step": 9810 |
| }, |
| { |
| "epoch": 2.366309193878781, |
| "grad_norm": 0.153785139042308, |
| "learning_rate": 1.299793151146594e-06, |
| "loss": 0.2139, |
| "step": 9820 |
| }, |
| { |
| "epoch": 2.368719122785878, |
| "grad_norm": 0.15562022157098584, |
| "learning_rate": 1.2903792653273916e-06, |
| "loss": 0.216, |
| "step": 9830 |
| }, |
| { |
| "epoch": 2.371129051692975, |
| "grad_norm": 0.17251767111709934, |
| "learning_rate": 1.2809945406986546e-06, |
| "loss": 0.2171, |
| "step": 9840 |
| }, |
| { |
| "epoch": 2.373538980600072, |
| "grad_norm": 0.1731800579302401, |
| "learning_rate": 1.2716390510333586e-06, |
| "loss": 0.2175, |
| "step": 9850 |
| }, |
| { |
| "epoch": 2.3759489095071693, |
| "grad_norm": 0.16473859545425532, |
| "learning_rate": 1.2623128698746527e-06, |
| "loss": 0.2177, |
| "step": 9860 |
| }, |
| { |
| "epoch": 2.378358838414267, |
| "grad_norm": 0.16059203541442638, |
| "learning_rate": 1.2530160705353068e-06, |
| "loss": 0.2123, |
| "step": 9870 |
| }, |
| { |
| "epoch": 2.380768767321364, |
| "grad_norm": 0.15004803332571034, |
| "learning_rate": 1.243748726097107e-06, |
| "loss": 0.2127, |
| "step": 9880 |
| }, |
| { |
| "epoch": 2.383178696228461, |
| "grad_norm": 0.15582484533929308, |
| "learning_rate": 1.2345109094103102e-06, |
| "loss": 0.2142, |
| "step": 9890 |
| }, |
| { |
| "epoch": 2.3855886251355587, |
| "grad_norm": 0.1517333606391172, |
| "learning_rate": 1.2253026930930422e-06, |
| "loss": 0.2174, |
| "step": 9900 |
| }, |
| { |
| "epoch": 2.387998554042656, |
| "grad_norm": 0.15307520166486493, |
| "learning_rate": 1.2161241495307546e-06, |
| "loss": 0.2177, |
| "step": 9910 |
| }, |
| { |
| "epoch": 2.390408482949753, |
| "grad_norm": 0.1627551849491432, |
| "learning_rate": 1.2069753508756332e-06, |
| "loss": 0.2182, |
| "step": 9920 |
| }, |
| { |
| "epoch": 2.39281841185685, |
| "grad_norm": 0.1593537445472167, |
| "learning_rate": 1.1978563690460454e-06, |
| "loss": 0.2152, |
| "step": 9930 |
| }, |
| { |
| "epoch": 2.3952283407639476, |
| "grad_norm": 0.15471800921328202, |
| "learning_rate": 1.188767275725966e-06, |
| "loss": 0.215, |
| "step": 9940 |
| }, |
| { |
| "epoch": 2.3976382696710448, |
| "grad_norm": 0.16330972159039678, |
| "learning_rate": 1.1797081423644207e-06, |
| "loss": 0.2167, |
| "step": 9950 |
| }, |
| { |
| "epoch": 2.400048198578142, |
| "grad_norm": 0.1552958638526077, |
| "learning_rate": 1.1706790401749191e-06, |
| "loss": 0.2139, |
| "step": 9960 |
| }, |
| { |
| "epoch": 2.402458127485239, |
| "grad_norm": 0.165985184347308, |
| "learning_rate": 1.161680040134897e-06, |
| "loss": 0.2171, |
| "step": 9970 |
| }, |
| { |
| "epoch": 2.4048680563923366, |
| "grad_norm": 0.16849891565784433, |
| "learning_rate": 1.152711212985157e-06, |
| "loss": 0.2187, |
| "step": 9980 |
| }, |
| { |
| "epoch": 2.4072779852994337, |
| "grad_norm": 0.16338319099698362, |
| "learning_rate": 1.1437726292293205e-06, |
| "loss": 0.2184, |
| "step": 9990 |
| }, |
| { |
| "epoch": 2.409687914206531, |
| "grad_norm": 0.15473961627507304, |
| "learning_rate": 1.1348643591332554e-06, |
| "loss": 0.2155, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.412097843113628, |
| "grad_norm": 0.1524426438326065, |
| "learning_rate": 1.1259864727245462e-06, |
| "loss": 0.2187, |
| "step": 10010 |
| }, |
| { |
| "epoch": 2.4145077720207255, |
| "grad_norm": 0.15125931354484692, |
| "learning_rate": 1.117139039791923e-06, |
| "loss": 0.2181, |
| "step": 10020 |
| }, |
| { |
| "epoch": 2.4169177009278227, |
| "grad_norm": 0.15930480589154564, |
| "learning_rate": 1.1083221298847318e-06, |
| "loss": 0.2169, |
| "step": 10030 |
| }, |
| { |
| "epoch": 2.41932762983492, |
| "grad_norm": 0.17114397481137986, |
| "learning_rate": 1.0995358123123672e-06, |
| "loss": 0.2155, |
| "step": 10040 |
| }, |
| { |
| "epoch": 2.4217375587420173, |
| "grad_norm": 0.1490004076876234, |
| "learning_rate": 1.0907801561437487e-06, |
| "loss": 0.215, |
| "step": 10050 |
| }, |
| { |
| "epoch": 2.4241474876491145, |
| "grad_norm": 0.16823086572880896, |
| "learning_rate": 1.0820552302067626e-06, |
| "loss": 0.2179, |
| "step": 10060 |
| }, |
| { |
| "epoch": 2.4265574165562116, |
| "grad_norm": 0.15240246304558344, |
| "learning_rate": 1.0733611030877261e-06, |
| "loss": 0.214, |
| "step": 10070 |
| }, |
| { |
| "epoch": 2.4289673454633087, |
| "grad_norm": 0.15789303726910206, |
| "learning_rate": 1.064697843130849e-06, |
| "loss": 0.2176, |
| "step": 10080 |
| }, |
| { |
| "epoch": 2.431377274370406, |
| "grad_norm": 0.16480605547265204, |
| "learning_rate": 1.0560655184376956e-06, |
| "loss": 0.2146, |
| "step": 10090 |
| }, |
| { |
| "epoch": 2.4337872032775034, |
| "grad_norm": 0.1548833111983981, |
| "learning_rate": 1.0474641968666482e-06, |
| "loss": 0.2141, |
| "step": 10100 |
| }, |
| { |
| "epoch": 2.4361971321846005, |
| "grad_norm": 0.23199817443932189, |
| "learning_rate": 1.0388939460323761e-06, |
| "loss": 0.2181, |
| "step": 10110 |
| }, |
| { |
| "epoch": 2.4386070610916977, |
| "grad_norm": 0.1524674301408952, |
| "learning_rate": 1.030354833305301e-06, |
| "loss": 0.2182, |
| "step": 10120 |
| }, |
| { |
| "epoch": 2.4410169899987952, |
| "grad_norm": 0.14792064329170312, |
| "learning_rate": 1.0218469258110713e-06, |
| "loss": 0.2207, |
| "step": 10130 |
| }, |
| { |
| "epoch": 2.4434269189058924, |
| "grad_norm": 0.14857023276356696, |
| "learning_rate": 1.013370290430029e-06, |
| "loss": 0.216, |
| "step": 10140 |
| }, |
| { |
| "epoch": 2.4458368478129895, |
| "grad_norm": 0.1518631715154245, |
| "learning_rate": 1.0049249937966938e-06, |
| "loss": 0.2202, |
| "step": 10150 |
| }, |
| { |
| "epoch": 2.4482467767200866, |
| "grad_norm": 0.15219218159498268, |
| "learning_rate": 9.96511102299222e-07, |
| "loss": 0.2156, |
| "step": 10160 |
| }, |
| { |
| "epoch": 2.450656705627184, |
| "grad_norm": 0.17439739208924204, |
| "learning_rate": 9.881286820789066e-07, |
| "loss": 0.2139, |
| "step": 10170 |
| }, |
| { |
| "epoch": 2.4530666345342813, |
| "grad_norm": 0.1624679939124629, |
| "learning_rate": 9.797777990296364e-07, |
| "loss": 0.2153, |
| "step": 10180 |
| }, |
| { |
| "epoch": 2.4554765634413784, |
| "grad_norm": 0.14825527179599532, |
| "learning_rate": 9.714585187973962e-07, |
| "loss": 0.2172, |
| "step": 10190 |
| }, |
| { |
| "epoch": 2.4578864923484756, |
| "grad_norm": 0.15110347253093098, |
| "learning_rate": 9.631709067797346e-07, |
| "loss": 0.2154, |
| "step": 10200 |
| }, |
| { |
| "epoch": 2.460296421255573, |
| "grad_norm": 0.1551914074026394, |
| "learning_rate": 9.549150281252633e-07, |
| "loss": 0.2132, |
| "step": 10210 |
| }, |
| { |
| "epoch": 2.4627063501626703, |
| "grad_norm": 0.14944505271933253, |
| "learning_rate": 9.466909477331365e-07, |
| "loss": 0.2124, |
| "step": 10220 |
| }, |
| { |
| "epoch": 2.4651162790697674, |
| "grad_norm": 0.43543381337712755, |
| "learning_rate": 9.384987302525439e-07, |
| "loss": 0.2146, |
| "step": 10230 |
| }, |
| { |
| "epoch": 2.4675262079768645, |
| "grad_norm": 0.14743881553761198, |
| "learning_rate": 9.303384400822019e-07, |
| "loss": 0.2149, |
| "step": 10240 |
| }, |
| { |
| "epoch": 2.469936136883962, |
| "grad_norm": 0.16839266721051543, |
| "learning_rate": 9.222101413698475e-07, |
| "loss": 0.217, |
| "step": 10250 |
| }, |
| { |
| "epoch": 2.472346065791059, |
| "grad_norm": 0.15512130900350077, |
| "learning_rate": 9.141138980117348e-07, |
| "loss": 0.2135, |
| "step": 10260 |
| }, |
| { |
| "epoch": 2.4747559946981563, |
| "grad_norm": 0.17547285221057424, |
| "learning_rate": 9.060497736521312e-07, |
| "loss": 0.217, |
| "step": 10270 |
| }, |
| { |
| "epoch": 2.477165923605254, |
| "grad_norm": 0.16694964929506417, |
| "learning_rate": 8.980178316828158e-07, |
| "loss": 0.2133, |
| "step": 10280 |
| }, |
| { |
| "epoch": 2.479575852512351, |
| "grad_norm": 0.1555523115039673, |
| "learning_rate": 8.900181352425907e-07, |
| "loss": 0.2162, |
| "step": 10290 |
| }, |
| { |
| "epoch": 2.481985781419448, |
| "grad_norm": 0.15050122564493998, |
| "learning_rate": 8.82050747216766e-07, |
| "loss": 0.2141, |
| "step": 10300 |
| }, |
| { |
| "epoch": 2.4843957103265453, |
| "grad_norm": 0.15441377443885543, |
| "learning_rate": 8.741157302366859e-07, |
| "loss": 0.2139, |
| "step": 10310 |
| }, |
| { |
| "epoch": 2.4868056392336424, |
| "grad_norm": 0.15380456685917115, |
| "learning_rate": 8.662131466792217e-07, |
| "loss": 0.2149, |
| "step": 10320 |
| }, |
| { |
| "epoch": 2.48921556814074, |
| "grad_norm": 0.23664151878761275, |
| "learning_rate": 8.5834305866629e-07, |
| "loss": 0.2173, |
| "step": 10330 |
| }, |
| { |
| "epoch": 2.491625497047837, |
| "grad_norm": 0.14724309183290407, |
| "learning_rate": 8.505055280643582e-07, |
| "loss": 0.2132, |
| "step": 10340 |
| }, |
| { |
| "epoch": 2.494035425954934, |
| "grad_norm": 0.15466217921216077, |
| "learning_rate": 8.42700616483963e-07, |
| "loss": 0.215, |
| "step": 10350 |
| }, |
| { |
| "epoch": 2.496445354862032, |
| "grad_norm": 0.1501541798478477, |
| "learning_rate": 8.34928385279224e-07, |
| "loss": 0.2156, |
| "step": 10360 |
| }, |
| { |
| "epoch": 2.498855283769129, |
| "grad_norm": 0.15799685088693785, |
| "learning_rate": 8.271888955473606e-07, |
| "loss": 0.2159, |
| "step": 10370 |
| }, |
| { |
| "epoch": 2.501265212676226, |
| "grad_norm": 0.15549087871964487, |
| "learning_rate": 8.194822081282144e-07, |
| "loss": 0.2171, |
| "step": 10380 |
| }, |
| { |
| "epoch": 2.503675141583323, |
| "grad_norm": 0.15827715861036482, |
| "learning_rate": 8.118083836037677e-07, |
| "loss": 0.2153, |
| "step": 10390 |
| }, |
| { |
| "epoch": 2.5060850704904203, |
| "grad_norm": 0.16063224870974138, |
| "learning_rate": 8.041674822976686e-07, |
| "loss": 0.2163, |
| "step": 10400 |
| }, |
| { |
| "epoch": 2.508494999397518, |
| "grad_norm": 0.14645411429716856, |
| "learning_rate": 7.965595642747593e-07, |
| "loss": 0.2188, |
| "step": 10410 |
| }, |
| { |
| "epoch": 2.510904928304615, |
| "grad_norm": 0.15056214346482824, |
| "learning_rate": 7.889846893405978e-07, |
| "loss": 0.2142, |
| "step": 10420 |
| }, |
| { |
| "epoch": 2.513314857211712, |
| "grad_norm": 0.1506232491064962, |
| "learning_rate": 7.814429170409965e-07, |
| "loss": 0.2167, |
| "step": 10430 |
| }, |
| { |
| "epoch": 2.5157247861188097, |
| "grad_norm": 0.1567734434906103, |
| "learning_rate": 7.739343066615457e-07, |
| "loss": 0.2144, |
| "step": 10440 |
| }, |
| { |
| "epoch": 2.518134715025907, |
| "grad_norm": 0.14321024474995245, |
| "learning_rate": 7.664589172271519e-07, |
| "loss": 0.2152, |
| "step": 10450 |
| }, |
| { |
| "epoch": 2.520544643933004, |
| "grad_norm": 0.15239216489963886, |
| "learning_rate": 7.590168075015725e-07, |
| "loss": 0.2138, |
| "step": 10460 |
| }, |
| { |
| "epoch": 2.522954572840101, |
| "grad_norm": 0.15562793629546276, |
| "learning_rate": 7.51608035986956e-07, |
| "loss": 0.214, |
| "step": 10470 |
| }, |
| { |
| "epoch": 2.5253645017471986, |
| "grad_norm": 0.19653172860047724, |
| "learning_rate": 7.442326609233786e-07, |
| "loss": 0.215, |
| "step": 10480 |
| }, |
| { |
| "epoch": 2.5277744306542957, |
| "grad_norm": 0.18176462712911917, |
| "learning_rate": 7.368907402883896e-07, |
| "loss": 0.2158, |
| "step": 10490 |
| }, |
| { |
| "epoch": 2.530184359561393, |
| "grad_norm": 0.15366550809542798, |
| "learning_rate": 7.295823317965533e-07, |
| "loss": 0.2204, |
| "step": 10500 |
| }, |
| { |
| "epoch": 2.5325942884684904, |
| "grad_norm": 0.16683906973897664, |
| "learning_rate": 7.223074928989971e-07, |
| "loss": 0.2156, |
| "step": 10510 |
| }, |
| { |
| "epoch": 2.5350042173755876, |
| "grad_norm": 0.1456531060733591, |
| "learning_rate": 7.150662807829584e-07, |
| "loss": 0.2166, |
| "step": 10520 |
| }, |
| { |
| "epoch": 2.5374141462826847, |
| "grad_norm": 0.14967535572296944, |
| "learning_rate": 7.07858752371336e-07, |
| "loss": 0.2163, |
| "step": 10530 |
| }, |
| { |
| "epoch": 2.539824075189782, |
| "grad_norm": 0.15185919957957109, |
| "learning_rate": 7.006849643222425e-07, |
| "loss": 0.2126, |
| "step": 10540 |
| }, |
| { |
| "epoch": 2.542234004096879, |
| "grad_norm": 0.15321061069930217, |
| "learning_rate": 6.935449730285576e-07, |
| "loss": 0.2173, |
| "step": 10550 |
| }, |
| { |
| "epoch": 2.5446439330039765, |
| "grad_norm": 0.15461884016543473, |
| "learning_rate": 6.864388346174899e-07, |
| "loss": 0.2135, |
| "step": 10560 |
| }, |
| { |
| "epoch": 2.5470538619110736, |
| "grad_norm": 0.15247762863206255, |
| "learning_rate": 6.793666049501252e-07, |
| "loss": 0.2157, |
| "step": 10570 |
| }, |
| { |
| "epoch": 2.5494637908181708, |
| "grad_norm": 0.153138733256644, |
| "learning_rate": 6.723283396210006e-07, |
| "loss": 0.2164, |
| "step": 10580 |
| }, |
| { |
| "epoch": 2.5518737197252683, |
| "grad_norm": 0.15762097259500046, |
| "learning_rate": 6.65324093957656e-07, |
| "loss": 0.2167, |
| "step": 10590 |
| }, |
| { |
| "epoch": 2.5542836486323655, |
| "grad_norm": 0.1452931268503044, |
| "learning_rate": 6.583539230202062e-07, |
| "loss": 0.2164, |
| "step": 10600 |
| }, |
| { |
| "epoch": 2.5566935775394626, |
| "grad_norm": 0.17760042076163413, |
| "learning_rate": 6.514178816009059e-07, |
| "loss": 0.2152, |
| "step": 10610 |
| }, |
| { |
| "epoch": 2.5591035064465597, |
| "grad_norm": 0.143344197207501, |
| "learning_rate": 6.445160242237181e-07, |
| "loss": 0.2137, |
| "step": 10620 |
| }, |
| { |
| "epoch": 2.561513435353657, |
| "grad_norm": 0.15195117677297604, |
| "learning_rate": 6.376484051438864e-07, |
| "loss": 0.2139, |
| "step": 10630 |
| }, |
| { |
| "epoch": 2.5639233642607544, |
| "grad_norm": 0.15428231381166374, |
| "learning_rate": 6.308150783475086e-07, |
| "loss": 0.2176, |
| "step": 10640 |
| }, |
| { |
| "epoch": 2.5663332931678515, |
| "grad_norm": 0.14082720147844494, |
| "learning_rate": 6.240160975511117e-07, |
| "loss": 0.2146, |
| "step": 10650 |
| }, |
| { |
| "epoch": 2.5687432220749487, |
| "grad_norm": 0.14947632811048964, |
| "learning_rate": 6.172515162012332e-07, |
| "loss": 0.215, |
| "step": 10660 |
| }, |
| { |
| "epoch": 2.571153150982046, |
| "grad_norm": 0.14709972300496124, |
| "learning_rate": 6.105213874739913e-07, |
| "loss": 0.2185, |
| "step": 10670 |
| }, |
| { |
| "epoch": 2.5735630798891433, |
| "grad_norm": 0.1514336436344568, |
| "learning_rate": 6.038257642746815e-07, |
| "loss": 0.2156, |
| "step": 10680 |
| }, |
| { |
| "epoch": 2.5759730087962405, |
| "grad_norm": 0.160201390531587, |
| "learning_rate": 5.971646992373442e-07, |
| "loss": 0.2186, |
| "step": 10690 |
| }, |
| { |
| "epoch": 2.5783829377033376, |
| "grad_norm": 0.14628128374580812, |
| "learning_rate": 5.905382447243679e-07, |
| "loss": 0.2144, |
| "step": 10700 |
| }, |
| { |
| "epoch": 2.5807928666104347, |
| "grad_norm": 0.14637092358991383, |
| "learning_rate": 5.839464528260602e-07, |
| "loss": 0.2125, |
| "step": 10710 |
| }, |
| { |
| "epoch": 2.5832027955175323, |
| "grad_norm": 0.1445507935886425, |
| "learning_rate": 5.773893753602556e-07, |
| "loss": 0.2157, |
| "step": 10720 |
| }, |
| { |
| "epoch": 2.5856127244246294, |
| "grad_norm": 0.1486312397473014, |
| "learning_rate": 5.708670638718944e-07, |
| "loss": 0.2148, |
| "step": 10730 |
| }, |
| { |
| "epoch": 2.588022653331727, |
| "grad_norm": 0.14196807854013832, |
| "learning_rate": 5.643795696326248e-07, |
| "loss": 0.2161, |
| "step": 10740 |
| }, |
| { |
| "epoch": 2.590432582238824, |
| "grad_norm": 0.15384901135968845, |
| "learning_rate": 5.579269436403967e-07, |
| "loss": 0.2204, |
| "step": 10750 |
| }, |
| { |
| "epoch": 2.5928425111459212, |
| "grad_norm": 0.1470650921033427, |
| "learning_rate": 5.515092366190633e-07, |
| "loss": 0.2158, |
| "step": 10760 |
| }, |
| { |
| "epoch": 2.5952524400530184, |
| "grad_norm": 0.1681803787313935, |
| "learning_rate": 5.451264990179806e-07, |
| "loss": 0.2174, |
| "step": 10770 |
| }, |
| { |
| "epoch": 2.5976623689601155, |
| "grad_norm": 0.16948890769571248, |
| "learning_rate": 5.387787810116107e-07, |
| "loss": 0.2147, |
| "step": 10780 |
| }, |
| { |
| "epoch": 2.600072297867213, |
| "grad_norm": 0.1498800616401931, |
| "learning_rate": 5.324661324991287e-07, |
| "loss": 0.2147, |
| "step": 10790 |
| }, |
| { |
| "epoch": 2.60248222677431, |
| "grad_norm": 0.14650403620935654, |
| "learning_rate": 5.261886031040297e-07, |
| "loss": 0.2128, |
| "step": 10800 |
| }, |
| { |
| "epoch": 2.6048921556814073, |
| "grad_norm": 0.1403119870002928, |
| "learning_rate": 5.199462421737378e-07, |
| "loss": 0.2154, |
| "step": 10810 |
| }, |
| { |
| "epoch": 2.607302084588505, |
| "grad_norm": 0.15026684131252868, |
| "learning_rate": 5.137390987792224e-07, |
| "loss": 0.2151, |
| "step": 10820 |
| }, |
| { |
| "epoch": 2.609712013495602, |
| "grad_norm": 0.1471904393058862, |
| "learning_rate": 5.075672217146021e-07, |
| "loss": 0.2139, |
| "step": 10830 |
| }, |
| { |
| "epoch": 2.612121942402699, |
| "grad_norm": 0.15677160811261384, |
| "learning_rate": 5.014306594967777e-07, |
| "loss": 0.2141, |
| "step": 10840 |
| }, |
| { |
| "epoch": 2.6145318713097963, |
| "grad_norm": 0.1481214708670388, |
| "learning_rate": 4.953294603650321e-07, |
| "loss": 0.214, |
| "step": 10850 |
| }, |
| { |
| "epoch": 2.6169418002168934, |
| "grad_norm": 0.18119119284756185, |
| "learning_rate": 4.892636722806681e-07, |
| "loss": 0.2165, |
| "step": 10860 |
| }, |
| { |
| "epoch": 2.619351729123991, |
| "grad_norm": 0.1477781304653272, |
| "learning_rate": 4.832333429266162e-07, |
| "loss": 0.216, |
| "step": 10870 |
| }, |
| { |
| "epoch": 2.621761658031088, |
| "grad_norm": 0.16079097682910412, |
| "learning_rate": 4.772385197070734e-07, |
| "loss": 0.2177, |
| "step": 10880 |
| }, |
| { |
| "epoch": 2.624171586938185, |
| "grad_norm": 0.14713561111355702, |
| "learning_rate": 4.712792497471219e-07, |
| "loss": 0.2166, |
| "step": 10890 |
| }, |
| { |
| "epoch": 2.6265815158452828, |
| "grad_norm": 0.15448363786407995, |
| "learning_rate": 4.653555798923598e-07, |
| "loss": 0.2141, |
| "step": 10900 |
| }, |
| { |
| "epoch": 2.62899144475238, |
| "grad_norm": 0.1849024805996653, |
| "learning_rate": 4.59467556708536e-07, |
| "loss": 0.2147, |
| "step": 10910 |
| }, |
| { |
| "epoch": 2.631401373659477, |
| "grad_norm": 0.15909490607189156, |
| "learning_rate": 4.5361522648118163e-07, |
| "loss": 0.2168, |
| "step": 10920 |
| }, |
| { |
| "epoch": 2.633811302566574, |
| "grad_norm": 0.1626346191674306, |
| "learning_rate": 4.477986352152458e-07, |
| "loss": 0.2167, |
| "step": 10930 |
| }, |
| { |
| "epoch": 2.6362212314736713, |
| "grad_norm": 0.14666355397322436, |
| "learning_rate": 4.420178286347365e-07, |
| "loss": 0.2137, |
| "step": 10940 |
| }, |
| { |
| "epoch": 2.638631160380769, |
| "grad_norm": 0.1490210269785957, |
| "learning_rate": 4.3627285218235836e-07, |
| "loss": 0.2149, |
| "step": 10950 |
| }, |
| { |
| "epoch": 2.641041089287866, |
| "grad_norm": 0.14461180157591294, |
| "learning_rate": 4.305637510191596e-07, |
| "loss": 0.2181, |
| "step": 10960 |
| }, |
| { |
| "epoch": 2.643451018194963, |
| "grad_norm": 0.14527442465310253, |
| "learning_rate": 4.248905700241679e-07, |
| "loss": 0.2173, |
| "step": 10970 |
| }, |
| { |
| "epoch": 2.6458609471020607, |
| "grad_norm": 0.14302406763640138, |
| "learning_rate": 4.192533537940524e-07, |
| "loss": 0.2159, |
| "step": 10980 |
| }, |
| { |
| "epoch": 2.648270876009158, |
| "grad_norm": 0.1434474709891144, |
| "learning_rate": 4.1365214664275624e-07, |
| "loss": 0.2168, |
| "step": 10990 |
| }, |
| { |
| "epoch": 2.650680804916255, |
| "grad_norm": 0.15007622042696891, |
| "learning_rate": 4.0808699260116267e-07, |
| "loss": 0.2172, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.653090733823352, |
| "grad_norm": 0.14678548126536323, |
| "learning_rate": 4.025579354167386e-07, |
| "loss": 0.2154, |
| "step": 11010 |
| }, |
| { |
| "epoch": 2.6555006627304496, |
| "grad_norm": 0.16699962459270185, |
| "learning_rate": 3.9706501855319767e-07, |
| "loss": 0.2152, |
| "step": 11020 |
| }, |
| { |
| "epoch": 2.6579105916375467, |
| "grad_norm": 0.1559363237731874, |
| "learning_rate": 3.9160828519015537e-07, |
| "loss": 0.2138, |
| "step": 11030 |
| }, |
| { |
| "epoch": 2.660320520544644, |
| "grad_norm": 0.15984795170652535, |
| "learning_rate": 3.8618777822278854e-07, |
| "loss": 0.2187, |
| "step": 11040 |
| }, |
| { |
| "epoch": 2.6627304494517414, |
| "grad_norm": 0.14956153654611046, |
| "learning_rate": 3.8080354026150067e-07, |
| "loss": 0.2142, |
| "step": 11050 |
| }, |
| { |
| "epoch": 2.6651403783588385, |
| "grad_norm": 0.15077480237694924, |
| "learning_rate": 3.754556136315862e-07, |
| "loss": 0.2147, |
| "step": 11060 |
| }, |
| { |
| "epoch": 2.6675503072659357, |
| "grad_norm": 0.14577843988899974, |
| "learning_rate": 3.701440403728973e-07, |
| "loss": 0.2139, |
| "step": 11070 |
| }, |
| { |
| "epoch": 2.669960236173033, |
| "grad_norm": 0.15153298255218833, |
| "learning_rate": 3.6486886223951356e-07, |
| "loss": 0.2136, |
| "step": 11080 |
| }, |
| { |
| "epoch": 2.67237016508013, |
| "grad_norm": 0.14377829877976545, |
| "learning_rate": 3.596301206994135e-07, |
| "loss": 0.2125, |
| "step": 11090 |
| }, |
| { |
| "epoch": 2.6747800939872275, |
| "grad_norm": 0.14107151261528034, |
| "learning_rate": 3.5442785693414916e-07, |
| "loss": 0.2145, |
| "step": 11100 |
| }, |
| { |
| "epoch": 2.6771900228943246, |
| "grad_norm": 1.3306074378302106, |
| "learning_rate": 3.4926211183852257e-07, |
| "loss": 0.2124, |
| "step": 11110 |
| }, |
| { |
| "epoch": 2.6795999518014217, |
| "grad_norm": 0.15076834202394918, |
| "learning_rate": 3.441329260202647e-07, |
| "loss": 0.2125, |
| "step": 11120 |
| }, |
| { |
| "epoch": 2.6820098807085193, |
| "grad_norm": 0.14596138661005686, |
| "learning_rate": 3.390403397997116e-07, |
| "loss": 0.2168, |
| "step": 11130 |
| }, |
| { |
| "epoch": 2.6844198096156164, |
| "grad_norm": 0.14575454258129183, |
| "learning_rate": 3.339843932094977e-07, |
| "loss": 0.2144, |
| "step": 11140 |
| }, |
| { |
| "epoch": 2.6868297385227136, |
| "grad_norm": 0.14296980394600473, |
| "learning_rate": 3.289651259942267e-07, |
| "loss": 0.2114, |
| "step": 11150 |
| }, |
| { |
| "epoch": 2.6892396674298107, |
| "grad_norm": 0.14448898833624593, |
| "learning_rate": 3.2398257761017516e-07, |
| "loss": 0.2118, |
| "step": 11160 |
| }, |
| { |
| "epoch": 2.691649596336908, |
| "grad_norm": 0.1455164505488364, |
| "learning_rate": 3.190367872249672e-07, |
| "loss": 0.2158, |
| "step": 11170 |
| }, |
| { |
| "epoch": 2.6940595252440054, |
| "grad_norm": 0.1480669347720248, |
| "learning_rate": 3.1412779371727873e-07, |
| "loss": 0.212, |
| "step": 11180 |
| }, |
| { |
| "epoch": 2.6964694541511025, |
| "grad_norm": 0.14678567604025397, |
| "learning_rate": 3.0925563567652474e-07, |
| "loss": 0.216, |
| "step": 11190 |
| }, |
| { |
| "epoch": 2.6988793830581996, |
| "grad_norm": 0.14111967346571289, |
| "learning_rate": 3.044203514025579e-07, |
| "loss": 0.2137, |
| "step": 11200 |
| }, |
| { |
| "epoch": 2.701289311965297, |
| "grad_norm": 0.15439891087860655, |
| "learning_rate": 2.996219789053678e-07, |
| "loss": 0.2153, |
| "step": 11210 |
| }, |
| { |
| "epoch": 2.7036992408723943, |
| "grad_norm": 0.1449692199656288, |
| "learning_rate": 2.948605559047818e-07, |
| "loss": 0.2164, |
| "step": 11220 |
| }, |
| { |
| "epoch": 2.7061091697794915, |
| "grad_norm": 0.1510671968535754, |
| "learning_rate": 2.9013611983016887e-07, |
| "loss": 0.2153, |
| "step": 11230 |
| }, |
| { |
| "epoch": 2.7085190986865886, |
| "grad_norm": 0.14155774109614394, |
| "learning_rate": 2.8544870782014566e-07, |
| "loss": 0.2167, |
| "step": 11240 |
| }, |
| { |
| "epoch": 2.710929027593686, |
| "grad_norm": 0.1459104749006618, |
| "learning_rate": 2.807983567222822e-07, |
| "loss": 0.2146, |
| "step": 11250 |
| }, |
| { |
| "epoch": 2.7133389565007833, |
| "grad_norm": 0.13975396752983146, |
| "learning_rate": 2.7618510309281756e-07, |
| "loss": 0.215, |
| "step": 11260 |
| }, |
| { |
| "epoch": 2.7157488854078804, |
| "grad_norm": 0.1505993142688404, |
| "learning_rate": 2.716089831963636e-07, |
| "loss": 0.2136, |
| "step": 11270 |
| }, |
| { |
| "epoch": 2.718158814314978, |
| "grad_norm": 0.15617080766090627, |
| "learning_rate": 2.6707003300563196e-07, |
| "loss": 0.2195, |
| "step": 11280 |
| }, |
| { |
| "epoch": 2.720568743222075, |
| "grad_norm": 0.15688756378161997, |
| "learning_rate": 2.6256828820113765e-07, |
| "loss": 0.2149, |
| "step": 11290 |
| }, |
| { |
| "epoch": 2.722978672129172, |
| "grad_norm": 0.1402875123126838, |
| "learning_rate": 2.581037841709322e-07, |
| "loss": 0.2148, |
| "step": 11300 |
| }, |
| { |
| "epoch": 2.7253886010362693, |
| "grad_norm": 0.14305298344924777, |
| "learning_rate": 2.536765560103122e-07, |
| "loss": 0.2159, |
| "step": 11310 |
| }, |
| { |
| "epoch": 2.7277985299433665, |
| "grad_norm": 0.14541053689623268, |
| "learning_rate": 2.492866385215559e-07, |
| "loss": 0.2154, |
| "step": 11320 |
| }, |
| { |
| "epoch": 2.730208458850464, |
| "grad_norm": 0.144441399188923, |
| "learning_rate": 2.449340662136407e-07, |
| "loss": 0.2121, |
| "step": 11330 |
| }, |
| { |
| "epoch": 2.732618387757561, |
| "grad_norm": 0.1445589057960872, |
| "learning_rate": 2.4061887330197485e-07, |
| "loss": 0.2146, |
| "step": 11340 |
| }, |
| { |
| "epoch": 2.7350283166646583, |
| "grad_norm": 0.1485865897290383, |
| "learning_rate": 2.3634109370813008e-07, |
| "loss": 0.2157, |
| "step": 11350 |
| }, |
| { |
| "epoch": 2.737438245571756, |
| "grad_norm": 0.1572861186189694, |
| "learning_rate": 2.3210076105957103e-07, |
| "loss": 0.2174, |
| "step": 11360 |
| }, |
| { |
| "epoch": 2.739848174478853, |
| "grad_norm": 0.14412296947468073, |
| "learning_rate": 2.278979086893962e-07, |
| "loss": 0.2174, |
| "step": 11370 |
| }, |
| { |
| "epoch": 2.74225810338595, |
| "grad_norm": 0.2303808996033888, |
| "learning_rate": 2.2373256963607093e-07, |
| "loss": 0.2162, |
| "step": 11380 |
| }, |
| { |
| "epoch": 2.7446680322930472, |
| "grad_norm": 0.14283031310782396, |
| "learning_rate": 2.1960477664317027e-07, |
| "loss": 0.2164, |
| "step": 11390 |
| }, |
| { |
| "epoch": 2.7470779612001444, |
| "grad_norm": 0.141104617011811, |
| "learning_rate": 2.1551456215912147e-07, |
| "loss": 0.2142, |
| "step": 11400 |
| }, |
| { |
| "epoch": 2.749487890107242, |
| "grad_norm": 0.13750087182045279, |
| "learning_rate": 2.114619583369476e-07, |
| "loss": 0.2172, |
| "step": 11410 |
| }, |
| { |
| "epoch": 2.751897819014339, |
| "grad_norm": 0.15008331895770075, |
| "learning_rate": 2.0744699703401817e-07, |
| "loss": 0.2128, |
| "step": 11420 |
| }, |
| { |
| "epoch": 2.754307747921436, |
| "grad_norm": 0.15087694733855953, |
| "learning_rate": 2.034697098117927e-07, |
| "loss": 0.2141, |
| "step": 11430 |
| }, |
| { |
| "epoch": 2.7567176768285337, |
| "grad_norm": 0.14772144208511126, |
| "learning_rate": 1.995301279355788e-07, |
| "loss": 0.2141, |
| "step": 11440 |
| }, |
| { |
| "epoch": 2.759127605735631, |
| "grad_norm": 0.1420236301894538, |
| "learning_rate": 1.9562828237428332e-07, |
| "loss": 0.2165, |
| "step": 11450 |
| }, |
| { |
| "epoch": 2.761537534642728, |
| "grad_norm": 0.17705266292955907, |
| "learning_rate": 1.917642038001677e-07, |
| "loss": 0.2186, |
| "step": 11460 |
| }, |
| { |
| "epoch": 2.763947463549825, |
| "grad_norm": 0.14736498204087187, |
| "learning_rate": 1.8793792258861077e-07, |
| "loss": 0.2143, |
| "step": 11470 |
| }, |
| { |
| "epoch": 2.7663573924569222, |
| "grad_norm": 0.1462058213629411, |
| "learning_rate": 1.8414946881786634e-07, |
| "loss": 0.2173, |
| "step": 11480 |
| }, |
| { |
| "epoch": 2.76876732136402, |
| "grad_norm": 0.14005297751119966, |
| "learning_rate": 1.8039887226882823e-07, |
| "loss": 0.2139, |
| "step": 11490 |
| }, |
| { |
| "epoch": 2.771177250271117, |
| "grad_norm": 0.14161333941981297, |
| "learning_rate": 1.7668616242479618e-07, |
| "loss": 0.2171, |
| "step": 11500 |
| }, |
| { |
| "epoch": 2.7735871791782145, |
| "grad_norm": 0.1669392687033115, |
| "learning_rate": 1.7301136847124477e-07, |
| "loss": 0.214, |
| "step": 11510 |
| }, |
| { |
| "epoch": 2.7759971080853116, |
| "grad_norm": 0.1564908936538617, |
| "learning_rate": 1.6937451929559147e-07, |
| "loss": 0.2158, |
| "step": 11520 |
| }, |
| { |
| "epoch": 2.7784070369924088, |
| "grad_norm": 0.14276676220504247, |
| "learning_rate": 1.6577564348697284e-07, |
| "loss": 0.2164, |
| "step": 11530 |
| }, |
| { |
| "epoch": 2.780816965899506, |
| "grad_norm": 0.1456887593652444, |
| "learning_rate": 1.622147693360171e-07, |
| "loss": 0.2155, |
| "step": 11540 |
| }, |
| { |
| "epoch": 2.783226894806603, |
| "grad_norm": 0.14609302357413262, |
| "learning_rate": 1.586919248346236e-07, |
| "loss": 0.2161, |
| "step": 11550 |
| }, |
| { |
| "epoch": 2.7856368237137006, |
| "grad_norm": 0.14773505784893998, |
| "learning_rate": 1.5520713767574247e-07, |
| "loss": 0.216, |
| "step": 11560 |
| }, |
| { |
| "epoch": 2.7880467526207977, |
| "grad_norm": 0.14143063951451698, |
| "learning_rate": 1.5176043525315543e-07, |
| "loss": 0.2159, |
| "step": 11570 |
| }, |
| { |
| "epoch": 2.790456681527895, |
| "grad_norm": 0.144146569653416, |
| "learning_rate": 1.483518446612614e-07, |
| "loss": 0.2139, |
| "step": 11580 |
| }, |
| { |
| "epoch": 2.7928666104349924, |
| "grad_norm": 0.15030088879637246, |
| "learning_rate": 1.4498139269486455e-07, |
| "loss": 0.2172, |
| "step": 11590 |
| }, |
| { |
| "epoch": 2.7952765393420895, |
| "grad_norm": 0.15073132705147213, |
| "learning_rate": 1.4164910584896163e-07, |
| "loss": 0.2153, |
| "step": 11600 |
| }, |
| { |
| "epoch": 2.7976864682491867, |
| "grad_norm": 0.1447578154197937, |
| "learning_rate": 1.383550103185366e-07, |
| "loss": 0.2135, |
| "step": 11610 |
| }, |
| { |
| "epoch": 2.800096397156284, |
| "grad_norm": 0.14760512710856533, |
| "learning_rate": 1.350991319983508e-07, |
| "loss": 0.2156, |
| "step": 11620 |
| }, |
| { |
| "epoch": 2.802506326063381, |
| "grad_norm": 0.14375223022331937, |
| "learning_rate": 1.3188149648274307e-07, |
| "loss": 0.2169, |
| "step": 11630 |
| }, |
| { |
| "epoch": 2.8049162549704785, |
| "grad_norm": 0.1431750582618315, |
| "learning_rate": 1.2870212906542612e-07, |
| "loss": 0.2144, |
| "step": 11640 |
| }, |
| { |
| "epoch": 2.8073261838775756, |
| "grad_norm": 0.1465298553349756, |
| "learning_rate": 1.2556105473928824e-07, |
| "loss": 0.2158, |
| "step": 11650 |
| }, |
| { |
| "epoch": 2.8097361127846727, |
| "grad_norm": 0.14429867980235148, |
| "learning_rate": 1.2245829819619858e-07, |
| "loss": 0.2158, |
| "step": 11660 |
| }, |
| { |
| "epoch": 2.8121460416917703, |
| "grad_norm": 0.1605621598018221, |
| "learning_rate": 1.1939388382681106e-07, |
| "loss": 0.2162, |
| "step": 11670 |
| }, |
| { |
| "epoch": 2.8145559705988674, |
| "grad_norm": 0.14490726186504946, |
| "learning_rate": 1.163678357203718e-07, |
| "loss": 0.2163, |
| "step": 11680 |
| }, |
| { |
| "epoch": 2.8169658995059645, |
| "grad_norm": 0.14265026765792224, |
| "learning_rate": 1.133801776645338e-07, |
| "loss": 0.2172, |
| "step": 11690 |
| }, |
| { |
| "epoch": 2.8193758284130617, |
| "grad_norm": 0.14252007202379896, |
| "learning_rate": 1.1043093314516418e-07, |
| "loss": 0.2144, |
| "step": 11700 |
| }, |
| { |
| "epoch": 2.821785757320159, |
| "grad_norm": 0.14018366242782945, |
| "learning_rate": 1.0752012534616496e-07, |
| "loss": 0.2146, |
| "step": 11710 |
| }, |
| { |
| "epoch": 2.8241956862272564, |
| "grad_norm": 0.14924691913698007, |
| "learning_rate": 1.046477771492882e-07, |
| "loss": 0.2137, |
| "step": 11720 |
| }, |
| { |
| "epoch": 2.8266056151343535, |
| "grad_norm": 0.14325563438103756, |
| "learning_rate": 1.0181391113395611e-07, |
| "loss": 0.2167, |
| "step": 11730 |
| }, |
| { |
| "epoch": 2.8290155440414506, |
| "grad_norm": 0.14560134689755475, |
| "learning_rate": 9.901854957708345e-08, |
| "loss": 0.2154, |
| "step": 11740 |
| }, |
| { |
| "epoch": 2.831425472948548, |
| "grad_norm": 0.13992789696897817, |
| "learning_rate": 9.626171445290378e-08, |
| "loss": 0.2127, |
| "step": 11750 |
| }, |
| { |
| "epoch": 2.8338354018556453, |
| "grad_norm": 0.14120716884520956, |
| "learning_rate": 9.354342743279455e-08, |
| "loss": 0.2174, |
| "step": 11760 |
| }, |
| { |
| "epoch": 2.8362453307627424, |
| "grad_norm": 0.14661882985075364, |
| "learning_rate": 9.086370988511006e-08, |
| "loss": 0.2145, |
| "step": 11770 |
| }, |
| { |
| "epoch": 2.8386552596698396, |
| "grad_norm": 0.138746176420656, |
| "learning_rate": 8.822258287500829e-08, |
| "loss": 0.2156, |
| "step": 11780 |
| }, |
| { |
| "epoch": 2.841065188576937, |
| "grad_norm": 0.14525313097858822, |
| "learning_rate": 8.562006716429316e-08, |
| "loss": 0.2158, |
| "step": 11790 |
| }, |
| { |
| "epoch": 2.8434751174840343, |
| "grad_norm": 0.13730154763188282, |
| "learning_rate": 8.305618321124087e-08, |
| "loss": 0.2147, |
| "step": 11800 |
| }, |
| { |
| "epoch": 2.8458850463911314, |
| "grad_norm": 0.14676730291717807, |
| "learning_rate": 8.053095117044995e-08, |
| "loss": 0.2164, |
| "step": 11810 |
| }, |
| { |
| "epoch": 2.848294975298229, |
| "grad_norm": 0.15785725014839885, |
| "learning_rate": 7.804439089267368e-08, |
| "loss": 0.2187, |
| "step": 11820 |
| }, |
| { |
| "epoch": 2.850704904205326, |
| "grad_norm": 0.16489312248668136, |
| "learning_rate": 7.559652192467127e-08, |
| "loss": 0.2157, |
| "step": 11830 |
| }, |
| { |
| "epoch": 2.853114833112423, |
| "grad_norm": 0.15245759469516776, |
| "learning_rate": 7.318736350904798e-08, |
| "loss": 0.2156, |
| "step": 11840 |
| }, |
| { |
| "epoch": 2.8555247620195203, |
| "grad_norm": 0.14008176695929778, |
| "learning_rate": 7.081693458410977e-08, |
| "loss": 0.2171, |
| "step": 11850 |
| }, |
| { |
| "epoch": 2.8579346909266174, |
| "grad_norm": 0.19455288597299314, |
| "learning_rate": 6.848525378370995e-08, |
| "loss": 0.2136, |
| "step": 11860 |
| }, |
| { |
| "epoch": 2.860344619833715, |
| "grad_norm": 0.1483545090144154, |
| "learning_rate": 6.61923394371039e-08, |
| "loss": 0.2145, |
| "step": 11870 |
| }, |
| { |
| "epoch": 2.862754548740812, |
| "grad_norm": 0.1384841405906939, |
| "learning_rate": 6.393820956880681e-08, |
| "loss": 0.2154, |
| "step": 11880 |
| }, |
| { |
| "epoch": 2.8651644776479093, |
| "grad_norm": 0.14038389430329315, |
| "learning_rate": 6.172288189844833e-08, |
| "loss": 0.2167, |
| "step": 11890 |
| }, |
| { |
| "epoch": 2.867574406555007, |
| "grad_norm": 0.14544988312585755, |
| "learning_rate": 5.954637384063766e-08, |
| "loss": 0.2185, |
| "step": 11900 |
| }, |
| { |
| "epoch": 2.869984335462104, |
| "grad_norm": 0.1409651272218272, |
| "learning_rate": 5.740870250482367e-08, |
| "loss": 0.216, |
| "step": 11910 |
| }, |
| { |
| "epoch": 2.872394264369201, |
| "grad_norm": 0.1447081545989431, |
| "learning_rate": 5.530988469516052e-08, |
| "loss": 0.2165, |
| "step": 11920 |
| }, |
| { |
| "epoch": 2.874804193276298, |
| "grad_norm": 0.14768819625895518, |
| "learning_rate": 5.324993691037783e-08, |
| "loss": 0.2153, |
| "step": 11930 |
| }, |
| { |
| "epoch": 2.8772141221833953, |
| "grad_norm": 0.15084497940541317, |
| "learning_rate": 5.12288753436474e-08, |
| "loss": 0.2144, |
| "step": 11940 |
| }, |
| { |
| "epoch": 2.879624051090493, |
| "grad_norm": 0.13818767890667225, |
| "learning_rate": 4.924671588246e-08, |
| "loss": 0.2179, |
| "step": 11950 |
| }, |
| { |
| "epoch": 2.88203397999759, |
| "grad_norm": 0.14294996691891518, |
| "learning_rate": 4.7303474108496e-08, |
| "loss": 0.2154, |
| "step": 11960 |
| }, |
| { |
| "epoch": 2.884443908904687, |
| "grad_norm": 0.1370553782790757, |
| "learning_rate": 4.539916529750832e-08, |
| "loss": 0.2105, |
| "step": 11970 |
| }, |
| { |
| "epoch": 2.8868538378117847, |
| "grad_norm": 0.14448370169663277, |
| "learning_rate": 4.353380441919575e-08, |
| "loss": 0.2133, |
| "step": 11980 |
| }, |
| { |
| "epoch": 2.889263766718882, |
| "grad_norm": 0.14358287400094485, |
| "learning_rate": 4.170740613709201e-08, |
| "loss": 0.2157, |
| "step": 11990 |
| }, |
| { |
| "epoch": 2.891673695625979, |
| "grad_norm": 0.145180918805608, |
| "learning_rate": 3.9919984808445836e-08, |
| "loss": 0.2148, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.894083624533076, |
| "grad_norm": 0.14916763417111534, |
| "learning_rate": 3.817155448410936e-08, |
| "loss": 0.214, |
| "step": 12010 |
| }, |
| { |
| "epoch": 2.8964935534401737, |
| "grad_norm": 0.1415009494551968, |
| "learning_rate": 3.6462128908428265e-08, |
| "loss": 0.2156, |
| "step": 12020 |
| }, |
| { |
| "epoch": 2.898903482347271, |
| "grad_norm": 0.14311257237621142, |
| "learning_rate": 3.479172151913346e-08, |
| "loss": 0.2149, |
| "step": 12030 |
| }, |
| { |
| "epoch": 2.901313411254368, |
| "grad_norm": 0.13942011766898588, |
| "learning_rate": 3.3160345447235674e-08, |
| "loss": 0.2108, |
| "step": 12040 |
| }, |
| { |
| "epoch": 2.9037233401614655, |
| "grad_norm": 0.1455508593123364, |
| "learning_rate": 3.156801351692051e-08, |
| "loss": 0.212, |
| "step": 12050 |
| }, |
| { |
| "epoch": 2.9061332690685626, |
| "grad_norm": 0.142658279173268, |
| "learning_rate": 3.0014738245450756e-08, |
| "loss": 0.2161, |
| "step": 12060 |
| }, |
| { |
| "epoch": 2.9085431979756597, |
| "grad_norm": 0.14186986012795527, |
| "learning_rate": 2.8500531843065893e-08, |
| "loss": 0.2132, |
| "step": 12070 |
| }, |
| { |
| "epoch": 2.910953126882757, |
| "grad_norm": 0.1375505945583521, |
| "learning_rate": 2.702540621288441e-08, |
| "loss": 0.2121, |
| "step": 12080 |
| }, |
| { |
| "epoch": 2.913363055789854, |
| "grad_norm": 0.15782136115572284, |
| "learning_rate": 2.5589372950815538e-08, |
| "loss": 0.2144, |
| "step": 12090 |
| }, |
| { |
| "epoch": 2.9157729846969516, |
| "grad_norm": 0.14098169270541844, |
| "learning_rate": 2.4192443345462667e-08, |
| "loss": 0.2166, |
| "step": 12100 |
| }, |
| { |
| "epoch": 2.9181829136040487, |
| "grad_norm": 0.14156996614525538, |
| "learning_rate": 2.2834628378037848e-08, |
| "loss": 0.2164, |
| "step": 12110 |
| }, |
| { |
| "epoch": 2.920592842511146, |
| "grad_norm": 0.14370423717812583, |
| "learning_rate": 2.1515938722272977e-08, |
| "loss": 0.2131, |
| "step": 12120 |
| }, |
| { |
| "epoch": 2.9230027714182434, |
| "grad_norm": 0.1657480952048383, |
| "learning_rate": 2.023638474433931e-08, |
| "loss": 0.2196, |
| "step": 12130 |
| }, |
| { |
| "epoch": 2.9254127003253405, |
| "grad_norm": 0.14299831047119618, |
| "learning_rate": 1.8995976502762526e-08, |
| "loss": 0.2139, |
| "step": 12140 |
| }, |
| { |
| "epoch": 2.9278226292324376, |
| "grad_norm": 0.1390493882381512, |
| "learning_rate": 1.779472374834612e-08, |
| "loss": 0.215, |
| "step": 12150 |
| }, |
| { |
| "epoch": 2.9302325581395348, |
| "grad_norm": 0.14273059733831805, |
| "learning_rate": 1.6632635924092587e-08, |
| "loss": 0.216, |
| "step": 12160 |
| }, |
| { |
| "epoch": 2.932642487046632, |
| "grad_norm": 0.14032313657944795, |
| "learning_rate": 1.5509722165131246e-08, |
| "loss": 0.215, |
| "step": 12170 |
| }, |
| { |
| "epoch": 2.9350524159537295, |
| "grad_norm": 0.14381047814039138, |
| "learning_rate": 1.4425991298645525e-08, |
| "loss": 0.2189, |
| "step": 12180 |
| }, |
| { |
| "epoch": 2.9374623448608266, |
| "grad_norm": 0.44016255112713637, |
| "learning_rate": 1.3381451843803572e-08, |
| "loss": 0.2142, |
| "step": 12190 |
| }, |
| { |
| "epoch": 2.9398722737679237, |
| "grad_norm": 0.14870582430227164, |
| "learning_rate": 1.2376112011691088e-08, |
| "loss": 0.2173, |
| "step": 12200 |
| }, |
| { |
| "epoch": 2.9422822026750213, |
| "grad_norm": 0.1474609049741924, |
| "learning_rate": 1.1409979705246932e-08, |
| "loss": 0.2162, |
| "step": 12210 |
| }, |
| { |
| "epoch": 2.9446921315821184, |
| "grad_norm": 0.1525187955812854, |
| "learning_rate": 1.0483062519200949e-08, |
| "loss": 0.2138, |
| "step": 12220 |
| }, |
| { |
| "epoch": 2.9471020604892155, |
| "grad_norm": 0.1399809610236953, |
| "learning_rate": 9.595367740014572e-09, |
| "loss": 0.2162, |
| "step": 12230 |
| }, |
| { |
| "epoch": 2.9495119893963127, |
| "grad_norm": 0.15187636342041655, |
| "learning_rate": 8.746902345824204e-09, |
| "loss": 0.2154, |
| "step": 12240 |
| }, |
| { |
| "epoch": 2.9519219183034098, |
| "grad_norm": 0.14279765689859514, |
| "learning_rate": 7.937673006384039e-09, |
| "loss": 0.2171, |
| "step": 12250 |
| }, |
| { |
| "epoch": 2.9543318472105073, |
| "grad_norm": 0.14857358256137754, |
| "learning_rate": 7.167686083015546e-09, |
| "loss": 0.2129, |
| "step": 12260 |
| }, |
| { |
| "epoch": 2.9567417761176045, |
| "grad_norm": 0.2990910657016441, |
| "learning_rate": 6.4369476285580656e-09, |
| "loss": 0.2143, |
| "step": 12270 |
| }, |
| { |
| "epoch": 2.959151705024702, |
| "grad_norm": 0.14282301773767403, |
| "learning_rate": 5.7454633873188505e-09, |
| "loss": 0.2157, |
| "step": 12280 |
| }, |
| { |
| "epoch": 2.961561633931799, |
| "grad_norm": 0.14403641334166958, |
| "learning_rate": 5.09323879503032e-09, |
| "loss": 0.2174, |
| "step": 12290 |
| }, |
| { |
| "epoch": 2.9639715628388963, |
| "grad_norm": 0.1508006924924947, |
| "learning_rate": 4.480278978804542e-09, |
| "loss": 0.2207, |
| "step": 12300 |
| }, |
| { |
| "epoch": 2.9663814917459934, |
| "grad_norm": 0.22678365631195488, |
| "learning_rate": 3.906588757097152e-09, |
| "loss": 0.2136, |
| "step": 12310 |
| }, |
| { |
| "epoch": 2.9687914206530905, |
| "grad_norm": 0.14213637464433346, |
| "learning_rate": 3.372172639664606e-09, |
| "loss": 0.215, |
| "step": 12320 |
| }, |
| { |
| "epoch": 2.971201349560188, |
| "grad_norm": 0.145959676319899, |
| "learning_rate": 2.877034827532543e-09, |
| "loss": 0.2153, |
| "step": 12330 |
| }, |
| { |
| "epoch": 2.9736112784672852, |
| "grad_norm": 0.14204231278835014, |
| "learning_rate": 2.4211792129608112e-09, |
| "loss": 0.2131, |
| "step": 12340 |
| }, |
| { |
| "epoch": 2.9760212073743824, |
| "grad_norm": 0.14406447672460498, |
| "learning_rate": 2.004609379413491e-09, |
| "loss": 0.2165, |
| "step": 12350 |
| }, |
| { |
| "epoch": 2.97843113628148, |
| "grad_norm": 0.14859502925256113, |
| "learning_rate": 1.6273286015305866e-09, |
| "loss": 0.2135, |
| "step": 12360 |
| }, |
| { |
| "epoch": 2.980841065188577, |
| "grad_norm": 0.20087252924258778, |
| "learning_rate": 1.2893398451024886e-09, |
| "loss": 0.2181, |
| "step": 12370 |
| }, |
| { |
| "epoch": 2.983250994095674, |
| "grad_norm": 0.1400785047430275, |
| "learning_rate": 9.906457670449953e-10, |
| "loss": 0.2103, |
| "step": 12380 |
| }, |
| { |
| "epoch": 2.9856609230027713, |
| "grad_norm": 0.13899773050364858, |
| "learning_rate": 7.312487153826597e-10, |
| "loss": 0.2149, |
| "step": 12390 |
| }, |
| { |
| "epoch": 2.9880708519098684, |
| "grad_norm": 0.14169054549532648, |
| "learning_rate": 5.111507292254736e-10, |
| "loss": 0.2156, |
| "step": 12400 |
| }, |
| { |
| "epoch": 2.990480780816966, |
| "grad_norm": 0.1412036059078898, |
| "learning_rate": 3.3035353875499056e-10, |
| "loss": 0.2121, |
| "step": 12410 |
| }, |
| { |
| "epoch": 2.992890709724063, |
| "grad_norm": 0.22298946462666086, |
| "learning_rate": 1.8885856521211333e-10, |
| "loss": 0.215, |
| "step": 12420 |
| }, |
| { |
| "epoch": 2.9953006386311603, |
| "grad_norm": 0.1487129856310578, |
| "learning_rate": 8.666692088266094e-11, |
| "loss": 0.2173, |
| "step": 12430 |
| }, |
| { |
| "epoch": 2.997710567538258, |
| "grad_norm": 1.0800677698904297, |
| "learning_rate": 2.377940909237264e-11, |
| "loss": 0.217, |
| "step": 12440 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.2807539396724425, |
| "learning_rate": 1.9652419636084773e-13, |
| "loss": 0.2181, |
| "step": 12450 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 12450, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 10000000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.93277535469568e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|