{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.999574721442545, "eval_steps": 500, "global_step": 29390, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0017011142298205325, "grad_norm": 8.1242036819458, "learning_rate": 1.524500175587422e-07, "loss": 4.5114, "step": 10 }, { "epoch": 0.003402228459641065, "grad_norm": 9.024007797241211, "learning_rate": 3.049000351174844e-07, "loss": 4.2617, "step": 20 }, { "epoch": 0.0051033426894615975, "grad_norm": 7.401093006134033, "learning_rate": 4.5735005267622656e-07, "loss": 4.5417, "step": 30 }, { "epoch": 0.00680445691928213, "grad_norm": 8.14155101776123, "learning_rate": 6.098000702349687e-07, "loss": 4.5044, "step": 40 }, { "epoch": 0.008505571149102662, "grad_norm": 6.201820373535156, "learning_rate": 7.622500877937109e-07, "loss": 4.3812, "step": 50 }, { "epoch": 0.010206685378923195, "grad_norm": 8.932755470275879, "learning_rate": 9.147001053524531e-07, "loss": 4.2968, "step": 60 }, { "epoch": 0.011907799608743727, "grad_norm": 7.06637716293335, "learning_rate": 1.0671501229111953e-06, "loss": 4.1351, "step": 70 }, { "epoch": 0.01360891383856426, "grad_norm": 8.479144096374512, "learning_rate": 1.2196001404699375e-06, "loss": 4.1493, "step": 80 }, { "epoch": 0.015310028068384792, "grad_norm": 8.668302536010742, "learning_rate": 1.3720501580286797e-06, "loss": 3.9006, "step": 90 }, { "epoch": 0.017011142298205325, "grad_norm": 7.180272579193115, "learning_rate": 1.5245001755874219e-06, "loss": 3.932, "step": 100 }, { "epoch": 0.018712256528025856, "grad_norm": 8.624896049499512, "learning_rate": 1.6769501931461638e-06, "loss": 3.888, "step": 110 }, { "epoch": 0.02041337075784639, "grad_norm": 7.598726272583008, "learning_rate": 1.8294002107049062e-06, "loss": 3.7444, "step": 120 }, { "epoch": 0.02211448498766692, "grad_norm": 7.890979290008545, "learning_rate": 1.9818502282636482e-06, "loss": 3.7337, "step": 130 }, { "epoch": 0.023815599217487455, "grad_norm": 6.916543960571289, "learning_rate": 2.1343002458223906e-06, "loss": 3.4046, "step": 140 }, { "epoch": 0.025516713447307986, "grad_norm": 8.39304256439209, "learning_rate": 2.2867502633811326e-06, "loss": 3.3585, "step": 150 }, { "epoch": 0.02721782767712852, "grad_norm": 10.390460968017578, "learning_rate": 2.439200280939875e-06, "loss": 3.1896, "step": 160 }, { "epoch": 0.02891894190694905, "grad_norm": 6.750011920928955, "learning_rate": 2.591650298498617e-06, "loss": 3.0233, "step": 170 }, { "epoch": 0.030620056136769585, "grad_norm": 7.048929691314697, "learning_rate": 2.7441003160573594e-06, "loss": 3.0611, "step": 180 }, { "epoch": 0.032321170366590116, "grad_norm": 5.937943458557129, "learning_rate": 2.8965503336161013e-06, "loss": 2.8964, "step": 190 }, { "epoch": 0.03402228459641065, "grad_norm": 4.653339862823486, "learning_rate": 3.0490003511748437e-06, "loss": 2.7357, "step": 200 }, { "epoch": 0.035723398826231184, "grad_norm": 2.970280885696411, "learning_rate": 3.2014503687335857e-06, "loss": 2.5882, "step": 210 }, { "epoch": 0.03742451305605171, "grad_norm": 2.7442290782928467, "learning_rate": 3.3539003862923277e-06, "loss": 2.6236, "step": 220 }, { "epoch": 0.039125627285872246, "grad_norm": 2.7237141132354736, "learning_rate": 3.50635040385107e-06, "loss": 2.6678, "step": 230 }, { "epoch": 0.04082674151569278, "grad_norm": 2.7791314125061035, "learning_rate": 3.6588004214098125e-06, "loss": 2.6291, "step": 240 }, { "epoch": 0.042527855745513314, "grad_norm": 2.6336171627044678, "learning_rate": 3.8112504389685545e-06, "loss": 2.5533, "step": 250 }, { "epoch": 0.04422896997533384, "grad_norm": 2.9241085052490234, "learning_rate": 3.9637004565272964e-06, "loss": 2.4647, "step": 260 }, { "epoch": 0.045930084205154376, "grad_norm": 3.389188289642334, "learning_rate": 4.116150474086039e-06, "loss": 2.414, "step": 270 }, { "epoch": 0.04763119843497491, "grad_norm": 3.224665641784668, "learning_rate": 4.268600491644781e-06, "loss": 2.472, "step": 280 }, { "epoch": 0.049332312664795444, "grad_norm": 3.153616189956665, "learning_rate": 4.421050509203523e-06, "loss": 2.4434, "step": 290 }, { "epoch": 0.05103342689461597, "grad_norm": 3.551027774810791, "learning_rate": 4.573500526762265e-06, "loss": 2.4699, "step": 300 }, { "epoch": 0.052734541124436506, "grad_norm": 3.2293620109558105, "learning_rate": 4.725950544321008e-06, "loss": 2.3638, "step": 310 }, { "epoch": 0.05443565535425704, "grad_norm": 2.619767665863037, "learning_rate": 4.87840056187975e-06, "loss": 2.3004, "step": 320 }, { "epoch": 0.056136769584077574, "grad_norm": 3.1208813190460205, "learning_rate": 5.030850579438492e-06, "loss": 2.2602, "step": 330 }, { "epoch": 0.0578378838138981, "grad_norm": 3.030879259109497, "learning_rate": 5.183300596997234e-06, "loss": 2.2759, "step": 340 }, { "epoch": 0.059538998043718636, "grad_norm": 3.025357723236084, "learning_rate": 5.335750614555976e-06, "loss": 2.3363, "step": 350 }, { "epoch": 0.06124011227353917, "grad_norm": 2.8321595191955566, "learning_rate": 5.488200632114719e-06, "loss": 2.3441, "step": 360 }, { "epoch": 0.0629412265033597, "grad_norm": 2.955768346786499, "learning_rate": 5.640650649673461e-06, "loss": 2.3207, "step": 370 }, { "epoch": 0.06464234073318023, "grad_norm": 2.877669095993042, "learning_rate": 5.793100667232203e-06, "loss": 2.26, "step": 380 }, { "epoch": 0.06634345496300076, "grad_norm": 2.615410566329956, "learning_rate": 5.945550684790945e-06, "loss": 2.2476, "step": 390 }, { "epoch": 0.0680445691928213, "grad_norm": 2.787527322769165, "learning_rate": 6.0980007023496875e-06, "loss": 2.205, "step": 400 }, { "epoch": 0.06974568342264183, "grad_norm": 2.9535741806030273, "learning_rate": 6.2504507199084295e-06, "loss": 2.2943, "step": 410 }, { "epoch": 0.07144679765246237, "grad_norm": 3.091892719268799, "learning_rate": 6.4029007374671714e-06, "loss": 2.3296, "step": 420 }, { "epoch": 0.0731479118822829, "grad_norm": 3.5832443237304688, "learning_rate": 6.555350755025914e-06, "loss": 2.2661, "step": 430 }, { "epoch": 0.07484902611210342, "grad_norm": 3.0930492877960205, "learning_rate": 6.707800772584655e-06, "loss": 2.2862, "step": 440 }, { "epoch": 0.07655014034192396, "grad_norm": 3.2023990154266357, "learning_rate": 6.860250790143398e-06, "loss": 2.301, "step": 450 }, { "epoch": 0.07825125457174449, "grad_norm": 3.0322978496551514, "learning_rate": 7.01270080770214e-06, "loss": 2.1735, "step": 460 }, { "epoch": 0.07995236880156502, "grad_norm": 3.60652232170105, "learning_rate": 7.165150825260882e-06, "loss": 2.2559, "step": 470 }, { "epoch": 0.08165348303138556, "grad_norm": 2.929839849472046, "learning_rate": 7.317600842819625e-06, "loss": 2.2043, "step": 480 }, { "epoch": 0.08335459726120609, "grad_norm": 3.3258023262023926, "learning_rate": 7.470050860378367e-06, "loss": 2.2505, "step": 490 }, { "epoch": 0.08505571149102663, "grad_norm": 3.416071653366089, "learning_rate": 7.622500877937109e-06, "loss": 2.1877, "step": 500 }, { "epoch": 0.08675682572084716, "grad_norm": 2.9309732913970947, "learning_rate": 7.77495089549585e-06, "loss": 2.2404, "step": 510 }, { "epoch": 0.08845793995066768, "grad_norm": 3.613666534423828, "learning_rate": 7.927400913054593e-06, "loss": 2.2869, "step": 520 }, { "epoch": 0.09015905418048822, "grad_norm": 4.01043176651001, "learning_rate": 8.079850930613336e-06, "loss": 2.2802, "step": 530 }, { "epoch": 0.09186016841030875, "grad_norm": 3.47981333732605, "learning_rate": 8.232300948172079e-06, "loss": 2.2599, "step": 540 }, { "epoch": 0.09356128264012928, "grad_norm": 2.9870450496673584, "learning_rate": 8.38475096573082e-06, "loss": 2.239, "step": 550 }, { "epoch": 0.09526239686994982, "grad_norm": 2.7835795879364014, "learning_rate": 8.537200983289562e-06, "loss": 2.2282, "step": 560 }, { "epoch": 0.09696351109977035, "grad_norm": 3.1672210693359375, "learning_rate": 8.689651000848305e-06, "loss": 2.2323, "step": 570 }, { "epoch": 0.09866462532959089, "grad_norm": 3.8526771068573, "learning_rate": 8.842101018407046e-06, "loss": 2.1846, "step": 580 }, { "epoch": 0.10036573955941142, "grad_norm": 3.2321290969848633, "learning_rate": 8.99455103596579e-06, "loss": 2.217, "step": 590 }, { "epoch": 0.10206685378923194, "grad_norm": 3.663644790649414, "learning_rate": 9.14700105352453e-06, "loss": 2.2593, "step": 600 }, { "epoch": 0.10376796801905248, "grad_norm": 3.2919960021972656, "learning_rate": 9.299451071083273e-06, "loss": 2.1836, "step": 610 }, { "epoch": 0.10546908224887301, "grad_norm": 3.550652503967285, "learning_rate": 9.451901088642016e-06, "loss": 2.2548, "step": 620 }, { "epoch": 0.10717019647869354, "grad_norm": 3.35884690284729, "learning_rate": 9.604351106200757e-06, "loss": 2.2009, "step": 630 }, { "epoch": 0.10887131070851408, "grad_norm": 3.3775312900543213, "learning_rate": 9.7568011237595e-06, "loss": 2.2623, "step": 640 }, { "epoch": 0.11057242493833461, "grad_norm": 3.32918381690979, "learning_rate": 9.909251141318241e-06, "loss": 2.1882, "step": 650 }, { "epoch": 0.11227353916815515, "grad_norm": 3.5951883792877197, "learning_rate": 1.0061701158876984e-05, "loss": 2.1755, "step": 660 }, { "epoch": 0.11397465339797568, "grad_norm": 3.7706947326660156, "learning_rate": 1.0214151176435727e-05, "loss": 2.1684, "step": 670 }, { "epoch": 0.1156757676277962, "grad_norm": 3.506378412246704, "learning_rate": 1.0366601193994468e-05, "loss": 2.1198, "step": 680 }, { "epoch": 0.11737688185761674, "grad_norm": 3.8458452224731445, "learning_rate": 1.051905121155321e-05, "loss": 2.2457, "step": 690 }, { "epoch": 0.11907799608743727, "grad_norm": 3.462367296218872, "learning_rate": 1.0671501229111952e-05, "loss": 2.1286, "step": 700 }, { "epoch": 0.1207791103172578, "grad_norm": 3.5375754833221436, "learning_rate": 1.0823951246670695e-05, "loss": 2.1568, "step": 710 }, { "epoch": 0.12248022454707834, "grad_norm": 4.020713806152344, "learning_rate": 1.0976401264229437e-05, "loss": 2.1467, "step": 720 }, { "epoch": 0.12418133877689887, "grad_norm": 3.4603312015533447, "learning_rate": 1.1128851281788179e-05, "loss": 2.1809, "step": 730 }, { "epoch": 0.1258824530067194, "grad_norm": 3.5019948482513428, "learning_rate": 1.1281301299346921e-05, "loss": 2.1061, "step": 740 }, { "epoch": 0.12758356723653994, "grad_norm": 3.1017353534698486, "learning_rate": 1.1433751316905663e-05, "loss": 2.2428, "step": 750 }, { "epoch": 0.12928468146636046, "grad_norm": 3.2938976287841797, "learning_rate": 1.1586201334464405e-05, "loss": 2.2237, "step": 760 }, { "epoch": 0.130985795696181, "grad_norm": 3.668274164199829, "learning_rate": 1.1738651352023148e-05, "loss": 2.1496, "step": 770 }, { "epoch": 0.13268690992600152, "grad_norm": 3.8302762508392334, "learning_rate": 1.189110136958189e-05, "loss": 2.2023, "step": 780 }, { "epoch": 0.13438802415582207, "grad_norm": 3.526839017868042, "learning_rate": 1.2043551387140632e-05, "loss": 2.1561, "step": 790 }, { "epoch": 0.1360891383856426, "grad_norm": 2.987562417984009, "learning_rate": 1.2196001404699375e-05, "loss": 2.1617, "step": 800 }, { "epoch": 0.13779025261546313, "grad_norm": 3.2640066146850586, "learning_rate": 1.2348451422258116e-05, "loss": 2.082, "step": 810 }, { "epoch": 0.13949136684528365, "grad_norm": 3.5792763233184814, "learning_rate": 1.2500901439816859e-05, "loss": 2.1362, "step": 820 }, { "epoch": 0.14119248107510418, "grad_norm": 3.7467305660247803, "learning_rate": 1.26533514573756e-05, "loss": 2.1767, "step": 830 }, { "epoch": 0.14289359530492474, "grad_norm": 3.8871824741363525, "learning_rate": 1.2805801474934343e-05, "loss": 2.1361, "step": 840 }, { "epoch": 0.14459470953474526, "grad_norm": 4.198888301849365, "learning_rate": 1.2958251492493086e-05, "loss": 2.1379, "step": 850 }, { "epoch": 0.1462958237645658, "grad_norm": 3.3561158180236816, "learning_rate": 1.3110701510051829e-05, "loss": 2.0982, "step": 860 }, { "epoch": 0.14799693799438632, "grad_norm": 3.4034993648529053, "learning_rate": 1.326315152761057e-05, "loss": 2.1312, "step": 870 }, { "epoch": 0.14969805222420685, "grad_norm": 3.768439531326294, "learning_rate": 1.341560154516931e-05, "loss": 2.0818, "step": 880 }, { "epoch": 0.1513991664540274, "grad_norm": 4.092297554016113, "learning_rate": 1.3446088936024867e-05, "loss": 2.1015, "step": 890 }, { "epoch": 0.15310028068384793, "grad_norm": 3.9114010334014893, "learning_rate": 1.344607832211256e-05, "loss": 2.1479, "step": 900 }, { "epoch": 0.15480139491366846, "grad_norm": 3.8456695079803467, "learning_rate": 1.344605954366601e-05, "loss": 2.107, "step": 910 }, { "epoch": 0.15650250914348898, "grad_norm": 3.279608726501465, "learning_rate": 1.3446032600708018e-05, "loss": 2.1185, "step": 920 }, { "epoch": 0.1582036233733095, "grad_norm": 4.147802829742432, "learning_rate": 1.3445997493271305e-05, "loss": 2.1371, "step": 930 }, { "epoch": 0.15990473760313004, "grad_norm": 3.8674826622009277, "learning_rate": 1.3445954221398507e-05, "loss": 2.204, "step": 940 }, { "epoch": 0.1616058518329506, "grad_norm": 3.8814809322357178, "learning_rate": 1.3445902785142174e-05, "loss": 2.1347, "step": 950 }, { "epoch": 0.16330696606277112, "grad_norm": 3.7449662685394287, "learning_rate": 1.344584318456477e-05, "loss": 2.0845, "step": 960 }, { "epoch": 0.16500808029259165, "grad_norm": 4.558581829071045, "learning_rate": 1.3445775419738675e-05, "loss": 2.0412, "step": 970 }, { "epoch": 0.16670919452241217, "grad_norm": 3.170236825942993, "learning_rate": 1.3445699490746183e-05, "loss": 2.1486, "step": 980 }, { "epoch": 0.1684103087522327, "grad_norm": 3.613136053085327, "learning_rate": 1.3445615397679502e-05, "loss": 2.0369, "step": 990 }, { "epoch": 0.17011142298205326, "grad_norm": 3.954437017440796, "learning_rate": 1.3445523140640761e-05, "loss": 2.0846, "step": 1000 }, { "epoch": 0.17181253721187378, "grad_norm": 3.777693510055542, "learning_rate": 1.3445422719741992e-05, "loss": 2.1255, "step": 1010 }, { "epoch": 0.1735136514416943, "grad_norm": 3.3416199684143066, "learning_rate": 1.344531413510515e-05, "loss": 2.1268, "step": 1020 }, { "epoch": 0.17521476567151484, "grad_norm": 3.943105459213257, "learning_rate": 1.3445197386862098e-05, "loss": 2.0934, "step": 1030 }, { "epoch": 0.17691587990133537, "grad_norm": 4.644258499145508, "learning_rate": 1.3445072475154622e-05, "loss": 2.0692, "step": 1040 }, { "epoch": 0.17861699413115592, "grad_norm": 3.2973544597625732, "learning_rate": 1.3444939400134416e-05, "loss": 2.1838, "step": 1050 }, { "epoch": 0.18031810836097645, "grad_norm": 3.7182607650756836, "learning_rate": 1.3444798161963082e-05, "loss": 2.0951, "step": 1060 }, { "epoch": 0.18201922259079698, "grad_norm": 3.6397881507873535, "learning_rate": 1.3444648760812149e-05, "loss": 2.1331, "step": 1070 }, { "epoch": 0.1837203368206175, "grad_norm": 4.1129021644592285, "learning_rate": 1.3444491196863046e-05, "loss": 2.0526, "step": 1080 }, { "epoch": 0.18542145105043803, "grad_norm": 4.195235729217529, "learning_rate": 1.3444325470307122e-05, "loss": 2.1748, "step": 1090 }, { "epoch": 0.18712256528025856, "grad_norm": 3.746614694595337, "learning_rate": 1.344415158134564e-05, "loss": 2.1016, "step": 1100 }, { "epoch": 0.1888236795100791, "grad_norm": 3.955183982849121, "learning_rate": 1.3443969530189769e-05, "loss": 2.1415, "step": 1110 }, { "epoch": 0.19052479373989964, "grad_norm": 4.204941272735596, "learning_rate": 1.3443779317060598e-05, "loss": 2.1487, "step": 1120 }, { "epoch": 0.19222590796972017, "grad_norm": 3.8602209091186523, "learning_rate": 1.344358094218912e-05, "loss": 2.0946, "step": 1130 }, { "epoch": 0.1939270221995407, "grad_norm": 3.87605881690979, "learning_rate": 1.3443374405816248e-05, "loss": 2.1114, "step": 1140 }, { "epoch": 0.19562813642936122, "grad_norm": 4.242985725402832, "learning_rate": 1.3443159708192799e-05, "loss": 2.1148, "step": 1150 }, { "epoch": 0.19732925065918178, "grad_norm": 3.853022575378418, "learning_rate": 1.3442936849579506e-05, "loss": 2.0633, "step": 1160 }, { "epoch": 0.1990303648890023, "grad_norm": 4.261999130249023, "learning_rate": 1.344270583024701e-05, "loss": 2.0983, "step": 1170 }, { "epoch": 0.20073147911882283, "grad_norm": 3.767235279083252, "learning_rate": 1.3442466650475868e-05, "loss": 2.1592, "step": 1180 }, { "epoch": 0.20243259334864336, "grad_norm": 4.7613205909729, "learning_rate": 1.3442219310556538e-05, "loss": 2.089, "step": 1190 }, { "epoch": 0.20413370757846389, "grad_norm": 3.825859308242798, "learning_rate": 1.3441963810789397e-05, "loss": 2.1249, "step": 1200 }, { "epoch": 0.2058348218082844, "grad_norm": 3.648253917694092, "learning_rate": 1.3441700151484721e-05, "loss": 2.0832, "step": 1210 }, { "epoch": 0.20753593603810497, "grad_norm": 3.996825695037842, "learning_rate": 1.3441428332962705e-05, "loss": 2.1009, "step": 1220 }, { "epoch": 0.2092370502679255, "grad_norm": 3.444505214691162, "learning_rate": 1.344114835555345e-05, "loss": 2.1229, "step": 1230 }, { "epoch": 0.21093816449774602, "grad_norm": 3.9788694381713867, "learning_rate": 1.3440860219596964e-05, "loss": 2.0401, "step": 1240 }, { "epoch": 0.21263927872756655, "grad_norm": 3.823482036590576, "learning_rate": 1.3440563925443158e-05, "loss": 2.0676, "step": 1250 }, { "epoch": 0.21434039295738708, "grad_norm": 3.833479166030884, "learning_rate": 1.344025947345186e-05, "loss": 2.138, "step": 1260 }, { "epoch": 0.21604150718720763, "grad_norm": 3.986558675765991, "learning_rate": 1.3439946863992799e-05, "loss": 2.0968, "step": 1270 }, { "epoch": 0.21774262141702816, "grad_norm": 3.4136123657226562, "learning_rate": 1.3439626097445615e-05, "loss": 2.0349, "step": 1280 }, { "epoch": 0.2194437356468487, "grad_norm": 4.327932357788086, "learning_rate": 1.3439297174199843e-05, "loss": 2.0767, "step": 1290 }, { "epoch": 0.22114484987666921, "grad_norm": 4.868990898132324, "learning_rate": 1.3438960094654938e-05, "loss": 2.1006, "step": 1300 }, { "epoch": 0.22284596410648974, "grad_norm": 4.4454498291015625, "learning_rate": 1.3438614859220252e-05, "loss": 2.0865, "step": 1310 }, { "epoch": 0.2245470783363103, "grad_norm": 4.084188461303711, "learning_rate": 1.3438261468315045e-05, "loss": 2.1575, "step": 1320 }, { "epoch": 0.22624819256613082, "grad_norm": 3.5140950679779053, "learning_rate": 1.3437899922368474e-05, "loss": 2.112, "step": 1330 }, { "epoch": 0.22794930679595135, "grad_norm": 4.531380653381348, "learning_rate": 1.3437530221819608e-05, "loss": 2.0994, "step": 1340 }, { "epoch": 0.22965042102577188, "grad_norm": 4.098215103149414, "learning_rate": 1.343715236711742e-05, "loss": 2.1288, "step": 1350 }, { "epoch": 0.2313515352555924, "grad_norm": 4.336173057556152, "learning_rate": 1.3436766358720774e-05, "loss": 2.0744, "step": 1360 }, { "epoch": 0.23305264948541293, "grad_norm": 4.105205535888672, "learning_rate": 1.3436372197098449e-05, "loss": 2.2001, "step": 1370 }, { "epoch": 0.2347537637152335, "grad_norm": 4.088197708129883, "learning_rate": 1.3435969882729115e-05, "loss": 2.1039, "step": 1380 }, { "epoch": 0.23645487794505402, "grad_norm": 4.016329765319824, "learning_rate": 1.343555941610135e-05, "loss": 2.1743, "step": 1390 }, { "epoch": 0.23815599217487454, "grad_norm": 3.8832547664642334, "learning_rate": 1.3435140797713633e-05, "loss": 2.0029, "step": 1400 }, { "epoch": 0.23985710640469507, "grad_norm": 3.58616304397583, "learning_rate": 1.3434714028074338e-05, "loss": 2.0792, "step": 1410 }, { "epoch": 0.2415582206345156, "grad_norm": 3.839970827102661, "learning_rate": 1.3434279107701734e-05, "loss": 2.1073, "step": 1420 }, { "epoch": 0.24325933486433615, "grad_norm": 4.0038323402404785, "learning_rate": 1.3433836037123999e-05, "loss": 2.1397, "step": 1430 }, { "epoch": 0.24496044909415668, "grad_norm": 4.53272819519043, "learning_rate": 1.3433384816879202e-05, "loss": 2.155, "step": 1440 }, { "epoch": 0.2466615633239772, "grad_norm": 3.404862642288208, "learning_rate": 1.3432925447515309e-05, "loss": 2.0471, "step": 1450 }, { "epoch": 0.24836267755379773, "grad_norm": 3.97717547416687, "learning_rate": 1.3432457929590187e-05, "loss": 2.1222, "step": 1460 }, { "epoch": 0.25006379178361826, "grad_norm": 4.303969383239746, "learning_rate": 1.3431982263671594e-05, "loss": 2.0641, "step": 1470 }, { "epoch": 0.2517649060134388, "grad_norm": 3.92305850982666, "learning_rate": 1.3431498450337182e-05, "loss": 2.0516, "step": 1480 }, { "epoch": 0.2534660202432593, "grad_norm": 3.997673988342285, "learning_rate": 1.3431006490174504e-05, "loss": 2.0984, "step": 1490 }, { "epoch": 0.25516713447307987, "grad_norm": 4.106938362121582, "learning_rate": 1.3430506383781e-05, "loss": 2.0811, "step": 1500 }, { "epoch": 0.2568682487029004, "grad_norm": 4.152285575866699, "learning_rate": 1.3429998131764008e-05, "loss": 2.0867, "step": 1510 }, { "epoch": 0.2585693629327209, "grad_norm": 3.77607798576355, "learning_rate": 1.3429481734740751e-05, "loss": 2.0138, "step": 1520 }, { "epoch": 0.2602704771625415, "grad_norm": 3.8017027378082275, "learning_rate": 1.3428957193338354e-05, "loss": 2.1102, "step": 1530 }, { "epoch": 0.261971591392362, "grad_norm": 4.08286190032959, "learning_rate": 1.3428424508193823e-05, "loss": 2.1213, "step": 1540 }, { "epoch": 0.26367270562218253, "grad_norm": 4.082574367523193, "learning_rate": 1.3427883679954056e-05, "loss": 1.9399, "step": 1550 }, { "epoch": 0.26537381985200303, "grad_norm": 4.214354991912842, "learning_rate": 1.3427334709275845e-05, "loss": 2.1207, "step": 1560 }, { "epoch": 0.2670749340818236, "grad_norm": 3.9730758666992188, "learning_rate": 1.3426777596825863e-05, "loss": 2.1379, "step": 1570 }, { "epoch": 0.26877604831164414, "grad_norm": 3.152505397796631, "learning_rate": 1.3426212343280678e-05, "loss": 2.1091, "step": 1580 }, { "epoch": 0.27047716254146464, "grad_norm": 3.7805302143096924, "learning_rate": 1.342563894932674e-05, "loss": 2.0486, "step": 1590 }, { "epoch": 0.2721782767712852, "grad_norm": 3.5900185108184814, "learning_rate": 1.3425057415660384e-05, "loss": 2.0382, "step": 1600 }, { "epoch": 0.2738793910011057, "grad_norm": 3.6163578033447266, "learning_rate": 1.3424467742987833e-05, "loss": 2.052, "step": 1610 }, { "epoch": 0.27558050523092625, "grad_norm": 4.680826187133789, "learning_rate": 1.3423869932025193e-05, "loss": 2.0847, "step": 1620 }, { "epoch": 0.2772816194607468, "grad_norm": 4.031846046447754, "learning_rate": 1.3423263983498454e-05, "loss": 2.0639, "step": 1630 }, { "epoch": 0.2789827336905673, "grad_norm": 3.679631471633911, "learning_rate": 1.3422649898143484e-05, "loss": 1.9937, "step": 1640 }, { "epoch": 0.28068384792038786, "grad_norm": 4.050079345703125, "learning_rate": 1.3422027676706038e-05, "loss": 2.0676, "step": 1650 }, { "epoch": 0.28238496215020836, "grad_norm": 4.256223678588867, "learning_rate": 1.342139731994175e-05, "loss": 2.0506, "step": 1660 }, { "epoch": 0.2840860763800289, "grad_norm": 4.579814434051514, "learning_rate": 1.3420758828616134e-05, "loss": 2.0044, "step": 1670 }, { "epoch": 0.2857871906098495, "grad_norm": 4.356644153594971, "learning_rate": 1.342011220350458e-05, "loss": 2.0261, "step": 1680 }, { "epoch": 0.28748830483967, "grad_norm": 3.2434017658233643, "learning_rate": 1.3419457445392356e-05, "loss": 2.045, "step": 1690 }, { "epoch": 0.2891894190694905, "grad_norm": 4.5175323486328125, "learning_rate": 1.3418794555074613e-05, "loss": 1.9873, "step": 1700 }, { "epoch": 0.290890533299311, "grad_norm": 3.9216480255126953, "learning_rate": 1.3418123533356368e-05, "loss": 2.0781, "step": 1710 }, { "epoch": 0.2925916475291316, "grad_norm": 4.742781162261963, "learning_rate": 1.3417444381052523e-05, "loss": 2.028, "step": 1720 }, { "epoch": 0.29429276175895214, "grad_norm": 3.6140594482421875, "learning_rate": 1.3416757098987845e-05, "loss": 2.1452, "step": 1730 }, { "epoch": 0.29599387598877264, "grad_norm": 4.244185447692871, "learning_rate": 1.3416061687996979e-05, "loss": 2.0183, "step": 1740 }, { "epoch": 0.2976949902185932, "grad_norm": 4.613598823547363, "learning_rate": 1.3415358148924444e-05, "loss": 2.0209, "step": 1750 }, { "epoch": 0.2993961044484137, "grad_norm": 3.735178232192993, "learning_rate": 1.3414646482624621e-05, "loss": 2.1145, "step": 1760 }, { "epoch": 0.30109721867823425, "grad_norm": 3.9842472076416016, "learning_rate": 1.341392668996177e-05, "loss": 1.9653, "step": 1770 }, { "epoch": 0.3027983329080548, "grad_norm": 4.243276119232178, "learning_rate": 1.3413198771810016e-05, "loss": 2.057, "step": 1780 }, { "epoch": 0.3044994471378753, "grad_norm": 4.055215358734131, "learning_rate": 1.3412462729053352e-05, "loss": 2.1238, "step": 1790 }, { "epoch": 0.30620056136769586, "grad_norm": 3.9792640209198, "learning_rate": 1.3411718562585636e-05, "loss": 2.0256, "step": 1800 }, { "epoch": 0.30790167559751636, "grad_norm": 4.385422229766846, "learning_rate": 1.3410966273310594e-05, "loss": 2.0131, "step": 1810 }, { "epoch": 0.3096027898273369, "grad_norm": 4.1103291511535645, "learning_rate": 1.3410205862141814e-05, "loss": 2.0393, "step": 1820 }, { "epoch": 0.31130390405715747, "grad_norm": 3.9303791522979736, "learning_rate": 1.340943733000275e-05, "loss": 2.0703, "step": 1830 }, { "epoch": 0.31300501828697797, "grad_norm": 4.296981334686279, "learning_rate": 1.3408660677826718e-05, "loss": 2.0678, "step": 1840 }, { "epoch": 0.3147061325167985, "grad_norm": 3.9926674365997314, "learning_rate": 1.3407875906556892e-05, "loss": 2.0054, "step": 1850 }, { "epoch": 0.316407246746619, "grad_norm": 4.080948829650879, "learning_rate": 1.340708301714631e-05, "loss": 2.07, "step": 1860 }, { "epoch": 0.3181083609764396, "grad_norm": 4.116609573364258, "learning_rate": 1.3406282010557866e-05, "loss": 2.0026, "step": 1870 }, { "epoch": 0.3198094752062601, "grad_norm": 4.65035343170166, "learning_rate": 1.3405472887764311e-05, "loss": 2.0698, "step": 1880 }, { "epoch": 0.32151058943608063, "grad_norm": 3.91961407661438, "learning_rate": 1.3404655649748256e-05, "loss": 1.9695, "step": 1890 }, { "epoch": 0.3232117036659012, "grad_norm": 3.994669198989868, "learning_rate": 1.3403830297502163e-05, "loss": 2.004, "step": 1900 }, { "epoch": 0.3249128178957217, "grad_norm": 3.5440726280212402, "learning_rate": 1.340299683202835e-05, "loss": 2.0409, "step": 1910 }, { "epoch": 0.32661393212554224, "grad_norm": 4.20074462890625, "learning_rate": 1.3402155254338988e-05, "loss": 2.0632, "step": 1920 }, { "epoch": 0.32831504635536274, "grad_norm": 4.328433990478516, "learning_rate": 1.3401305565456102e-05, "loss": 1.9827, "step": 1930 }, { "epoch": 0.3300161605851833, "grad_norm": 3.7840631008148193, "learning_rate": 1.3400447766411563e-05, "loss": 2.1697, "step": 1940 }, { "epoch": 0.33171727481500385, "grad_norm": 4.054955005645752, "learning_rate": 1.3399581858247094e-05, "loss": 2.0077, "step": 1950 }, { "epoch": 0.33341838904482435, "grad_norm": 4.07275390625, "learning_rate": 1.339870784201426e-05, "loss": 2.011, "step": 1960 }, { "epoch": 0.3351195032746449, "grad_norm": 3.921703815460205, "learning_rate": 1.339782571877448e-05, "loss": 2.1012, "step": 1970 }, { "epoch": 0.3368206175044654, "grad_norm": 3.991225481033325, "learning_rate": 1.3396935489599019e-05, "loss": 2.0105, "step": 1980 }, { "epoch": 0.33852173173428596, "grad_norm": 4.40214204788208, "learning_rate": 1.3396037155568978e-05, "loss": 2.0321, "step": 1990 }, { "epoch": 0.3402228459641065, "grad_norm": 4.139034748077393, "learning_rate": 1.3395130717775306e-05, "loss": 2.1542, "step": 2000 }, { "epoch": 0.341923960193927, "grad_norm": 4.642544746398926, "learning_rate": 1.3394216177318792e-05, "loss": 2.0085, "step": 2010 }, { "epoch": 0.34362507442374757, "grad_norm": 4.030486583709717, "learning_rate": 1.3393293535310068e-05, "loss": 2.1126, "step": 2020 }, { "epoch": 0.34532618865356807, "grad_norm": 4.032608985900879, "learning_rate": 1.3392362792869597e-05, "loss": 2.0684, "step": 2030 }, { "epoch": 0.3470273028833886, "grad_norm": 4.151008605957031, "learning_rate": 1.3391423951127687e-05, "loss": 2.0391, "step": 2040 }, { "epoch": 0.3487284171132092, "grad_norm": 4.639594554901123, "learning_rate": 1.3390477011224483e-05, "loss": 2.0073, "step": 2050 }, { "epoch": 0.3504295313430297, "grad_norm": 4.20526123046875, "learning_rate": 1.3389521974309951e-05, "loss": 2.0737, "step": 2060 }, { "epoch": 0.35213064557285023, "grad_norm": 3.4363558292388916, "learning_rate": 1.3388558841543909e-05, "loss": 2.0779, "step": 2070 }, { "epoch": 0.35383175980267073, "grad_norm": 3.939847946166992, "learning_rate": 1.3387587614095993e-05, "loss": 2.0052, "step": 2080 }, { "epoch": 0.3555328740324913, "grad_norm": 3.8116230964660645, "learning_rate": 1.3386608293145675e-05, "loss": 2.0368, "step": 2090 }, { "epoch": 0.35723398826231184, "grad_norm": 3.318373680114746, "learning_rate": 1.3385620879882251e-05, "loss": 2.083, "step": 2100 }, { "epoch": 0.35893510249213234, "grad_norm": 3.8586812019348145, "learning_rate": 1.3384625375504853e-05, "loss": 2.0442, "step": 2110 }, { "epoch": 0.3606362167219529, "grad_norm": 3.684936046600342, "learning_rate": 1.3383621781222432e-05, "loss": 2.0002, "step": 2120 }, { "epoch": 0.3623373309517734, "grad_norm": 4.2225213050842285, "learning_rate": 1.3382610098253764e-05, "loss": 1.9229, "step": 2130 }, { "epoch": 0.36403844518159395, "grad_norm": 4.452572345733643, "learning_rate": 1.3381590327827451e-05, "loss": 2.0504, "step": 2140 }, { "epoch": 0.36573955941141445, "grad_norm": 4.429842472076416, "learning_rate": 1.3380562471181914e-05, "loss": 2.0126, "step": 2150 }, { "epoch": 0.367440673641235, "grad_norm": 4.385965347290039, "learning_rate": 1.3379526529565396e-05, "loss": 2.0278, "step": 2160 }, { "epoch": 0.36914178787105556, "grad_norm": 3.4593615531921387, "learning_rate": 1.3378482504235955e-05, "loss": 1.9504, "step": 2170 }, { "epoch": 0.37084290210087606, "grad_norm": 3.8733391761779785, "learning_rate": 1.3377430396461471e-05, "loss": 2.0475, "step": 2180 }, { "epoch": 0.3725440163306966, "grad_norm": 3.7339272499084473, "learning_rate": 1.3376370207519636e-05, "loss": 1.9485, "step": 2190 }, { "epoch": 0.3742451305605171, "grad_norm": 3.994168519973755, "learning_rate": 1.3375301938697957e-05, "loss": 1.9755, "step": 2200 }, { "epoch": 0.37594624479033767, "grad_norm": 3.77097749710083, "learning_rate": 1.3374225591293751e-05, "loss": 1.9572, "step": 2210 }, { "epoch": 0.3776473590201582, "grad_norm": 3.773251533508301, "learning_rate": 1.3373141166614147e-05, "loss": 2.1328, "step": 2220 }, { "epoch": 0.3793484732499787, "grad_norm": 4.05300235748291, "learning_rate": 1.3372048665976088e-05, "loss": 2.0099, "step": 2230 }, { "epoch": 0.3810495874797993, "grad_norm": 4.33394193649292, "learning_rate": 1.3370948090706318e-05, "loss": 2.0302, "step": 2240 }, { "epoch": 0.3827507017096198, "grad_norm": 3.647555351257324, "learning_rate": 1.3369839442141388e-05, "loss": 2.0583, "step": 2250 }, { "epoch": 0.38445181593944033, "grad_norm": 3.822615385055542, "learning_rate": 1.3368722721627658e-05, "loss": 2.0677, "step": 2260 }, { "epoch": 0.3861529301692609, "grad_norm": 3.414644956588745, "learning_rate": 1.336759793052128e-05, "loss": 1.9975, "step": 2270 }, { "epoch": 0.3878540443990814, "grad_norm": 3.7595229148864746, "learning_rate": 1.3366465070188226e-05, "loss": 1.9567, "step": 2280 }, { "epoch": 0.38955515862890194, "grad_norm": 4.140371799468994, "learning_rate": 1.3365324142004245e-05, "loss": 1.971, "step": 2290 }, { "epoch": 0.39125627285872244, "grad_norm": 3.429995059967041, "learning_rate": 1.33641751473549e-05, "loss": 2.0038, "step": 2300 }, { "epoch": 0.392957387088543, "grad_norm": 3.883551836013794, "learning_rate": 1.3363018087635543e-05, "loss": 2.0446, "step": 2310 }, { "epoch": 0.39465850131836355, "grad_norm": 3.6643548011779785, "learning_rate": 1.3361852964251321e-05, "loss": 2.1035, "step": 2320 }, { "epoch": 0.39635961554818405, "grad_norm": 4.243268966674805, "learning_rate": 1.3360679778617173e-05, "loss": 2.0463, "step": 2330 }, { "epoch": 0.3980607297780046, "grad_norm": 4.2195868492126465, "learning_rate": 1.3359498532157835e-05, "loss": 1.9562, "step": 2340 }, { "epoch": 0.3997618440078251, "grad_norm": 3.8276352882385254, "learning_rate": 1.3358309226307824e-05, "loss": 1.9243, "step": 2350 }, { "epoch": 0.40146295823764566, "grad_norm": 4.186319828033447, "learning_rate": 1.3357111862511447e-05, "loss": 2.041, "step": 2360 }, { "epoch": 0.4031640724674662, "grad_norm": 3.921633005142212, "learning_rate": 1.3355906442222798e-05, "loss": 2.0424, "step": 2370 }, { "epoch": 0.4048651866972867, "grad_norm": 3.9087321758270264, "learning_rate": 1.3354692966905756e-05, "loss": 2.0559, "step": 2380 }, { "epoch": 0.40656630092710727, "grad_norm": 4.365992069244385, "learning_rate": 1.335347143803398e-05, "loss": 2.0072, "step": 2390 }, { "epoch": 0.40826741515692777, "grad_norm": 4.397064208984375, "learning_rate": 1.335224185709091e-05, "loss": 2.0557, "step": 2400 }, { "epoch": 0.4099685293867483, "grad_norm": 3.6686463356018066, "learning_rate": 1.3351004225569762e-05, "loss": 1.9476, "step": 2410 }, { "epoch": 0.4116696436165688, "grad_norm": 4.134527683258057, "learning_rate": 1.3349758544973535e-05, "loss": 2.0976, "step": 2420 }, { "epoch": 0.4133707578463894, "grad_norm": 4.738413333892822, "learning_rate": 1.3348504816814996e-05, "loss": 2.0496, "step": 2430 }, { "epoch": 0.41507187207620994, "grad_norm": 3.523717164993286, "learning_rate": 1.3347243042616687e-05, "loss": 2.0754, "step": 2440 }, { "epoch": 0.41677298630603044, "grad_norm": 3.527294874191284, "learning_rate": 1.3345973223910927e-05, "loss": 2.0003, "step": 2450 }, { "epoch": 0.418474100535851, "grad_norm": 3.812926769256592, "learning_rate": 1.3344695362239794e-05, "loss": 2.0657, "step": 2460 }, { "epoch": 0.4201752147656715, "grad_norm": 4.237487316131592, "learning_rate": 1.3343409459155144e-05, "loss": 2.0629, "step": 2470 }, { "epoch": 0.42187632899549204, "grad_norm": 3.8257393836975098, "learning_rate": 1.3342115516218592e-05, "loss": 2.0618, "step": 2480 }, { "epoch": 0.4235774432253126, "grad_norm": 4.0557861328125, "learning_rate": 1.3340813535001515e-05, "loss": 1.9491, "step": 2490 }, { "epoch": 0.4252785574551331, "grad_norm": 3.6852915287017822, "learning_rate": 1.333950351708506e-05, "loss": 2.1553, "step": 2500 }, { "epoch": 0.42697967168495365, "grad_norm": 5.004347801208496, "learning_rate": 1.3338185464060127e-05, "loss": 1.9624, "step": 2510 }, { "epoch": 0.42868078591477415, "grad_norm": 4.5740647315979, "learning_rate": 1.3336859377527376e-05, "loss": 2.0589, "step": 2520 }, { "epoch": 0.4303819001445947, "grad_norm": 4.380794048309326, "learning_rate": 1.3335525259097222e-05, "loss": 2.018, "step": 2530 }, { "epoch": 0.43208301437441526, "grad_norm": 4.0294599533081055, "learning_rate": 1.3334183110389837e-05, "loss": 1.9966, "step": 2540 }, { "epoch": 0.43378412860423576, "grad_norm": 4.073001861572266, "learning_rate": 1.3332832933035143e-05, "loss": 2.0794, "step": 2550 }, { "epoch": 0.4354852428340563, "grad_norm": 3.7871289253234863, "learning_rate": 1.333147472867281e-05, "loss": 2.0538, "step": 2560 }, { "epoch": 0.4371863570638768, "grad_norm": 4.166225433349609, "learning_rate": 1.3330108498952259e-05, "loss": 2.0601, "step": 2570 }, { "epoch": 0.4388874712936974, "grad_norm": 4.1680169105529785, "learning_rate": 1.3328734245532657e-05, "loss": 2.0239, "step": 2580 }, { "epoch": 0.44058858552351793, "grad_norm": 4.646623611450195, "learning_rate": 1.3327351970082916e-05, "loss": 2.0568, "step": 2590 }, { "epoch": 0.44228969975333843, "grad_norm": 4.190902233123779, "learning_rate": 1.3325961674281686e-05, "loss": 1.964, "step": 2600 }, { "epoch": 0.443990813983159, "grad_norm": 3.622678756713867, "learning_rate": 1.332456335981736e-05, "loss": 2.0088, "step": 2610 }, { "epoch": 0.4456919282129795, "grad_norm": 4.435421943664551, "learning_rate": 1.3323157028388067e-05, "loss": 1.9484, "step": 2620 }, { "epoch": 0.44739304244280004, "grad_norm": 4.374332904815674, "learning_rate": 1.3321742681701679e-05, "loss": 2.0214, "step": 2630 }, { "epoch": 0.4490941566726206, "grad_norm": 4.046358585357666, "learning_rate": 1.3320320321475788e-05, "loss": 2.1021, "step": 2640 }, { "epoch": 0.4507952709024411, "grad_norm": 4.689764499664307, "learning_rate": 1.3318889949437733e-05, "loss": 2.036, "step": 2650 }, { "epoch": 0.45249638513226165, "grad_norm": 4.060450553894043, "learning_rate": 1.3317451567324572e-05, "loss": 1.9854, "step": 2660 }, { "epoch": 0.45419749936208215, "grad_norm": 3.979417562484741, "learning_rate": 1.3316005176883095e-05, "loss": 2.0304, "step": 2670 }, { "epoch": 0.4558986135919027, "grad_norm": 3.943890333175659, "learning_rate": 1.3314550779869815e-05, "loss": 2.0657, "step": 2680 }, { "epoch": 0.4575997278217232, "grad_norm": 4.07530403137207, "learning_rate": 1.3313088378050974e-05, "loss": 2.0117, "step": 2690 }, { "epoch": 0.45930084205154376, "grad_norm": 4.679013729095459, "learning_rate": 1.3311617973202526e-05, "loss": 2.0441, "step": 2700 }, { "epoch": 0.4610019562813643, "grad_norm": 4.465762615203857, "learning_rate": 1.331013956711015e-05, "loss": 2.0589, "step": 2710 }, { "epoch": 0.4627030705111848, "grad_norm": 4.215399265289307, "learning_rate": 1.3308653161569244e-05, "loss": 2.0401, "step": 2720 }, { "epoch": 0.46440418474100537, "grad_norm": 4.507813930511475, "learning_rate": 1.3307158758384914e-05, "loss": 2.0423, "step": 2730 }, { "epoch": 0.46610529897082587, "grad_norm": 4.728775978088379, "learning_rate": 1.3305656359371983e-05, "loss": 2.0764, "step": 2740 }, { "epoch": 0.4678064132006464, "grad_norm": 3.851613998413086, "learning_rate": 1.3304145966354982e-05, "loss": 2.0171, "step": 2750 }, { "epoch": 0.469507527430467, "grad_norm": 4.450353622436523, "learning_rate": 1.3302627581168152e-05, "loss": 1.9752, "step": 2760 }, { "epoch": 0.4712086416602875, "grad_norm": 4.43014669418335, "learning_rate": 1.3301101205655442e-05, "loss": 1.9331, "step": 2770 }, { "epoch": 0.47290975589010803, "grad_norm": 4.224061012268066, "learning_rate": 1.3299566841670496e-05, "loss": 2.0289, "step": 2780 }, { "epoch": 0.47461087011992853, "grad_norm": 4.00301456451416, "learning_rate": 1.3298024491076665e-05, "loss": 1.9951, "step": 2790 }, { "epoch": 0.4763119843497491, "grad_norm": 3.680476427078247, "learning_rate": 1.3296474155747003e-05, "loss": 2.0388, "step": 2800 }, { "epoch": 0.47801309857956964, "grad_norm": 3.9796175956726074, "learning_rate": 1.3294915837564254e-05, "loss": 1.9321, "step": 2810 }, { "epoch": 0.47971421280939014, "grad_norm": 3.5634584426879883, "learning_rate": 1.3293349538420858e-05, "loss": 1.9956, "step": 2820 }, { "epoch": 0.4814153270392107, "grad_norm": 4.366597652435303, "learning_rate": 1.329177526021895e-05, "loss": 2.0251, "step": 2830 }, { "epoch": 0.4831164412690312, "grad_norm": 7.455503463745117, "learning_rate": 1.329019300487035e-05, "loss": 2.0371, "step": 2840 }, { "epoch": 0.48481755549885175, "grad_norm": 4.278938293457031, "learning_rate": 1.3288602774296575e-05, "loss": 2.0568, "step": 2850 }, { "epoch": 0.4865186697286723, "grad_norm": 4.015135765075684, "learning_rate": 1.3287004570428812e-05, "loss": 1.8454, "step": 2860 }, { "epoch": 0.4882197839584928, "grad_norm": 3.6956396102905273, "learning_rate": 1.3285398395207948e-05, "loss": 2.037, "step": 2870 }, { "epoch": 0.48992089818831336, "grad_norm": 4.163562297821045, "learning_rate": 1.3283784250584536e-05, "loss": 1.9196, "step": 2880 }, { "epoch": 0.49162201241813386, "grad_norm": 3.465672016143799, "learning_rate": 1.3282162138518814e-05, "loss": 2.0516, "step": 2890 }, { "epoch": 0.4933231266479544, "grad_norm": 4.972282409667969, "learning_rate": 1.3280532060980699e-05, "loss": 2.0446, "step": 2900 }, { "epoch": 0.49502424087777497, "grad_norm": 3.4876644611358643, "learning_rate": 1.3278894019949775e-05, "loss": 1.8955, "step": 2910 }, { "epoch": 0.49672535510759547, "grad_norm": 4.416055202484131, "learning_rate": 1.3277248017415297e-05, "loss": 1.9769, "step": 2920 }, { "epoch": 0.498426469337416, "grad_norm": 4.977348327636719, "learning_rate": 1.3275594055376196e-05, "loss": 1.9478, "step": 2930 }, { "epoch": 0.5001275835672365, "grad_norm": 3.413250207901001, "learning_rate": 1.327393213584106e-05, "loss": 1.9492, "step": 2940 }, { "epoch": 0.5018286977970571, "grad_norm": 3.2921433448791504, "learning_rate": 1.3272262260828147e-05, "loss": 1.9549, "step": 2950 }, { "epoch": 0.5035298120268776, "grad_norm": 4.159584999084473, "learning_rate": 1.3270584432365373e-05, "loss": 2.01, "step": 2960 }, { "epoch": 0.5052309262566982, "grad_norm": 3.793626546859741, "learning_rate": 1.3268898652490313e-05, "loss": 2.0069, "step": 2970 }, { "epoch": 0.5069320404865186, "grad_norm": 4.399741172790527, "learning_rate": 1.32672049232502e-05, "loss": 1.9689, "step": 2980 }, { "epoch": 0.5086331547163392, "grad_norm": 3.734663248062134, "learning_rate": 1.326550324670192e-05, "loss": 2.0126, "step": 2990 }, { "epoch": 0.5103342689461597, "grad_norm": 4.259927272796631, "learning_rate": 1.3263793624912009e-05, "loss": 1.9307, "step": 3000 }, { "epoch": 0.5120353831759803, "grad_norm": 4.000924587249756, "learning_rate": 1.3262076059956654e-05, "loss": 1.9663, "step": 3010 }, { "epoch": 0.5137364974058009, "grad_norm": 4.104271411895752, "learning_rate": 1.3260350553921688e-05, "loss": 1.9565, "step": 3020 }, { "epoch": 0.5154376116356213, "grad_norm": 4.1479878425598145, "learning_rate": 1.3258617108902584e-05, "loss": 2.0009, "step": 3030 }, { "epoch": 0.5171387258654419, "grad_norm": 3.7437946796417236, "learning_rate": 1.3256875727004462e-05, "loss": 2.0229, "step": 3040 }, { "epoch": 0.5188398400952624, "grad_norm": 4.00507926940918, "learning_rate": 1.3255126410342076e-05, "loss": 2.0789, "step": 3050 }, { "epoch": 0.520540954325083, "grad_norm": 4.296029567718506, "learning_rate": 1.3253369161039821e-05, "loss": 1.9893, "step": 3060 }, { "epoch": 0.5222420685549035, "grad_norm": 3.632911205291748, "learning_rate": 1.3251603981231718e-05, "loss": 2.0216, "step": 3070 }, { "epoch": 0.523943182784724, "grad_norm": 3.6041970252990723, "learning_rate": 1.3249830873061426e-05, "loss": 2.049, "step": 3080 }, { "epoch": 0.5256442970145445, "grad_norm": 3.852734327316284, "learning_rate": 1.3248049838682229e-05, "loss": 1.9443, "step": 3090 }, { "epoch": 0.5273454112443651, "grad_norm": 3.8711137771606445, "learning_rate": 1.3246260880257038e-05, "loss": 2.0248, "step": 3100 }, { "epoch": 0.5290465254741856, "grad_norm": 4.121218204498291, "learning_rate": 1.3244463999958385e-05, "loss": 2.0191, "step": 3110 }, { "epoch": 0.5307476397040061, "grad_norm": 4.025257587432861, "learning_rate": 1.3242659199968425e-05, "loss": 2.0551, "step": 3120 }, { "epoch": 0.5324487539338266, "grad_norm": 3.772130012512207, "learning_rate": 1.3240846482478928e-05, "loss": 2.0279, "step": 3130 }, { "epoch": 0.5341498681636472, "grad_norm": 3.7531166076660156, "learning_rate": 1.323902584969128e-05, "loss": 1.995, "step": 3140 }, { "epoch": 0.5358509823934677, "grad_norm": 4.111819267272949, "learning_rate": 1.3237197303816486e-05, "loss": 2.1142, "step": 3150 }, { "epoch": 0.5375520966232883, "grad_norm": 3.1626720428466797, "learning_rate": 1.3235360847075147e-05, "loss": 1.9886, "step": 3160 }, { "epoch": 0.5392532108531087, "grad_norm": 3.5329902172088623, "learning_rate": 1.3233516481697481e-05, "loss": 2.0167, "step": 3170 }, { "epoch": 0.5409543250829293, "grad_norm": 3.377159595489502, "learning_rate": 1.3231664209923313e-05, "loss": 2.1049, "step": 3180 }, { "epoch": 0.5426554393127498, "grad_norm": 3.3523881435394287, "learning_rate": 1.322980403400206e-05, "loss": 2.0344, "step": 3190 }, { "epoch": 0.5443565535425704, "grad_norm": 4.31252908706665, "learning_rate": 1.3227935956192744e-05, "loss": 1.9801, "step": 3200 }, { "epoch": 0.546057667772391, "grad_norm": 4.030631065368652, "learning_rate": 1.3226059978763984e-05, "loss": 1.948, "step": 3210 }, { "epoch": 0.5477587820022114, "grad_norm": 3.5859789848327637, "learning_rate": 1.3224176103993986e-05, "loss": 2.0254, "step": 3220 }, { "epoch": 0.549459896232032, "grad_norm": 4.439004421234131, "learning_rate": 1.3222284334170552e-05, "loss": 1.9773, "step": 3230 }, { "epoch": 0.5511610104618525, "grad_norm": 3.986593008041382, "learning_rate": 1.3220384671591074e-05, "loss": 2.0235, "step": 3240 }, { "epoch": 0.5528621246916731, "grad_norm": 4.354572296142578, "learning_rate": 1.3218477118562525e-05, "loss": 1.8921, "step": 3250 }, { "epoch": 0.5545632389214936, "grad_norm": 4.235722064971924, "learning_rate": 1.3216561677401458e-05, "loss": 2.0174, "step": 3260 }, { "epoch": 0.5562643531513141, "grad_norm": 3.979583263397217, "learning_rate": 1.3214638350434013e-05, "loss": 1.9176, "step": 3270 }, { "epoch": 0.5579654673811346, "grad_norm": 4.8564324378967285, "learning_rate": 1.3212707139995897e-05, "loss": 1.9592, "step": 3280 }, { "epoch": 0.5596665816109552, "grad_norm": 3.7206497192382812, "learning_rate": 1.3210768048432398e-05, "loss": 1.95, "step": 3290 }, { "epoch": 0.5613676958407757, "grad_norm": 4.199899196624756, "learning_rate": 1.3208821078098376e-05, "loss": 2.0345, "step": 3300 }, { "epoch": 0.5630688100705963, "grad_norm": 3.7845380306243896, "learning_rate": 1.3206866231358253e-05, "loss": 2.0071, "step": 3310 }, { "epoch": 0.5647699243004167, "grad_norm": 3.7112693786621094, "learning_rate": 1.3204903510586017e-05, "loss": 1.9788, "step": 3320 }, { "epoch": 0.5664710385302373, "grad_norm": 4.377164363861084, "learning_rate": 1.3202932918165227e-05, "loss": 2.0004, "step": 3330 }, { "epoch": 0.5681721527600578, "grad_norm": 4.244388103485107, "learning_rate": 1.320095445648899e-05, "loss": 1.9677, "step": 3340 }, { "epoch": 0.5698732669898784, "grad_norm": 4.318045139312744, "learning_rate": 1.3198968127959974e-05, "loss": 2.0189, "step": 3350 }, { "epoch": 0.571574381219699, "grad_norm": 4.372837066650391, "learning_rate": 1.3196973934990402e-05, "loss": 2.043, "step": 3360 }, { "epoch": 0.5732754954495194, "grad_norm": 4.486396312713623, "learning_rate": 1.3194971880002048e-05, "loss": 1.9951, "step": 3370 }, { "epoch": 0.57497660967934, "grad_norm": 4.997833251953125, "learning_rate": 1.3192961965426233e-05, "loss": 1.8461, "step": 3380 }, { "epoch": 0.5766777239091605, "grad_norm": 3.630509853363037, "learning_rate": 1.319094419370382e-05, "loss": 2.0038, "step": 3390 }, { "epoch": 0.578378838138981, "grad_norm": 3.9311442375183105, "learning_rate": 1.3188918567285216e-05, "loss": 1.9245, "step": 3400 }, { "epoch": 0.5800799523688016, "grad_norm": 3.9913883209228516, "learning_rate": 1.318688508863037e-05, "loss": 1.9845, "step": 3410 }, { "epoch": 0.581781066598622, "grad_norm": 3.962445020675659, "learning_rate": 1.318484376020876e-05, "loss": 1.9759, "step": 3420 }, { "epoch": 0.5834821808284426, "grad_norm": 3.8628203868865967, "learning_rate": 1.3182794584499402e-05, "loss": 2.0435, "step": 3430 }, { "epoch": 0.5851832950582632, "grad_norm": 3.9601385593414307, "learning_rate": 1.3180737563990842e-05, "loss": 1.8831, "step": 3440 }, { "epoch": 0.5868844092880837, "grad_norm": 3.4271185398101807, "learning_rate": 1.317867270118115e-05, "loss": 2.0022, "step": 3450 }, { "epoch": 0.5885855235179043, "grad_norm": 3.7709715366363525, "learning_rate": 1.3176599998577918e-05, "loss": 1.8817, "step": 3460 }, { "epoch": 0.5902866377477247, "grad_norm": 3.9962878227233887, "learning_rate": 1.3174519458698268e-05, "loss": 2.003, "step": 3470 }, { "epoch": 0.5919877519775453, "grad_norm": 4.124835014343262, "learning_rate": 1.3172431084068829e-05, "loss": 1.9846, "step": 3480 }, { "epoch": 0.5936888662073658, "grad_norm": 3.7828152179718018, "learning_rate": 1.317033487722575e-05, "loss": 2.0059, "step": 3490 }, { "epoch": 0.5953899804371864, "grad_norm": 4.043171405792236, "learning_rate": 1.316823084071469e-05, "loss": 1.9832, "step": 3500 }, { "epoch": 0.5970910946670069, "grad_norm": 5.648746013641357, "learning_rate": 1.316611897709082e-05, "loss": 2.0174, "step": 3510 }, { "epoch": 0.5987922088968274, "grad_norm": 3.3299238681793213, "learning_rate": 1.316399928891881e-05, "loss": 1.8811, "step": 3520 }, { "epoch": 0.6004933231266479, "grad_norm": 3.889491319656372, "learning_rate": 1.3161871778772836e-05, "loss": 1.9641, "step": 3530 }, { "epoch": 0.6021944373564685, "grad_norm": 4.209897041320801, "learning_rate": 1.3159736449236577e-05, "loss": 2.0733, "step": 3540 }, { "epoch": 0.603895551586289, "grad_norm": 3.638066291809082, "learning_rate": 1.3157593302903199e-05, "loss": 1.9662, "step": 3550 }, { "epoch": 0.6055966658161096, "grad_norm": 3.562626838684082, "learning_rate": 1.3155442342375365e-05, "loss": 1.9735, "step": 3560 }, { "epoch": 0.60729778004593, "grad_norm": 3.843376636505127, "learning_rate": 1.3153283570265233e-05, "loss": 1.9661, "step": 3570 }, { "epoch": 0.6089988942757506, "grad_norm": 3.4872562885284424, "learning_rate": 1.3151116989194438e-05, "loss": 2.0091, "step": 3580 }, { "epoch": 0.6107000085055712, "grad_norm": 4.058065414428711, "learning_rate": 1.3148942601794106e-05, "loss": 2.0173, "step": 3590 }, { "epoch": 0.6124011227353917, "grad_norm": 3.877347230911255, "learning_rate": 1.314676041070484e-05, "loss": 2.0794, "step": 3600 }, { "epoch": 0.6141022369652123, "grad_norm": 3.453751564025879, "learning_rate": 1.3144570418576719e-05, "loss": 1.9352, "step": 3610 }, { "epoch": 0.6158033511950327, "grad_norm": 3.791684627532959, "learning_rate": 1.3142372628069294e-05, "loss": 1.9737, "step": 3620 }, { "epoch": 0.6175044654248533, "grad_norm": 4.093663215637207, "learning_rate": 1.3140167041851593e-05, "loss": 1.9208, "step": 3630 }, { "epoch": 0.6192055796546738, "grad_norm": 3.8233466148376465, "learning_rate": 1.3137953662602104e-05, "loss": 1.9972, "step": 3640 }, { "epoch": 0.6209066938844944, "grad_norm": 3.34663987159729, "learning_rate": 1.3135732493008786e-05, "loss": 2.0431, "step": 3650 }, { "epoch": 0.6226078081143149, "grad_norm": 3.8648672103881836, "learning_rate": 1.3133503535769055e-05, "loss": 2.1321, "step": 3660 }, { "epoch": 0.6243089223441354, "grad_norm": 4.013655185699463, "learning_rate": 1.313126679358978e-05, "loss": 1.9735, "step": 3670 }, { "epoch": 0.6260100365739559, "grad_norm": 3.838092088699341, "learning_rate": 1.3129022269187292e-05, "loss": 1.9773, "step": 3680 }, { "epoch": 0.6277111508037765, "grad_norm": 3.74652099609375, "learning_rate": 1.3126769965287367e-05, "loss": 1.9456, "step": 3690 }, { "epoch": 0.629412265033597, "grad_norm": 4.200870990753174, "learning_rate": 1.3124509884625234e-05, "loss": 2.0082, "step": 3700 }, { "epoch": 0.6311133792634175, "grad_norm": 3.9270575046539307, "learning_rate": 1.312224202994556e-05, "loss": 1.9668, "step": 3710 }, { "epoch": 0.632814493493238, "grad_norm": 3.9572863578796387, "learning_rate": 1.3119966404002458e-05, "loss": 1.9819, "step": 3720 }, { "epoch": 0.6345156077230586, "grad_norm": 4.089076042175293, "learning_rate": 1.3117683009559475e-05, "loss": 2.1096, "step": 3730 }, { "epoch": 0.6362167219528791, "grad_norm": 3.985917568206787, "learning_rate": 1.3115391849389593e-05, "loss": 1.9507, "step": 3740 }, { "epoch": 0.6379178361826997, "grad_norm": 3.494417190551758, "learning_rate": 1.3113092926275229e-05, "loss": 2.0412, "step": 3750 }, { "epoch": 0.6396189504125201, "grad_norm": 3.491520404815674, "learning_rate": 1.3110786243008218e-05, "loss": 2.0301, "step": 3760 }, { "epoch": 0.6413200646423407, "grad_norm": 3.2409474849700928, "learning_rate": 1.310847180238983e-05, "loss": 1.8752, "step": 3770 }, { "epoch": 0.6430211788721613, "grad_norm": 3.796698808670044, "learning_rate": 1.3106149607230751e-05, "loss": 1.9809, "step": 3780 }, { "epoch": 0.6447222931019818, "grad_norm": 4.545765399932861, "learning_rate": 1.3103819660351079e-05, "loss": 1.8971, "step": 3790 }, { "epoch": 0.6464234073318024, "grad_norm": 3.602032423019409, "learning_rate": 1.310148196458033e-05, "loss": 1.9571, "step": 3800 }, { "epoch": 0.6481245215616228, "grad_norm": 3.979278564453125, "learning_rate": 1.3099136522757435e-05, "loss": 1.9982, "step": 3810 }, { "epoch": 0.6498256357914434, "grad_norm": 3.5593981742858887, "learning_rate": 1.3096783337730727e-05, "loss": 2.0194, "step": 3820 }, { "epoch": 0.6515267500212639, "grad_norm": 3.7880349159240723, "learning_rate": 1.309442241235794e-05, "loss": 1.9879, "step": 3830 }, { "epoch": 0.6532278642510845, "grad_norm": 3.7969162464141846, "learning_rate": 1.3092053749506217e-05, "loss": 2.0842, "step": 3840 }, { "epoch": 0.654928978480905, "grad_norm": 3.9793834686279297, "learning_rate": 1.3089677352052087e-05, "loss": 1.9374, "step": 3850 }, { "epoch": 0.6566300927107255, "grad_norm": 3.4517974853515625, "learning_rate": 1.3087293222881475e-05, "loss": 1.9882, "step": 3860 }, { "epoch": 0.658331206940546, "grad_norm": 3.781954288482666, "learning_rate": 1.3084901364889702e-05, "loss": 1.9982, "step": 3870 }, { "epoch": 0.6600323211703666, "grad_norm": 3.9006168842315674, "learning_rate": 1.3082501780981465e-05, "loss": 1.9795, "step": 3880 }, { "epoch": 0.6617334354001871, "grad_norm": 3.539034605026245, "learning_rate": 1.3080094474070855e-05, "loss": 1.9624, "step": 3890 }, { "epoch": 0.6634345496300077, "grad_norm": 3.632161855697632, "learning_rate": 1.3077679447081328e-05, "loss": 1.9449, "step": 3900 }, { "epoch": 0.6651356638598281, "grad_norm": 4.065733909606934, "learning_rate": 1.3075256702945727e-05, "loss": 2.0045, "step": 3910 }, { "epoch": 0.6668367780896487, "grad_norm": 3.7506015300750732, "learning_rate": 1.3072826244606263e-05, "loss": 2.0023, "step": 3920 }, { "epoch": 0.6685378923194693, "grad_norm": 3.198028087615967, "learning_rate": 1.3070388075014513e-05, "loss": 1.9508, "step": 3930 }, { "epoch": 0.6702390065492898, "grad_norm": 3.5299782752990723, "learning_rate": 1.3067942197131422e-05, "loss": 1.942, "step": 3940 }, { "epoch": 0.6719401207791104, "grad_norm": 4.142813682556152, "learning_rate": 1.306548861392729e-05, "loss": 1.9504, "step": 3950 }, { "epoch": 0.6736412350089308, "grad_norm": 3.373514175415039, "learning_rate": 1.3063027328381788e-05, "loss": 1.9089, "step": 3960 }, { "epoch": 0.6753423492387514, "grad_norm": 3.3843135833740234, "learning_rate": 1.3060558343483922e-05, "loss": 1.8664, "step": 3970 }, { "epoch": 0.6770434634685719, "grad_norm": 4.3059587478637695, "learning_rate": 1.3058081662232065e-05, "loss": 2.0297, "step": 3980 }, { "epoch": 0.6787445776983925, "grad_norm": 4.434045791625977, "learning_rate": 1.3055597287633925e-05, "loss": 2.0207, "step": 3990 }, { "epoch": 0.680445691928213, "grad_norm": 4.15225076675415, "learning_rate": 1.305310522270656e-05, "loss": 1.9678, "step": 4000 }, { "epoch": 0.6821468061580335, "grad_norm": 4.175527572631836, "learning_rate": 1.3050605470476363e-05, "loss": 2.0039, "step": 4010 }, { "epoch": 0.683847920387854, "grad_norm": 3.8997533321380615, "learning_rate": 1.3048098033979066e-05, "loss": 1.8911, "step": 4020 }, { "epoch": 0.6855490346176746, "grad_norm": 4.1734089851379395, "learning_rate": 1.3045582916259729e-05, "loss": 1.9374, "step": 4030 }, { "epoch": 0.6872501488474951, "grad_norm": 4.575035095214844, "learning_rate": 1.3043060120372744e-05, "loss": 1.9354, "step": 4040 }, { "epoch": 0.6889512630773157, "grad_norm": 3.9317026138305664, "learning_rate": 1.3040529649381823e-05, "loss": 1.9613, "step": 4050 }, { "epoch": 0.6906523773071361, "grad_norm": 4.242161750793457, "learning_rate": 1.3037991506360002e-05, "loss": 1.9097, "step": 4060 }, { "epoch": 0.6923534915369567, "grad_norm": 4.004007816314697, "learning_rate": 1.3035445694389638e-05, "loss": 1.9673, "step": 4070 }, { "epoch": 0.6940546057667772, "grad_norm": 4.778632164001465, "learning_rate": 1.303289221656239e-05, "loss": 1.9565, "step": 4080 }, { "epoch": 0.6957557199965978, "grad_norm": 3.2379519939422607, "learning_rate": 1.3030331075979241e-05, "loss": 1.9305, "step": 4090 }, { "epoch": 0.6974568342264184, "grad_norm": 4.127084255218506, "learning_rate": 1.3027762275750465e-05, "loss": 1.9676, "step": 4100 }, { "epoch": 0.6991579484562388, "grad_norm": 3.654214859008789, "learning_rate": 1.3025185818995646e-05, "loss": 1.9687, "step": 4110 }, { "epoch": 0.7008590626860594, "grad_norm": 4.186607837677002, "learning_rate": 1.3022601708843666e-05, "loss": 1.9978, "step": 4120 }, { "epoch": 0.7025601769158799, "grad_norm": 3.398437976837158, "learning_rate": 1.3020009948432703e-05, "loss": 2.0208, "step": 4130 }, { "epoch": 0.7042612911457005, "grad_norm": 4.260915279388428, "learning_rate": 1.3017410540910221e-05, "loss": 1.9923, "step": 4140 }, { "epoch": 0.705962405375521, "grad_norm": 3.935940742492676, "learning_rate": 1.301480348943297e-05, "loss": 2.0397, "step": 4150 }, { "epoch": 0.7076635196053415, "grad_norm": 4.820540904998779, "learning_rate": 1.3012188797166992e-05, "loss": 1.9785, "step": 4160 }, { "epoch": 0.709364633835162, "grad_norm": 3.5808472633361816, "learning_rate": 1.3009566467287596e-05, "loss": 1.8898, "step": 4170 }, { "epoch": 0.7110657480649826, "grad_norm": 3.319737672805786, "learning_rate": 1.3006936502979375e-05, "loss": 2.0429, "step": 4180 }, { "epoch": 0.7127668622948031, "grad_norm": 3.511857032775879, "learning_rate": 1.3004298907436188e-05, "loss": 1.9019, "step": 4190 }, { "epoch": 0.7144679765246237, "grad_norm": 3.2292962074279785, "learning_rate": 1.3001653683861167e-05, "loss": 1.9096, "step": 4200 }, { "epoch": 0.7161690907544441, "grad_norm": 3.7671141624450684, "learning_rate": 1.2999000835466701e-05, "loss": 1.9454, "step": 4210 }, { "epoch": 0.7178702049842647, "grad_norm": 4.096886157989502, "learning_rate": 1.2996340365474445e-05, "loss": 1.9327, "step": 4220 }, { "epoch": 0.7195713192140852, "grad_norm": 3.9096286296844482, "learning_rate": 1.2993672277115305e-05, "loss": 2.0614, "step": 4230 }, { "epoch": 0.7212724334439058, "grad_norm": 3.94098162651062, "learning_rate": 1.299099657362944e-05, "loss": 2.004, "step": 4240 }, { "epoch": 0.7229735476737262, "grad_norm": 3.444897174835205, "learning_rate": 1.298831325826626e-05, "loss": 1.9583, "step": 4250 }, { "epoch": 0.7246746619035468, "grad_norm": 3.791532516479492, "learning_rate": 1.2985622334284414e-05, "loss": 1.9095, "step": 4260 }, { "epoch": 0.7263757761333673, "grad_norm": 3.3483974933624268, "learning_rate": 1.2982923804951797e-05, "loss": 1.8663, "step": 4270 }, { "epoch": 0.7280768903631879, "grad_norm": 3.745339870452881, "learning_rate": 1.2980217673545534e-05, "loss": 1.9655, "step": 4280 }, { "epoch": 0.7297780045930085, "grad_norm": 3.8242061138153076, "learning_rate": 1.2977503943351981e-05, "loss": 1.9921, "step": 4290 }, { "epoch": 0.7314791188228289, "grad_norm": 3.353945732116699, "learning_rate": 1.2974782617666734e-05, "loss": 1.9331, "step": 4300 }, { "epoch": 0.7331802330526495, "grad_norm": 3.296675682067871, "learning_rate": 1.2972053699794598e-05, "loss": 2.0209, "step": 4310 }, { "epoch": 0.73488134728247, "grad_norm": 3.7973153591156006, "learning_rate": 1.2969317193049608e-05, "loss": 2.0261, "step": 4320 }, { "epoch": 0.7365824615122906, "grad_norm": 3.46197772026062, "learning_rate": 1.296657310075501e-05, "loss": 1.9906, "step": 4330 }, { "epoch": 0.7382835757421111, "grad_norm": 4.166983127593994, "learning_rate": 1.2963821426243264e-05, "loss": 1.9852, "step": 4340 }, { "epoch": 0.7399846899719316, "grad_norm": 3.487180709838867, "learning_rate": 1.296106217285604e-05, "loss": 1.944, "step": 4350 }, { "epoch": 0.7416858042017521, "grad_norm": 3.9465606212615967, "learning_rate": 1.2958295343944207e-05, "loss": 2.0371, "step": 4360 }, { "epoch": 0.7433869184315727, "grad_norm": 3.864844560623169, "learning_rate": 1.2955520942867837e-05, "loss": 1.979, "step": 4370 }, { "epoch": 0.7450880326613932, "grad_norm": 3.770357370376587, "learning_rate": 1.2952738972996197e-05, "loss": 1.9993, "step": 4380 }, { "epoch": 0.7467891468912138, "grad_norm": 3.781005859375, "learning_rate": 1.2949949437707747e-05, "loss": 1.9191, "step": 4390 }, { "epoch": 0.7484902611210342, "grad_norm": 3.8930251598358154, "learning_rate": 1.2947152340390131e-05, "loss": 1.9467, "step": 4400 }, { "epoch": 0.7501913753508548, "grad_norm": 3.6845479011535645, "learning_rate": 1.2944347684440183e-05, "loss": 1.9453, "step": 4410 }, { "epoch": 0.7518924895806753, "grad_norm": 3.2310779094696045, "learning_rate": 1.2941535473263907e-05, "loss": 1.9638, "step": 4420 }, { "epoch": 0.7535936038104959, "grad_norm": 3.4538753032684326, "learning_rate": 1.2938715710276491e-05, "loss": 1.9097, "step": 4430 }, { "epoch": 0.7552947180403164, "grad_norm": 4.4561872482299805, "learning_rate": 1.2935888398902285e-05, "loss": 1.9183, "step": 4440 }, { "epoch": 0.7569958322701369, "grad_norm": 3.7806403636932373, "learning_rate": 1.2933053542574818e-05, "loss": 2.0133, "step": 4450 }, { "epoch": 0.7586969464999574, "grad_norm": 3.7306880950927734, "learning_rate": 1.2930211144736772e-05, "loss": 1.913, "step": 4460 }, { "epoch": 0.760398060729778, "grad_norm": 3.4570231437683105, "learning_rate": 1.2927361208839987e-05, "loss": 2.0017, "step": 4470 }, { "epoch": 0.7620991749595986, "grad_norm": 4.0018534660339355, "learning_rate": 1.2924503738345465e-05, "loss": 1.8458, "step": 4480 }, { "epoch": 0.7638002891894191, "grad_norm": 3.6628971099853516, "learning_rate": 1.2921638736723351e-05, "loss": 1.9024, "step": 4490 }, { "epoch": 0.7655014034192396, "grad_norm": 4.748337745666504, "learning_rate": 1.291876620745294e-05, "loss": 1.9229, "step": 4500 }, { "epoch": 0.7672025176490601, "grad_norm": 3.4937639236450195, "learning_rate": 1.2915886154022668e-05, "loss": 1.9887, "step": 4510 }, { "epoch": 0.7689036318788807, "grad_norm": 3.3699965476989746, "learning_rate": 1.2912998579930104e-05, "loss": 1.9189, "step": 4520 }, { "epoch": 0.7706047461087012, "grad_norm": 4.4775872230529785, "learning_rate": 1.2910103488681956e-05, "loss": 1.9302, "step": 4530 }, { "epoch": 0.7723058603385218, "grad_norm": 3.719393491744995, "learning_rate": 1.2907200883794061e-05, "loss": 1.9872, "step": 4540 }, { "epoch": 0.7740069745683422, "grad_norm": 3.6321024894714355, "learning_rate": 1.2904290768791376e-05, "loss": 1.8501, "step": 4550 }, { "epoch": 0.7757080887981628, "grad_norm": 4.0052809715271, "learning_rate": 1.2901373147207981e-05, "loss": 1.9772, "step": 4560 }, { "epoch": 0.7774092030279833, "grad_norm": 3.715613842010498, "learning_rate": 1.289844802258707e-05, "loss": 1.8721, "step": 4570 }, { "epoch": 0.7791103172578039, "grad_norm": 3.5955617427825928, "learning_rate": 1.2895515398480956e-05, "loss": 1.911, "step": 4580 }, { "epoch": 0.7808114314876244, "grad_norm": 3.7583234310150146, "learning_rate": 1.2892575278451049e-05, "loss": 1.9743, "step": 4590 }, { "epoch": 0.7825125457174449, "grad_norm": 4.226505756378174, "learning_rate": 1.288962766606787e-05, "loss": 1.915, "step": 4600 }, { "epoch": 0.7842136599472654, "grad_norm": 3.244330406188965, "learning_rate": 1.2886672564911035e-05, "loss": 1.9117, "step": 4610 }, { "epoch": 0.785914774177086, "grad_norm": 3.8930232524871826, "learning_rate": 1.2883709978569256e-05, "loss": 1.947, "step": 4620 }, { "epoch": 0.7876158884069066, "grad_norm": 4.156840801239014, "learning_rate": 1.2880739910640333e-05, "loss": 2.0105, "step": 4630 }, { "epoch": 0.7893170026367271, "grad_norm": 4.2512688636779785, "learning_rate": 1.2877762364731159e-05, "loss": 1.8665, "step": 4640 }, { "epoch": 0.7910181168665475, "grad_norm": 3.931245803833008, "learning_rate": 1.2874777344457694e-05, "loss": 1.85, "step": 4650 }, { "epoch": 0.7927192310963681, "grad_norm": 3.273142099380493, "learning_rate": 1.287178485344499e-05, "loss": 1.8956, "step": 4660 }, { "epoch": 0.7944203453261887, "grad_norm": 3.791325092315674, "learning_rate": 1.2868784895327165e-05, "loss": 1.9721, "step": 4670 }, { "epoch": 0.7961214595560092, "grad_norm": 3.9774279594421387, "learning_rate": 1.2865777473747403e-05, "loss": 1.8861, "step": 4680 }, { "epoch": 0.7978225737858298, "grad_norm": 4.466744899749756, "learning_rate": 1.2862762592357954e-05, "loss": 1.9802, "step": 4690 }, { "epoch": 0.7995236880156502, "grad_norm": 4.0846710205078125, "learning_rate": 1.285974025482013e-05, "loss": 2.0306, "step": 4700 }, { "epoch": 0.8012248022454708, "grad_norm": 3.1373989582061768, "learning_rate": 1.2856710464804294e-05, "loss": 1.9023, "step": 4710 }, { "epoch": 0.8029259164752913, "grad_norm": 4.3124518394470215, "learning_rate": 1.2853673225989859e-05, "loss": 1.8912, "step": 4720 }, { "epoch": 0.8046270307051119, "grad_norm": 3.9370830059051514, "learning_rate": 1.2850628542065289e-05, "loss": 1.8607, "step": 4730 }, { "epoch": 0.8063281449349324, "grad_norm": 3.7955822944641113, "learning_rate": 1.2847576416728082e-05, "loss": 2.0242, "step": 4740 }, { "epoch": 0.8080292591647529, "grad_norm": 3.9595768451690674, "learning_rate": 1.284451685368478e-05, "loss": 2.0156, "step": 4750 }, { "epoch": 0.8097303733945734, "grad_norm": 3.8596062660217285, "learning_rate": 1.2841449856650952e-05, "loss": 1.9646, "step": 4760 }, { "epoch": 0.811431487624394, "grad_norm": 4.593785285949707, "learning_rate": 1.2838375429351201e-05, "loss": 1.956, "step": 4770 }, { "epoch": 0.8131326018542145, "grad_norm": 3.4249627590179443, "learning_rate": 1.2835293575519146e-05, "loss": 1.9888, "step": 4780 }, { "epoch": 0.8148337160840351, "grad_norm": 3.3835091590881348, "learning_rate": 1.283220429889743e-05, "loss": 2.0539, "step": 4790 }, { "epoch": 0.8165348303138555, "grad_norm": 3.9617295265197754, "learning_rate": 1.2829107603237708e-05, "loss": 1.9249, "step": 4800 }, { "epoch": 0.8182359445436761, "grad_norm": 3.755145311355591, "learning_rate": 1.2826003492300647e-05, "loss": 1.9182, "step": 4810 }, { "epoch": 0.8199370587734967, "grad_norm": 3.9545507431030273, "learning_rate": 1.2822891969855917e-05, "loss": 1.9943, "step": 4820 }, { "epoch": 0.8216381730033172, "grad_norm": 3.2209560871124268, "learning_rate": 1.281977303968219e-05, "loss": 1.9436, "step": 4830 }, { "epoch": 0.8233392872331377, "grad_norm": 5.035518169403076, "learning_rate": 1.2816646705567135e-05, "loss": 1.8816, "step": 4840 }, { "epoch": 0.8250404014629582, "grad_norm": 4.079342365264893, "learning_rate": 1.2813512971307409e-05, "loss": 1.9416, "step": 4850 }, { "epoch": 0.8267415156927788, "grad_norm": 4.157555103302002, "learning_rate": 1.2810371840708656e-05, "loss": 1.9485, "step": 4860 }, { "epoch": 0.8284426299225993, "grad_norm": 3.232848644256592, "learning_rate": 1.2807223317585508e-05, "loss": 1.8885, "step": 4870 }, { "epoch": 0.8301437441524199, "grad_norm": 4.313023090362549, "learning_rate": 1.2804067405761567e-05, "loss": 1.9958, "step": 4880 }, { "epoch": 0.8318448583822403, "grad_norm": 4.823918342590332, "learning_rate": 1.2800904109069414e-05, "loss": 1.9411, "step": 4890 }, { "epoch": 0.8335459726120609, "grad_norm": 3.9723315238952637, "learning_rate": 1.2797733431350596e-05, "loss": 2.0031, "step": 4900 }, { "epoch": 0.8352470868418814, "grad_norm": 4.099934101104736, "learning_rate": 1.2794555376455623e-05, "loss": 1.888, "step": 4910 }, { "epoch": 0.836948201071702, "grad_norm": 3.663517475128174, "learning_rate": 1.2791369948243961e-05, "loss": 1.909, "step": 4920 }, { "epoch": 0.8386493153015225, "grad_norm": 3.9390037059783936, "learning_rate": 1.278817715058404e-05, "loss": 1.9576, "step": 4930 }, { "epoch": 0.840350429531343, "grad_norm": 4.8486127853393555, "learning_rate": 1.2784976987353228e-05, "loss": 1.9344, "step": 4940 }, { "epoch": 0.8420515437611635, "grad_norm": 4.624791145324707, "learning_rate": 1.2781769462437846e-05, "loss": 1.9987, "step": 4950 }, { "epoch": 0.8437526579909841, "grad_norm": 3.784917116165161, "learning_rate": 1.277855457973315e-05, "loss": 1.9721, "step": 4960 }, { "epoch": 0.8454537722208046, "grad_norm": 3.5814690589904785, "learning_rate": 1.2775332343143332e-05, "loss": 1.9869, "step": 4970 }, { "epoch": 0.8471548864506252, "grad_norm": 4.098245143890381, "learning_rate": 1.277210275658152e-05, "loss": 1.9373, "step": 4980 }, { "epoch": 0.8488560006804456, "grad_norm": 3.2476980686187744, "learning_rate": 1.2768865823969763e-05, "loss": 2.0476, "step": 4990 }, { "epoch": 0.8505571149102662, "grad_norm": 4.2028374671936035, "learning_rate": 1.276562154923903e-05, "loss": 1.9071, "step": 5000 }, { "epoch": 0.8522582291400868, "grad_norm": 2.8667080402374268, "learning_rate": 1.2762369936329212e-05, "loss": 1.8819, "step": 5010 }, { "epoch": 0.8539593433699073, "grad_norm": 4.183213710784912, "learning_rate": 1.2759110989189105e-05, "loss": 1.9156, "step": 5020 }, { "epoch": 0.8556604575997279, "grad_norm": 3.4557831287384033, "learning_rate": 1.2755844711776415e-05, "loss": 1.9098, "step": 5030 }, { "epoch": 0.8573615718295483, "grad_norm": 3.910086154937744, "learning_rate": 1.2752571108057754e-05, "loss": 1.9913, "step": 5040 }, { "epoch": 0.8590626860593689, "grad_norm": 4.714244842529297, "learning_rate": 1.2749290182008621e-05, "loss": 2.0118, "step": 5050 }, { "epoch": 0.8607638002891894, "grad_norm": 2.8815157413482666, "learning_rate": 1.274600193761342e-05, "loss": 1.996, "step": 5060 }, { "epoch": 0.86246491451901, "grad_norm": 4.021540641784668, "learning_rate": 1.2742706378865429e-05, "loss": 1.9917, "step": 5070 }, { "epoch": 0.8641660287488305, "grad_norm": 4.05567741394043, "learning_rate": 1.273940350976682e-05, "loss": 1.9756, "step": 5080 }, { "epoch": 0.865867142978651, "grad_norm": 3.598092555999756, "learning_rate": 1.2736093334328635e-05, "loss": 1.9183, "step": 5090 }, { "epoch": 0.8675682572084715, "grad_norm": 4.150853157043457, "learning_rate": 1.2732775856570795e-05, "loss": 1.9923, "step": 5100 }, { "epoch": 0.8692693714382921, "grad_norm": 3.2984402179718018, "learning_rate": 1.2729451080522086e-05, "loss": 1.9023, "step": 5110 }, { "epoch": 0.8709704856681126, "grad_norm": 3.5412111282348633, "learning_rate": 1.2726119010220156e-05, "loss": 1.9325, "step": 5120 }, { "epoch": 0.8726715998979332, "grad_norm": 3.7908759117126465, "learning_rate": 1.2722779649711515e-05, "loss": 1.965, "step": 5130 }, { "epoch": 0.8743727141277536, "grad_norm": 4.017210483551025, "learning_rate": 1.271943300305152e-05, "loss": 1.9754, "step": 5140 }, { "epoch": 0.8760738283575742, "grad_norm": 3.5406274795532227, "learning_rate": 1.2716079074304384e-05, "loss": 1.9246, "step": 5150 }, { "epoch": 0.8777749425873947, "grad_norm": 3.591987133026123, "learning_rate": 1.2712717867543158e-05, "loss": 1.9146, "step": 5160 }, { "epoch": 0.8794760568172153, "grad_norm": 3.8221333026885986, "learning_rate": 1.2709349386849731e-05, "loss": 1.988, "step": 5170 }, { "epoch": 0.8811771710470359, "grad_norm": 3.9325971603393555, "learning_rate": 1.2705973636314834e-05, "loss": 1.8172, "step": 5180 }, { "epoch": 0.8828782852768563, "grad_norm": 4.338099479675293, "learning_rate": 1.2702590620038014e-05, "loss": 1.9972, "step": 5190 }, { "epoch": 0.8845793995066769, "grad_norm": 3.458946704864502, "learning_rate": 1.269920034212765e-05, "loss": 1.9436, "step": 5200 }, { "epoch": 0.8862805137364974, "grad_norm": 3.817826986312866, "learning_rate": 1.269580280670094e-05, "loss": 1.9126, "step": 5210 }, { "epoch": 0.887981627966318, "grad_norm": 3.9971837997436523, "learning_rate": 1.269239801788389e-05, "loss": 1.904, "step": 5220 }, { "epoch": 0.8896827421961385, "grad_norm": 3.336728096008301, "learning_rate": 1.2688985979811319e-05, "loss": 1.9538, "step": 5230 }, { "epoch": 0.891383856425959, "grad_norm": 3.4137091636657715, "learning_rate": 1.268556669662685e-05, "loss": 1.9659, "step": 5240 }, { "epoch": 0.8930849706557795, "grad_norm": 3.567591905593872, "learning_rate": 1.26821401724829e-05, "loss": 2.0012, "step": 5250 }, { "epoch": 0.8947860848856001, "grad_norm": 3.2225394248962402, "learning_rate": 1.2678706411540686e-05, "loss": 1.882, "step": 5260 }, { "epoch": 0.8964871991154206, "grad_norm": 3.936091899871826, "learning_rate": 1.2675265417970207e-05, "loss": 1.8807, "step": 5270 }, { "epoch": 0.8981883133452412, "grad_norm": 3.4630918502807617, "learning_rate": 1.2672162343313195e-05, "loss": 2.0266, "step": 5280 }, { "epoch": 0.8998894275750616, "grad_norm": 3.2364659309387207, "learning_rate": 1.2668707619268824e-05, "loss": 1.9086, "step": 5290 }, { "epoch": 0.9015905418048822, "grad_norm": 3.93023419380188, "learning_rate": 1.2665245674738846e-05, "loss": 1.905, "step": 5300 }, { "epoch": 0.9032916560347027, "grad_norm": 3.954702138900757, "learning_rate": 1.2661776513927489e-05, "loss": 1.9401, "step": 5310 }, { "epoch": 0.9049927702645233, "grad_norm": 4.499449729919434, "learning_rate": 1.2658300141047747e-05, "loss": 1.98, "step": 5320 }, { "epoch": 0.9066938844943438, "grad_norm": 4.818963527679443, "learning_rate": 1.2654816560321371e-05, "loss": 2.0562, "step": 5330 }, { "epoch": 0.9083949987241643, "grad_norm": 5.28843355178833, "learning_rate": 1.265132577597886e-05, "loss": 2.0204, "step": 5340 }, { "epoch": 0.9100961129539848, "grad_norm": 3.337747097015381, "learning_rate": 1.2647827792259472e-05, "loss": 1.8886, "step": 5350 }, { "epoch": 0.9117972271838054, "grad_norm": 3.8496439456939697, "learning_rate": 1.2644322613411198e-05, "loss": 1.8986, "step": 5360 }, { "epoch": 0.913498341413626, "grad_norm": 4.1146559715271, "learning_rate": 1.2640810243690772e-05, "loss": 1.9446, "step": 5370 }, { "epoch": 0.9151994556434464, "grad_norm": 3.5451676845550537, "learning_rate": 1.2637290687363658e-05, "loss": 1.8395, "step": 5380 }, { "epoch": 0.916900569873267, "grad_norm": 3.9882071018218994, "learning_rate": 1.2633763948704051e-05, "loss": 1.9069, "step": 5390 }, { "epoch": 0.9186016841030875, "grad_norm": 4.388647556304932, "learning_rate": 1.2630230031994866e-05, "loss": 1.8664, "step": 5400 }, { "epoch": 0.9203027983329081, "grad_norm": 3.316866636276245, "learning_rate": 1.262668894152773e-05, "loss": 1.8976, "step": 5410 }, { "epoch": 0.9220039125627286, "grad_norm": 3.828775405883789, "learning_rate": 1.2623140681602997e-05, "loss": 2.0227, "step": 5420 }, { "epoch": 0.9237050267925491, "grad_norm": 3.898134231567383, "learning_rate": 1.2619585256529709e-05, "loss": 1.9358, "step": 5430 }, { "epoch": 0.9254061410223696, "grad_norm": 3.237833023071289, "learning_rate": 1.2616022670625625e-05, "loss": 1.9884, "step": 5440 }, { "epoch": 0.9271072552521902, "grad_norm": 4.014885425567627, "learning_rate": 1.2612452928217191e-05, "loss": 1.9748, "step": 5450 }, { "epoch": 0.9288083694820107, "grad_norm": 3.4019582271575928, "learning_rate": 1.2608876033639544e-05, "loss": 1.8849, "step": 5460 }, { "epoch": 0.9305094837118313, "grad_norm": 4.390929222106934, "learning_rate": 1.2605291991236512e-05, "loss": 1.8787, "step": 5470 }, { "epoch": 0.9322105979416517, "grad_norm": 3.5821480751037598, "learning_rate": 1.2601700805360602e-05, "loss": 1.9351, "step": 5480 }, { "epoch": 0.9339117121714723, "grad_norm": 4.156138896942139, "learning_rate": 1.259810248037299e-05, "loss": 2.0119, "step": 5490 }, { "epoch": 0.9356128264012928, "grad_norm": 4.29572868347168, "learning_rate": 1.259449702064353e-05, "loss": 2.0583, "step": 5500 }, { "epoch": 0.9373139406311134, "grad_norm": 3.802596092224121, "learning_rate": 1.2590884430550738e-05, "loss": 1.837, "step": 5510 }, { "epoch": 0.939015054860934, "grad_norm": 3.691622734069824, "learning_rate": 1.2587264714481787e-05, "loss": 1.9308, "step": 5520 }, { "epoch": 0.9407161690907544, "grad_norm": 3.693000078201294, "learning_rate": 1.2583637876832502e-05, "loss": 1.8829, "step": 5530 }, { "epoch": 0.942417283320575, "grad_norm": 4.015108108520508, "learning_rate": 1.2580003922007365e-05, "loss": 1.9215, "step": 5540 }, { "epoch": 0.9441183975503955, "grad_norm": 3.6444931030273438, "learning_rate": 1.2576362854419494e-05, "loss": 1.8794, "step": 5550 }, { "epoch": 0.9458195117802161, "grad_norm": 4.046820640563965, "learning_rate": 1.2572714678490646e-05, "loss": 1.9707, "step": 5560 }, { "epoch": 0.9475206260100366, "grad_norm": 4.2986297607421875, "learning_rate": 1.256905939865121e-05, "loss": 1.8715, "step": 5570 }, { "epoch": 0.9492217402398571, "grad_norm": 3.7403786182403564, "learning_rate": 1.2565397019340203e-05, "loss": 1.9072, "step": 5580 }, { "epoch": 0.9509228544696776, "grad_norm": 3.4848575592041016, "learning_rate": 1.256172754500527e-05, "loss": 1.9457, "step": 5590 }, { "epoch": 0.9526239686994982, "grad_norm": 4.1095991134643555, "learning_rate": 1.255805098010266e-05, "loss": 1.8761, "step": 5600 }, { "epoch": 0.9543250829293187, "grad_norm": 4.046541690826416, "learning_rate": 1.255436732909724e-05, "loss": 1.9785, "step": 5610 }, { "epoch": 0.9560261971591393, "grad_norm": 4.484745979309082, "learning_rate": 1.2550676596462483e-05, "loss": 1.9656, "step": 5620 }, { "epoch": 0.9577273113889597, "grad_norm": 4.468336582183838, "learning_rate": 1.2546978786680459e-05, "loss": 1.8788, "step": 5630 }, { "epoch": 0.9594284256187803, "grad_norm": 3.724666118621826, "learning_rate": 1.2543273904241836e-05, "loss": 1.882, "step": 5640 }, { "epoch": 0.9611295398486008, "grad_norm": 4.388457775115967, "learning_rate": 1.2539561953645868e-05, "loss": 1.9252, "step": 5650 }, { "epoch": 0.9628306540784214, "grad_norm": 4.207907199859619, "learning_rate": 1.2535842939400395e-05, "loss": 1.9167, "step": 5660 }, { "epoch": 0.9645317683082419, "grad_norm": 3.7429306507110596, "learning_rate": 1.2532116866021834e-05, "loss": 1.8324, "step": 5670 }, { "epoch": 0.9662328825380624, "grad_norm": 4.0732622146606445, "learning_rate": 1.2528383738035172e-05, "loss": 1.9824, "step": 5680 }, { "epoch": 0.9679339967678829, "grad_norm": 3.736802339553833, "learning_rate": 1.252464355997397e-05, "loss": 1.8791, "step": 5690 }, { "epoch": 0.9696351109977035, "grad_norm": 3.765545606613159, "learning_rate": 1.2520896336380344e-05, "loss": 1.9353, "step": 5700 }, { "epoch": 0.971336225227524, "grad_norm": 3.725818634033203, "learning_rate": 1.2517142071804969e-05, "loss": 1.9819, "step": 5710 }, { "epoch": 0.9730373394573446, "grad_norm": 3.6932244300842285, "learning_rate": 1.2513380770807073e-05, "loss": 1.9578, "step": 5720 }, { "epoch": 0.974738453687165, "grad_norm": 5.006459712982178, "learning_rate": 1.2509612437954425e-05, "loss": 1.8818, "step": 5730 }, { "epoch": 0.9764395679169856, "grad_norm": 3.6801838874816895, "learning_rate": 1.2505837077823335e-05, "loss": 1.9557, "step": 5740 }, { "epoch": 0.9781406821468062, "grad_norm": 3.696099042892456, "learning_rate": 1.2502054694998645e-05, "loss": 1.9777, "step": 5750 }, { "epoch": 0.9798417963766267, "grad_norm": 4.071863651275635, "learning_rate": 1.2498265294073733e-05, "loss": 1.9201, "step": 5760 }, { "epoch": 0.9815429106064473, "grad_norm": 3.497555732727051, "learning_rate": 1.249446887965049e-05, "loss": 1.8312, "step": 5770 }, { "epoch": 0.9832440248362677, "grad_norm": 4.3368916511535645, "learning_rate": 1.2490665456339332e-05, "loss": 1.9367, "step": 5780 }, { "epoch": 0.9849451390660883, "grad_norm": 3.4796338081359863, "learning_rate": 1.2486855028759182e-05, "loss": 1.956, "step": 5790 }, { "epoch": 0.9866462532959088, "grad_norm": 4.069372177124023, "learning_rate": 1.248303760153747e-05, "loss": 2.043, "step": 5800 }, { "epoch": 0.9883473675257294, "grad_norm": 4.666215419769287, "learning_rate": 1.2479213179310127e-05, "loss": 1.9051, "step": 5810 }, { "epoch": 0.9900484817555499, "grad_norm": 3.515803575515747, "learning_rate": 1.247538176672158e-05, "loss": 1.9167, "step": 5820 }, { "epoch": 0.9917495959853704, "grad_norm": 3.4563639163970947, "learning_rate": 1.2471543368424744e-05, "loss": 1.8534, "step": 5830 }, { "epoch": 0.9934507102151909, "grad_norm": 3.588573932647705, "learning_rate": 1.2467697989081017e-05, "loss": 1.9681, "step": 5840 }, { "epoch": 0.9951518244450115, "grad_norm": 4.674989223480225, "learning_rate": 1.2463845633360276e-05, "loss": 1.9613, "step": 5850 }, { "epoch": 0.996852938674832, "grad_norm": 3.8413889408111572, "learning_rate": 1.2459986305940868e-05, "loss": 1.9165, "step": 5860 }, { "epoch": 0.9985540529046526, "grad_norm": 3.9926917552948, "learning_rate": 1.2456120011509611e-05, "loss": 1.8678, "step": 5870 }, { "epoch": 1.000255167134473, "grad_norm": 3.9802398681640625, "learning_rate": 1.2452246754761779e-05, "loss": 1.805, "step": 5880 }, { "epoch": 1.0019562813642937, "grad_norm": 3.552992105484009, "learning_rate": 1.2448366540401103e-05, "loss": 1.9088, "step": 5890 }, { "epoch": 1.0036573955941142, "grad_norm": 3.8864123821258545, "learning_rate": 1.2444479373139763e-05, "loss": 1.8298, "step": 5900 }, { "epoch": 1.0053585098239346, "grad_norm": 3.5456321239471436, "learning_rate": 1.2440585257698385e-05, "loss": 2.0187, "step": 5910 }, { "epoch": 1.0070596240537553, "grad_norm": 3.786015748977661, "learning_rate": 1.2436684198806031e-05, "loss": 1.8857, "step": 5920 }, { "epoch": 1.0087607382835757, "grad_norm": 3.7994706630706787, "learning_rate": 1.2432776201200195e-05, "loss": 1.9403, "step": 5930 }, { "epoch": 1.0104618525133964, "grad_norm": 4.07720947265625, "learning_rate": 1.2428861269626798e-05, "loss": 1.8519, "step": 5940 }, { "epoch": 1.0121629667432168, "grad_norm": 4.268309116363525, "learning_rate": 1.242493940884018e-05, "loss": 1.9267, "step": 5950 }, { "epoch": 1.0138640809730373, "grad_norm": 3.8725132942199707, "learning_rate": 1.2421010623603099e-05, "loss": 1.9198, "step": 5960 }, { "epoch": 1.015565195202858, "grad_norm": 4.3192901611328125, "learning_rate": 1.241707491868672e-05, "loss": 1.9027, "step": 5970 }, { "epoch": 1.0172663094326784, "grad_norm": 4.3986430168151855, "learning_rate": 1.2413132298870612e-05, "loss": 1.82, "step": 5980 }, { "epoch": 1.018967423662499, "grad_norm": 2.96518874168396, "learning_rate": 1.2409182768942742e-05, "loss": 1.8239, "step": 5990 }, { "epoch": 1.0206685378923195, "grad_norm": 4.35980224609375, "learning_rate": 1.2405226333699469e-05, "loss": 1.8164, "step": 6000 }, { "epoch": 1.02236965212214, "grad_norm": 3.609809637069702, "learning_rate": 1.2401262997945535e-05, "loss": 1.9054, "step": 6010 }, { "epoch": 1.0240707663519606, "grad_norm": 4.375704288482666, "learning_rate": 1.2397292766494063e-05, "loss": 1.7685, "step": 6020 }, { "epoch": 1.025771880581781, "grad_norm": 3.8709499835968018, "learning_rate": 1.2393315644166555e-05, "loss": 1.9408, "step": 6030 }, { "epoch": 1.0274729948116017, "grad_norm": 4.003682613372803, "learning_rate": 1.2389331635792878e-05, "loss": 1.8039, "step": 6040 }, { "epoch": 1.0291741090414221, "grad_norm": 3.9275400638580322, "learning_rate": 1.2385340746211259e-05, "loss": 1.8981, "step": 6050 }, { "epoch": 1.0308752232712426, "grad_norm": 3.491661787033081, "learning_rate": 1.2381342980268283e-05, "loss": 1.8998, "step": 6060 }, { "epoch": 1.0325763375010633, "grad_norm": 4.1040568351745605, "learning_rate": 1.237733834281889e-05, "loss": 1.8652, "step": 6070 }, { "epoch": 1.0342774517308837, "grad_norm": 4.30794620513916, "learning_rate": 1.237332683872636e-05, "loss": 1.8184, "step": 6080 }, { "epoch": 1.0359785659607044, "grad_norm": 4.098321437835693, "learning_rate": 1.236930847286231e-05, "loss": 1.783, "step": 6090 }, { "epoch": 1.0376796801905248, "grad_norm": 4.051021575927734, "learning_rate": 1.2365283250106698e-05, "loss": 1.832, "step": 6100 }, { "epoch": 1.0393807944203453, "grad_norm": 3.617978572845459, "learning_rate": 1.2361251175347803e-05, "loss": 1.84, "step": 6110 }, { "epoch": 1.041081908650166, "grad_norm": 3.803922176361084, "learning_rate": 1.2357212253482226e-05, "loss": 1.8226, "step": 6120 }, { "epoch": 1.0427830228799864, "grad_norm": 3.8119354248046875, "learning_rate": 1.2353166489414886e-05, "loss": 1.8064, "step": 6130 }, { "epoch": 1.0444841371098068, "grad_norm": 4.282057285308838, "learning_rate": 1.2349113888059004e-05, "loss": 1.85, "step": 6140 }, { "epoch": 1.0461852513396275, "grad_norm": 4.06541633605957, "learning_rate": 1.2345054454336117e-05, "loss": 1.8458, "step": 6150 }, { "epoch": 1.047886365569448, "grad_norm": 4.3509392738342285, "learning_rate": 1.2340988193176048e-05, "loss": 1.8692, "step": 6160 }, { "epoch": 1.0495874797992686, "grad_norm": 3.179201364517212, "learning_rate": 1.2336915109516911e-05, "loss": 1.8236, "step": 6170 }, { "epoch": 1.051288594029089, "grad_norm": 3.8430192470550537, "learning_rate": 1.2332835208305116e-05, "loss": 1.8379, "step": 6180 }, { "epoch": 1.0529897082589095, "grad_norm": 3.632685661315918, "learning_rate": 1.2328748494495342e-05, "loss": 1.8193, "step": 6190 }, { "epoch": 1.0546908224887301, "grad_norm": 4.009765625, "learning_rate": 1.2324654973050547e-05, "loss": 1.7988, "step": 6200 }, { "epoch": 1.0563919367185506, "grad_norm": 3.634101390838623, "learning_rate": 1.2320554648941952e-05, "loss": 1.8799, "step": 6210 }, { "epoch": 1.0580930509483712, "grad_norm": 4.166500568389893, "learning_rate": 1.2316447527149044e-05, "loss": 1.9124, "step": 6220 }, { "epoch": 1.0597941651781917, "grad_norm": 3.7749826908111572, "learning_rate": 1.231233361265956e-05, "loss": 1.7627, "step": 6230 }, { "epoch": 1.0614952794080121, "grad_norm": 3.855471611022949, "learning_rate": 1.230821291046949e-05, "loss": 2.018, "step": 6240 }, { "epoch": 1.0631963936378328, "grad_norm": 3.7861082553863525, "learning_rate": 1.2304085425583068e-05, "loss": 1.8683, "step": 6250 }, { "epoch": 1.0648975078676532, "grad_norm": 4.207472801208496, "learning_rate": 1.2299951163012763e-05, "loss": 1.9023, "step": 6260 }, { "epoch": 1.066598622097474, "grad_norm": 4.255404472351074, "learning_rate": 1.2295810127779271e-05, "loss": 1.9143, "step": 6270 }, { "epoch": 1.0682997363272944, "grad_norm": 4.092896461486816, "learning_rate": 1.2291662324911517e-05, "loss": 1.796, "step": 6280 }, { "epoch": 1.0700008505571148, "grad_norm": 4.437941074371338, "learning_rate": 1.2287507759446648e-05, "loss": 1.8307, "step": 6290 }, { "epoch": 1.0717019647869355, "grad_norm": 3.610339641571045, "learning_rate": 1.2283346436430017e-05, "loss": 1.82, "step": 6300 }, { "epoch": 1.073403079016756, "grad_norm": 3.712244987487793, "learning_rate": 1.2279178360915188e-05, "loss": 1.9487, "step": 6310 }, { "epoch": 1.0751041932465766, "grad_norm": 4.214893341064453, "learning_rate": 1.2275003537963923e-05, "loss": 1.825, "step": 6320 }, { "epoch": 1.076805307476397, "grad_norm": 3.998854875564575, "learning_rate": 1.2270821972646179e-05, "loss": 1.8145, "step": 6330 }, { "epoch": 1.0785064217062175, "grad_norm": 3.4029698371887207, "learning_rate": 1.2266633670040101e-05, "loss": 1.8831, "step": 6340 }, { "epoch": 1.0802075359360381, "grad_norm": 4.011364936828613, "learning_rate": 1.2262438635232015e-05, "loss": 1.7939, "step": 6350 }, { "epoch": 1.0819086501658586, "grad_norm": 4.054690837860107, "learning_rate": 1.2258236873316424e-05, "loss": 1.784, "step": 6360 }, { "epoch": 1.0836097643956792, "grad_norm": 4.267333984375, "learning_rate": 1.2254028389396e-05, "loss": 1.837, "step": 6370 }, { "epoch": 1.0853108786254997, "grad_norm": 4.028444290161133, "learning_rate": 1.2249813188581575e-05, "loss": 1.7764, "step": 6380 }, { "epoch": 1.0870119928553201, "grad_norm": 4.8640031814575195, "learning_rate": 1.2245591275992147e-05, "loss": 1.8812, "step": 6390 }, { "epoch": 1.0887131070851408, "grad_norm": 4.347374439239502, "learning_rate": 1.2241362656754855e-05, "loss": 1.8798, "step": 6400 }, { "epoch": 1.0904142213149612, "grad_norm": 3.810614824295044, "learning_rate": 1.223712733600499e-05, "loss": 1.9044, "step": 6410 }, { "epoch": 1.092115335544782, "grad_norm": 4.5110602378845215, "learning_rate": 1.2232885318885973e-05, "loss": 1.8203, "step": 6420 }, { "epoch": 1.0938164497746024, "grad_norm": 3.834718942642212, "learning_rate": 1.2228636610549368e-05, "loss": 1.939, "step": 6430 }, { "epoch": 1.0955175640044228, "grad_norm": 3.7686891555786133, "learning_rate": 1.2224381216154855e-05, "loss": 1.8401, "step": 6440 }, { "epoch": 1.0972186782342435, "grad_norm": 4.520566940307617, "learning_rate": 1.222011914087024e-05, "loss": 1.8598, "step": 6450 }, { "epoch": 1.098919792464064, "grad_norm": 4.536760330200195, "learning_rate": 1.2215850389871438e-05, "loss": 1.834, "step": 6460 }, { "epoch": 1.1006209066938846, "grad_norm": 4.597507476806641, "learning_rate": 1.2211574968342477e-05, "loss": 1.8079, "step": 6470 }, { "epoch": 1.102322020923705, "grad_norm": 3.8545501232147217, "learning_rate": 1.2207292881475478e-05, "loss": 1.9277, "step": 6480 }, { "epoch": 1.1040231351535255, "grad_norm": 4.181375026702881, "learning_rate": 1.2203004134470664e-05, "loss": 1.821, "step": 6490 }, { "epoch": 1.1057242493833461, "grad_norm": 3.9119484424591064, "learning_rate": 1.2198708732536338e-05, "loss": 1.8269, "step": 6500 }, { "epoch": 1.1074253636131666, "grad_norm": 3.5906999111175537, "learning_rate": 1.2194406680888894e-05, "loss": 1.8119, "step": 6510 }, { "epoch": 1.1091264778429872, "grad_norm": 3.4192230701446533, "learning_rate": 1.2190097984752797e-05, "loss": 1.7266, "step": 6520 }, { "epoch": 1.1108275920728077, "grad_norm": 3.0418713092803955, "learning_rate": 1.2185782649360577e-05, "loss": 1.9245, "step": 6530 }, { "epoch": 1.1125287063026281, "grad_norm": 3.510636568069458, "learning_rate": 1.2181460679952836e-05, "loss": 1.8278, "step": 6540 }, { "epoch": 1.1142298205324488, "grad_norm": 3.8723108768463135, "learning_rate": 1.2177132081778222e-05, "loss": 1.8933, "step": 6550 }, { "epoch": 1.1159309347622692, "grad_norm": 3.8740153312683105, "learning_rate": 1.217279686009344e-05, "loss": 1.8602, "step": 6560 }, { "epoch": 1.11763204899209, "grad_norm": 3.662519931793213, "learning_rate": 1.2168455020163242e-05, "loss": 1.8353, "step": 6570 }, { "epoch": 1.1193331632219103, "grad_norm": 3.3864052295684814, "learning_rate": 1.2164106567260405e-05, "loss": 1.9044, "step": 6580 }, { "epoch": 1.1210342774517308, "grad_norm": 3.839643955230713, "learning_rate": 1.215975150666575e-05, "loss": 1.8148, "step": 6590 }, { "epoch": 1.1227353916815515, "grad_norm": 4.567275524139404, "learning_rate": 1.2155389843668114e-05, "loss": 1.8031, "step": 6600 }, { "epoch": 1.124436505911372, "grad_norm": 3.8312809467315674, "learning_rate": 1.2151021583564355e-05, "loss": 1.8066, "step": 6610 }, { "epoch": 1.1261376201411926, "grad_norm": 4.2484211921691895, "learning_rate": 1.2146646731659344e-05, "loss": 1.849, "step": 6620 }, { "epoch": 1.127838734371013, "grad_norm": 3.935384750366211, "learning_rate": 1.2142265293265952e-05, "loss": 1.8554, "step": 6630 }, { "epoch": 1.1295398486008335, "grad_norm": 2.8967413902282715, "learning_rate": 1.2137877273705054e-05, "loss": 1.8206, "step": 6640 }, { "epoch": 1.1312409628306541, "grad_norm": 3.9680368900299072, "learning_rate": 1.2133482678305516e-05, "loss": 1.8569, "step": 6650 }, { "epoch": 1.1329420770604746, "grad_norm": 3.64766788482666, "learning_rate": 1.212908151240419e-05, "loss": 1.8013, "step": 6660 }, { "epoch": 1.1346431912902952, "grad_norm": 4.119669437408447, "learning_rate": 1.2124673781345903e-05, "loss": 1.8953, "step": 6670 }, { "epoch": 1.1363443055201157, "grad_norm": 4.643278121948242, "learning_rate": 1.2120259490483464e-05, "loss": 1.7907, "step": 6680 }, { "epoch": 1.1380454197499361, "grad_norm": 4.240262031555176, "learning_rate": 1.211583864517764e-05, "loss": 1.8067, "step": 6690 }, { "epoch": 1.1397465339797568, "grad_norm": 4.9946489334106445, "learning_rate": 1.2111411250797156e-05, "loss": 1.8233, "step": 6700 }, { "epoch": 1.1414476482095772, "grad_norm": 4.387408256530762, "learning_rate": 1.2106977312718699e-05, "loss": 1.86, "step": 6710 }, { "epoch": 1.143148762439398, "grad_norm": 3.9364635944366455, "learning_rate": 1.2102536836326901e-05, "loss": 1.8694, "step": 6720 }, { "epoch": 1.1448498766692183, "grad_norm": 4.609739780426025, "learning_rate": 1.2098089827014327e-05, "loss": 1.8502, "step": 6730 }, { "epoch": 1.1465509908990388, "grad_norm": 4.438980579376221, "learning_rate": 1.2093636290181483e-05, "loss": 1.851, "step": 6740 }, { "epoch": 1.1482521051288594, "grad_norm": 4.564301490783691, "learning_rate": 1.2089176231236798e-05, "loss": 1.886, "step": 6750 }, { "epoch": 1.14995321935868, "grad_norm": 4.274376392364502, "learning_rate": 1.2084709655596623e-05, "loss": 1.8449, "step": 6760 }, { "epoch": 1.1516543335885006, "grad_norm": 5.069614887237549, "learning_rate": 1.2080236568685226e-05, "loss": 1.8416, "step": 6770 }, { "epoch": 1.153355447818321, "grad_norm": 3.986492156982422, "learning_rate": 1.2075756975934775e-05, "loss": 1.8218, "step": 6780 }, { "epoch": 1.1550565620481414, "grad_norm": 4.155921936035156, "learning_rate": 1.2071270882785346e-05, "loss": 1.8001, "step": 6790 }, { "epoch": 1.156757676277962, "grad_norm": 4.066920757293701, "learning_rate": 1.2066778294684905e-05, "loss": 1.8378, "step": 6800 }, { "epoch": 1.1584587905077826, "grad_norm": 4.247832775115967, "learning_rate": 1.2062279217089307e-05, "loss": 1.8766, "step": 6810 }, { "epoch": 1.1601599047376032, "grad_norm": 3.875669002532959, "learning_rate": 1.2057773655462288e-05, "loss": 1.8497, "step": 6820 }, { "epoch": 1.1618610189674237, "grad_norm": 4.125594139099121, "learning_rate": 1.2053261615275455e-05, "loss": 1.9058, "step": 6830 }, { "epoch": 1.163562133197244, "grad_norm": 4.312350273132324, "learning_rate": 1.2048743102008293e-05, "loss": 1.9242, "step": 6840 }, { "epoch": 1.1652632474270648, "grad_norm": 3.9462075233459473, "learning_rate": 1.2044218121148132e-05, "loss": 1.9029, "step": 6850 }, { "epoch": 1.1669643616568852, "grad_norm": 4.291337013244629, "learning_rate": 1.2039686678190173e-05, "loss": 1.8806, "step": 6860 }, { "epoch": 1.1686654758867059, "grad_norm": 3.9357290267944336, "learning_rate": 1.2035148778637448e-05, "loss": 1.8882, "step": 6870 }, { "epoch": 1.1703665901165263, "grad_norm": 3.846447467803955, "learning_rate": 1.2030604428000846e-05, "loss": 1.906, "step": 6880 }, { "epoch": 1.1720677043463468, "grad_norm": 4.311671257019043, "learning_rate": 1.202605363179908e-05, "loss": 1.8491, "step": 6890 }, { "epoch": 1.1737688185761674, "grad_norm": 3.54050350189209, "learning_rate": 1.2021496395558695e-05, "loss": 1.9521, "step": 6900 }, { "epoch": 1.1754699328059879, "grad_norm": 4.3173651695251465, "learning_rate": 1.2016932724814051e-05, "loss": 1.764, "step": 6910 }, { "epoch": 1.1771710470358085, "grad_norm": 3.853074550628662, "learning_rate": 1.2012362625107332e-05, "loss": 1.7911, "step": 6920 }, { "epoch": 1.178872161265629, "grad_norm": 3.8800690174102783, "learning_rate": 1.2007786101988517e-05, "loss": 1.8936, "step": 6930 }, { "epoch": 1.1805732754954494, "grad_norm": 4.200761795043945, "learning_rate": 1.20032031610154e-05, "loss": 1.8044, "step": 6940 }, { "epoch": 1.18227438972527, "grad_norm": 4.506488800048828, "learning_rate": 1.1998613807753559e-05, "loss": 1.777, "step": 6950 }, { "epoch": 1.1839755039550905, "grad_norm": 3.8466339111328125, "learning_rate": 1.199401804777636e-05, "loss": 1.8042, "step": 6960 }, { "epoch": 1.1856766181849112, "grad_norm": 3.482567310333252, "learning_rate": 1.198941588666495e-05, "loss": 1.872, "step": 6970 }, { "epoch": 1.1873777324147317, "grad_norm": 3.968972682952881, "learning_rate": 1.1984807330008253e-05, "loss": 1.8401, "step": 6980 }, { "epoch": 1.189078846644552, "grad_norm": 4.146262168884277, "learning_rate": 1.1980192383402957e-05, "loss": 1.8257, "step": 6990 }, { "epoch": 1.1907799608743728, "grad_norm": 4.3229079246521, "learning_rate": 1.197557105245351e-05, "loss": 1.9198, "step": 7000 }, { "epoch": 1.1924810751041932, "grad_norm": 4.2531914710998535, "learning_rate": 1.1970943342772116e-05, "loss": 1.8715, "step": 7010 }, { "epoch": 1.1941821893340139, "grad_norm": 3.7825818061828613, "learning_rate": 1.1966309259978719e-05, "loss": 1.8092, "step": 7020 }, { "epoch": 1.1958833035638343, "grad_norm": 4.886743545532227, "learning_rate": 1.196166880970101e-05, "loss": 1.8566, "step": 7030 }, { "epoch": 1.1975844177936548, "grad_norm": 4.063226222991943, "learning_rate": 1.1957021997574409e-05, "loss": 1.909, "step": 7040 }, { "epoch": 1.1992855320234754, "grad_norm": 4.031005382537842, "learning_rate": 1.195236882924206e-05, "loss": 1.8579, "step": 7050 }, { "epoch": 1.2009866462532959, "grad_norm": 4.141384601593018, "learning_rate": 1.1947709310354832e-05, "loss": 1.9419, "step": 7060 }, { "epoch": 1.2026877604831165, "grad_norm": 4.252750396728516, "learning_rate": 1.19430434465713e-05, "loss": 1.9514, "step": 7070 }, { "epoch": 1.204388874712937, "grad_norm": 4.281816482543945, "learning_rate": 1.1938371243557747e-05, "loss": 1.9094, "step": 7080 }, { "epoch": 1.2060899889427574, "grad_norm": 4.234149932861328, "learning_rate": 1.1933692706988156e-05, "loss": 1.8436, "step": 7090 }, { "epoch": 1.207791103172578, "grad_norm": 4.110588073730469, "learning_rate": 1.1929007842544197e-05, "loss": 1.8141, "step": 7100 }, { "epoch": 1.2094922174023985, "grad_norm": 4.462185382843018, "learning_rate": 1.1924316655915232e-05, "loss": 1.8112, "step": 7110 }, { "epoch": 1.2111933316322192, "grad_norm": 3.660940408706665, "learning_rate": 1.1919619152798293e-05, "loss": 1.8147, "step": 7120 }, { "epoch": 1.2128944458620396, "grad_norm": 3.804466724395752, "learning_rate": 1.191491533889809e-05, "loss": 1.8511, "step": 7130 }, { "epoch": 1.21459556009186, "grad_norm": 4.5483293533325195, "learning_rate": 1.1910205219926986e-05, "loss": 1.8949, "step": 7140 }, { "epoch": 1.2162966743216808, "grad_norm": 4.025954246520996, "learning_rate": 1.1905488801605014e-05, "loss": 1.9193, "step": 7150 }, { "epoch": 1.2179977885515012, "grad_norm": 4.330733299255371, "learning_rate": 1.1900766089659847e-05, "loss": 1.8404, "step": 7160 }, { "epoch": 1.2196989027813219, "grad_norm": 3.604673385620117, "learning_rate": 1.189603708982681e-05, "loss": 1.9742, "step": 7170 }, { "epoch": 1.2214000170111423, "grad_norm": 4.058786869049072, "learning_rate": 1.1891301807848854e-05, "loss": 1.8803, "step": 7180 }, { "epoch": 1.2231011312409628, "grad_norm": 4.989222526550293, "learning_rate": 1.1886560249476568e-05, "loss": 1.7776, "step": 7190 }, { "epoch": 1.2248022454707834, "grad_norm": 4.440055847167969, "learning_rate": 1.1881812420468158e-05, "loss": 1.8804, "step": 7200 }, { "epoch": 1.2265033597006039, "grad_norm": 4.92519998550415, "learning_rate": 1.1877058326589447e-05, "loss": 1.7766, "step": 7210 }, { "epoch": 1.2282044739304245, "grad_norm": 3.1428630352020264, "learning_rate": 1.1872297973613864e-05, "loss": 1.9083, "step": 7220 }, { "epoch": 1.229905588160245, "grad_norm": 4.2585225105285645, "learning_rate": 1.1867531367322442e-05, "loss": 1.8742, "step": 7230 }, { "epoch": 1.2316067023900654, "grad_norm": 3.5493762493133545, "learning_rate": 1.1862758513503807e-05, "loss": 1.8458, "step": 7240 }, { "epoch": 1.233307816619886, "grad_norm": 4.4121880531311035, "learning_rate": 1.185797941795417e-05, "loss": 1.8574, "step": 7250 }, { "epoch": 1.2350089308497065, "grad_norm": 3.277613401412964, "learning_rate": 1.185319408647733e-05, "loss": 1.8732, "step": 7260 }, { "epoch": 1.2367100450795272, "grad_norm": 3.6618874073028564, "learning_rate": 1.1848402524884646e-05, "loss": 1.8563, "step": 7270 }, { "epoch": 1.2384111593093476, "grad_norm": 4.217512607574463, "learning_rate": 1.1843604738995052e-05, "loss": 1.8422, "step": 7280 }, { "epoch": 1.240112273539168, "grad_norm": 4.140598773956299, "learning_rate": 1.183880073463504e-05, "loss": 1.8516, "step": 7290 }, { "epoch": 1.2418133877689888, "grad_norm": 3.781187057495117, "learning_rate": 1.1833990517638654e-05, "loss": 1.8753, "step": 7300 }, { "epoch": 1.2435145019988092, "grad_norm": 3.7208242416381836, "learning_rate": 1.1829174093847479e-05, "loss": 1.793, "step": 7310 }, { "epoch": 1.2452156162286299, "grad_norm": 3.963973045349121, "learning_rate": 1.1824351469110637e-05, "loss": 1.8121, "step": 7320 }, { "epoch": 1.2469167304584503, "grad_norm": 4.138283729553223, "learning_rate": 1.181952264928479e-05, "loss": 1.8787, "step": 7330 }, { "epoch": 1.2486178446882708, "grad_norm": 4.180245876312256, "learning_rate": 1.1814687640234112e-05, "loss": 1.8196, "step": 7340 }, { "epoch": 1.2503189589180914, "grad_norm": 3.703684091567993, "learning_rate": 1.1809846447830301e-05, "loss": 1.865, "step": 7350 }, { "epoch": 1.2520200731479119, "grad_norm": 4.624788761138916, "learning_rate": 1.1804999077952558e-05, "loss": 1.827, "step": 7360 }, { "epoch": 1.2537211873777325, "grad_norm": 4.256951332092285, "learning_rate": 1.1800145536487591e-05, "loss": 1.8149, "step": 7370 }, { "epoch": 1.255422301607553, "grad_norm": 4.586760520935059, "learning_rate": 1.1795285829329602e-05, "loss": 1.7933, "step": 7380 }, { "epoch": 1.2571234158373734, "grad_norm": 3.419952869415283, "learning_rate": 1.1790419962380278e-05, "loss": 1.8373, "step": 7390 }, { "epoch": 1.258824530067194, "grad_norm": 3.8896842002868652, "learning_rate": 1.178554794154879e-05, "loss": 1.8466, "step": 7400 }, { "epoch": 1.2605256442970145, "grad_norm": 3.539841651916504, "learning_rate": 1.1780669772751775e-05, "loss": 1.9326, "step": 7410 }, { "epoch": 1.2622267585268352, "grad_norm": 3.7069525718688965, "learning_rate": 1.1775785461913352e-05, "loss": 1.8622, "step": 7420 }, { "epoch": 1.2639278727566556, "grad_norm": 3.6968042850494385, "learning_rate": 1.177089501496508e-05, "loss": 1.8145, "step": 7430 }, { "epoch": 1.265628986986476, "grad_norm": 3.9641242027282715, "learning_rate": 1.1765998437845982e-05, "loss": 1.8396, "step": 7440 }, { "epoch": 1.2673301012162967, "grad_norm": 3.476146697998047, "learning_rate": 1.1761095736502525e-05, "loss": 1.8247, "step": 7450 }, { "epoch": 1.2690312154461172, "grad_norm": 4.397093772888184, "learning_rate": 1.1756186916888604e-05, "loss": 1.7688, "step": 7460 }, { "epoch": 1.2707323296759379, "grad_norm": 3.8471662998199463, "learning_rate": 1.1751271984965555e-05, "loss": 1.9181, "step": 7470 }, { "epoch": 1.2724334439057583, "grad_norm": 4.207676887512207, "learning_rate": 1.1746350946702128e-05, "loss": 1.7721, "step": 7480 }, { "epoch": 1.2741345581355787, "grad_norm": 4.509179592132568, "learning_rate": 1.1741423808074499e-05, "loss": 1.8171, "step": 7490 }, { "epoch": 1.2758356723653994, "grad_norm": 3.6446003913879395, "learning_rate": 1.1736490575066243e-05, "loss": 1.8396, "step": 7500 }, { "epoch": 1.2775367865952199, "grad_norm": 4.312423229217529, "learning_rate": 1.173155125366834e-05, "loss": 1.7823, "step": 7510 }, { "epoch": 1.2792379008250405, "grad_norm": 3.0327653884887695, "learning_rate": 1.1726605849879162e-05, "loss": 1.8165, "step": 7520 }, { "epoch": 1.280939015054861, "grad_norm": 3.464770793914795, "learning_rate": 1.1721654369704471e-05, "loss": 1.8435, "step": 7530 }, { "epoch": 1.2826401292846814, "grad_norm": 3.883833885192871, "learning_rate": 1.1716696819157403e-05, "loss": 1.9098, "step": 7540 }, { "epoch": 1.284341243514502, "grad_norm": 3.4528632164001465, "learning_rate": 1.1711733204258472e-05, "loss": 1.8664, "step": 7550 }, { "epoch": 1.2860423577443225, "grad_norm": 4.106072902679443, "learning_rate": 1.1706763531035554e-05, "loss": 1.8568, "step": 7560 }, { "epoch": 1.2877434719741432, "grad_norm": 4.352649211883545, "learning_rate": 1.1701787805523879e-05, "loss": 1.8838, "step": 7570 }, { "epoch": 1.2894445862039636, "grad_norm": 4.048183441162109, "learning_rate": 1.1696806033766031e-05, "loss": 1.7881, "step": 7580 }, { "epoch": 1.291145700433784, "grad_norm": 3.615002155303955, "learning_rate": 1.1691818221811937e-05, "loss": 1.7633, "step": 7590 }, { "epoch": 1.2928468146636047, "grad_norm": 3.5091981887817383, "learning_rate": 1.1686824375718855e-05, "loss": 1.8125, "step": 7600 }, { "epoch": 1.2945479288934252, "grad_norm": 3.6418280601501465, "learning_rate": 1.1681824501551377e-05, "loss": 1.752, "step": 7610 }, { "epoch": 1.2962490431232458, "grad_norm": 3.4704437255859375, "learning_rate": 1.1676818605381409e-05, "loss": 1.8887, "step": 7620 }, { "epoch": 1.2979501573530663, "grad_norm": 3.6811389923095703, "learning_rate": 1.1671806693288177e-05, "loss": 1.8914, "step": 7630 }, { "epoch": 1.2996512715828867, "grad_norm": 3.922544240951538, "learning_rate": 1.1666788771358206e-05, "loss": 1.8412, "step": 7640 }, { "epoch": 1.3013523858127074, "grad_norm": 3.4494495391845703, "learning_rate": 1.1661764845685325e-05, "loss": 1.842, "step": 7650 }, { "epoch": 1.3030535000425278, "grad_norm": 4.570243835449219, "learning_rate": 1.165673492237065e-05, "loss": 1.7121, "step": 7660 }, { "epoch": 1.3047546142723485, "grad_norm": 3.663918972015381, "learning_rate": 1.1651699007522584e-05, "loss": 1.7647, "step": 7670 }, { "epoch": 1.306455728502169, "grad_norm": 4.224978446960449, "learning_rate": 1.1646657107256803e-05, "loss": 1.86, "step": 7680 }, { "epoch": 1.3081568427319894, "grad_norm": 3.920491933822632, "learning_rate": 1.1641609227696256e-05, "loss": 1.8903, "step": 7690 }, { "epoch": 1.30985795696181, "grad_norm": 3.300448179244995, "learning_rate": 1.1636555374971147e-05, "loss": 1.9144, "step": 7700 }, { "epoch": 1.3115590711916305, "grad_norm": 4.223546981811523, "learning_rate": 1.163149555521894e-05, "loss": 1.8116, "step": 7710 }, { "epoch": 1.3132601854214512, "grad_norm": 4.763315677642822, "learning_rate": 1.1626429774584343e-05, "loss": 1.8878, "step": 7720 }, { "epoch": 1.3149612996512716, "grad_norm": 3.610171318054199, "learning_rate": 1.1621358039219301e-05, "loss": 1.8383, "step": 7730 }, { "epoch": 1.316662413881092, "grad_norm": 3.96856427192688, "learning_rate": 1.1616280355282994e-05, "loss": 1.8283, "step": 7740 }, { "epoch": 1.3183635281109127, "grad_norm": 4.092318058013916, "learning_rate": 1.1611196728941822e-05, "loss": 1.8411, "step": 7750 }, { "epoch": 1.3200646423407332, "grad_norm": 3.7782583236694336, "learning_rate": 1.1606107166369409e-05, "loss": 1.8832, "step": 7760 }, { "epoch": 1.3217657565705538, "grad_norm": 3.8647406101226807, "learning_rate": 1.1601011673746579e-05, "loss": 1.8914, "step": 7770 }, { "epoch": 1.3234668708003743, "grad_norm": 4.192235469818115, "learning_rate": 1.1595910257261363e-05, "loss": 1.8367, "step": 7780 }, { "epoch": 1.3251679850301947, "grad_norm": 4.087222099304199, "learning_rate": 1.1590802923108984e-05, "loss": 1.7934, "step": 7790 }, { "epoch": 1.3268690992600152, "grad_norm": 4.581682205200195, "learning_rate": 1.1585689677491854e-05, "loss": 1.8093, "step": 7800 }, { "epoch": 1.3285702134898358, "grad_norm": 4.077419281005859, "learning_rate": 1.1580570526619559e-05, "loss": 1.8714, "step": 7810 }, { "epoch": 1.3302713277196565, "grad_norm": 3.7243072986602783, "learning_rate": 1.157544547670886e-05, "loss": 1.8082, "step": 7820 }, { "epoch": 1.331972441949477, "grad_norm": 4.393478870391846, "learning_rate": 1.1570314533983683e-05, "loss": 1.911, "step": 7830 }, { "epoch": 1.3336735561792974, "grad_norm": 3.8908112049102783, "learning_rate": 1.1565177704675106e-05, "loss": 1.8287, "step": 7840 }, { "epoch": 1.3353746704091178, "grad_norm": 3.665543794631958, "learning_rate": 1.1560034995021359e-05, "loss": 1.8854, "step": 7850 }, { "epoch": 1.3370757846389385, "grad_norm": 3.881185531616211, "learning_rate": 1.1554886411267817e-05, "loss": 1.8453, "step": 7860 }, { "epoch": 1.3387768988687592, "grad_norm": 4.123805046081543, "learning_rate": 1.1549731959666976e-05, "loss": 1.8287, "step": 7870 }, { "epoch": 1.3404780130985796, "grad_norm": 3.782844066619873, "learning_rate": 1.1544571646478473e-05, "loss": 1.824, "step": 7880 }, { "epoch": 1.3421791273284, "grad_norm": 3.8619236946105957, "learning_rate": 1.1539405477969054e-05, "loss": 1.8937, "step": 7890 }, { "epoch": 1.3438802415582205, "grad_norm": 4.254144668579102, "learning_rate": 1.1534233460412579e-05, "loss": 1.8273, "step": 7900 }, { "epoch": 1.3455813557880412, "grad_norm": 4.115326881408691, "learning_rate": 1.1529055600090007e-05, "loss": 1.8292, "step": 7910 }, { "epoch": 1.3472824700178618, "grad_norm": 4.696479797363281, "learning_rate": 1.15238719032894e-05, "loss": 1.8741, "step": 7920 }, { "epoch": 1.3489835842476823, "grad_norm": 3.319958448410034, "learning_rate": 1.15186823763059e-05, "loss": 2.0001, "step": 7930 }, { "epoch": 1.3506846984775027, "grad_norm": 4.459231376647949, "learning_rate": 1.1513487025441738e-05, "loss": 1.7909, "step": 7940 }, { "epoch": 1.3523858127073232, "grad_norm": 3.6977946758270264, "learning_rate": 1.150828585700621e-05, "loss": 1.9375, "step": 7950 }, { "epoch": 1.3540869269371438, "grad_norm": 3.963927745819092, "learning_rate": 1.1503078877315678e-05, "loss": 1.8812, "step": 7960 }, { "epoch": 1.3557880411669645, "grad_norm": 3.8123812675476074, "learning_rate": 1.1497866092693564e-05, "loss": 1.8044, "step": 7970 }, { "epoch": 1.357489155396785, "grad_norm": 4.007399559020996, "learning_rate": 1.149264750947034e-05, "loss": 1.9753, "step": 7980 }, { "epoch": 1.3591902696266054, "grad_norm": 4.110389709472656, "learning_rate": 1.1487423133983515e-05, "loss": 1.8695, "step": 7990 }, { "epoch": 1.3608913838564258, "grad_norm": 4.561108589172363, "learning_rate": 1.148219297257764e-05, "loss": 1.8648, "step": 8000 }, { "epoch": 1.3625924980862465, "grad_norm": 4.409726619720459, "learning_rate": 1.1476957031604283e-05, "loss": 1.8565, "step": 8010 }, { "epoch": 1.3642936123160672, "grad_norm": 3.8639228343963623, "learning_rate": 1.147171531742204e-05, "loss": 1.889, "step": 8020 }, { "epoch": 1.3659947265458876, "grad_norm": 4.365349769592285, "learning_rate": 1.146646783639651e-05, "loss": 1.8925, "step": 8030 }, { "epoch": 1.367695840775708, "grad_norm": 4.182748794555664, "learning_rate": 1.1461214594900302e-05, "loss": 1.7287, "step": 8040 }, { "epoch": 1.3693969550055285, "grad_norm": 3.912654161453247, "learning_rate": 1.1455955599313017e-05, "loss": 1.7366, "step": 8050 }, { "epoch": 1.3710980692353492, "grad_norm": 4.697111129760742, "learning_rate": 1.1450690856021244e-05, "loss": 1.8811, "step": 8060 }, { "epoch": 1.3727991834651696, "grad_norm": 4.002067565917969, "learning_rate": 1.1445420371418552e-05, "loss": 1.8407, "step": 8070 }, { "epoch": 1.3745002976949903, "grad_norm": 4.642768859863281, "learning_rate": 1.1440144151905489e-05, "loss": 1.8951, "step": 8080 }, { "epoch": 1.3762014119248107, "grad_norm": 4.091320514678955, "learning_rate": 1.1434862203889552e-05, "loss": 1.7457, "step": 8090 }, { "epoch": 1.3779025261546312, "grad_norm": 4.746745586395264, "learning_rate": 1.142957453378521e-05, "loss": 1.7577, "step": 8100 }, { "epoch": 1.3796036403844518, "grad_norm": 3.4975032806396484, "learning_rate": 1.1424281148013873e-05, "loss": 1.8887, "step": 8110 }, { "epoch": 1.3813047546142723, "grad_norm": 4.448775291442871, "learning_rate": 1.1418982053003897e-05, "loss": 1.8713, "step": 8120 }, { "epoch": 1.383005868844093, "grad_norm": 3.9427380561828613, "learning_rate": 1.1413677255190569e-05, "loss": 1.8355, "step": 8130 }, { "epoch": 1.3847069830739134, "grad_norm": 4.403369426727295, "learning_rate": 1.1408366761016097e-05, "loss": 1.9192, "step": 8140 }, { "epoch": 1.3864080973037338, "grad_norm": 4.291594982147217, "learning_rate": 1.1403050576929615e-05, "loss": 1.833, "step": 8150 }, { "epoch": 1.3881092115335545, "grad_norm": 4.179330825805664, "learning_rate": 1.139772870938716e-05, "loss": 1.787, "step": 8160 }, { "epoch": 1.389810325763375, "grad_norm": 4.093100070953369, "learning_rate": 1.1392401164851679e-05, "loss": 1.8275, "step": 8170 }, { "epoch": 1.3915114399931956, "grad_norm": 4.226815700531006, "learning_rate": 1.1387067949793004e-05, "loss": 1.8355, "step": 8180 }, { "epoch": 1.393212554223016, "grad_norm": 4.04533052444458, "learning_rate": 1.138172907068786e-05, "loss": 1.7927, "step": 8190 }, { "epoch": 1.3949136684528365, "grad_norm": 3.832703113555908, "learning_rate": 1.1376384534019843e-05, "loss": 1.8264, "step": 8200 }, { "epoch": 1.3966147826826572, "grad_norm": 4.171768665313721, "learning_rate": 1.1371034346279432e-05, "loss": 1.7574, "step": 8210 }, { "epoch": 1.3983158969124776, "grad_norm": 3.803027868270874, "learning_rate": 1.1365678513963956e-05, "loss": 1.8445, "step": 8220 }, { "epoch": 1.4000170111422983, "grad_norm": 3.384702444076538, "learning_rate": 1.1360317043577609e-05, "loss": 1.8349, "step": 8230 }, { "epoch": 1.4017181253721187, "grad_norm": 4.899093151092529, "learning_rate": 1.1354949941631423e-05, "loss": 1.8671, "step": 8240 }, { "epoch": 1.4034192396019392, "grad_norm": 4.261693477630615, "learning_rate": 1.1349577214643276e-05, "loss": 1.8051, "step": 8250 }, { "epoch": 1.4051203538317598, "grad_norm": 4.3631062507629395, "learning_rate": 1.1344198869137871e-05, "loss": 1.8807, "step": 8260 }, { "epoch": 1.4068214680615803, "grad_norm": 4.09499454498291, "learning_rate": 1.1338814911646742e-05, "loss": 1.7799, "step": 8270 }, { "epoch": 1.408522582291401, "grad_norm": 3.800813913345337, "learning_rate": 1.133342534870823e-05, "loss": 1.9407, "step": 8280 }, { "epoch": 1.4102236965212214, "grad_norm": 3.953648328781128, "learning_rate": 1.132803018686749e-05, "loss": 1.7741, "step": 8290 }, { "epoch": 1.4119248107510418, "grad_norm": 3.5527772903442383, "learning_rate": 1.1322629432676473e-05, "loss": 1.9239, "step": 8300 }, { "epoch": 1.4136259249808625, "grad_norm": 3.6812546253204346, "learning_rate": 1.1317223092693921e-05, "loss": 1.865, "step": 8310 }, { "epoch": 1.415327039210683, "grad_norm": 3.842613935470581, "learning_rate": 1.131181117348536e-05, "loss": 1.8384, "step": 8320 }, { "epoch": 1.4170281534405036, "grad_norm": 4.840838432312012, "learning_rate": 1.1306393681623095e-05, "loss": 1.7868, "step": 8330 }, { "epoch": 1.418729267670324, "grad_norm": 3.6300811767578125, "learning_rate": 1.1300970623686192e-05, "loss": 1.7641, "step": 8340 }, { "epoch": 1.4204303819001445, "grad_norm": 4.901582717895508, "learning_rate": 1.1295542006260482e-05, "loss": 1.8725, "step": 8350 }, { "epoch": 1.4221314961299651, "grad_norm": 3.8441402912139893, "learning_rate": 1.1290107835938543e-05, "loss": 1.8115, "step": 8360 }, { "epoch": 1.4238326103597856, "grad_norm": 4.257385730743408, "learning_rate": 1.1284668119319701e-05, "loss": 1.6997, "step": 8370 }, { "epoch": 1.4255337245896063, "grad_norm": 3.5949318408966064, "learning_rate": 1.1279222863010015e-05, "loss": 1.9023, "step": 8380 }, { "epoch": 1.4272348388194267, "grad_norm": 3.525285005569458, "learning_rate": 1.1273772073622273e-05, "loss": 1.7829, "step": 8390 }, { "epoch": 1.4289359530492471, "grad_norm": 4.029623508453369, "learning_rate": 1.126831575777598e-05, "loss": 1.7838, "step": 8400 }, { "epoch": 1.4306370672790678, "grad_norm": 4.243871212005615, "learning_rate": 1.1262853922097353e-05, "loss": 1.8764, "step": 8410 }, { "epoch": 1.4323381815088883, "grad_norm": 3.630593776702881, "learning_rate": 1.1257386573219315e-05, "loss": 1.8042, "step": 8420 }, { "epoch": 1.434039295738709, "grad_norm": 3.6100449562072754, "learning_rate": 1.1251913717781481e-05, "loss": 1.792, "step": 8430 }, { "epoch": 1.4357404099685294, "grad_norm": 3.461958885192871, "learning_rate": 1.1246435362430155e-05, "loss": 1.8373, "step": 8440 }, { "epoch": 1.4374415241983498, "grad_norm": 3.8997554779052734, "learning_rate": 1.1240951513818321e-05, "loss": 1.8916, "step": 8450 }, { "epoch": 1.4391426384281705, "grad_norm": 5.204623699188232, "learning_rate": 1.123546217860563e-05, "loss": 1.9023, "step": 8460 }, { "epoch": 1.440843752657991, "grad_norm": 3.4482414722442627, "learning_rate": 1.1229967363458403e-05, "loss": 1.7964, "step": 8470 }, { "epoch": 1.4425448668878116, "grad_norm": 3.914092540740967, "learning_rate": 1.122446707504961e-05, "loss": 1.7912, "step": 8480 }, { "epoch": 1.444245981117632, "grad_norm": 3.4906105995178223, "learning_rate": 1.1218961320058867e-05, "loss": 1.7525, "step": 8490 }, { "epoch": 1.4459470953474525, "grad_norm": 3.8349711894989014, "learning_rate": 1.1213450105172432e-05, "loss": 1.8479, "step": 8500 }, { "epoch": 1.4476482095772731, "grad_norm": 4.6149115562438965, "learning_rate": 1.1207933437083197e-05, "loss": 1.793, "step": 8510 }, { "epoch": 1.4493493238070936, "grad_norm": 4.550126552581787, "learning_rate": 1.1202411322490667e-05, "loss": 1.7767, "step": 8520 }, { "epoch": 1.4510504380369142, "grad_norm": 4.194046974182129, "learning_rate": 1.1196883768100972e-05, "loss": 1.882, "step": 8530 }, { "epoch": 1.4527515522667347, "grad_norm": 4.48024320602417, "learning_rate": 1.1191350780626837e-05, "loss": 1.8334, "step": 8540 }, { "epoch": 1.4544526664965551, "grad_norm": 4.147039890289307, "learning_rate": 1.1185812366787594e-05, "loss": 1.8671, "step": 8550 }, { "epoch": 1.4561537807263758, "grad_norm": 4.3436455726623535, "learning_rate": 1.1180268533309163e-05, "loss": 1.8078, "step": 8560 }, { "epoch": 1.4578548949561962, "grad_norm": 3.813438892364502, "learning_rate": 1.1174719286924037e-05, "loss": 1.9086, "step": 8570 }, { "epoch": 1.459556009186017, "grad_norm": 4.152981758117676, "learning_rate": 1.11691646343713e-05, "loss": 1.8482, "step": 8580 }, { "epoch": 1.4612571234158374, "grad_norm": 4.7066731452941895, "learning_rate": 1.1163604582396585e-05, "loss": 1.7849, "step": 8590 }, { "epoch": 1.4629582376456578, "grad_norm": 4.566705703735352, "learning_rate": 1.1158039137752086e-05, "loss": 1.8481, "step": 8600 }, { "epoch": 1.4646593518754785, "grad_norm": 4.4183244705200195, "learning_rate": 1.1152468307196556e-05, "loss": 1.8281, "step": 8610 }, { "epoch": 1.466360466105299, "grad_norm": 3.972365617752075, "learning_rate": 1.1146892097495276e-05, "loss": 1.7882, "step": 8620 }, { "epoch": 1.4680615803351196, "grad_norm": 5.084153175354004, "learning_rate": 1.114131051542006e-05, "loss": 1.7916, "step": 8630 }, { "epoch": 1.46976269456494, "grad_norm": 4.221405029296875, "learning_rate": 1.1135723567749258e-05, "loss": 1.8082, "step": 8640 }, { "epoch": 1.4714638087947605, "grad_norm": 5.822017669677734, "learning_rate": 1.1130131261267726e-05, "loss": 1.8063, "step": 8650 }, { "epoch": 1.4731649230245811, "grad_norm": 4.446184158325195, "learning_rate": 1.112453360276683e-05, "loss": 1.8443, "step": 8660 }, { "epoch": 1.4748660372544016, "grad_norm": 4.830113887786865, "learning_rate": 1.1118930599044436e-05, "loss": 1.8257, "step": 8670 }, { "epoch": 1.4765671514842222, "grad_norm": 3.6857476234436035, "learning_rate": 1.11133222569049e-05, "loss": 1.7696, "step": 8680 }, { "epoch": 1.4782682657140427, "grad_norm": 4.067925930023193, "learning_rate": 1.1107708583159068e-05, "loss": 1.9156, "step": 8690 }, { "epoch": 1.4799693799438631, "grad_norm": 4.013896465301514, "learning_rate": 1.1102089584624247e-05, "loss": 1.9133, "step": 8700 }, { "epoch": 1.4816704941736838, "grad_norm": 4.576723098754883, "learning_rate": 1.1096465268124223e-05, "loss": 1.8425, "step": 8710 }, { "epoch": 1.4833716084035042, "grad_norm": 3.8686013221740723, "learning_rate": 1.1090835640489238e-05, "loss": 1.8831, "step": 8720 }, { "epoch": 1.485072722633325, "grad_norm": 5.166773319244385, "learning_rate": 1.1085200708555978e-05, "loss": 1.8464, "step": 8730 }, { "epoch": 1.4867738368631453, "grad_norm": 3.826002597808838, "learning_rate": 1.1079560479167575e-05, "loss": 1.7631, "step": 8740 }, { "epoch": 1.4884749510929658, "grad_norm": 3.7657535076141357, "learning_rate": 1.1073914959173593e-05, "loss": 1.7616, "step": 8750 }, { "epoch": 1.4901760653227865, "grad_norm": 3.9099855422973633, "learning_rate": 1.1068264155430026e-05, "loss": 1.8288, "step": 8760 }, { "epoch": 1.491877179552607, "grad_norm": 3.8278870582580566, "learning_rate": 1.1062608074799276e-05, "loss": 1.8276, "step": 8770 }, { "epoch": 1.4935782937824276, "grad_norm": 4.208233833312988, "learning_rate": 1.1056946724150159e-05, "loss": 1.9022, "step": 8780 }, { "epoch": 1.495279408012248, "grad_norm": 4.208041667938232, "learning_rate": 1.1051280110357887e-05, "loss": 1.8226, "step": 8790 }, { "epoch": 1.4969805222420685, "grad_norm": 3.7516579627990723, "learning_rate": 1.1045608240304072e-05, "loss": 1.7655, "step": 8800 }, { "epoch": 1.4986816364718891, "grad_norm": 4.082713603973389, "learning_rate": 1.1039931120876698e-05, "loss": 1.8776, "step": 8810 }, { "epoch": 1.5003827507017096, "grad_norm": 4.539659023284912, "learning_rate": 1.1034248758970132e-05, "loss": 1.7614, "step": 8820 }, { "epoch": 1.5020838649315302, "grad_norm": 4.518299579620361, "learning_rate": 1.1028561161485101e-05, "loss": 1.9139, "step": 8830 }, { "epoch": 1.5037849791613507, "grad_norm": 6.363071918487549, "learning_rate": 1.10228683353287e-05, "loss": 1.8792, "step": 8840 }, { "epoch": 1.5054860933911711, "grad_norm": 4.739774703979492, "learning_rate": 1.1017170287414366e-05, "loss": 1.8488, "step": 8850 }, { "epoch": 1.5071872076209918, "grad_norm": 4.243860244750977, "learning_rate": 1.1011467024661875e-05, "loss": 1.8552, "step": 8860 }, { "epoch": 1.5088883218508122, "grad_norm": 4.369020938873291, "learning_rate": 1.1005758553997344e-05, "loss": 1.8134, "step": 8870 }, { "epoch": 1.510589436080633, "grad_norm": 3.736544370651245, "learning_rate": 1.1000044882353212e-05, "loss": 1.8532, "step": 8880 }, { "epoch": 1.5122905503104533, "grad_norm": 4.25405216217041, "learning_rate": 1.099432601666823e-05, "loss": 1.8365, "step": 8890 }, { "epoch": 1.5139916645402738, "grad_norm": 4.405406951904297, "learning_rate": 1.098860196388746e-05, "loss": 1.7931, "step": 8900 }, { "epoch": 1.5156927787700945, "grad_norm": 4.439251899719238, "learning_rate": 1.0982872730962264e-05, "loss": 1.8344, "step": 8910 }, { "epoch": 1.517393892999915, "grad_norm": 4.5193376541137695, "learning_rate": 1.0977138324850292e-05, "loss": 1.8816, "step": 8920 }, { "epoch": 1.5190950072297356, "grad_norm": 4.007116317749023, "learning_rate": 1.0971398752515479e-05, "loss": 1.8915, "step": 8930 }, { "epoch": 1.520796121459556, "grad_norm": 4.432260513305664, "learning_rate": 1.0965654020928033e-05, "loss": 1.8104, "step": 8940 }, { "epoch": 1.5224972356893764, "grad_norm": 4.290425777435303, "learning_rate": 1.0959904137064426e-05, "loss": 1.7932, "step": 8950 }, { "epoch": 1.5241983499191971, "grad_norm": 3.5299010276794434, "learning_rate": 1.0954149107907391e-05, "loss": 1.9034, "step": 8960 }, { "epoch": 1.5258994641490176, "grad_norm": 4.343108654022217, "learning_rate": 1.0948388940445903e-05, "loss": 1.79, "step": 8970 }, { "epoch": 1.5276005783788382, "grad_norm": 3.799375057220459, "learning_rate": 1.0942623641675187e-05, "loss": 1.7986, "step": 8980 }, { "epoch": 1.5293016926086587, "grad_norm": 4.593927383422852, "learning_rate": 1.0936853218596688e-05, "loss": 1.8083, "step": 8990 }, { "epoch": 1.5310028068384791, "grad_norm": 3.587238073348999, "learning_rate": 1.0931077678218081e-05, "loss": 1.8337, "step": 9000 }, { "epoch": 1.5327039210682998, "grad_norm": 4.195778846740723, "learning_rate": 1.0925297027553255e-05, "loss": 1.7882, "step": 9010 }, { "epoch": 1.5344050352981202, "grad_norm": 4.813584804534912, "learning_rate": 1.0920090078462173e-05, "loss": 1.8808, "step": 9020 }, { "epoch": 1.536106149527941, "grad_norm": 5.184726715087891, "learning_rate": 1.0914299737599012e-05, "loss": 1.7869, "step": 9030 }, { "epoch": 1.5378072637577613, "grad_norm": 3.95269775390625, "learning_rate": 1.0908504306824966e-05, "loss": 1.7439, "step": 9040 }, { "epoch": 1.5395083779875818, "grad_norm": 3.7676851749420166, "learning_rate": 1.0902703793178087e-05, "loss": 1.8334, "step": 9050 }, { "epoch": 1.5412094922174024, "grad_norm": 4.78963565826416, "learning_rate": 1.0896898203702586e-05, "loss": 1.8415, "step": 9060 }, { "epoch": 1.5429106064472229, "grad_norm": 5.882055282592773, "learning_rate": 1.0891087545448844e-05, "loss": 1.7718, "step": 9070 }, { "epoch": 1.5446117206770436, "grad_norm": 4.677974224090576, "learning_rate": 1.0885271825473395e-05, "loss": 1.8712, "step": 9080 }, { "epoch": 1.546312834906864, "grad_norm": 4.491163730621338, "learning_rate": 1.0879451050838925e-05, "loss": 1.9664, "step": 9090 }, { "epoch": 1.5480139491366844, "grad_norm": 3.6754069328308105, "learning_rate": 1.0873625228614252e-05, "loss": 1.8512, "step": 9100 }, { "epoch": 1.549715063366505, "grad_norm": 4.288093090057373, "learning_rate": 1.0867794365874328e-05, "loss": 1.799, "step": 9110 }, { "epoch": 1.5514161775963256, "grad_norm": 5.016831874847412, "learning_rate": 1.0861958469700224e-05, "loss": 1.8035, "step": 9120 }, { "epoch": 1.5531172918261462, "grad_norm": 4.571620464324951, "learning_rate": 1.0856117547179126e-05, "loss": 1.8682, "step": 9130 }, { "epoch": 1.5548184060559667, "grad_norm": 4.138873100280762, "learning_rate": 1.0850271605404325e-05, "loss": 1.9147, "step": 9140 }, { "epoch": 1.556519520285787, "grad_norm": 3.770691156387329, "learning_rate": 1.0844420651475199e-05, "loss": 1.8129, "step": 9150 }, { "epoch": 1.5582206345156078, "grad_norm": 4.242394924163818, "learning_rate": 1.0838564692497224e-05, "loss": 1.8318, "step": 9160 }, { "epoch": 1.5599217487454282, "grad_norm": 4.3046956062316895, "learning_rate": 1.083270373558195e-05, "loss": 1.7672, "step": 9170 }, { "epoch": 1.5616228629752489, "grad_norm": 4.118201732635498, "learning_rate": 1.0826837787846995e-05, "loss": 1.8444, "step": 9180 }, { "epoch": 1.5633239772050693, "grad_norm": 4.4052252769470215, "learning_rate": 1.0820966856416037e-05, "loss": 1.8891, "step": 9190 }, { "epoch": 1.5650250914348898, "grad_norm": 4.588507175445557, "learning_rate": 1.081509094841881e-05, "loss": 1.8004, "step": 9200 }, { "epoch": 1.5667262056647104, "grad_norm": 4.094803810119629, "learning_rate": 1.0809210070991085e-05, "loss": 1.8421, "step": 9210 }, { "epoch": 1.5684273198945309, "grad_norm": 4.139965057373047, "learning_rate": 1.0803324231274682e-05, "loss": 1.6933, "step": 9220 }, { "epoch": 1.5701284341243515, "grad_norm": 3.955552816390991, "learning_rate": 1.0797433436417429e-05, "loss": 1.8102, "step": 9230 }, { "epoch": 1.571829548354172, "grad_norm": 3.6447393894195557, "learning_rate": 1.0791537693573182e-05, "loss": 1.6936, "step": 9240 }, { "epoch": 1.5735306625839924, "grad_norm": 4.680001258850098, "learning_rate": 1.078563700990181e-05, "loss": 1.8554, "step": 9250 }, { "epoch": 1.575231776813813, "grad_norm": 4.303575038909912, "learning_rate": 1.0779731392569172e-05, "loss": 1.8058, "step": 9260 }, { "epoch": 1.5769328910436335, "grad_norm": 4.5315752029418945, "learning_rate": 1.0773820848747122e-05, "loss": 1.7302, "step": 9270 }, { "epoch": 1.5786340052734542, "grad_norm": 4.147940158843994, "learning_rate": 1.0767905385613499e-05, "loss": 1.835, "step": 9280 }, { "epoch": 1.5803351195032747, "grad_norm": 4.4117279052734375, "learning_rate": 1.0761985010352117e-05, "loss": 1.8636, "step": 9290 }, { "epoch": 1.582036233733095, "grad_norm": 4.065666198730469, "learning_rate": 1.0756059730152753e-05, "loss": 1.7914, "step": 9300 }, { "epoch": 1.5837373479629158, "grad_norm": 4.648740291595459, "learning_rate": 1.0750129552211138e-05, "loss": 1.8272, "step": 9310 }, { "epoch": 1.5854384621927362, "grad_norm": 4.147993564605713, "learning_rate": 1.0744194483728956e-05, "loss": 1.7533, "step": 9320 }, { "epoch": 1.5871395764225569, "grad_norm": 3.6206252574920654, "learning_rate": 1.0738254531913827e-05, "loss": 1.6779, "step": 9330 }, { "epoch": 1.5888406906523773, "grad_norm": 4.248616695404053, "learning_rate": 1.0732309703979303e-05, "loss": 1.9106, "step": 9340 }, { "epoch": 1.5905418048821978, "grad_norm": 4.507028102874756, "learning_rate": 1.0726360007144855e-05, "loss": 1.7764, "step": 9350 }, { "epoch": 1.5922429191120182, "grad_norm": 7.24201774597168, "learning_rate": 1.0720405448635871e-05, "loss": 1.7711, "step": 9360 }, { "epoch": 1.5939440333418389, "grad_norm": 4.546375751495361, "learning_rate": 1.071444603568364e-05, "loss": 1.874, "step": 9370 }, { "epoch": 1.5956451475716595, "grad_norm": 4.4695024490356445, "learning_rate": 1.0708481775525347e-05, "loss": 1.8446, "step": 9380 }, { "epoch": 1.59734626180148, "grad_norm": 4.036654472351074, "learning_rate": 1.0702512675404066e-05, "loss": 1.8142, "step": 9390 }, { "epoch": 1.5990473760313004, "grad_norm": 3.3620481491088867, "learning_rate": 1.0696538742568742e-05, "loss": 1.8514, "step": 9400 }, { "epoch": 1.6007484902611209, "grad_norm": 4.128815174102783, "learning_rate": 1.0690559984274196e-05, "loss": 1.8632, "step": 9410 }, { "epoch": 1.6024496044909415, "grad_norm": 4.229797840118408, "learning_rate": 1.0684576407781106e-05, "loss": 1.7916, "step": 9420 }, { "epoch": 1.6041507187207622, "grad_norm": 4.053248405456543, "learning_rate": 1.0678588020356e-05, "loss": 1.8409, "step": 9430 }, { "epoch": 1.6058518329505826, "grad_norm": 3.9605884552001953, "learning_rate": 1.0672594829271251e-05, "loss": 1.7641, "step": 9440 }, { "epoch": 1.607552947180403, "grad_norm": 4.531416893005371, "learning_rate": 1.0666596841805066e-05, "loss": 1.8203, "step": 9450 }, { "epoch": 1.6092540614102235, "grad_norm": 4.545661926269531, "learning_rate": 1.0660594065241477e-05, "loss": 1.8606, "step": 9460 }, { "epoch": 1.6109551756400442, "grad_norm": 4.635908603668213, "learning_rate": 1.0654586506870324e-05, "loss": 1.8076, "step": 9470 }, { "epoch": 1.6126562898698649, "grad_norm": 4.799504280090332, "learning_rate": 1.0648574173987266e-05, "loss": 1.7702, "step": 9480 }, { "epoch": 1.6143574040996853, "grad_norm": 3.9335310459136963, "learning_rate": 1.0642557073893754e-05, "loss": 1.7715, "step": 9490 }, { "epoch": 1.6160585183295058, "grad_norm": 4.012069225311279, "learning_rate": 1.0636535213897029e-05, "loss": 1.7753, "step": 9500 }, { "epoch": 1.6177596325593262, "grad_norm": 4.231178283691406, "learning_rate": 1.0630508601310112e-05, "loss": 1.8751, "step": 9510 }, { "epoch": 1.6194607467891469, "grad_norm": 4.730849266052246, "learning_rate": 1.0624477243451794e-05, "loss": 1.8002, "step": 9520 }, { "epoch": 1.6211618610189675, "grad_norm": 4.264655590057373, "learning_rate": 1.0618441147646635e-05, "loss": 1.8493, "step": 9530 }, { "epoch": 1.622862975248788, "grad_norm": 4.410914897918701, "learning_rate": 1.0612400321224943e-05, "loss": 1.8096, "step": 9540 }, { "epoch": 1.6245640894786084, "grad_norm": 4.016700744628906, "learning_rate": 1.0606354771522772e-05, "loss": 1.7711, "step": 9550 }, { "epoch": 1.6262652037084289, "grad_norm": 4.219663143157959, "learning_rate": 1.0600304505881915e-05, "loss": 1.7203, "step": 9560 }, { "epoch": 1.6279663179382495, "grad_norm": 4.0092034339904785, "learning_rate": 1.0594249531649886e-05, "loss": 1.7203, "step": 9570 }, { "epoch": 1.6296674321680702, "grad_norm": 4.708493709564209, "learning_rate": 1.0588189856179922e-05, "loss": 1.8393, "step": 9580 }, { "epoch": 1.6313685463978906, "grad_norm": 4.2801923751831055, "learning_rate": 1.0582125486830968e-05, "loss": 1.7938, "step": 9590 }, { "epoch": 1.633069660627711, "grad_norm": 3.3154683113098145, "learning_rate": 1.0576056430967673e-05, "loss": 1.7587, "step": 9600 }, { "epoch": 1.6347707748575315, "grad_norm": 4.226798057556152, "learning_rate": 1.056998269596037e-05, "loss": 1.7655, "step": 9610 }, { "epoch": 1.6364718890873522, "grad_norm": 3.5839290618896484, "learning_rate": 1.0563904289185081e-05, "loss": 1.8516, "step": 9620 }, { "epoch": 1.6381730033171729, "grad_norm": 4.124845027923584, "learning_rate": 1.0557821218023497e-05, "loss": 1.8054, "step": 9630 }, { "epoch": 1.6398741175469933, "grad_norm": 4.484249114990234, "learning_rate": 1.0551733489862973e-05, "loss": 1.8195, "step": 9640 }, { "epoch": 1.6415752317768137, "grad_norm": 4.2265625, "learning_rate": 1.0545641112096527e-05, "loss": 1.8027, "step": 9650 }, { "epoch": 1.6432763460066342, "grad_norm": 3.7763261795043945, "learning_rate": 1.0539544092122815e-05, "loss": 1.7931, "step": 9660 }, { "epoch": 1.6449774602364549, "grad_norm": 4.304544925689697, "learning_rate": 1.0533442437346133e-05, "loss": 1.8094, "step": 9670 }, { "epoch": 1.6466785744662755, "grad_norm": 3.8868472576141357, "learning_rate": 1.0527336155176407e-05, "loss": 1.813, "step": 9680 }, { "epoch": 1.648379688696096, "grad_norm": 4.530086517333984, "learning_rate": 1.0521225253029186e-05, "loss": 1.732, "step": 9690 }, { "epoch": 1.6500808029259164, "grad_norm": 3.6333813667297363, "learning_rate": 1.0515109738325618e-05, "loss": 1.7679, "step": 9700 }, { "epoch": 1.6517819171557369, "grad_norm": 3.765293836593628, "learning_rate": 1.0508989618492466e-05, "loss": 1.8888, "step": 9710 }, { "epoch": 1.6534830313855575, "grad_norm": 3.8620569705963135, "learning_rate": 1.0502864900962075e-05, "loss": 1.7507, "step": 9720 }, { "epoch": 1.6551841456153782, "grad_norm": 3.9069511890411377, "learning_rate": 1.0496735593172384e-05, "loss": 1.7659, "step": 9730 }, { "epoch": 1.6568852598451986, "grad_norm": 4.23579216003418, "learning_rate": 1.0490601702566898e-05, "loss": 1.8153, "step": 9740 }, { "epoch": 1.658586374075019, "grad_norm": 4.105676651000977, "learning_rate": 1.0484463236594686e-05, "loss": 1.8104, "step": 9750 }, { "epoch": 1.6602874883048395, "grad_norm": 4.492506504058838, "learning_rate": 1.0478320202710382e-05, "loss": 1.8233, "step": 9760 }, { "epoch": 1.6619886025346602, "grad_norm": 4.099576473236084, "learning_rate": 1.047217260837416e-05, "loss": 1.8074, "step": 9770 }, { "epoch": 1.6636897167644809, "grad_norm": 3.619990825653076, "learning_rate": 1.0466020461051735e-05, "loss": 1.8891, "step": 9780 }, { "epoch": 1.6653908309943013, "grad_norm": 4.893577575683594, "learning_rate": 1.0459863768214355e-05, "loss": 1.8991, "step": 9790 }, { "epoch": 1.6670919452241217, "grad_norm": 4.113924980163574, "learning_rate": 1.0453702537338779e-05, "loss": 1.8974, "step": 9800 }, { "epoch": 1.6687930594539422, "grad_norm": 3.6121366024017334, "learning_rate": 1.0447536775907285e-05, "loss": 1.8565, "step": 9810 }, { "epoch": 1.6704941736837629, "grad_norm": 4.138027667999268, "learning_rate": 1.044136649140765e-05, "loss": 1.8812, "step": 9820 }, { "epoch": 1.6721952879135835, "grad_norm": 4.8973388671875, "learning_rate": 1.0435191691333147e-05, "loss": 1.8696, "step": 9830 }, { "epoch": 1.673896402143404, "grad_norm": 3.971330404281616, "learning_rate": 1.0429012383182522e-05, "loss": 1.8755, "step": 9840 }, { "epoch": 1.6755975163732244, "grad_norm": 3.5776214599609375, "learning_rate": 1.0422828574460013e-05, "loss": 1.7165, "step": 9850 }, { "epoch": 1.6772986306030448, "grad_norm": 4.539066791534424, "learning_rate": 1.0416640272675307e-05, "loss": 1.7711, "step": 9860 }, { "epoch": 1.6789997448328655, "grad_norm": 3.9174132347106934, "learning_rate": 1.041044748534356e-05, "loss": 1.7762, "step": 9870 }, { "epoch": 1.6807008590626862, "grad_norm": 4.058426380157471, "learning_rate": 1.0404250219985366e-05, "loss": 1.717, "step": 9880 }, { "epoch": 1.6824019732925066, "grad_norm": 3.7579991817474365, "learning_rate": 1.039804848412676e-05, "loss": 1.7561, "step": 9890 }, { "epoch": 1.684103087522327, "grad_norm": 3.993664503097534, "learning_rate": 1.0391842285299212e-05, "loss": 1.8824, "step": 9900 }, { "epoch": 1.6858042017521475, "grad_norm": 4.473487854003906, "learning_rate": 1.0385631631039606e-05, "loss": 1.8299, "step": 9910 }, { "epoch": 1.6875053159819682, "grad_norm": 4.511467933654785, "learning_rate": 1.0379416528890233e-05, "loss": 1.7641, "step": 9920 }, { "epoch": 1.6892064302117888, "grad_norm": 3.6925597190856934, "learning_rate": 1.0373196986398793e-05, "loss": 1.7787, "step": 9930 }, { "epoch": 1.6909075444416093, "grad_norm": 3.6119349002838135, "learning_rate": 1.0366973011118377e-05, "loss": 1.8198, "step": 9940 }, { "epoch": 1.6926086586714297, "grad_norm": 4.093749046325684, "learning_rate": 1.0360744610607455e-05, "loss": 1.7895, "step": 9950 }, { "epoch": 1.6943097729012502, "grad_norm": 3.8011345863342285, "learning_rate": 1.0354511792429879e-05, "loss": 1.814, "step": 9960 }, { "epoch": 1.6960108871310708, "grad_norm": 4.387421131134033, "learning_rate": 1.0348274564154857e-05, "loss": 1.7612, "step": 9970 }, { "epoch": 1.6977120013608915, "grad_norm": 4.6559576988220215, "learning_rate": 1.0342032933356953e-05, "loss": 1.9255, "step": 9980 }, { "epoch": 1.699413115590712, "grad_norm": 3.9810171127319336, "learning_rate": 1.0335786907616088e-05, "loss": 1.8769, "step": 9990 }, { "epoch": 1.7011142298205324, "grad_norm": 4.540745735168457, "learning_rate": 1.032953649451751e-05, "loss": 1.7372, "step": 10000 }, { "epoch": 1.7028153440503528, "grad_norm": 4.483917236328125, "learning_rate": 1.03232817016518e-05, "loss": 1.8221, "step": 10010 }, { "epoch": 1.7045164582801735, "grad_norm": 4.2136616706848145, "learning_rate": 1.0317022536614854e-05, "loss": 1.7018, "step": 10020 }, { "epoch": 1.7062175725099942, "grad_norm": 4.191181659698486, "learning_rate": 1.031075900700788e-05, "loss": 1.7263, "step": 10030 }, { "epoch": 1.7079186867398146, "grad_norm": 4.057601451873779, "learning_rate": 1.0304491120437385e-05, "loss": 1.8144, "step": 10040 }, { "epoch": 1.709619800969635, "grad_norm": 3.8585596084594727, "learning_rate": 1.0298218884515172e-05, "loss": 1.828, "step": 10050 }, { "epoch": 1.7113209151994555, "grad_norm": 3.869547128677368, "learning_rate": 1.0291942306858321e-05, "loss": 1.8319, "step": 10060 }, { "epoch": 1.7130220294292762, "grad_norm": 4.130283355712891, "learning_rate": 1.0285661395089184e-05, "loss": 1.7499, "step": 10070 }, { "epoch": 1.7147231436590968, "grad_norm": 5.252286434173584, "learning_rate": 1.027937615683538e-05, "loss": 1.847, "step": 10080 }, { "epoch": 1.7164242578889173, "grad_norm": 4.278568744659424, "learning_rate": 1.027308659972978e-05, "loss": 1.8668, "step": 10090 }, { "epoch": 1.7181253721187377, "grad_norm": 4.391490459442139, "learning_rate": 1.02667927314105e-05, "loss": 1.7655, "step": 10100 }, { "epoch": 1.7198264863485582, "grad_norm": 3.2366139888763428, "learning_rate": 1.0260494559520891e-05, "loss": 1.8268, "step": 10110 }, { "epoch": 1.7215276005783788, "grad_norm": 4.020900726318359, "learning_rate": 1.0254192091709532e-05, "loss": 1.7843, "step": 10120 }, { "epoch": 1.7232287148081995, "grad_norm": 4.717282295227051, "learning_rate": 1.0247885335630218e-05, "loss": 1.9005, "step": 10130 }, { "epoch": 1.72492982903802, "grad_norm": 4.262220859527588, "learning_rate": 1.0241574298941955e-05, "loss": 1.7488, "step": 10140 }, { "epoch": 1.7266309432678404, "grad_norm": 3.7055883407592773, "learning_rate": 1.023525898930894e-05, "loss": 1.8134, "step": 10150 }, { "epoch": 1.7283320574976608, "grad_norm": 4.13065767288208, "learning_rate": 1.0228939414400564e-05, "loss": 1.8133, "step": 10160 }, { "epoch": 1.7300331717274815, "grad_norm": 3.898186206817627, "learning_rate": 1.0222615581891401e-05, "loss": 1.6895, "step": 10170 }, { "epoch": 1.7317342859573022, "grad_norm": 3.9497716426849365, "learning_rate": 1.0216287499461187e-05, "loss": 1.801, "step": 10180 }, { "epoch": 1.7334354001871226, "grad_norm": 4.006579399108887, "learning_rate": 1.0209955174794828e-05, "loss": 1.8844, "step": 10190 }, { "epoch": 1.735136514416943, "grad_norm": 4.037294387817383, "learning_rate": 1.0203618615582373e-05, "loss": 1.8429, "step": 10200 }, { "epoch": 1.7368376286467635, "grad_norm": 4.3926100730896, "learning_rate": 1.0197277829519023e-05, "loss": 1.8029, "step": 10210 }, { "epoch": 1.7385387428765842, "grad_norm": 4.085505485534668, "learning_rate": 1.0190932824305105e-05, "loss": 1.8537, "step": 10220 }, { "epoch": 1.7402398571064048, "grad_norm": 4.870039463043213, "learning_rate": 1.018458360764607e-05, "loss": 1.8234, "step": 10230 }, { "epoch": 1.7419409713362253, "grad_norm": 3.6122336387634277, "learning_rate": 1.0178230187252492e-05, "loss": 1.7095, "step": 10240 }, { "epoch": 1.7436420855660457, "grad_norm": 4.699728488922119, "learning_rate": 1.017187257084004e-05, "loss": 1.8254, "step": 10250 }, { "epoch": 1.7453431997958662, "grad_norm": 4.258387565612793, "learning_rate": 1.0165510766129481e-05, "loss": 1.795, "step": 10260 }, { "epoch": 1.7470443140256868, "grad_norm": 3.91975736618042, "learning_rate": 1.015914478084667e-05, "loss": 1.8109, "step": 10270 }, { "epoch": 1.7487454282555075, "grad_norm": 3.489849328994751, "learning_rate": 1.015277462272254e-05, "loss": 1.8538, "step": 10280 }, { "epoch": 1.750446542485328, "grad_norm": 3.8970468044281006, "learning_rate": 1.014640029949309e-05, "loss": 1.7765, "step": 10290 }, { "epoch": 1.7521476567151484, "grad_norm": 3.8178770542144775, "learning_rate": 1.0140021818899377e-05, "loss": 1.7456, "step": 10300 }, { "epoch": 1.7538487709449688, "grad_norm": 4.668042182922363, "learning_rate": 1.0133639188687506e-05, "loss": 1.8154, "step": 10310 }, { "epoch": 1.7555498851747895, "grad_norm": 3.638029098510742, "learning_rate": 1.0127252416608624e-05, "loss": 1.857, "step": 10320 }, { "epoch": 1.7572509994046102, "grad_norm": 4.546308994293213, "learning_rate": 1.0120861510418905e-05, "loss": 1.8085, "step": 10330 }, { "epoch": 1.7589521136344306, "grad_norm": 4.162155628204346, "learning_rate": 1.0114466477879548e-05, "loss": 1.8464, "step": 10340 }, { "epoch": 1.760653227864251, "grad_norm": 4.644067764282227, "learning_rate": 1.0108067326756755e-05, "loss": 1.8394, "step": 10350 }, { "epoch": 1.7623543420940715, "grad_norm": 4.256865501403809, "learning_rate": 1.0101664064821739e-05, "loss": 1.9286, "step": 10360 }, { "epoch": 1.7640554563238922, "grad_norm": 4.50824499130249, "learning_rate": 1.0095256699850698e-05, "loss": 1.7065, "step": 10370 }, { "epoch": 1.7657565705537128, "grad_norm": 3.926837205886841, "learning_rate": 1.0088845239624815e-05, "loss": 1.7754, "step": 10380 }, { "epoch": 1.7674576847835333, "grad_norm": 4.212017059326172, "learning_rate": 1.0082429691930252e-05, "loss": 1.7532, "step": 10390 }, { "epoch": 1.7691587990133537, "grad_norm": 4.2193498611450195, "learning_rate": 1.0076010064558126e-05, "loss": 1.7152, "step": 10400 }, { "epoch": 1.7708599132431742, "grad_norm": 4.437827110290527, "learning_rate": 1.006958636530451e-05, "loss": 1.7723, "step": 10410 }, { "epoch": 1.7725610274729948, "grad_norm": 4.158639907836914, "learning_rate": 1.006315860197043e-05, "loss": 1.8109, "step": 10420 }, { "epoch": 1.7742621417028155, "grad_norm": 3.403130054473877, "learning_rate": 1.0056726782361835e-05, "loss": 1.7922, "step": 10430 }, { "epoch": 1.775963255932636, "grad_norm": 3.8050906658172607, "learning_rate": 1.0050290914289612e-05, "loss": 1.8183, "step": 10440 }, { "epoch": 1.7776643701624564, "grad_norm": 3.5411856174468994, "learning_rate": 1.004385100556956e-05, "loss": 1.7803, "step": 10450 }, { "epoch": 1.7793654843922768, "grad_norm": 4.4496307373046875, "learning_rate": 1.0037407064022376e-05, "loss": 1.7554, "step": 10460 }, { "epoch": 1.7810665986220975, "grad_norm": 4.679849147796631, "learning_rate": 1.0030959097473672e-05, "loss": 1.7382, "step": 10470 }, { "epoch": 1.7827677128519182, "grad_norm": 5.210113048553467, "learning_rate": 1.0024507113753934e-05, "loss": 1.8625, "step": 10480 }, { "epoch": 1.7844688270817386, "grad_norm": 4.474411964416504, "learning_rate": 1.001805112069853e-05, "loss": 1.8229, "step": 10490 }, { "epoch": 1.786169941311559, "grad_norm": 4.553360462188721, "learning_rate": 1.0011591126147702e-05, "loss": 1.8339, "step": 10500 }, { "epoch": 1.7878710555413795, "grad_norm": 4.378016948699951, "learning_rate": 1.0005127137946546e-05, "loss": 1.8492, "step": 10510 }, { "epoch": 1.7895721697712001, "grad_norm": 3.943833827972412, "learning_rate": 9.998659163945008e-06, "loss": 1.8541, "step": 10520 }, { "epoch": 1.7912732840010208, "grad_norm": 4.326706409454346, "learning_rate": 9.992187211997876e-06, "loss": 1.8652, "step": 10530 }, { "epoch": 1.7929743982308413, "grad_norm": 4.4171671867370605, "learning_rate": 9.985711289964774e-06, "loss": 1.8608, "step": 10540 }, { "epoch": 1.7946755124606617, "grad_norm": 4.190195083618164, "learning_rate": 9.979231405710136e-06, "loss": 1.7901, "step": 10550 }, { "epoch": 1.7963766266904821, "grad_norm": 4.556994438171387, "learning_rate": 9.972747567103217e-06, "loss": 1.7475, "step": 10560 }, { "epoch": 1.7980777409203028, "grad_norm": 4.397325038909912, "learning_rate": 9.96625978201807e-06, "loss": 1.7118, "step": 10570 }, { "epoch": 1.7997788551501235, "grad_norm": 3.940673828125, "learning_rate": 9.959768058333541e-06, "loss": 1.8119, "step": 10580 }, { "epoch": 1.801479969379944, "grad_norm": 5.351619720458984, "learning_rate": 9.953272403933263e-06, "loss": 1.8465, "step": 10590 }, { "epoch": 1.8031810836097644, "grad_norm": 4.466691970825195, "learning_rate": 9.946772826705638e-06, "loss": 1.7923, "step": 10600 }, { "epoch": 1.8048821978395848, "grad_norm": 4.102950096130371, "learning_rate": 9.94026933454383e-06, "loss": 1.8181, "step": 10610 }, { "epoch": 1.8065833120694055, "grad_norm": 3.4681036472320557, "learning_rate": 9.933761935345766e-06, "loss": 1.785, "step": 10620 }, { "epoch": 1.8082844262992261, "grad_norm": 4.120108604431152, "learning_rate": 9.927250637014107e-06, "loss": 1.7911, "step": 10630 }, { "epoch": 1.8099855405290466, "grad_norm": 4.320941925048828, "learning_rate": 9.920735447456261e-06, "loss": 1.8297, "step": 10640 }, { "epoch": 1.811686654758867, "grad_norm": 4.682049751281738, "learning_rate": 9.914216374584352e-06, "loss": 1.7805, "step": 10650 }, { "epoch": 1.8133877689886875, "grad_norm": 4.224926471710205, "learning_rate": 9.907693426315219e-06, "loss": 1.7751, "step": 10660 }, { "epoch": 1.8150888832185081, "grad_norm": 4.522299289703369, "learning_rate": 9.901166610570417e-06, "loss": 1.7741, "step": 10670 }, { "epoch": 1.8167899974483288, "grad_norm": 4.843745708465576, "learning_rate": 9.89463593527619e-06, "loss": 1.837, "step": 10680 }, { "epoch": 1.8184911116781493, "grad_norm": 4.759020805358887, "learning_rate": 9.888101408363473e-06, "loss": 1.7156, "step": 10690 }, { "epoch": 1.8201922259079697, "grad_norm": 4.42807674407959, "learning_rate": 9.881563037767872e-06, "loss": 1.821, "step": 10700 }, { "epoch": 1.8218933401377901, "grad_norm": 4.293888568878174, "learning_rate": 9.875020831429671e-06, "loss": 1.7818, "step": 10710 }, { "epoch": 1.8235944543676108, "grad_norm": 3.7048070430755615, "learning_rate": 9.868474797293803e-06, "loss": 1.8526, "step": 10720 }, { "epoch": 1.8252955685974315, "grad_norm": 3.907238006591797, "learning_rate": 9.861924943309855e-06, "loss": 1.7242, "step": 10730 }, { "epoch": 1.826996682827252, "grad_norm": 4.688352584838867, "learning_rate": 9.855371277432052e-06, "loss": 1.7701, "step": 10740 }, { "epoch": 1.8286977970570724, "grad_norm": 4.016199588775635, "learning_rate": 9.848813807619242e-06, "loss": 1.8497, "step": 10750 }, { "epoch": 1.8303989112868928, "grad_norm": 4.666214466094971, "learning_rate": 9.842252541834904e-06, "loss": 1.8062, "step": 10760 }, { "epoch": 1.8321000255167135, "grad_norm": 4.1961188316345215, "learning_rate": 9.835687488047119e-06, "loss": 1.8466, "step": 10770 }, { "epoch": 1.8338011397465341, "grad_norm": 4.104489326477051, "learning_rate": 9.829118654228565e-06, "loss": 1.8977, "step": 10780 }, { "epoch": 1.8355022539763546, "grad_norm": 3.4118106365203857, "learning_rate": 9.822546048356524e-06, "loss": 1.951, "step": 10790 }, { "epoch": 1.837203368206175, "grad_norm": 4.667579650878906, "learning_rate": 9.815969678412844e-06, "loss": 1.7684, "step": 10800 }, { "epoch": 1.8389044824359955, "grad_norm": 3.9106857776641846, "learning_rate": 9.80938955238395e-06, "loss": 1.8193, "step": 10810 }, { "epoch": 1.8406055966658161, "grad_norm": 4.125003337860107, "learning_rate": 9.802805678260835e-06, "loss": 1.8235, "step": 10820 }, { "epoch": 1.8423067108956368, "grad_norm": 4.059302806854248, "learning_rate": 9.79621806403903e-06, "loss": 1.7107, "step": 10830 }, { "epoch": 1.8440078251254572, "grad_norm": 3.3380849361419678, "learning_rate": 9.789626717718622e-06, "loss": 1.832, "step": 10840 }, { "epoch": 1.8457089393552777, "grad_norm": 3.950030565261841, "learning_rate": 9.783031647304217e-06, "loss": 1.811, "step": 10850 }, { "epoch": 1.8474100535850981, "grad_norm": 4.345433235168457, "learning_rate": 9.776432860804957e-06, "loss": 1.7453, "step": 10860 }, { "epoch": 1.8491111678149188, "grad_norm": 4.332964897155762, "learning_rate": 9.769830366234486e-06, "loss": 1.7936, "step": 10870 }, { "epoch": 1.8508122820447395, "grad_norm": 4.119027614593506, "learning_rate": 9.763224171610958e-06, "loss": 1.7703, "step": 10880 }, { "epoch": 1.85251339627456, "grad_norm": 3.9850196838378906, "learning_rate": 9.756614284957016e-06, "loss": 1.7415, "step": 10890 }, { "epoch": 1.8542145105043804, "grad_norm": 4.185554027557373, "learning_rate": 9.75000071429979e-06, "loss": 1.8353, "step": 10900 }, { "epoch": 1.8559156247342008, "grad_norm": 4.098738193511963, "learning_rate": 9.74338346767088e-06, "loss": 1.8058, "step": 10910 }, { "epoch": 1.8576167389640215, "grad_norm": 5.299314975738525, "learning_rate": 9.736762553106356e-06, "loss": 1.7866, "step": 10920 }, { "epoch": 1.8593178531938421, "grad_norm": 4.44684362411499, "learning_rate": 9.730137978646737e-06, "loss": 1.8705, "step": 10930 }, { "epoch": 1.8610189674236626, "grad_norm": 4.626757621765137, "learning_rate": 9.723509752336989e-06, "loss": 1.8671, "step": 10940 }, { "epoch": 1.862720081653483, "grad_norm": 3.2464823722839355, "learning_rate": 9.716877882226508e-06, "loss": 1.7845, "step": 10950 }, { "epoch": 1.8644211958833035, "grad_norm": 4.471595764160156, "learning_rate": 9.710242376369126e-06, "loss": 1.7647, "step": 10960 }, { "epoch": 1.8661223101131241, "grad_norm": 4.0545878410339355, "learning_rate": 9.703603242823082e-06, "loss": 1.761, "step": 10970 }, { "epoch": 1.8678234243429448, "grad_norm": 4.138380527496338, "learning_rate": 9.696960489651018e-06, "loss": 1.8144, "step": 10980 }, { "epoch": 1.8695245385727652, "grad_norm": 4.559357643127441, "learning_rate": 9.690314124919985e-06, "loss": 1.7863, "step": 10990 }, { "epoch": 1.8712256528025857, "grad_norm": 3.6744070053100586, "learning_rate": 9.6836641567014e-06, "loss": 1.8297, "step": 11000 }, { "epoch": 1.8729267670324061, "grad_norm": 4.699102878570557, "learning_rate": 9.677010593071073e-06, "loss": 1.7677, "step": 11010 }, { "epoch": 1.8746278812622268, "grad_norm": 3.9466359615325928, "learning_rate": 9.670353442109175e-06, "loss": 1.7409, "step": 11020 }, { "epoch": 1.8763289954920472, "grad_norm": 4.065673351287842, "learning_rate": 9.663692711900232e-06, "loss": 1.772, "step": 11030 }, { "epoch": 1.878030109721868, "grad_norm": 3.4951467514038086, "learning_rate": 9.657028410533114e-06, "loss": 1.8675, "step": 11040 }, { "epoch": 1.8797312239516883, "grad_norm": 4.127050876617432, "learning_rate": 9.650360546101037e-06, "loss": 1.7917, "step": 11050 }, { "epoch": 1.8814323381815088, "grad_norm": 3.586827278137207, "learning_rate": 9.643689126701533e-06, "loss": 1.7772, "step": 11060 }, { "epoch": 1.8831334524113295, "grad_norm": 4.5430073738098145, "learning_rate": 9.63701416043646e-06, "loss": 1.8817, "step": 11070 }, { "epoch": 1.88483456664115, "grad_norm": 4.422996520996094, "learning_rate": 9.630335655411983e-06, "loss": 1.7157, "step": 11080 }, { "epoch": 1.8865356808709706, "grad_norm": 4.226709365844727, "learning_rate": 9.623653619738555e-06, "loss": 1.7476, "step": 11090 }, { "epoch": 1.888236795100791, "grad_norm": 3.803516387939453, "learning_rate": 9.616968061530925e-06, "loss": 1.7751, "step": 11100 }, { "epoch": 1.8899379093306115, "grad_norm": 4.143571376800537, "learning_rate": 9.61027898890812e-06, "loss": 1.7535, "step": 11110 }, { "epoch": 1.8916390235604321, "grad_norm": 4.062926769256592, "learning_rate": 9.603586409993429e-06, "loss": 1.7253, "step": 11120 }, { "epoch": 1.8933401377902526, "grad_norm": 4.166606426239014, "learning_rate": 9.59689033291441e-06, "loss": 1.7783, "step": 11130 }, { "epoch": 1.8950412520200732, "grad_norm": 4.350265026092529, "learning_rate": 9.590190765802856e-06, "loss": 1.7658, "step": 11140 }, { "epoch": 1.8967423662498937, "grad_norm": 3.63614559173584, "learning_rate": 9.583487716794805e-06, "loss": 1.7519, "step": 11150 }, { "epoch": 1.8984434804797141, "grad_norm": 5.263920307159424, "learning_rate": 9.576781194030524e-06, "loss": 1.7858, "step": 11160 }, { "epoch": 1.9001445947095348, "grad_norm": 4.241003513336182, "learning_rate": 9.5700712056545e-06, "loss": 1.7522, "step": 11170 }, { "epoch": 1.9018457089393552, "grad_norm": 4.151254177093506, "learning_rate": 9.563357759815423e-06, "loss": 1.8353, "step": 11180 }, { "epoch": 1.903546823169176, "grad_norm": 4.62730073928833, "learning_rate": 9.556640864666185e-06, "loss": 1.7649, "step": 11190 }, { "epoch": 1.9052479373989963, "grad_norm": 4.749996185302734, "learning_rate": 9.549920528363868e-06, "loss": 1.797, "step": 11200 }, { "epoch": 1.9069490516288168, "grad_norm": 4.428854942321777, "learning_rate": 9.543196759069734e-06, "loss": 1.9067, "step": 11210 }, { "epoch": 1.9086501658586374, "grad_norm": 4.450839996337891, "learning_rate": 9.53646956494921e-06, "loss": 1.7731, "step": 11220 }, { "epoch": 1.910351280088458, "grad_norm": 4.343857765197754, "learning_rate": 9.529738954171882e-06, "loss": 1.7605, "step": 11230 }, { "epoch": 1.9120523943182786, "grad_norm": 4.726666450500488, "learning_rate": 9.523004934911492e-06, "loss": 1.8331, "step": 11240 }, { "epoch": 1.913753508548099, "grad_norm": 4.01237678527832, "learning_rate": 9.516267515345911e-06, "loss": 1.8047, "step": 11250 }, { "epoch": 1.9154546227779194, "grad_norm": 4.32087516784668, "learning_rate": 9.50952670365715e-06, "loss": 1.8528, "step": 11260 }, { "epoch": 1.9171557370077401, "grad_norm": 4.309276580810547, "learning_rate": 9.502782508031331e-06, "loss": 1.8709, "step": 11270 }, { "epoch": 1.9188568512375606, "grad_norm": 3.589980363845825, "learning_rate": 9.496034936658694e-06, "loss": 1.781, "step": 11280 }, { "epoch": 1.9205579654673812, "grad_norm": 3.661651134490967, "learning_rate": 9.489283997733564e-06, "loss": 1.7792, "step": 11290 }, { "epoch": 1.9222590796972017, "grad_norm": 4.655760288238525, "learning_rate": 9.482529699454375e-06, "loss": 1.8161, "step": 11300 }, { "epoch": 1.923960193927022, "grad_norm": 4.194929599761963, "learning_rate": 9.475772050023626e-06, "loss": 1.835, "step": 11310 }, { "epoch": 1.9256613081568428, "grad_norm": 4.503241539001465, "learning_rate": 9.469011057647892e-06, "loss": 1.8862, "step": 11320 }, { "epoch": 1.9273624223866632, "grad_norm": 4.981051921844482, "learning_rate": 9.462246730537804e-06, "loss": 1.749, "step": 11330 }, { "epoch": 1.9290635366164839, "grad_norm": 3.3752570152282715, "learning_rate": 9.455479076908046e-06, "loss": 1.8074, "step": 11340 }, { "epoch": 1.9307646508463043, "grad_norm": 4.697122097015381, "learning_rate": 9.448708104977342e-06, "loss": 1.7902, "step": 11350 }, { "epoch": 1.9324657650761248, "grad_norm": 4.077208995819092, "learning_rate": 9.441933822968444e-06, "loss": 1.7781, "step": 11360 }, { "epoch": 1.9341668793059454, "grad_norm": 4.550171852111816, "learning_rate": 9.435156239108121e-06, "loss": 1.7047, "step": 11370 }, { "epoch": 1.9358679935357659, "grad_norm": 4.2797393798828125, "learning_rate": 9.428375361627159e-06, "loss": 1.703, "step": 11380 }, { "epoch": 1.9375691077655866, "grad_norm": 3.6947295665740967, "learning_rate": 9.421591198760338e-06, "loss": 1.8047, "step": 11390 }, { "epoch": 1.939270221995407, "grad_norm": 4.601194381713867, "learning_rate": 9.414803758746429e-06, "loss": 1.7655, "step": 11400 }, { "epoch": 1.9409713362252274, "grad_norm": 3.7636353969573975, "learning_rate": 9.408013049828185e-06, "loss": 1.794, "step": 11410 }, { "epoch": 1.942672450455048, "grad_norm": 3.624544143676758, "learning_rate": 9.40121908025233e-06, "loss": 1.7938, "step": 11420 }, { "epoch": 1.9443735646848685, "grad_norm": 4.272632122039795, "learning_rate": 9.394421858269537e-06, "loss": 1.6927, "step": 11430 }, { "epoch": 1.9460746789146892, "grad_norm": 4.589675426483154, "learning_rate": 9.387621392134446e-06, "loss": 1.8256, "step": 11440 }, { "epoch": 1.9477757931445097, "grad_norm": 4.706130027770996, "learning_rate": 9.38081769010562e-06, "loss": 1.6867, "step": 11450 }, { "epoch": 1.94947690737433, "grad_norm": 4.569252967834473, "learning_rate": 9.374010760445563e-06, "loss": 1.765, "step": 11460 }, { "epoch": 1.9511780216041508, "grad_norm": 4.408115386962891, "learning_rate": 9.367200611420695e-06, "loss": 1.8893, "step": 11470 }, { "epoch": 1.9528791358339712, "grad_norm": 4.092146873474121, "learning_rate": 9.360387251301345e-06, "loss": 1.8494, "step": 11480 }, { "epoch": 1.9545802500637919, "grad_norm": 4.814793586730957, "learning_rate": 9.35357068836174e-06, "loss": 1.6643, "step": 11490 }, { "epoch": 1.9562813642936123, "grad_norm": 5.637572765350342, "learning_rate": 9.346750930880005e-06, "loss": 1.7359, "step": 11500 }, { "epoch": 1.9579824785234328, "grad_norm": 5.127346515655518, "learning_rate": 9.339927987138132e-06, "loss": 1.8141, "step": 11510 }, { "epoch": 1.9596835927532534, "grad_norm": 4.358199596405029, "learning_rate": 9.33310186542199e-06, "loss": 1.8143, "step": 11520 }, { "epoch": 1.9613847069830739, "grad_norm": 5.185015678405762, "learning_rate": 9.326272574021307e-06, "loss": 1.7924, "step": 11530 }, { "epoch": 1.9630858212128945, "grad_norm": 3.737765073776245, "learning_rate": 9.31944012122966e-06, "loss": 1.7952, "step": 11540 }, { "epoch": 1.964786935442715, "grad_norm": 4.091814041137695, "learning_rate": 9.312604515344463e-06, "loss": 1.7418, "step": 11550 }, { "epoch": 1.9664880496725354, "grad_norm": 3.7944629192352295, "learning_rate": 9.30576576466696e-06, "loss": 1.7573, "step": 11560 }, { "epoch": 1.968189163902356, "grad_norm": 4.183420658111572, "learning_rate": 9.298923877502218e-06, "loss": 1.8055, "step": 11570 }, { "epoch": 1.9698902781321765, "grad_norm": 4.519730091094971, "learning_rate": 9.292078862159108e-06, "loss": 1.7887, "step": 11580 }, { "epoch": 1.9715913923619972, "grad_norm": 3.6484477519989014, "learning_rate": 9.2852307269503e-06, "loss": 1.7559, "step": 11590 }, { "epoch": 1.9732925065918177, "grad_norm": 4.221194267272949, "learning_rate": 9.278379480192255e-06, "loss": 1.7274, "step": 11600 }, { "epoch": 1.974993620821638, "grad_norm": 4.922400951385498, "learning_rate": 9.271525130205218e-06, "loss": 1.8043, "step": 11610 }, { "epoch": 1.9766947350514585, "grad_norm": 4.591314315795898, "learning_rate": 9.264667685313188e-06, "loss": 1.8736, "step": 11620 }, { "epoch": 1.9783958492812792, "grad_norm": 4.475770950317383, "learning_rate": 9.257807153843933e-06, "loss": 1.7494, "step": 11630 }, { "epoch": 1.9800969635110999, "grad_norm": 4.4012131690979, "learning_rate": 9.250943544128974e-06, "loss": 1.7984, "step": 11640 }, { "epoch": 1.9817980777409203, "grad_norm": 4.212027549743652, "learning_rate": 9.244076864503559e-06, "loss": 1.7879, "step": 11650 }, { "epoch": 1.9834991919707408, "grad_norm": 3.849550247192383, "learning_rate": 9.23720712330667e-06, "loss": 1.8438, "step": 11660 }, { "epoch": 1.9852003062005612, "grad_norm": 3.509598731994629, "learning_rate": 9.230334328881004e-06, "loss": 1.779, "step": 11670 }, { "epoch": 1.9869014204303819, "grad_norm": 3.4126853942871094, "learning_rate": 9.22345848957297e-06, "loss": 1.8188, "step": 11680 }, { "epoch": 1.9886025346602025, "grad_norm": 4.051961898803711, "learning_rate": 9.216579613732674e-06, "loss": 1.7996, "step": 11690 }, { "epoch": 1.990303648890023, "grad_norm": 3.7018580436706543, "learning_rate": 9.209697709713905e-06, "loss": 1.6788, "step": 11700 }, { "epoch": 1.9920047631198434, "grad_norm": 4.118253707885742, "learning_rate": 9.202812785874137e-06, "loss": 1.8283, "step": 11710 }, { "epoch": 1.9937058773496639, "grad_norm": 4.72760534286499, "learning_rate": 9.195924850574502e-06, "loss": 1.763, "step": 11720 }, { "epoch": 1.9954069915794845, "grad_norm": 5.105101108551025, "learning_rate": 9.189033912179798e-06, "loss": 1.7422, "step": 11730 }, { "epoch": 1.9971081058093052, "grad_norm": 4.302199840545654, "learning_rate": 9.182139979058463e-06, "loss": 1.7793, "step": 11740 }, { "epoch": 1.9988092200391256, "grad_norm": 4.676185131072998, "learning_rate": 9.175243059582579e-06, "loss": 1.7238, "step": 11750 }, { "epoch": 2.000510334268946, "grad_norm": 4.118522644042969, "learning_rate": 9.168343162127848e-06, "loss": 1.7101, "step": 11760 }, { "epoch": 2.0022114484987665, "grad_norm": 4.459059238433838, "learning_rate": 9.161440295073591e-06, "loss": 1.642, "step": 11770 }, { "epoch": 2.0039125627285874, "grad_norm": 5.093273639678955, "learning_rate": 9.154534466802737e-06, "loss": 1.6551, "step": 11780 }, { "epoch": 2.005613676958408, "grad_norm": 4.468721866607666, "learning_rate": 9.14762568570181e-06, "loss": 1.6835, "step": 11790 }, { "epoch": 2.0073147911882283, "grad_norm": 4.6995649337768555, "learning_rate": 9.140713960160917e-06, "loss": 1.7078, "step": 11800 }, { "epoch": 2.0090159054180488, "grad_norm": 4.6736907958984375, "learning_rate": 9.133799298573745e-06, "loss": 1.7109, "step": 11810 }, { "epoch": 2.010717019647869, "grad_norm": 4.717840671539307, "learning_rate": 9.126881709337545e-06, "loss": 1.7939, "step": 11820 }, { "epoch": 2.01241813387769, "grad_norm": 4.908730983734131, "learning_rate": 9.119961200853122e-06, "loss": 1.6915, "step": 11830 }, { "epoch": 2.0141192481075105, "grad_norm": 5.266244888305664, "learning_rate": 9.113037781524829e-06, "loss": 1.7138, "step": 11840 }, { "epoch": 2.015820362337331, "grad_norm": 5.060489177703857, "learning_rate": 9.106111459760554e-06, "loss": 1.7255, "step": 11850 }, { "epoch": 2.0175214765671514, "grad_norm": 4.0937089920043945, "learning_rate": 9.099182243971704e-06, "loss": 1.6528, "step": 11860 }, { "epoch": 2.019222590796972, "grad_norm": 5.246833801269531, "learning_rate": 9.09225014257321e-06, "loss": 1.7111, "step": 11870 }, { "epoch": 2.0209237050267927, "grad_norm": 4.744830131530762, "learning_rate": 9.085315163983497e-06, "loss": 1.766, "step": 11880 }, { "epoch": 2.022624819256613, "grad_norm": 4.4739203453063965, "learning_rate": 9.078377316624488e-06, "loss": 1.6996, "step": 11890 }, { "epoch": 2.0243259334864336, "grad_norm": 4.242108345031738, "learning_rate": 9.071436608921599e-06, "loss": 1.7228, "step": 11900 }, { "epoch": 2.026027047716254, "grad_norm": 5.044703960418701, "learning_rate": 9.064493049303706e-06, "loss": 1.6989, "step": 11910 }, { "epoch": 2.0277281619460745, "grad_norm": 4.1773786544799805, "learning_rate": 9.057546646203152e-06, "loss": 1.6513, "step": 11920 }, { "epoch": 2.0294292761758954, "grad_norm": 4.137922286987305, "learning_rate": 9.05059740805574e-06, "loss": 1.6933, "step": 11930 }, { "epoch": 2.031130390405716, "grad_norm": 4.424137115478516, "learning_rate": 9.04364534330071e-06, "loss": 1.6533, "step": 11940 }, { "epoch": 2.0328315046355363, "grad_norm": 4.5705885887146, "learning_rate": 9.036690460380735e-06, "loss": 1.6856, "step": 11950 }, { "epoch": 2.0345326188653567, "grad_norm": 6.594974994659424, "learning_rate": 9.02973276774191e-06, "loss": 1.7271, "step": 11960 }, { "epoch": 2.036233733095177, "grad_norm": 5.3873491287231445, "learning_rate": 9.022772273833745e-06, "loss": 1.7247, "step": 11970 }, { "epoch": 2.037934847324998, "grad_norm": 4.3244500160217285, "learning_rate": 9.01580898710915e-06, "loss": 1.7503, "step": 11980 }, { "epoch": 2.0396359615548185, "grad_norm": 4.935193061828613, "learning_rate": 9.008842916024427e-06, "loss": 1.7229, "step": 11990 }, { "epoch": 2.041337075784639, "grad_norm": 4.176199436187744, "learning_rate": 9.00187406903926e-06, "loss": 1.7348, "step": 12000 }, { "epoch": 2.0430381900144594, "grad_norm": 5.202916622161865, "learning_rate": 8.9949024546167e-06, "loss": 1.5918, "step": 12010 }, { "epoch": 2.04473930424428, "grad_norm": 4.1330437660217285, "learning_rate": 8.987928081223167e-06, "loss": 1.7322, "step": 12020 }, { "epoch": 2.0464404184741007, "grad_norm": 4.667124271392822, "learning_rate": 8.980950957328423e-06, "loss": 1.6891, "step": 12030 }, { "epoch": 2.048141532703921, "grad_norm": 5.26122522354126, "learning_rate": 8.973971091405575e-06, "loss": 1.7367, "step": 12040 }, { "epoch": 2.0498426469337416, "grad_norm": 4.209774494171143, "learning_rate": 8.966988491931062e-06, "loss": 1.7748, "step": 12050 }, { "epoch": 2.051543761163562, "grad_norm": 4.397939682006836, "learning_rate": 8.960003167384633e-06, "loss": 1.6671, "step": 12060 }, { "epoch": 2.0532448753933825, "grad_norm": 4.774211883544922, "learning_rate": 8.953015126249358e-06, "loss": 1.7462, "step": 12070 }, { "epoch": 2.0549459896232034, "grad_norm": 4.361632823944092, "learning_rate": 8.9460243770116e-06, "loss": 1.6999, "step": 12080 }, { "epoch": 2.056647103853024, "grad_norm": 4.739696979522705, "learning_rate": 8.939030928161008e-06, "loss": 1.7113, "step": 12090 }, { "epoch": 2.0583482180828443, "grad_norm": 4.217962265014648, "learning_rate": 8.932034788190522e-06, "loss": 1.7864, "step": 12100 }, { "epoch": 2.0600493323126647, "grad_norm": 4.49211311340332, "learning_rate": 8.925035965596332e-06, "loss": 1.7689, "step": 12110 }, { "epoch": 2.061750446542485, "grad_norm": 4.063029766082764, "learning_rate": 8.9180344688779e-06, "loss": 1.7162, "step": 12120 }, { "epoch": 2.063451560772306, "grad_norm": 4.042270183563232, "learning_rate": 8.911030306537929e-06, "loss": 1.7093, "step": 12130 }, { "epoch": 2.0651526750021265, "grad_norm": 4.807375907897949, "learning_rate": 8.90402348708236e-06, "loss": 1.5763, "step": 12140 }, { "epoch": 2.066853789231947, "grad_norm": 4.455898284912109, "learning_rate": 8.897014019020363e-06, "loss": 1.6977, "step": 12150 }, { "epoch": 2.0685549034617674, "grad_norm": 4.151392459869385, "learning_rate": 8.890001910864324e-06, "loss": 1.8438, "step": 12160 }, { "epoch": 2.070256017691588, "grad_norm": 5.351758003234863, "learning_rate": 8.882987171129831e-06, "loss": 1.6591, "step": 12170 }, { "epoch": 2.0719571319214087, "grad_norm": 4.933199405670166, "learning_rate": 8.875969808335677e-06, "loss": 1.6332, "step": 12180 }, { "epoch": 2.073658246151229, "grad_norm": 4.691521167755127, "learning_rate": 8.86894983100383e-06, "loss": 1.6974, "step": 12190 }, { "epoch": 2.0753593603810496, "grad_norm": 5.424036502838135, "learning_rate": 8.861927247659436e-06, "loss": 1.7456, "step": 12200 }, { "epoch": 2.07706047461087, "grad_norm": 3.9998979568481445, "learning_rate": 8.85490206683081e-06, "loss": 1.6898, "step": 12210 }, { "epoch": 2.0787615888406905, "grad_norm": 4.3668718338012695, "learning_rate": 8.84787429704942e-06, "loss": 1.6834, "step": 12220 }, { "epoch": 2.0804627030705114, "grad_norm": 4.986365795135498, "learning_rate": 8.840843946849874e-06, "loss": 1.8197, "step": 12230 }, { "epoch": 2.082163817300332, "grad_norm": 4.653778553009033, "learning_rate": 8.83381102476992e-06, "loss": 1.7306, "step": 12240 }, { "epoch": 2.0838649315301523, "grad_norm": 3.908618450164795, "learning_rate": 8.826775539350426e-06, "loss": 1.6604, "step": 12250 }, { "epoch": 2.0855660457599727, "grad_norm": 4.5211687088012695, "learning_rate": 8.819737499135368e-06, "loss": 1.7098, "step": 12260 }, { "epoch": 2.087267159989793, "grad_norm": 5.266591548919678, "learning_rate": 8.812696912671833e-06, "loss": 1.7872, "step": 12270 }, { "epoch": 2.0889682742196136, "grad_norm": 6.0518879890441895, "learning_rate": 8.805653788509997e-06, "loss": 1.6938, "step": 12280 }, { "epoch": 2.0906693884494345, "grad_norm": 5.185415744781494, "learning_rate": 8.798608135203115e-06, "loss": 1.7457, "step": 12290 }, { "epoch": 2.092370502679255, "grad_norm": 4.733633995056152, "learning_rate": 8.79155996130752e-06, "loss": 1.729, "step": 12300 }, { "epoch": 2.0940716169090754, "grad_norm": 3.8256046772003174, "learning_rate": 8.784509275382598e-06, "loss": 1.7013, "step": 12310 }, { "epoch": 2.095772731138896, "grad_norm": 4.688540935516357, "learning_rate": 8.777456085990791e-06, "loss": 1.6962, "step": 12320 }, { "epoch": 2.0974738453687163, "grad_norm": 5.134762287139893, "learning_rate": 8.77040040169758e-06, "loss": 1.7053, "step": 12330 }, { "epoch": 2.099174959598537, "grad_norm": 4.646895885467529, "learning_rate": 8.763342231071477e-06, "loss": 1.7464, "step": 12340 }, { "epoch": 2.1008760738283576, "grad_norm": 4.749012470245361, "learning_rate": 8.756281582684011e-06, "loss": 1.7256, "step": 12350 }, { "epoch": 2.102577188058178, "grad_norm": 3.6838204860687256, "learning_rate": 8.74921846510972e-06, "loss": 1.6454, "step": 12360 }, { "epoch": 2.1042783022879985, "grad_norm": 4.912604331970215, "learning_rate": 8.74215288692614e-06, "loss": 1.7182, "step": 12370 }, { "epoch": 2.105979416517819, "grad_norm": 5.738154411315918, "learning_rate": 8.735084856713802e-06, "loss": 1.7614, "step": 12380 }, { "epoch": 2.10768053074764, "grad_norm": 5.089351654052734, "learning_rate": 8.72801438305621e-06, "loss": 1.6985, "step": 12390 }, { "epoch": 2.1093816449774603, "grad_norm": 4.910715103149414, "learning_rate": 8.720941474539826e-06, "loss": 1.722, "step": 12400 }, { "epoch": 2.1110827592072807, "grad_norm": 4.526627540588379, "learning_rate": 8.713866139754089e-06, "loss": 1.7323, "step": 12410 }, { "epoch": 2.112783873437101, "grad_norm": 4.7195611000061035, "learning_rate": 8.706788387291366e-06, "loss": 1.7134, "step": 12420 }, { "epoch": 2.1144849876669216, "grad_norm": 5.276111602783203, "learning_rate": 8.69970822574697e-06, "loss": 1.6738, "step": 12430 }, { "epoch": 2.1161861018967425, "grad_norm": 4.766108989715576, "learning_rate": 8.69262566371914e-06, "loss": 1.6695, "step": 12440 }, { "epoch": 2.117887216126563, "grad_norm": 5.434549331665039, "learning_rate": 8.685540709809023e-06, "loss": 1.689, "step": 12450 }, { "epoch": 2.1195883303563834, "grad_norm": 4.173770904541016, "learning_rate": 8.678453372620676e-06, "loss": 1.7214, "step": 12460 }, { "epoch": 2.121289444586204, "grad_norm": 4.431570053100586, "learning_rate": 8.671363660761051e-06, "loss": 1.7108, "step": 12470 }, { "epoch": 2.1229905588160243, "grad_norm": 5.044260025024414, "learning_rate": 8.66427158283998e-06, "loss": 1.7559, "step": 12480 }, { "epoch": 2.124691673045845, "grad_norm": 4.083590507507324, "learning_rate": 8.657177147470172e-06, "loss": 1.7039, "step": 12490 }, { "epoch": 2.1263927872756656, "grad_norm": 4.259870529174805, "learning_rate": 8.650080363267197e-06, "loss": 1.7676, "step": 12500 }, { "epoch": 2.128093901505486, "grad_norm": 5.370848178863525, "learning_rate": 8.642981238849477e-06, "loss": 1.6801, "step": 12510 }, { "epoch": 2.1297950157353065, "grad_norm": 5.005211353302002, "learning_rate": 8.635879782838281e-06, "loss": 1.6762, "step": 12520 }, { "epoch": 2.131496129965127, "grad_norm": 4.411081314086914, "learning_rate": 8.6287760038577e-06, "loss": 1.6965, "step": 12530 }, { "epoch": 2.133197244194948, "grad_norm": 4.327058792114258, "learning_rate": 8.621669910534654e-06, "loss": 1.7941, "step": 12540 }, { "epoch": 2.1348983584247683, "grad_norm": 4.803872585296631, "learning_rate": 8.61456151149887e-06, "loss": 1.7329, "step": 12550 }, { "epoch": 2.1365994726545887, "grad_norm": 5.848106861114502, "learning_rate": 8.60745081538288e-06, "loss": 1.7452, "step": 12560 }, { "epoch": 2.138300586884409, "grad_norm": 4.075498104095459, "learning_rate": 8.600337830821995e-06, "loss": 1.7577, "step": 12570 }, { "epoch": 2.1400017011142296, "grad_norm": 3.8453147411346436, "learning_rate": 8.593222566454318e-06, "loss": 1.7492, "step": 12580 }, { "epoch": 2.1417028153440505, "grad_norm": 4.6681227684021, "learning_rate": 8.58610503092071e-06, "loss": 1.6385, "step": 12590 }, { "epoch": 2.143403929573871, "grad_norm": 5.454570770263672, "learning_rate": 8.578985232864796e-06, "loss": 1.7272, "step": 12600 }, { "epoch": 2.1451050438036914, "grad_norm": 5.396411418914795, "learning_rate": 8.571863180932948e-06, "loss": 1.7069, "step": 12610 }, { "epoch": 2.146806158033512, "grad_norm": 5.200651168823242, "learning_rate": 8.564738883774275e-06, "loss": 1.5999, "step": 12620 }, { "epoch": 2.1485072722633323, "grad_norm": 4.8601813316345215, "learning_rate": 8.557612350040611e-06, "loss": 1.6065, "step": 12630 }, { "epoch": 2.150208386493153, "grad_norm": 4.7667317390441895, "learning_rate": 8.550483588386504e-06, "loss": 1.5607, "step": 12640 }, { "epoch": 2.1519095007229736, "grad_norm": 5.358502388000488, "learning_rate": 8.543352607469216e-06, "loss": 1.7666, "step": 12650 }, { "epoch": 2.153610614952794, "grad_norm": 4.752895355224609, "learning_rate": 8.536219415948695e-06, "loss": 1.6798, "step": 12660 }, { "epoch": 2.1553117291826145, "grad_norm": 3.580336570739746, "learning_rate": 8.529084022487582e-06, "loss": 1.7687, "step": 12670 }, { "epoch": 2.157012843412435, "grad_norm": 4.459094524383545, "learning_rate": 8.521946435751182e-06, "loss": 1.6895, "step": 12680 }, { "epoch": 2.158713957642256, "grad_norm": 6.012520790100098, "learning_rate": 8.514806664407472e-06, "loss": 1.7698, "step": 12690 }, { "epoch": 2.1604150718720763, "grad_norm": 4.733858108520508, "learning_rate": 8.507664717127077e-06, "loss": 1.6648, "step": 12700 }, { "epoch": 2.1621161861018967, "grad_norm": 4.045647621154785, "learning_rate": 8.500520602583268e-06, "loss": 1.66, "step": 12710 }, { "epoch": 2.163817300331717, "grad_norm": 4.753208637237549, "learning_rate": 8.493374329451948e-06, "loss": 1.6367, "step": 12720 }, { "epoch": 2.1655184145615376, "grad_norm": 5.455912113189697, "learning_rate": 8.48622590641164e-06, "loss": 1.7819, "step": 12730 }, { "epoch": 2.1672195287913585, "grad_norm": 4.091403484344482, "learning_rate": 8.479075342143472e-06, "loss": 1.7988, "step": 12740 }, { "epoch": 2.168920643021179, "grad_norm": 5.065116882324219, "learning_rate": 8.471922645331183e-06, "loss": 1.6525, "step": 12750 }, { "epoch": 2.1706217572509994, "grad_norm": 4.216938018798828, "learning_rate": 8.464767824661097e-06, "loss": 1.7405, "step": 12760 }, { "epoch": 2.17232287148082, "grad_norm": 4.321226596832275, "learning_rate": 8.457610888822113e-06, "loss": 1.7527, "step": 12770 }, { "epoch": 2.1740239857106403, "grad_norm": 4.348523139953613, "learning_rate": 8.45045184650571e-06, "loss": 1.7419, "step": 12780 }, { "epoch": 2.175725099940461, "grad_norm": 4.465217113494873, "learning_rate": 8.443290706405914e-06, "loss": 1.716, "step": 12790 }, { "epoch": 2.1774262141702816, "grad_norm": 3.363562822341919, "learning_rate": 8.436127477219299e-06, "loss": 1.6208, "step": 12800 }, { "epoch": 2.179127328400102, "grad_norm": 4.242889881134033, "learning_rate": 8.428962167644982e-06, "loss": 1.7619, "step": 12810 }, { "epoch": 2.1808284426299225, "grad_norm": 4.917881488800049, "learning_rate": 8.421794786384607e-06, "loss": 1.6682, "step": 12820 }, { "epoch": 2.182529556859743, "grad_norm": 3.955270528793335, "learning_rate": 8.41462534214233e-06, "loss": 1.6528, "step": 12830 }, { "epoch": 2.184230671089564, "grad_norm": 4.5821309089660645, "learning_rate": 8.407453843624807e-06, "loss": 1.677, "step": 12840 }, { "epoch": 2.1859317853193843, "grad_norm": 5.448122978210449, "learning_rate": 8.400280299541205e-06, "loss": 1.6756, "step": 12850 }, { "epoch": 2.1876328995492047, "grad_norm": 3.5626063346862793, "learning_rate": 8.393104718603154e-06, "loss": 1.656, "step": 12860 }, { "epoch": 2.189334013779025, "grad_norm": 4.248167037963867, "learning_rate": 8.385927109524776e-06, "loss": 1.6074, "step": 12870 }, { "epoch": 2.1910351280088456, "grad_norm": 5.133649826049805, "learning_rate": 8.378747481022648e-06, "loss": 1.6894, "step": 12880 }, { "epoch": 2.1927362422386665, "grad_norm": 4.838881015777588, "learning_rate": 8.3715658418158e-06, "loss": 1.7187, "step": 12890 }, { "epoch": 2.194437356468487, "grad_norm": 4.620151519775391, "learning_rate": 8.364382200625701e-06, "loss": 1.6652, "step": 12900 }, { "epoch": 2.1961384706983074, "grad_norm": 4.420077800750732, "learning_rate": 8.357196566176256e-06, "loss": 1.6278, "step": 12910 }, { "epoch": 2.197839584928128, "grad_norm": 4.555145263671875, "learning_rate": 8.350008947193792e-06, "loss": 1.7234, "step": 12920 }, { "epoch": 2.1995406991579483, "grad_norm": 5.071643829345703, "learning_rate": 8.342819352407036e-06, "loss": 1.7096, "step": 12930 }, { "epoch": 2.201241813387769, "grad_norm": 5.000056743621826, "learning_rate": 8.335627790547125e-06, "loss": 1.7824, "step": 12940 }, { "epoch": 2.2029429276175896, "grad_norm": 4.919875144958496, "learning_rate": 8.32843427034758e-06, "loss": 1.713, "step": 12950 }, { "epoch": 2.20464404184741, "grad_norm": 5.44525671005249, "learning_rate": 8.321238800544301e-06, "loss": 1.6816, "step": 12960 }, { "epoch": 2.2063451560772305, "grad_norm": 5.025317668914795, "learning_rate": 8.31404138987556e-06, "loss": 1.7378, "step": 12970 }, { "epoch": 2.208046270307051, "grad_norm": 4.864163398742676, "learning_rate": 8.306842047081977e-06, "loss": 1.7188, "step": 12980 }, { "epoch": 2.209747384536872, "grad_norm": 5.568412780761719, "learning_rate": 8.299640780906527e-06, "loss": 1.7523, "step": 12990 }, { "epoch": 2.2114484987666922, "grad_norm": 4.593928813934326, "learning_rate": 8.292437600094513e-06, "loss": 1.652, "step": 13000 }, { "epoch": 2.2131496129965127, "grad_norm": 4.015298366546631, "learning_rate": 8.28523251339357e-06, "loss": 1.6344, "step": 13010 }, { "epoch": 2.214850727226333, "grad_norm": 5.201580047607422, "learning_rate": 8.27874631305947e-06, "loss": 1.8141, "step": 13020 }, { "epoch": 2.2165518414561536, "grad_norm": 4.677765369415283, "learning_rate": 8.27153762927757e-06, "loss": 1.8078, "step": 13030 }, { "epoch": 2.2182529556859745, "grad_norm": 4.313126564025879, "learning_rate": 8.264327064987924e-06, "loss": 1.6792, "step": 13040 }, { "epoch": 2.219954069915795, "grad_norm": 4.24152946472168, "learning_rate": 8.257114628947131e-06, "loss": 1.6695, "step": 13050 }, { "epoch": 2.2216551841456154, "grad_norm": 4.828244209289551, "learning_rate": 8.249900329914065e-06, "loss": 1.7602, "step": 13060 }, { "epoch": 2.223356298375436, "grad_norm": 4.852785110473633, "learning_rate": 8.242684176649857e-06, "loss": 1.7807, "step": 13070 }, { "epoch": 2.2250574126052562, "grad_norm": 5.04357385635376, "learning_rate": 8.235466177917896e-06, "loss": 1.703, "step": 13080 }, { "epoch": 2.226758526835077, "grad_norm": 5.173859119415283, "learning_rate": 8.228246342483809e-06, "loss": 1.666, "step": 13090 }, { "epoch": 2.2284596410648976, "grad_norm": 4.799098014831543, "learning_rate": 8.221024679115452e-06, "loss": 1.6277, "step": 13100 }, { "epoch": 2.230160755294718, "grad_norm": 4.921494960784912, "learning_rate": 8.213801196582905e-06, "loss": 1.5804, "step": 13110 }, { "epoch": 2.2318618695245385, "grad_norm": 4.846426486968994, "learning_rate": 8.206575903658452e-06, "loss": 1.7394, "step": 13120 }, { "epoch": 2.233562983754359, "grad_norm": 4.785889148712158, "learning_rate": 8.19934880911658e-06, "loss": 1.6805, "step": 13130 }, { "epoch": 2.23526409798418, "grad_norm": 5.29194974899292, "learning_rate": 8.192119921733966e-06, "loss": 1.6522, "step": 13140 }, { "epoch": 2.2369652122140002, "grad_norm": 5.888778209686279, "learning_rate": 8.184889250289456e-06, "loss": 1.7498, "step": 13150 }, { "epoch": 2.2386663264438207, "grad_norm": 3.804825782775879, "learning_rate": 8.177656803564068e-06, "loss": 1.7379, "step": 13160 }, { "epoch": 2.240367440673641, "grad_norm": 4.515087127685547, "learning_rate": 8.170422590340977e-06, "loss": 1.7091, "step": 13170 }, { "epoch": 2.2420685549034616, "grad_norm": 5.151566505432129, "learning_rate": 8.163186619405497e-06, "loss": 1.6192, "step": 13180 }, { "epoch": 2.2437696691332825, "grad_norm": 7.327603816986084, "learning_rate": 8.155948899545087e-06, "loss": 1.6528, "step": 13190 }, { "epoch": 2.245470783363103, "grad_norm": 4.373720169067383, "learning_rate": 8.148709439549322e-06, "loss": 1.7527, "step": 13200 }, { "epoch": 2.2471718975929234, "grad_norm": 4.045927047729492, "learning_rate": 8.141468248209889e-06, "loss": 1.7512, "step": 13210 }, { "epoch": 2.248873011822744, "grad_norm": 4.1613616943359375, "learning_rate": 8.134225334320583e-06, "loss": 1.7419, "step": 13220 }, { "epoch": 2.2505741260525642, "grad_norm": 5.061476230621338, "learning_rate": 8.12770524630982e-06, "loss": 1.7468, "step": 13230 }, { "epoch": 2.252275240282385, "grad_norm": 4.439516067504883, "learning_rate": 8.12045908381014e-06, "loss": 1.7932, "step": 13240 }, { "epoch": 2.2539763545122056, "grad_norm": 5.531265735626221, "learning_rate": 8.113211224274374e-06, "loss": 1.7472, "step": 13250 }, { "epoch": 2.255677468742026, "grad_norm": 4.822131156921387, "learning_rate": 8.105961676504417e-06, "loss": 1.788, "step": 13260 }, { "epoch": 2.2573785829718465, "grad_norm": 4.480312824249268, "learning_rate": 8.098710449304209e-06, "loss": 1.7329, "step": 13270 }, { "epoch": 2.259079697201667, "grad_norm": 4.88050651550293, "learning_rate": 8.09145755147973e-06, "loss": 1.688, "step": 13280 }, { "epoch": 2.260780811431488, "grad_norm": 5.95066499710083, "learning_rate": 8.084202991838988e-06, "loss": 1.7034, "step": 13290 }, { "epoch": 2.2624819256613082, "grad_norm": 3.742136001586914, "learning_rate": 8.076946779192013e-06, "loss": 1.7597, "step": 13300 }, { "epoch": 2.2641830398911287, "grad_norm": 4.50217342376709, "learning_rate": 8.069688922350838e-06, "loss": 1.7729, "step": 13310 }, { "epoch": 2.265884154120949, "grad_norm": 5.608749866485596, "learning_rate": 8.062429430129496e-06, "loss": 1.7258, "step": 13320 }, { "epoch": 2.2675852683507696, "grad_norm": 4.774039268493652, "learning_rate": 8.055168311344003e-06, "loss": 1.6989, "step": 13330 }, { "epoch": 2.2692863825805905, "grad_norm": 4.012284755706787, "learning_rate": 8.04790557481235e-06, "loss": 1.6824, "step": 13340 }, { "epoch": 2.270987496810411, "grad_norm": 4.178895950317383, "learning_rate": 8.040641229354499e-06, "loss": 1.6828, "step": 13350 }, { "epoch": 2.2726886110402313, "grad_norm": 4.772857189178467, "learning_rate": 8.033375283792356e-06, "loss": 1.6993, "step": 13360 }, { "epoch": 2.274389725270052, "grad_norm": 4.545144557952881, "learning_rate": 8.026107746949784e-06, "loss": 1.6041, "step": 13370 }, { "epoch": 2.2760908394998722, "grad_norm": 4.623827934265137, "learning_rate": 8.018838627652563e-06, "loss": 1.6094, "step": 13380 }, { "epoch": 2.277791953729693, "grad_norm": 4.452998161315918, "learning_rate": 8.0115679347284e-06, "loss": 1.7036, "step": 13390 }, { "epoch": 2.2794930679595136, "grad_norm": 4.04271936416626, "learning_rate": 8.004295677006923e-06, "loss": 1.6462, "step": 13400 }, { "epoch": 2.281194182189334, "grad_norm": 4.8894524574279785, "learning_rate": 7.997021863319647e-06, "loss": 1.782, "step": 13410 }, { "epoch": 2.2828952964191545, "grad_norm": 5.355381965637207, "learning_rate": 7.989746502499978e-06, "loss": 1.6844, "step": 13420 }, { "epoch": 2.284596410648975, "grad_norm": 4.542727947235107, "learning_rate": 7.982469603383212e-06, "loss": 1.6117, "step": 13430 }, { "epoch": 2.286297524878796, "grad_norm": 4.602875232696533, "learning_rate": 7.975191174806502e-06, "loss": 1.65, "step": 13440 }, { "epoch": 2.2879986391086162, "grad_norm": 4.5872955322265625, "learning_rate": 7.967911225608862e-06, "loss": 1.6256, "step": 13450 }, { "epoch": 2.2896997533384367, "grad_norm": 4.036448955535889, "learning_rate": 7.960629764631154e-06, "loss": 1.7718, "step": 13460 }, { "epoch": 2.291400867568257, "grad_norm": 5.055094242095947, "learning_rate": 7.953346800716076e-06, "loss": 1.7826, "step": 13470 }, { "epoch": 2.2931019817980776, "grad_norm": 4.7970380783081055, "learning_rate": 7.946062342708144e-06, "loss": 1.5622, "step": 13480 }, { "epoch": 2.2948030960278984, "grad_norm": 5.095872402191162, "learning_rate": 7.9387763994537e-06, "loss": 1.7061, "step": 13490 }, { "epoch": 2.296504210257719, "grad_norm": 4.219940185546875, "learning_rate": 7.93148897980088e-06, "loss": 1.7782, "step": 13500 }, { "epoch": 2.2982053244875393, "grad_norm": 4.889625549316406, "learning_rate": 7.924200092599618e-06, "loss": 1.7454, "step": 13510 }, { "epoch": 2.29990643871736, "grad_norm": 4.371610641479492, "learning_rate": 7.916909746701637e-06, "loss": 1.7023, "step": 13520 }, { "epoch": 2.3016075529471802, "grad_norm": 5.004741191864014, "learning_rate": 7.909617950960411e-06, "loss": 1.7414, "step": 13530 }, { "epoch": 2.303308667177001, "grad_norm": 4.704267501831055, "learning_rate": 7.902324714231194e-06, "loss": 1.7434, "step": 13540 }, { "epoch": 2.3050097814068216, "grad_norm": 5.118117809295654, "learning_rate": 7.895030045370983e-06, "loss": 1.7567, "step": 13550 }, { "epoch": 2.306710895636642, "grad_norm": 4.657116889953613, "learning_rate": 7.887733953238513e-06, "loss": 1.6377, "step": 13560 }, { "epoch": 2.3084120098664624, "grad_norm": 5.6785783767700195, "learning_rate": 7.880436446694251e-06, "loss": 1.6896, "step": 13570 }, { "epoch": 2.310113124096283, "grad_norm": 4.47097110748291, "learning_rate": 7.87313753460038e-06, "loss": 1.6696, "step": 13580 }, { "epoch": 2.3118142383261038, "grad_norm": 3.680044174194336, "learning_rate": 7.865837225820784e-06, "loss": 1.6833, "step": 13590 }, { "epoch": 2.313515352555924, "grad_norm": 4.680473804473877, "learning_rate": 7.858535529221054e-06, "loss": 1.7197, "step": 13600 }, { "epoch": 2.3152164667857447, "grad_norm": 4.563645839691162, "learning_rate": 7.85123245366846e-06, "loss": 1.6869, "step": 13610 }, { "epoch": 2.316917581015565, "grad_norm": 4.670734405517578, "learning_rate": 7.843928008031949e-06, "loss": 1.7179, "step": 13620 }, { "epoch": 2.3186186952453856, "grad_norm": 3.7702691555023193, "learning_rate": 7.836622201182127e-06, "loss": 1.6822, "step": 13630 }, { "epoch": 2.3203198094752064, "grad_norm": 4.605519771575928, "learning_rate": 7.82931504199126e-06, "loss": 1.678, "step": 13640 }, { "epoch": 2.322020923705027, "grad_norm": 4.155745983123779, "learning_rate": 7.822006539333249e-06, "loss": 1.7425, "step": 13650 }, { "epoch": 2.3237220379348473, "grad_norm": 4.561372756958008, "learning_rate": 7.814696702083637e-06, "loss": 1.6521, "step": 13660 }, { "epoch": 2.3254231521646678, "grad_norm": 4.688206672668457, "learning_rate": 7.807385539119572e-06, "loss": 1.6336, "step": 13670 }, { "epoch": 2.327124266394488, "grad_norm": 4.2422966957092285, "learning_rate": 7.800073059319828e-06, "loss": 1.7293, "step": 13680 }, { "epoch": 2.328825380624309, "grad_norm": 5.442010402679443, "learning_rate": 7.792759271564769e-06, "loss": 1.6455, "step": 13690 }, { "epoch": 2.3305264948541295, "grad_norm": 4.347396373748779, "learning_rate": 7.785444184736351e-06, "loss": 1.6886, "step": 13700 }, { "epoch": 2.33222760908395, "grad_norm": 4.186034202575684, "learning_rate": 7.778127807718106e-06, "loss": 1.6293, "step": 13710 }, { "epoch": 2.3339287233137704, "grad_norm": 4.553366184234619, "learning_rate": 7.770810149395132e-06, "loss": 1.6962, "step": 13720 }, { "epoch": 2.335629837543591, "grad_norm": 5.827213764190674, "learning_rate": 7.763491218654083e-06, "loss": 1.769, "step": 13730 }, { "epoch": 2.3373309517734118, "grad_norm": 5.1207051277160645, "learning_rate": 7.756171024383162e-06, "loss": 1.6669, "step": 13740 }, { "epoch": 2.339032066003232, "grad_norm": 5.226886749267578, "learning_rate": 7.748849575472106e-06, "loss": 1.7029, "step": 13750 }, { "epoch": 2.3407331802330527, "grad_norm": 4.730441093444824, "learning_rate": 7.741526880812167e-06, "loss": 1.6802, "step": 13760 }, { "epoch": 2.342434294462873, "grad_norm": 5.613226890563965, "learning_rate": 7.734202949296123e-06, "loss": 1.6901, "step": 13770 }, { "epoch": 2.3441354086926935, "grad_norm": 4.034677028656006, "learning_rate": 7.726877789818241e-06, "loss": 1.6834, "step": 13780 }, { "epoch": 2.3458365229225144, "grad_norm": 4.615573883056641, "learning_rate": 7.719551411274289e-06, "loss": 1.6692, "step": 13790 }, { "epoch": 2.347537637152335, "grad_norm": 4.723323345184326, "learning_rate": 7.712223822561515e-06, "loss": 1.7574, "step": 13800 }, { "epoch": 2.3492387513821553, "grad_norm": 4.764580249786377, "learning_rate": 7.704895032578631e-06, "loss": 1.7047, "step": 13810 }, { "epoch": 2.3509398656119758, "grad_norm": 7.666451454162598, "learning_rate": 7.697565050225806e-06, "loss": 1.7018, "step": 13820 }, { "epoch": 2.352640979841796, "grad_norm": 5.401124477386475, "learning_rate": 7.69023388440467e-06, "loss": 1.7899, "step": 13830 }, { "epoch": 2.354342094071617, "grad_norm": 5.429254055023193, "learning_rate": 7.682901544018278e-06, "loss": 1.6748, "step": 13840 }, { "epoch": 2.3560432083014375, "grad_norm": 4.640020370483398, "learning_rate": 7.675568037971112e-06, "loss": 1.7833, "step": 13850 }, { "epoch": 2.357744322531258, "grad_norm": 5.1837873458862305, "learning_rate": 7.668233375169079e-06, "loss": 1.6224, "step": 13860 }, { "epoch": 2.3594454367610784, "grad_norm": 4.908547401428223, "learning_rate": 7.66089756451948e-06, "loss": 1.7572, "step": 13870 }, { "epoch": 2.361146550990899, "grad_norm": 4.419002056121826, "learning_rate": 7.653560614931015e-06, "loss": 1.6856, "step": 13880 }, { "epoch": 2.3628476652207198, "grad_norm": 5.0171217918396, "learning_rate": 7.64622253531377e-06, "loss": 1.7392, "step": 13890 }, { "epoch": 2.36454877945054, "grad_norm": 4.358807563781738, "learning_rate": 7.6388833345792e-06, "loss": 1.6246, "step": 13900 }, { "epoch": 2.3662498936803606, "grad_norm": 5.187094211578369, "learning_rate": 7.631543021640117e-06, "loss": 1.7443, "step": 13910 }, { "epoch": 2.367951007910181, "grad_norm": 4.929495334625244, "learning_rate": 7.624201605410692e-06, "loss": 1.7132, "step": 13920 }, { "epoch": 2.3696521221400015, "grad_norm": 4.808412075042725, "learning_rate": 7.616859094806427e-06, "loss": 1.8151, "step": 13930 }, { "epoch": 2.3713532363698224, "grad_norm": 4.2984161376953125, "learning_rate": 7.609515498744166e-06, "loss": 1.7249, "step": 13940 }, { "epoch": 2.373054350599643, "grad_norm": 6.034128189086914, "learning_rate": 7.602170826142058e-06, "loss": 1.7053, "step": 13950 }, { "epoch": 2.3747554648294633, "grad_norm": 3.8828516006469727, "learning_rate": 7.594825085919566e-06, "loss": 1.6311, "step": 13960 }, { "epoch": 2.3764565790592838, "grad_norm": 5.292279243469238, "learning_rate": 7.587478286997449e-06, "loss": 1.7548, "step": 13970 }, { "epoch": 2.378157693289104, "grad_norm": 4.623950958251953, "learning_rate": 7.58013043829775e-06, "loss": 1.7034, "step": 13980 }, { "epoch": 2.379858807518925, "grad_norm": 4.3811798095703125, "learning_rate": 7.572781548743785e-06, "loss": 1.6608, "step": 13990 }, { "epoch": 2.3815599217487455, "grad_norm": 4.47433614730835, "learning_rate": 7.5654316272601425e-06, "loss": 1.7934, "step": 14000 }, { "epoch": 2.383261035978566, "grad_norm": 4.668087005615234, "learning_rate": 7.558080682772654e-06, "loss": 1.6634, "step": 14010 }, { "epoch": 2.3849621502083864, "grad_norm": 4.859496593475342, "learning_rate": 7.550728724208397e-06, "loss": 1.6277, "step": 14020 }, { "epoch": 2.386663264438207, "grad_norm": 5.9143500328063965, "learning_rate": 7.543375760495687e-06, "loss": 1.7545, "step": 14030 }, { "epoch": 2.3883643786680278, "grad_norm": 4.088611602783203, "learning_rate": 7.536021800564049e-06, "loss": 1.6934, "step": 14040 }, { "epoch": 2.390065492897848, "grad_norm": 4.978387355804443, "learning_rate": 7.528666853344227e-06, "loss": 1.6947, "step": 14050 }, { "epoch": 2.3917666071276686, "grad_norm": 4.899922847747803, "learning_rate": 7.521310927768158e-06, "loss": 1.681, "step": 14060 }, { "epoch": 2.393467721357489, "grad_norm": 5.146270751953125, "learning_rate": 7.51395403276897e-06, "loss": 1.6137, "step": 14070 }, { "epoch": 2.3951688355873095, "grad_norm": 5.114542484283447, "learning_rate": 7.506596177280966e-06, "loss": 1.7901, "step": 14080 }, { "epoch": 2.3968699498171304, "grad_norm": 5.798167705535889, "learning_rate": 7.499237370239621e-06, "loss": 1.7634, "step": 14090 }, { "epoch": 2.398571064046951, "grad_norm": 4.79186487197876, "learning_rate": 7.491877620581558e-06, "loss": 1.7108, "step": 14100 }, { "epoch": 2.4002721782767713, "grad_norm": 3.603715419769287, "learning_rate": 7.4845169372445494e-06, "loss": 1.7623, "step": 14110 }, { "epoch": 2.4019732925065918, "grad_norm": 4.47135591506958, "learning_rate": 7.4771553291675035e-06, "loss": 1.7133, "step": 14120 }, { "epoch": 2.403674406736412, "grad_norm": 4.575337886810303, "learning_rate": 7.469792805290443e-06, "loss": 1.6831, "step": 14130 }, { "epoch": 2.405375520966233, "grad_norm": 5.56679630279541, "learning_rate": 7.4624293745545135e-06, "loss": 1.7401, "step": 14140 }, { "epoch": 2.4070766351960535, "grad_norm": 5.321033954620361, "learning_rate": 7.455065045901955e-06, "loss": 1.6929, "step": 14150 }, { "epoch": 2.408777749425874, "grad_norm": 4.899209499359131, "learning_rate": 7.447699828276098e-06, "loss": 1.6492, "step": 14160 }, { "epoch": 2.4104788636556944, "grad_norm": 4.6105194091796875, "learning_rate": 7.4403337306213545e-06, "loss": 1.7318, "step": 14170 }, { "epoch": 2.412179977885515, "grad_norm": 5.365864276885986, "learning_rate": 7.4329667618832096e-06, "loss": 1.8249, "step": 14180 }, { "epoch": 2.4138810921153357, "grad_norm": 4.625761985778809, "learning_rate": 7.425598931008195e-06, "loss": 1.6837, "step": 14190 }, { "epoch": 2.415582206345156, "grad_norm": 5.167141914367676, "learning_rate": 7.4182302469439015e-06, "loss": 1.6767, "step": 14200 }, { "epoch": 2.4172833205749766, "grad_norm": 5.74930477142334, "learning_rate": 7.410860718638946e-06, "loss": 1.7934, "step": 14210 }, { "epoch": 2.418984434804797, "grad_norm": 5.915599346160889, "learning_rate": 7.403490355042979e-06, "loss": 1.683, "step": 14220 }, { "epoch": 2.4206855490346175, "grad_norm": 4.36338472366333, "learning_rate": 7.396119165106659e-06, "loss": 1.6917, "step": 14230 }, { "epoch": 2.4223866632644384, "grad_norm": 4.530436992645264, "learning_rate": 7.388747157781655e-06, "loss": 1.6475, "step": 14240 }, { "epoch": 2.424087777494259, "grad_norm": 3.656707525253296, "learning_rate": 7.381374342020617e-06, "loss": 1.7279, "step": 14250 }, { "epoch": 2.4257888917240793, "grad_norm": 4.894069194793701, "learning_rate": 7.374000726777191e-06, "loss": 1.6692, "step": 14260 }, { "epoch": 2.4274900059538997, "grad_norm": 4.92869758605957, "learning_rate": 7.366626321005984e-06, "loss": 1.7126, "step": 14270 }, { "epoch": 2.42919112018372, "grad_norm": 5.302714824676514, "learning_rate": 7.359251133662563e-06, "loss": 1.6418, "step": 14280 }, { "epoch": 2.430892234413541, "grad_norm": 4.040475845336914, "learning_rate": 7.3518751737034536e-06, "loss": 1.7015, "step": 14290 }, { "epoch": 2.4325933486433615, "grad_norm": 3.626659631729126, "learning_rate": 7.3444984500861075e-06, "loss": 1.7544, "step": 14300 }, { "epoch": 2.434294462873182, "grad_norm": 4.41288423538208, "learning_rate": 7.337120971768914e-06, "loss": 1.7394, "step": 14310 }, { "epoch": 2.4359955771030024, "grad_norm": 4.458189964294434, "learning_rate": 7.329742747711171e-06, "loss": 1.8029, "step": 14320 }, { "epoch": 2.437696691332823, "grad_norm": 4.584317207336426, "learning_rate": 7.322363786873085e-06, "loss": 1.7365, "step": 14330 }, { "epoch": 2.4393978055626437, "grad_norm": 5.787274360656738, "learning_rate": 7.314984098215762e-06, "loss": 1.7295, "step": 14340 }, { "epoch": 2.441098919792464, "grad_norm": 4.288479328155518, "learning_rate": 7.307603690701184e-06, "loss": 1.749, "step": 14350 }, { "epoch": 2.4428000340222846, "grad_norm": 5.697612762451172, "learning_rate": 7.30022257329221e-06, "loss": 1.683, "step": 14360 }, { "epoch": 2.444501148252105, "grad_norm": 5.400142669677734, "learning_rate": 7.292840754952561e-06, "loss": 1.6881, "step": 14370 }, { "epoch": 2.4462022624819255, "grad_norm": 4.963900566101074, "learning_rate": 7.28545824464681e-06, "loss": 1.6204, "step": 14380 }, { "epoch": 2.4479033767117464, "grad_norm": 3.567223310470581, "learning_rate": 7.278075051340368e-06, "loss": 1.6629, "step": 14390 }, { "epoch": 2.449604490941567, "grad_norm": 4.4511494636535645, "learning_rate": 7.2706911839994764e-06, "loss": 1.7245, "step": 14400 }, { "epoch": 2.4513056051713873, "grad_norm": 5.4482622146606445, "learning_rate": 7.263306651591197e-06, "loss": 1.612, "step": 14410 }, { "epoch": 2.4530067194012077, "grad_norm": 4.920886516571045, "learning_rate": 7.2559214630833945e-06, "loss": 1.6784, "step": 14420 }, { "epoch": 2.454707833631028, "grad_norm": 4.329588890075684, "learning_rate": 7.248535627444738e-06, "loss": 1.8066, "step": 14430 }, { "epoch": 2.456408947860849, "grad_norm": 5.034507751464844, "learning_rate": 7.241149153644675e-06, "loss": 1.7325, "step": 14440 }, { "epoch": 2.4581100620906695, "grad_norm": 4.867656230926514, "learning_rate": 7.233762050653434e-06, "loss": 1.6242, "step": 14450 }, { "epoch": 2.45981117632049, "grad_norm": 4.606437683105469, "learning_rate": 7.226374327442e-06, "loss": 1.6645, "step": 14460 }, { "epoch": 2.4615122905503104, "grad_norm": 4.166538715362549, "learning_rate": 7.2189859929821184e-06, "loss": 1.7196, "step": 14470 }, { "epoch": 2.463213404780131, "grad_norm": 5.104836940765381, "learning_rate": 7.211597056246274e-06, "loss": 1.7106, "step": 14480 }, { "epoch": 2.4649145190099517, "grad_norm": 4.810766696929932, "learning_rate": 7.2042075262076865e-06, "loss": 1.7215, "step": 14490 }, { "epoch": 2.466615633239772, "grad_norm": 4.952357292175293, "learning_rate": 7.196817411840287e-06, "loss": 1.7115, "step": 14500 }, { "epoch": 2.4683167474695926, "grad_norm": 4.354887008666992, "learning_rate": 7.189426722118726e-06, "loss": 1.6942, "step": 14510 }, { "epoch": 2.470017861699413, "grad_norm": 4.45330810546875, "learning_rate": 7.18203546601835e-06, "loss": 1.6784, "step": 14520 }, { "epoch": 2.4717189759292335, "grad_norm": 4.8778300285339355, "learning_rate": 7.1746436525151886e-06, "loss": 1.6435, "step": 14530 }, { "epoch": 2.4734200901590544, "grad_norm": 5.075737476348877, "learning_rate": 7.167251290585956e-06, "loss": 1.7026, "step": 14540 }, { "epoch": 2.475121204388875, "grad_norm": 3.924260377883911, "learning_rate": 7.159858389208025e-06, "loss": 1.6816, "step": 14550 }, { "epoch": 2.4768223186186953, "grad_norm": 5.6758012771606445, "learning_rate": 7.152464957359426e-06, "loss": 1.7027, "step": 14560 }, { "epoch": 2.4785234328485157, "grad_norm": 4.747528553009033, "learning_rate": 7.145071004018839e-06, "loss": 1.7251, "step": 14570 }, { "epoch": 2.480224547078336, "grad_norm": 5.045699596405029, "learning_rate": 7.137676538165569e-06, "loss": 1.5223, "step": 14580 }, { "epoch": 2.481925661308157, "grad_norm": 5.234375476837158, "learning_rate": 7.13028156877955e-06, "loss": 1.7245, "step": 14590 }, { "epoch": 2.4836267755379775, "grad_norm": 4.646345138549805, "learning_rate": 7.122886104841322e-06, "loss": 1.7356, "step": 14600 }, { "epoch": 2.485327889767798, "grad_norm": 5.123286247253418, "learning_rate": 7.115490155332031e-06, "loss": 1.7379, "step": 14610 }, { "epoch": 2.4870290039976184, "grad_norm": 4.82652473449707, "learning_rate": 7.108093729233406e-06, "loss": 1.6862, "step": 14620 }, { "epoch": 2.488730118227439, "grad_norm": 4.396784782409668, "learning_rate": 7.100696835527762e-06, "loss": 1.7695, "step": 14630 }, { "epoch": 2.4904312324572597, "grad_norm": 4.29108190536499, "learning_rate": 7.093299483197979e-06, "loss": 1.6293, "step": 14640 }, { "epoch": 2.49213234668708, "grad_norm": 5.77024507522583, "learning_rate": 7.085901681227492e-06, "loss": 1.6403, "step": 14650 }, { "epoch": 2.4938334609169006, "grad_norm": 5.397991180419922, "learning_rate": 7.078503438600283e-06, "loss": 1.7527, "step": 14660 }, { "epoch": 2.495534575146721, "grad_norm": 4.551114559173584, "learning_rate": 7.07110476430087e-06, "loss": 1.7193, "step": 14670 }, { "epoch": 2.4972356893765415, "grad_norm": 4.737240791320801, "learning_rate": 7.0637056673142985e-06, "loss": 1.6529, "step": 14680 }, { "epoch": 2.4989368036063624, "grad_norm": 5.69704008102417, "learning_rate": 7.05630615662612e-06, "loss": 1.6794, "step": 14690 }, { "epoch": 2.500637917836183, "grad_norm": 4.747729778289795, "learning_rate": 7.048906241222392e-06, "loss": 1.7111, "step": 14700 }, { "epoch": 2.5023390320660033, "grad_norm": 5.632922649383545, "learning_rate": 7.0415059300896664e-06, "loss": 1.744, "step": 14710 }, { "epoch": 2.5040401462958237, "grad_norm": 5.497499465942383, "learning_rate": 7.03410523221497e-06, "loss": 1.7957, "step": 14720 }, { "epoch": 2.505741260525644, "grad_norm": 4.994222164154053, "learning_rate": 7.026704156585805e-06, "loss": 1.655, "step": 14730 }, { "epoch": 2.507442374755465, "grad_norm": 5.27976131439209, "learning_rate": 7.019302712190128e-06, "loss": 1.6606, "step": 14740 }, { "epoch": 2.5091434889852855, "grad_norm": 5.364691734313965, "learning_rate": 7.011900908016344e-06, "loss": 1.7161, "step": 14750 }, { "epoch": 2.510844603215106, "grad_norm": 5.166101455688477, "learning_rate": 7.004498753053299e-06, "loss": 1.7393, "step": 14760 }, { "epoch": 2.5125457174449264, "grad_norm": 4.633950710296631, "learning_rate": 6.99709625629026e-06, "loss": 1.6672, "step": 14770 }, { "epoch": 2.514246831674747, "grad_norm": 4.659962177276611, "learning_rate": 6.9896934267169124e-06, "loss": 1.7071, "step": 14780 }, { "epoch": 2.5159479459045677, "grad_norm": 3.876272201538086, "learning_rate": 6.982290273323339e-06, "loss": 1.703, "step": 14790 }, { "epoch": 2.517649060134388, "grad_norm": 5.550055503845215, "learning_rate": 6.974886805100029e-06, "loss": 1.7049, "step": 14800 }, { "epoch": 2.5193501743642086, "grad_norm": 5.260471820831299, "learning_rate": 6.967483031037843e-06, "loss": 1.7017, "step": 14810 }, { "epoch": 2.521051288594029, "grad_norm": 4.32659387588501, "learning_rate": 6.960078960128013e-06, "loss": 1.6116, "step": 14820 }, { "epoch": 2.5227524028238495, "grad_norm": 6.853899002075195, "learning_rate": 6.952674601362141e-06, "loss": 1.6228, "step": 14830 }, { "epoch": 2.5244535170536704, "grad_norm": 4.765081405639648, "learning_rate": 6.945269963732167e-06, "loss": 1.684, "step": 14840 }, { "epoch": 2.526154631283491, "grad_norm": 3.684476375579834, "learning_rate": 6.937865056230376e-06, "loss": 1.7092, "step": 14850 }, { "epoch": 2.5278557455133113, "grad_norm": 6.573903560638428, "learning_rate": 6.930459887849382e-06, "loss": 1.6655, "step": 14860 }, { "epoch": 2.5295568597431317, "grad_norm": 4.969848155975342, "learning_rate": 6.923054467582111e-06, "loss": 1.6628, "step": 14870 }, { "epoch": 2.531257973972952, "grad_norm": 4.645257949829102, "learning_rate": 6.915648804421799e-06, "loss": 1.6335, "step": 14880 }, { "epoch": 2.532959088202773, "grad_norm": 5.171104907989502, "learning_rate": 6.908242907361974e-06, "loss": 1.6676, "step": 14890 }, { "epoch": 2.5346602024325935, "grad_norm": 5.349738121032715, "learning_rate": 6.900836785396449e-06, "loss": 1.6593, "step": 14900 }, { "epoch": 2.536361316662414, "grad_norm": 5.259891033172607, "learning_rate": 6.893430447519312e-06, "loss": 1.6858, "step": 14910 }, { "epoch": 2.5380624308922344, "grad_norm": 4.558155059814453, "learning_rate": 6.8860239027249114e-06, "loss": 1.6901, "step": 14920 }, { "epoch": 2.539763545122055, "grad_norm": 5.312442779541016, "learning_rate": 6.878617160007848e-06, "loss": 1.6492, "step": 14930 }, { "epoch": 2.5414646593518757, "grad_norm": 5.157334327697754, "learning_rate": 6.871210228362961e-06, "loss": 1.6853, "step": 14940 }, { "epoch": 2.543165773581696, "grad_norm": 4.31597375869751, "learning_rate": 6.86380311678532e-06, "loss": 1.687, "step": 14950 }, { "epoch": 2.5448668878115166, "grad_norm": 4.309648036956787, "learning_rate": 6.856395834270214e-06, "loss": 1.6711, "step": 14960 }, { "epoch": 2.546568002041337, "grad_norm": 5.050992012023926, "learning_rate": 6.848988389813137e-06, "loss": 1.6623, "step": 14970 }, { "epoch": 2.5482691162711575, "grad_norm": 4.8922319412231445, "learning_rate": 6.841580792409787e-06, "loss": 1.6639, "step": 14980 }, { "epoch": 2.5499702305009784, "grad_norm": 4.5712995529174805, "learning_rate": 6.834173051056034e-06, "loss": 1.6761, "step": 14990 }, { "epoch": 2.551671344730799, "grad_norm": 4.379843235015869, "learning_rate": 6.826765174747939e-06, "loss": 1.6514, "step": 15000 }, { "epoch": 2.5533724589606193, "grad_norm": 4.673671722412109, "learning_rate": 6.819357172481715e-06, "loss": 1.7389, "step": 15010 }, { "epoch": 2.5550735731904397, "grad_norm": 5.1559367179870605, "learning_rate": 6.811949053253731e-06, "loss": 1.5973, "step": 15020 }, { "epoch": 2.55677468742026, "grad_norm": 4.359671592712402, "learning_rate": 6.804540826060503e-06, "loss": 1.7217, "step": 15030 }, { "epoch": 2.558475801650081, "grad_norm": 5.706950664520264, "learning_rate": 6.7971324998986706e-06, "loss": 1.625, "step": 15040 }, { "epoch": 2.5601769158799015, "grad_norm": 4.20098876953125, "learning_rate": 6.789724083764997e-06, "loss": 1.7012, "step": 15050 }, { "epoch": 2.561878030109722, "grad_norm": 5.58827543258667, "learning_rate": 6.782315586656356e-06, "loss": 1.6924, "step": 15060 }, { "epoch": 2.5635791443395424, "grad_norm": 4.752255439758301, "learning_rate": 6.774907017569722e-06, "loss": 1.6697, "step": 15070 }, { "epoch": 2.565280258569363, "grad_norm": 5.409000873565674, "learning_rate": 6.767498385502147e-06, "loss": 1.7054, "step": 15080 }, { "epoch": 2.5669813727991837, "grad_norm": 4.958235263824463, "learning_rate": 6.76008969945077e-06, "loss": 1.7299, "step": 15090 }, { "epoch": 2.568682487029004, "grad_norm": 3.6053428649902344, "learning_rate": 6.752680968412789e-06, "loss": 1.611, "step": 15100 }, { "epoch": 2.5703836012588246, "grad_norm": 4.998643398284912, "learning_rate": 6.745272201385462e-06, "loss": 1.7689, "step": 15110 }, { "epoch": 2.572084715488645, "grad_norm": 4.558544635772705, "learning_rate": 6.737863407366085e-06, "loss": 1.6389, "step": 15120 }, { "epoch": 2.5737858297184655, "grad_norm": 4.5095109939575195, "learning_rate": 6.7304545953519895e-06, "loss": 1.7371, "step": 15130 }, { "epoch": 2.5754869439482864, "grad_norm": 4.6984148025512695, "learning_rate": 6.72304577434053e-06, "loss": 1.622, "step": 15140 }, { "epoch": 2.577188058178107, "grad_norm": 5.407587051391602, "learning_rate": 6.715636953329072e-06, "loss": 1.7879, "step": 15150 }, { "epoch": 2.5788891724079273, "grad_norm": 5.009973049163818, "learning_rate": 6.7082281413149776e-06, "loss": 1.7139, "step": 15160 }, { "epoch": 2.5805902866377477, "grad_norm": 5.410165309906006, "learning_rate": 6.700819347295599e-06, "loss": 1.6792, "step": 15170 }, { "epoch": 2.582291400867568, "grad_norm": 5.430539131164551, "learning_rate": 6.693410580268271e-06, "loss": 1.7042, "step": 15180 }, { "epoch": 2.583992515097389, "grad_norm": 5.337528705596924, "learning_rate": 6.686001849230292e-06, "loss": 1.669, "step": 15190 }, { "epoch": 2.5856936293272095, "grad_norm": 4.983978748321533, "learning_rate": 6.6785931631789134e-06, "loss": 1.733, "step": 15200 }, { "epoch": 2.58739474355703, "grad_norm": 4.920320510864258, "learning_rate": 6.6711845311113404e-06, "loss": 1.6872, "step": 15210 }, { "epoch": 2.5890958577868504, "grad_norm": 4.505009174346924, "learning_rate": 6.663775962024704e-06, "loss": 1.6061, "step": 15220 }, { "epoch": 2.590796972016671, "grad_norm": 4.6945905685424805, "learning_rate": 6.656367464916064e-06, "loss": 1.7347, "step": 15230 }, { "epoch": 2.5924980862464917, "grad_norm": 4.4238433837890625, "learning_rate": 6.648959048782391e-06, "loss": 1.7022, "step": 15240 }, { "epoch": 2.594199200476312, "grad_norm": 5.2501044273376465, "learning_rate": 6.641550722620559e-06, "loss": 1.6146, "step": 15250 }, { "epoch": 2.5959003147061326, "grad_norm": 6.0587239265441895, "learning_rate": 6.63414249542733e-06, "loss": 1.5737, "step": 15260 }, { "epoch": 2.597601428935953, "grad_norm": 4.9692254066467285, "learning_rate": 6.626734376199349e-06, "loss": 1.736, "step": 15270 }, { "epoch": 2.5993025431657735, "grad_norm": 6.877859115600586, "learning_rate": 6.619326373933124e-06, "loss": 1.671, "step": 15280 }, { "epoch": 2.6010036573955944, "grad_norm": 5.190441131591797, "learning_rate": 6.6119184976250254e-06, "loss": 1.6989, "step": 15290 }, { "epoch": 2.602704771625415, "grad_norm": 5.681378364562988, "learning_rate": 6.604510756271275e-06, "loss": 1.7095, "step": 15300 }, { "epoch": 2.6044058858552352, "grad_norm": 5.812134265899658, "learning_rate": 6.597103158867924e-06, "loss": 1.7042, "step": 15310 }, { "epoch": 2.6061070000850557, "grad_norm": 5.831223011016846, "learning_rate": 6.589695714410848e-06, "loss": 1.6901, "step": 15320 }, { "epoch": 2.607808114314876, "grad_norm": 5.0628228187561035, "learning_rate": 6.582288431895742e-06, "loss": 1.745, "step": 15330 }, { "epoch": 2.609509228544697, "grad_norm": 4.264681339263916, "learning_rate": 6.5748813203180985e-06, "loss": 1.6821, "step": 15340 }, { "epoch": 2.6112103427745175, "grad_norm": 5.896673202514648, "learning_rate": 6.567474388673212e-06, "loss": 1.7215, "step": 15350 }, { "epoch": 2.612911457004338, "grad_norm": 4.254663944244385, "learning_rate": 6.560067645956149e-06, "loss": 1.6865, "step": 15360 }, { "epoch": 2.6146125712341584, "grad_norm": 5.108974933624268, "learning_rate": 6.552661101161749e-06, "loss": 1.6408, "step": 15370 }, { "epoch": 2.616313685463979, "grad_norm": 4.906005859375, "learning_rate": 6.545254763284613e-06, "loss": 1.6504, "step": 15380 }, { "epoch": 2.6180147996937997, "grad_norm": 5.226259708404541, "learning_rate": 6.537848641319088e-06, "loss": 1.6527, "step": 15390 }, { "epoch": 2.61971591392362, "grad_norm": 4.990200042724609, "learning_rate": 6.530442744259263e-06, "loss": 1.704, "step": 15400 }, { "epoch": 2.6214170281534406, "grad_norm": 5.2751874923706055, "learning_rate": 6.52303708109895e-06, "loss": 1.6663, "step": 15410 }, { "epoch": 2.623118142383261, "grad_norm": 5.105523109436035, "learning_rate": 6.51563166083168e-06, "loss": 1.6694, "step": 15420 }, { "epoch": 2.6248192566130815, "grad_norm": 5.40707540512085, "learning_rate": 6.508226492450685e-06, "loss": 1.6771, "step": 15430 }, { "epoch": 2.6265203708429024, "grad_norm": 4.876675128936768, "learning_rate": 6.500821584948895e-06, "loss": 1.6224, "step": 15440 }, { "epoch": 2.628221485072723, "grad_norm": 6.009464263916016, "learning_rate": 6.4941573986814035e-06, "loss": 1.7189, "step": 15450 }, { "epoch": 2.6299225993025432, "grad_norm": 5.135744571685791, "learning_rate": 6.486753011624476e-06, "loss": 1.6572, "step": 15460 }, { "epoch": 2.6316237135323637, "grad_norm": 4.236342906951904, "learning_rate": 6.479348911524416e-06, "loss": 1.7756, "step": 15470 }, { "epoch": 2.633324827762184, "grad_norm": 5.710334300994873, "learning_rate": 6.471945107372854e-06, "loss": 1.6689, "step": 15480 }, { "epoch": 2.635025941992005, "grad_norm": 4.605166435241699, "learning_rate": 6.464541608161061e-06, "loss": 1.7447, "step": 15490 }, { "epoch": 2.6367270562218255, "grad_norm": 4.071094512939453, "learning_rate": 6.457138422879936e-06, "loss": 1.7051, "step": 15500 }, { "epoch": 2.638428170451646, "grad_norm": 4.874546527862549, "learning_rate": 6.449735560520004e-06, "loss": 1.6924, "step": 15510 }, { "epoch": 2.6401292846814663, "grad_norm": 6.157898426055908, "learning_rate": 6.442333030071392e-06, "loss": 1.7024, "step": 15520 }, { "epoch": 2.641830398911287, "grad_norm": 6.2554931640625, "learning_rate": 6.434930840523823e-06, "loss": 1.6483, "step": 15530 }, { "epoch": 2.6435315131411077, "grad_norm": 4.651872634887695, "learning_rate": 6.427529000866609e-06, "loss": 1.6866, "step": 15540 }, { "epoch": 2.6452326273709277, "grad_norm": 4.448643684387207, "learning_rate": 6.420127520088638e-06, "loss": 1.7347, "step": 15550 }, { "epoch": 2.6469337416007486, "grad_norm": 5.3721137046813965, "learning_rate": 6.412726407178352e-06, "loss": 1.738, "step": 15560 }, { "epoch": 2.648634855830569, "grad_norm": 4.654369831085205, "learning_rate": 6.405325671123765e-06, "loss": 1.608, "step": 15570 }, { "epoch": 2.6503359700603895, "grad_norm": 4.932215690612793, "learning_rate": 6.397925320912418e-06, "loss": 1.7595, "step": 15580 }, { "epoch": 2.6520370842902103, "grad_norm": 4.843132972717285, "learning_rate": 6.39052536553139e-06, "loss": 1.6511, "step": 15590 }, { "epoch": 2.6537381985200303, "grad_norm": 4.947632789611816, "learning_rate": 6.3831258139672755e-06, "loss": 1.6544, "step": 15600 }, { "epoch": 2.6554393127498512, "grad_norm": 4.492228031158447, "learning_rate": 6.375726675206181e-06, "loss": 1.6936, "step": 15610 }, { "epoch": 2.6571404269796717, "grad_norm": 4.530820846557617, "learning_rate": 6.368327958233718e-06, "loss": 1.7186, "step": 15620 }, { "epoch": 2.658841541209492, "grad_norm": 5.303435325622559, "learning_rate": 6.360929672034979e-06, "loss": 1.7346, "step": 15630 }, { "epoch": 2.660542655439313, "grad_norm": 4.329684734344482, "learning_rate": 6.3535318255945315e-06, "loss": 1.6931, "step": 15640 }, { "epoch": 2.662243769669133, "grad_norm": 4.740088939666748, "learning_rate": 6.346134427896415e-06, "loss": 1.7554, "step": 15650 }, { "epoch": 2.663944883898954, "grad_norm": 4.414624214172363, "learning_rate": 6.338737487924118e-06, "loss": 1.6209, "step": 15660 }, { "epoch": 2.6656459981287743, "grad_norm": 5.261606693267822, "learning_rate": 6.331341014660575e-06, "loss": 1.6882, "step": 15670 }, { "epoch": 2.667347112358595, "grad_norm": 4.723742961883545, "learning_rate": 6.323945017088165e-06, "loss": 1.6296, "step": 15680 }, { "epoch": 2.6690482265884157, "grad_norm": 4.809913158416748, "learning_rate": 6.316549504188669e-06, "loss": 1.7024, "step": 15690 }, { "epoch": 2.6707493408182357, "grad_norm": 5.183937072753906, "learning_rate": 6.309154484943295e-06, "loss": 1.7459, "step": 15700 }, { "epoch": 2.6724504550480566, "grad_norm": 4.400472164154053, "learning_rate": 6.301759968332643e-06, "loss": 1.659, "step": 15710 }, { "epoch": 2.674151569277877, "grad_norm": 4.746430397033691, "learning_rate": 6.294365963336708e-06, "loss": 1.7923, "step": 15720 }, { "epoch": 2.6758526835076974, "grad_norm": 5.050037860870361, "learning_rate": 6.2869724789348605e-06, "loss": 1.591, "step": 15730 }, { "epoch": 2.6775537977375183, "grad_norm": 4.194572448730469, "learning_rate": 6.279579524105841e-06, "loss": 1.7503, "step": 15740 }, { "epoch": 2.6792549119673383, "grad_norm": 3.9525299072265625, "learning_rate": 6.272187107827744e-06, "loss": 1.6488, "step": 15750 }, { "epoch": 2.6809560261971592, "grad_norm": 4.082591533660889, "learning_rate": 6.264795239078013e-06, "loss": 1.6269, "step": 15760 }, { "epoch": 2.6826571404269797, "grad_norm": 4.713684558868408, "learning_rate": 6.257403926833425e-06, "loss": 1.6432, "step": 15770 }, { "epoch": 2.6843582546568, "grad_norm": 4.3438262939453125, "learning_rate": 6.2500131800700795e-06, "loss": 1.6673, "step": 15780 }, { "epoch": 2.686059368886621, "grad_norm": 4.460534572601318, "learning_rate": 6.242623007763395e-06, "loss": 1.663, "step": 15790 }, { "epoch": 2.687760483116441, "grad_norm": 5.520261287689209, "learning_rate": 6.235233418888085e-06, "loss": 1.7469, "step": 15800 }, { "epoch": 2.689461597346262, "grad_norm": 4.027762413024902, "learning_rate": 6.227844422418161e-06, "loss": 1.6448, "step": 15810 }, { "epoch": 2.6911627115760823, "grad_norm": 6.53219747543335, "learning_rate": 6.22045602732691e-06, "loss": 1.6534, "step": 15820 }, { "epoch": 2.6928638258059028, "grad_norm": 4.947280406951904, "learning_rate": 6.213068242586891e-06, "loss": 1.766, "step": 15830 }, { "epoch": 2.6945649400357237, "grad_norm": 4.839651584625244, "learning_rate": 6.20568107716992e-06, "loss": 1.6713, "step": 15840 }, { "epoch": 2.6962660542655437, "grad_norm": 4.531035900115967, "learning_rate": 6.198294540047068e-06, "loss": 1.7435, "step": 15850 }, { "epoch": 2.6979671684953646, "grad_norm": 5.5740461349487305, "learning_rate": 6.190908640188632e-06, "loss": 1.5887, "step": 15860 }, { "epoch": 2.699668282725185, "grad_norm": 5.6536664962768555, "learning_rate": 6.183523386564143e-06, "loss": 1.685, "step": 15870 }, { "epoch": 2.7013693969550054, "grad_norm": 4.010603427886963, "learning_rate": 6.176138788142345e-06, "loss": 1.6643, "step": 15880 }, { "epoch": 2.7030705111848263, "grad_norm": 5.687734603881836, "learning_rate": 6.168754853891184e-06, "loss": 1.6053, "step": 15890 }, { "epoch": 2.7047716254146463, "grad_norm": 5.492772579193115, "learning_rate": 6.1613715927778e-06, "loss": 1.6483, "step": 15900 }, { "epoch": 2.706472739644467, "grad_norm": 4.573555946350098, "learning_rate": 6.153989013768522e-06, "loss": 1.7613, "step": 15910 }, { "epoch": 2.7081738538742877, "grad_norm": 4.435306072235107, "learning_rate": 6.146607125828844e-06, "loss": 1.5846, "step": 15920 }, { "epoch": 2.709874968104108, "grad_norm": 5.207588195800781, "learning_rate": 6.139225937923423e-06, "loss": 1.5914, "step": 15930 }, { "epoch": 2.711576082333929, "grad_norm": 5.92121696472168, "learning_rate": 6.1318454590160624e-06, "loss": 1.7785, "step": 15940 }, { "epoch": 2.713277196563749, "grad_norm": 6.585207939147949, "learning_rate": 6.124465698069706e-06, "loss": 1.652, "step": 15950 }, { "epoch": 2.71497831079357, "grad_norm": 5.72299337387085, "learning_rate": 6.1170866640464345e-06, "loss": 1.7755, "step": 15960 }, { "epoch": 2.7166794250233903, "grad_norm": 4.594733715057373, "learning_rate": 6.109708365907434e-06, "loss": 1.7994, "step": 15970 }, { "epoch": 2.7183805392532108, "grad_norm": 4.8487443923950195, "learning_rate": 6.102330812613001e-06, "loss": 1.6315, "step": 15980 }, { "epoch": 2.7200816534830317, "grad_norm": 5.4819746017456055, "learning_rate": 6.0949540131225284e-06, "loss": 1.7589, "step": 15990 }, { "epoch": 2.7217827677128517, "grad_norm": 6.045997619628906, "learning_rate": 6.087577976394493e-06, "loss": 1.6223, "step": 16000 }, { "epoch": 2.7234838819426725, "grad_norm": 5.426220417022705, "learning_rate": 6.080202711386444e-06, "loss": 1.6925, "step": 16010 }, { "epoch": 2.725184996172493, "grad_norm": 5.517510890960693, "learning_rate": 6.072828227055e-06, "loss": 1.7678, "step": 16020 }, { "epoch": 2.7268861104023134, "grad_norm": 4.463784217834473, "learning_rate": 6.065454532355822e-06, "loss": 1.7528, "step": 16030 }, { "epoch": 2.7285872246321343, "grad_norm": 4.352672100067139, "learning_rate": 6.058081636243617e-06, "loss": 1.7728, "step": 16040 }, { "epoch": 2.7302883388619543, "grad_norm": 4.805075168609619, "learning_rate": 6.050709547672122e-06, "loss": 1.6959, "step": 16050 }, { "epoch": 2.731989453091775, "grad_norm": 5.226137161254883, "learning_rate": 6.043338275594094e-06, "loss": 1.5511, "step": 16060 }, { "epoch": 2.7336905673215957, "grad_norm": 5.238788604736328, "learning_rate": 6.0359678289612935e-06, "loss": 1.7664, "step": 16070 }, { "epoch": 2.735391681551416, "grad_norm": 5.0616960525512695, "learning_rate": 6.028598216724487e-06, "loss": 1.6906, "step": 16080 }, { "epoch": 2.737092795781237, "grad_norm": 4.657918453216553, "learning_rate": 6.021229447833422e-06, "loss": 1.588, "step": 16090 }, { "epoch": 2.738793910011057, "grad_norm": 4.7162322998046875, "learning_rate": 6.0138615312368216e-06, "loss": 1.7453, "step": 16100 }, { "epoch": 2.740495024240878, "grad_norm": 5.221200466156006, "learning_rate": 6.006494475882377e-06, "loss": 1.8455, "step": 16110 }, { "epoch": 2.7421961384706983, "grad_norm": 5.481096267700195, "learning_rate": 5.999128290716731e-06, "loss": 1.6014, "step": 16120 }, { "epoch": 2.7438972527005188, "grad_norm": 5.632100582122803, "learning_rate": 5.991762984685468e-06, "loss": 1.6951, "step": 16130 }, { "epoch": 2.745598366930339, "grad_norm": 4.87919807434082, "learning_rate": 5.984398566733111e-06, "loss": 1.7195, "step": 16140 }, { "epoch": 2.7472994811601597, "grad_norm": 5.315734386444092, "learning_rate": 5.9770350458030995e-06, "loss": 1.7032, "step": 16150 }, { "epoch": 2.7490005953899805, "grad_norm": 5.583526611328125, "learning_rate": 5.969672430837784e-06, "loss": 1.628, "step": 16160 }, { "epoch": 2.750701709619801, "grad_norm": 4.761498928070068, "learning_rate": 5.962310730778415e-06, "loss": 1.7235, "step": 16170 }, { "epoch": 2.7524028238496214, "grad_norm": 5.210669040679932, "learning_rate": 5.954949954565134e-06, "loss": 1.7194, "step": 16180 }, { "epoch": 2.754103938079442, "grad_norm": 5.470809459686279, "learning_rate": 5.947590111136958e-06, "loss": 1.6384, "step": 16190 }, { "epoch": 2.7558050523092623, "grad_norm": 4.662988662719727, "learning_rate": 5.940231209431773e-06, "loss": 1.5649, "step": 16200 }, { "epoch": 2.757506166539083, "grad_norm": 4.8013763427734375, "learning_rate": 5.932873258386321e-06, "loss": 1.6258, "step": 16210 }, { "epoch": 2.7592072807689036, "grad_norm": 4.362658977508545, "learning_rate": 5.9255162669361906e-06, "loss": 1.7916, "step": 16220 }, { "epoch": 2.760908394998724, "grad_norm": 4.981400489807129, "learning_rate": 5.918160244015799e-06, "loss": 1.6733, "step": 16230 }, { "epoch": 2.7626095092285445, "grad_norm": 5.8743577003479, "learning_rate": 5.910805198558391e-06, "loss": 1.6994, "step": 16240 }, { "epoch": 2.764310623458365, "grad_norm": 5.567012310028076, "learning_rate": 5.903451139496033e-06, "loss": 1.6526, "step": 16250 }, { "epoch": 2.766011737688186, "grad_norm": 5.757974147796631, "learning_rate": 5.89609807575958e-06, "loss": 1.7313, "step": 16260 }, { "epoch": 2.7677128519180063, "grad_norm": 4.79555606842041, "learning_rate": 5.8887460162786875e-06, "loss": 1.7318, "step": 16270 }, { "epoch": 2.7694139661478268, "grad_norm": 5.295034408569336, "learning_rate": 5.881394969981784e-06, "loss": 1.6734, "step": 16280 }, { "epoch": 2.771115080377647, "grad_norm": 4.794110298156738, "learning_rate": 5.874044945796072e-06, "loss": 1.6553, "step": 16290 }, { "epoch": 2.7728161946074676, "grad_norm": 5.3395915031433105, "learning_rate": 5.866695952647512e-06, "loss": 1.6836, "step": 16300 }, { "epoch": 2.7745173088372885, "grad_norm": 5.3286261558532715, "learning_rate": 5.8593479994608165e-06, "loss": 1.714, "step": 16310 }, { "epoch": 2.776218423067109, "grad_norm": 4.2219648361206055, "learning_rate": 5.852001095159426e-06, "loss": 1.7005, "step": 16320 }, { "epoch": 2.7779195372969294, "grad_norm": 5.95657205581665, "learning_rate": 5.844655248665518e-06, "loss": 1.7092, "step": 16330 }, { "epoch": 2.77962065152675, "grad_norm": 5.675844192504883, "learning_rate": 5.837310468899974e-06, "loss": 1.5973, "step": 16340 }, { "epoch": 2.7813217657565703, "grad_norm": 4.944215774536133, "learning_rate": 5.829966764782388e-06, "loss": 1.7103, "step": 16350 }, { "epoch": 2.783022879986391, "grad_norm": 4.9561686515808105, "learning_rate": 5.822624145231046e-06, "loss": 1.606, "step": 16360 }, { "epoch": 2.7847239942162116, "grad_norm": 4.788738250732422, "learning_rate": 5.815282619162917e-06, "loss": 1.6993, "step": 16370 }, { "epoch": 2.786425108446032, "grad_norm": 4.6601481437683105, "learning_rate": 5.80794219549364e-06, "loss": 1.6983, "step": 16380 }, { "epoch": 2.7881262226758525, "grad_norm": 5.334542274475098, "learning_rate": 5.800602883137519e-06, "loss": 1.6107, "step": 16390 }, { "epoch": 2.789827336905673, "grad_norm": 5.247471809387207, "learning_rate": 5.793264691007503e-06, "loss": 1.7659, "step": 16400 }, { "epoch": 2.791528451135494, "grad_norm": 5.605321884155273, "learning_rate": 5.785927628015184e-06, "loss": 1.6061, "step": 16410 }, { "epoch": 2.7932295653653143, "grad_norm": 5.843311786651611, "learning_rate": 5.778591703070784e-06, "loss": 1.7109, "step": 16420 }, { "epoch": 2.7949306795951347, "grad_norm": 5.994198322296143, "learning_rate": 5.771256925083143e-06, "loss": 1.7198, "step": 16430 }, { "epoch": 2.796631793824955, "grad_norm": 4.986034393310547, "learning_rate": 5.763923302959703e-06, "loss": 1.6503, "step": 16440 }, { "epoch": 2.7983329080547756, "grad_norm": 5.742405891418457, "learning_rate": 5.7565908456065076e-06, "loss": 1.6704, "step": 16450 }, { "epoch": 2.8000340222845965, "grad_norm": 5.289145469665527, "learning_rate": 5.749259561928184e-06, "loss": 1.6697, "step": 16460 }, { "epoch": 2.801735136514417, "grad_norm": 5.038757801055908, "learning_rate": 5.741929460827932e-06, "loss": 1.6896, "step": 16470 }, { "epoch": 2.8034362507442374, "grad_norm": 4.987347602844238, "learning_rate": 5.7346005512075215e-06, "loss": 1.6926, "step": 16480 }, { "epoch": 2.805137364974058, "grad_norm": 5.577208518981934, "learning_rate": 5.727272841967269e-06, "loss": 1.6673, "step": 16490 }, { "epoch": 2.8068384792038783, "grad_norm": 5.131875038146973, "learning_rate": 5.719946342006035e-06, "loss": 1.6913, "step": 16500 }, { "epoch": 2.808539593433699, "grad_norm": 4.246416091918945, "learning_rate": 5.7126210602212136e-06, "loss": 1.6361, "step": 16510 }, { "epoch": 2.8102407076635196, "grad_norm": 5.458721160888672, "learning_rate": 5.7052970055087156e-06, "loss": 1.6904, "step": 16520 }, { "epoch": 2.81194182189334, "grad_norm": 5.210801601409912, "learning_rate": 5.697974186762968e-06, "loss": 1.6842, "step": 16530 }, { "epoch": 2.8136429361231605, "grad_norm": 5.769560813903809, "learning_rate": 5.69065261287689e-06, "loss": 1.7072, "step": 16540 }, { "epoch": 2.815344050352981, "grad_norm": 5.030094623565674, "learning_rate": 5.683332292741893e-06, "loss": 1.7412, "step": 16550 }, { "epoch": 2.817045164582802, "grad_norm": 4.414101600646973, "learning_rate": 5.676013235247866e-06, "loss": 1.6901, "step": 16560 }, { "epoch": 2.8187462788126223, "grad_norm": 3.788679361343384, "learning_rate": 5.668695449283159e-06, "loss": 1.6915, "step": 16570 }, { "epoch": 2.8204473930424427, "grad_norm": 5.092972278594971, "learning_rate": 5.661378943734583e-06, "loss": 1.7052, "step": 16580 }, { "epoch": 2.822148507272263, "grad_norm": 5.105255603790283, "learning_rate": 5.654063727487396e-06, "loss": 1.7155, "step": 16590 }, { "epoch": 2.8238496215020836, "grad_norm": 4.697558879852295, "learning_rate": 5.646749809425289e-06, "loss": 1.7101, "step": 16600 }, { "epoch": 2.8255507357319045, "grad_norm": 4.967411518096924, "learning_rate": 5.639437198430372e-06, "loss": 1.711, "step": 16610 }, { "epoch": 2.827251849961725, "grad_norm": 4.828697204589844, "learning_rate": 5.63212590338317e-06, "loss": 1.7244, "step": 16620 }, { "epoch": 2.8289529641915454, "grad_norm": 6.0997395515441895, "learning_rate": 5.6248159331626125e-06, "loss": 1.6242, "step": 16630 }, { "epoch": 2.830654078421366, "grad_norm": 5.063726425170898, "learning_rate": 5.617507296646014e-06, "loss": 1.6804, "step": 16640 }, { "epoch": 2.8323551926511863, "grad_norm": 5.528225898742676, "learning_rate": 5.610200002709081e-06, "loss": 1.6623, "step": 16650 }, { "epoch": 2.834056306881007, "grad_norm": 5.0858049392700195, "learning_rate": 5.602894060225876e-06, "loss": 1.6641, "step": 16660 }, { "epoch": 2.8357574211108276, "grad_norm": 4.781316757202148, "learning_rate": 5.595589478068828e-06, "loss": 1.7048, "step": 16670 }, { "epoch": 2.837458535340648, "grad_norm": 4.909644603729248, "learning_rate": 5.5882862651087116e-06, "loss": 1.7676, "step": 16680 }, { "epoch": 2.8391596495704685, "grad_norm": 6.090540885925293, "learning_rate": 5.580984430214642e-06, "loss": 1.7098, "step": 16690 }, { "epoch": 2.840860763800289, "grad_norm": 5.437629222869873, "learning_rate": 5.573683982254053e-06, "loss": 1.7233, "step": 16700 }, { "epoch": 2.84256187803011, "grad_norm": 5.543697357177734, "learning_rate": 5.566384930092706e-06, "loss": 1.658, "step": 16710 }, { "epoch": 2.8442629922599303, "grad_norm": 4.917662143707275, "learning_rate": 5.559087282594656e-06, "loss": 1.6179, "step": 16720 }, { "epoch": 2.8459641064897507, "grad_norm": 5.373143196105957, "learning_rate": 5.551791048622258e-06, "loss": 1.69, "step": 16730 }, { "epoch": 2.847665220719571, "grad_norm": 4.551077842712402, "learning_rate": 5.5444962370361495e-06, "loss": 1.6458, "step": 16740 }, { "epoch": 2.8493663349493916, "grad_norm": 4.875065326690674, "learning_rate": 5.537202856695239e-06, "loss": 1.7397, "step": 16750 }, { "epoch": 2.8510674491792125, "grad_norm": 5.468201637268066, "learning_rate": 5.5299109164567e-06, "loss": 1.579, "step": 16760 }, { "epoch": 2.852768563409033, "grad_norm": 4.847186088562012, "learning_rate": 5.522620425175955e-06, "loss": 1.7088, "step": 16770 }, { "epoch": 2.8544696776388534, "grad_norm": 5.318556785583496, "learning_rate": 5.5153313917066665e-06, "loss": 1.6794, "step": 16780 }, { "epoch": 2.856170791868674, "grad_norm": 5.668161869049072, "learning_rate": 5.508043824900728e-06, "loss": 1.7609, "step": 16790 }, { "epoch": 2.8578719060984943, "grad_norm": 4.532445907592773, "learning_rate": 5.50075773360825e-06, "loss": 1.854, "step": 16800 }, { "epoch": 2.859573020328315, "grad_norm": 4.807154655456543, "learning_rate": 5.493473126677551e-06, "loss": 1.7322, "step": 16810 }, { "epoch": 2.8612741345581356, "grad_norm": 4.542354106903076, "learning_rate": 5.486190012955153e-06, "loss": 1.646, "step": 16820 }, { "epoch": 2.862975248787956, "grad_norm": 5.106041431427002, "learning_rate": 5.4789084012857555e-06, "loss": 1.6552, "step": 16830 }, { "epoch": 2.8646763630177765, "grad_norm": 4.498376846313477, "learning_rate": 5.47162830051224e-06, "loss": 1.6193, "step": 16840 }, { "epoch": 2.866377477247597, "grad_norm": 5.249197483062744, "learning_rate": 5.464349719475651e-06, "loss": 1.6655, "step": 16850 }, { "epoch": 2.868078591477418, "grad_norm": 4.739222049713135, "learning_rate": 5.457072667015189e-06, "loss": 1.715, "step": 16860 }, { "epoch": 2.8697797057072383, "grad_norm": 4.548635959625244, "learning_rate": 5.449797151968192e-06, "loss": 1.6876, "step": 16870 }, { "epoch": 2.8714808199370587, "grad_norm": 4.852926731109619, "learning_rate": 5.44252318317014e-06, "loss": 1.619, "step": 16880 }, { "epoch": 2.873181934166879, "grad_norm": 5.815784454345703, "learning_rate": 5.435250769454633e-06, "loss": 1.63, "step": 16890 }, { "epoch": 2.8748830483966996, "grad_norm": 5.472360134124756, "learning_rate": 5.4279799196533776e-06, "loss": 1.6145, "step": 16900 }, { "epoch": 2.8765841626265205, "grad_norm": 5.125363349914551, "learning_rate": 5.420710642596184e-06, "loss": 1.7248, "step": 16910 }, { "epoch": 2.878285276856341, "grad_norm": 5.4498610496521, "learning_rate": 5.41344294711095e-06, "loss": 1.6834, "step": 16920 }, { "epoch": 2.8799863910861614, "grad_norm": 5.653110980987549, "learning_rate": 5.406176842023661e-06, "loss": 1.709, "step": 16930 }, { "epoch": 2.881687505315982, "grad_norm": 5.308363437652588, "learning_rate": 5.398912336158364e-06, "loss": 1.7208, "step": 16940 }, { "epoch": 2.8833886195458023, "grad_norm": 5.207526206970215, "learning_rate": 5.3916494383371625e-06, "loss": 1.7317, "step": 16950 }, { "epoch": 2.885089733775623, "grad_norm": 5.076615810394287, "learning_rate": 5.384388157380212e-06, "loss": 1.6813, "step": 16960 }, { "epoch": 2.8867908480054436, "grad_norm": 5.327056884765625, "learning_rate": 5.3771285021057e-06, "loss": 1.8338, "step": 16970 }, { "epoch": 2.888491962235264, "grad_norm": 4.913582801818848, "learning_rate": 5.3698704813298406e-06, "loss": 1.585, "step": 16980 }, { "epoch": 2.8901930764650845, "grad_norm": 5.4310407638549805, "learning_rate": 5.362614103866873e-06, "loss": 1.788, "step": 16990 }, { "epoch": 2.891894190694905, "grad_norm": 5.223809719085693, "learning_rate": 5.355359378529024e-06, "loss": 1.6812, "step": 17000 }, { "epoch": 2.893595304924726, "grad_norm": 4.831777572631836, "learning_rate": 5.348106314126525e-06, "loss": 1.6825, "step": 17010 }, { "epoch": 2.8952964191545463, "grad_norm": 4.534534931182861, "learning_rate": 5.340854919467587e-06, "loss": 1.6786, "step": 17020 }, { "epoch": 2.8969975333843667, "grad_norm": 4.957123756408691, "learning_rate": 5.333605203358394e-06, "loss": 1.631, "step": 17030 }, { "epoch": 2.898698647614187, "grad_norm": 4.873937606811523, "learning_rate": 5.326357174603089e-06, "loss": 1.7522, "step": 17040 }, { "epoch": 2.9003997618440076, "grad_norm": 5.077538967132568, "learning_rate": 5.319110842003773e-06, "loss": 1.7145, "step": 17050 }, { "epoch": 2.9021008760738285, "grad_norm": 4.669430255889893, "learning_rate": 5.31186621436048e-06, "loss": 1.6077, "step": 17060 }, { "epoch": 2.903801990303649, "grad_norm": 5.177587509155273, "learning_rate": 5.304623300471174e-06, "loss": 1.7528, "step": 17070 }, { "epoch": 2.9055031045334694, "grad_norm": 5.614473819732666, "learning_rate": 5.297382109131741e-06, "loss": 1.6914, "step": 17080 }, { "epoch": 2.90720421876329, "grad_norm": 4.8400163650512695, "learning_rate": 5.290142649135974e-06, "loss": 1.705, "step": 17090 }, { "epoch": 2.9089053329931103, "grad_norm": 5.290616035461426, "learning_rate": 5.282904929275563e-06, "loss": 1.7426, "step": 17100 }, { "epoch": 2.910606447222931, "grad_norm": 4.153530597686768, "learning_rate": 5.275668958340085e-06, "loss": 1.7851, "step": 17110 }, { "epoch": 2.9123075614527516, "grad_norm": 6.381970405578613, "learning_rate": 5.2684347451169934e-06, "loss": 1.7088, "step": 17120 }, { "epoch": 2.914008675682572, "grad_norm": 6.792290210723877, "learning_rate": 5.261202298391605e-06, "loss": 1.6729, "step": 17130 }, { "epoch": 2.9157097899123925, "grad_norm": 5.274122714996338, "learning_rate": 5.253971626947096e-06, "loss": 1.6674, "step": 17140 }, { "epoch": 2.917410904142213, "grad_norm": 5.607309341430664, "learning_rate": 5.246742739564478e-06, "loss": 1.7321, "step": 17150 }, { "epoch": 2.919112018372034, "grad_norm": 6.518828868865967, "learning_rate": 5.239515645022608e-06, "loss": 1.5473, "step": 17160 }, { "epoch": 2.9208131326018543, "grad_norm": 5.3464484214782715, "learning_rate": 5.232290352098156e-06, "loss": 1.673, "step": 17170 }, { "epoch": 2.9225142468316747, "grad_norm": 5.311427116394043, "learning_rate": 5.225066869565609e-06, "loss": 1.7325, "step": 17180 }, { "epoch": 2.924215361061495, "grad_norm": 5.174399375915527, "learning_rate": 5.217845206197253e-06, "loss": 1.7709, "step": 17190 }, { "epoch": 2.9259164752913156, "grad_norm": 5.062442302703857, "learning_rate": 5.210625370763167e-06, "loss": 1.73, "step": 17200 }, { "epoch": 2.9276175895211365, "grad_norm": 4.9413838386535645, "learning_rate": 5.203407372031203e-06, "loss": 1.6064, "step": 17210 }, { "epoch": 2.929318703750957, "grad_norm": 5.229119300842285, "learning_rate": 5.196191218766996e-06, "loss": 1.6858, "step": 17220 }, { "epoch": 2.9310198179807774, "grad_norm": 4.790507793426514, "learning_rate": 5.188976919733929e-06, "loss": 1.6615, "step": 17230 }, { "epoch": 2.932720932210598, "grad_norm": 4.71579647064209, "learning_rate": 5.181764483693139e-06, "loss": 1.702, "step": 17240 }, { "epoch": 2.9344220464404183, "grad_norm": 6.398921966552734, "learning_rate": 5.174553919403493e-06, "loss": 1.5692, "step": 17250 }, { "epoch": 2.936123160670239, "grad_norm": 5.193541049957275, "learning_rate": 5.167345235621592e-06, "loss": 1.6498, "step": 17260 }, { "epoch": 2.9378242749000596, "grad_norm": 5.615979194641113, "learning_rate": 5.160138441101749e-06, "loss": 1.6564, "step": 17270 }, { "epoch": 2.93952538912988, "grad_norm": 5.156586647033691, "learning_rate": 5.152933544595992e-06, "loss": 1.6679, "step": 17280 }, { "epoch": 2.9412265033597005, "grad_norm": 5.580843925476074, "learning_rate": 5.14573055485403e-06, "loss": 1.6675, "step": 17290 }, { "epoch": 2.942927617589521, "grad_norm": 4.808172702789307, "learning_rate": 5.138529480623265e-06, "loss": 1.6527, "step": 17300 }, { "epoch": 2.944628731819342, "grad_norm": 4.427377223968506, "learning_rate": 5.131330330648771e-06, "loss": 1.7089, "step": 17310 }, { "epoch": 2.9463298460491623, "grad_norm": 5.212540149688721, "learning_rate": 5.124133113673284e-06, "loss": 1.6657, "step": 17320 }, { "epoch": 2.9480309602789827, "grad_norm": 5.242393970489502, "learning_rate": 5.116937838437198e-06, "loss": 1.6288, "step": 17330 }, { "epoch": 2.949732074508803, "grad_norm": 4.68244743347168, "learning_rate": 5.109744513678541e-06, "loss": 1.6502, "step": 17340 }, { "epoch": 2.9514331887386236, "grad_norm": 6.034001350402832, "learning_rate": 5.1025531481329765e-06, "loss": 1.7018, "step": 17350 }, { "epoch": 2.9531343029684445, "grad_norm": 5.154334545135498, "learning_rate": 5.095363750533789e-06, "loss": 1.7219, "step": 17360 }, { "epoch": 2.954835417198265, "grad_norm": 5.9419684410095215, "learning_rate": 5.088176329611871e-06, "loss": 1.6474, "step": 17370 }, { "epoch": 2.9565365314280854, "grad_norm": 5.031863689422607, "learning_rate": 5.080990894095715e-06, "loss": 1.6742, "step": 17380 }, { "epoch": 2.958237645657906, "grad_norm": 5.029059886932373, "learning_rate": 5.073807452711406e-06, "loss": 1.6828, "step": 17390 }, { "epoch": 2.9599387598877263, "grad_norm": 5.34487771987915, "learning_rate": 5.066626014182603e-06, "loss": 1.7054, "step": 17400 }, { "epoch": 2.961639874117547, "grad_norm": 4.280765533447266, "learning_rate": 5.059446587230534e-06, "loss": 1.6666, "step": 17410 }, { "epoch": 2.9633409883473676, "grad_norm": 5.261526107788086, "learning_rate": 5.0522691805739854e-06, "loss": 1.5722, "step": 17420 }, { "epoch": 2.965042102577188, "grad_norm": 4.829394817352295, "learning_rate": 5.045093802929287e-06, "loss": 1.6807, "step": 17430 }, { "epoch": 2.9667432168070085, "grad_norm": 5.677450180053711, "learning_rate": 5.0379204630103066e-06, "loss": 1.6928, "step": 17440 }, { "epoch": 2.968444331036829, "grad_norm": 6.83398962020874, "learning_rate": 5.030749169528439e-06, "loss": 1.6617, "step": 17450 }, { "epoch": 2.97014544526665, "grad_norm": 5.460479259490967, "learning_rate": 5.023579931192591e-06, "loss": 1.6503, "step": 17460 }, { "epoch": 2.9718465594964703, "grad_norm": 5.797318458557129, "learning_rate": 5.0164127567091775e-06, "loss": 1.6275, "step": 17470 }, { "epoch": 2.9735476737262907, "grad_norm": 5.9739203453063965, "learning_rate": 5.009247654782099e-06, "loss": 1.7129, "step": 17480 }, { "epoch": 2.975248787956111, "grad_norm": 5.020544052124023, "learning_rate": 5.002084634112748e-06, "loss": 1.67, "step": 17490 }, { "epoch": 2.9769499021859316, "grad_norm": 4.675488471984863, "learning_rate": 4.994923703399985e-06, "loss": 1.682, "step": 17500 }, { "epoch": 2.9786510164157525, "grad_norm": 4.7809295654296875, "learning_rate": 4.987764871340134e-06, "loss": 1.6577, "step": 17510 }, { "epoch": 2.980352130645573, "grad_norm": 5.958826065063477, "learning_rate": 4.980608146626967e-06, "loss": 1.7438, "step": 17520 }, { "epoch": 2.9820532448753934, "grad_norm": 4.796045303344727, "learning_rate": 4.9734535379517044e-06, "loss": 1.6495, "step": 17530 }, { "epoch": 2.983754359105214, "grad_norm": 5.485558032989502, "learning_rate": 4.966301054002987e-06, "loss": 1.7123, "step": 17540 }, { "epoch": 2.9854554733350342, "grad_norm": 4.91352653503418, "learning_rate": 4.959150703466877e-06, "loss": 1.6351, "step": 17550 }, { "epoch": 2.987156587564855, "grad_norm": 5.352145195007324, "learning_rate": 4.952002495026858e-06, "loss": 1.6597, "step": 17560 }, { "epoch": 2.9888577017946756, "grad_norm": 5.881899833679199, "learning_rate": 4.944856437363798e-06, "loss": 1.6898, "step": 17570 }, { "epoch": 2.990558816024496, "grad_norm": 5.65572452545166, "learning_rate": 4.937712539155961e-06, "loss": 1.7213, "step": 17580 }, { "epoch": 2.9922599302543165, "grad_norm": 6.259247779846191, "learning_rate": 4.930570809078983e-06, "loss": 1.6738, "step": 17590 }, { "epoch": 2.993961044484137, "grad_norm": 5.623510360717773, "learning_rate": 4.923431255805869e-06, "loss": 1.6972, "step": 17600 }, { "epoch": 2.995662158713958, "grad_norm": 6.0123724937438965, "learning_rate": 4.916293888006981e-06, "loss": 1.6268, "step": 17610 }, { "epoch": 2.9973632729437782, "grad_norm": 4.6384358406066895, "learning_rate": 4.909158714350032e-06, "loss": 1.6977, "step": 17620 }, { "epoch": 2.9990643871735987, "grad_norm": 4.532982349395752, "learning_rate": 4.902025743500061e-06, "loss": 1.7146, "step": 17630 }, { "epoch": 3.000765501403419, "grad_norm": 4.014255523681641, "learning_rate": 4.8948949841194375e-06, "loss": 1.6955, "step": 17640 }, { "epoch": 3.0024666156332396, "grad_norm": 4.36602258682251, "learning_rate": 4.887766444867844e-06, "loss": 1.6495, "step": 17650 }, { "epoch": 3.0041677298630605, "grad_norm": 5.667288303375244, "learning_rate": 4.8806401344022666e-06, "loss": 1.5879, "step": 17660 }, { "epoch": 3.005868844092881, "grad_norm": 5.284454345703125, "learning_rate": 4.873516061376989e-06, "loss": 1.7207, "step": 17670 }, { "epoch": 3.0075699583227014, "grad_norm": 5.314521312713623, "learning_rate": 4.86639423444357e-06, "loss": 1.6461, "step": 17680 }, { "epoch": 3.009271072552522, "grad_norm": 5.508584976196289, "learning_rate": 4.859274662250845e-06, "loss": 1.6006, "step": 17690 }, { "epoch": 3.0109721867823422, "grad_norm": 6.3213396072387695, "learning_rate": 4.852157353444914e-06, "loss": 1.6444, "step": 17700 }, { "epoch": 3.012673301012163, "grad_norm": 5.8377909660339355, "learning_rate": 4.845042316669122e-06, "loss": 1.5934, "step": 17710 }, { "epoch": 3.0143744152419836, "grad_norm": 6.235638618469238, "learning_rate": 4.837929560564058e-06, "loss": 1.5883, "step": 17720 }, { "epoch": 3.016075529471804, "grad_norm": 6.320291042327881, "learning_rate": 4.830819093767543e-06, "loss": 1.724, "step": 17730 }, { "epoch": 3.0177766437016245, "grad_norm": 5.157838344573975, "learning_rate": 4.823710924914616e-06, "loss": 1.5502, "step": 17740 }, { "epoch": 3.019477757931445, "grad_norm": 5.896531105041504, "learning_rate": 4.816605062637525e-06, "loss": 1.6807, "step": 17750 }, { "epoch": 3.021178872161266, "grad_norm": 6.018843173980713, "learning_rate": 4.809501515565719e-06, "loss": 1.5876, "step": 17760 }, { "epoch": 3.0228799863910862, "grad_norm": 5.043145656585693, "learning_rate": 4.802400292325831e-06, "loss": 1.6391, "step": 17770 }, { "epoch": 3.0245811006209067, "grad_norm": 6.310497760772705, "learning_rate": 4.795301401541675e-06, "loss": 1.4978, "step": 17780 }, { "epoch": 3.026282214850727, "grad_norm": 5.880179405212402, "learning_rate": 4.788204851834236e-06, "loss": 1.458, "step": 17790 }, { "epoch": 3.0279833290805476, "grad_norm": 5.39325475692749, "learning_rate": 4.781110651821649e-06, "loss": 1.7641, "step": 17800 }, { "epoch": 3.0296844433103685, "grad_norm": 6.003304958343506, "learning_rate": 4.7740188101191985e-06, "loss": 1.6129, "step": 17810 }, { "epoch": 3.031385557540189, "grad_norm": 5.247802257537842, "learning_rate": 4.766929335339306e-06, "loss": 1.559, "step": 17820 }, { "epoch": 3.0330866717700093, "grad_norm": 5.565807819366455, "learning_rate": 4.7598422360915186e-06, "loss": 1.6142, "step": 17830 }, { "epoch": 3.03478778599983, "grad_norm": 4.938119411468506, "learning_rate": 4.7527575209824935e-06, "loss": 1.6264, "step": 17840 }, { "epoch": 3.0364889002296502, "grad_norm": 6.364495277404785, "learning_rate": 4.745675198616001e-06, "loss": 1.5744, "step": 17850 }, { "epoch": 3.038190014459471, "grad_norm": 5.86942195892334, "learning_rate": 4.738595277592901e-06, "loss": 1.5696, "step": 17860 }, { "epoch": 3.0398911286892916, "grad_norm": 5.963891506195068, "learning_rate": 4.7315177665111384e-06, "loss": 1.6403, "step": 17870 }, { "epoch": 3.041592242919112, "grad_norm": 4.875551223754883, "learning_rate": 4.724442673965728e-06, "loss": 1.6111, "step": 17880 }, { "epoch": 3.0432933571489325, "grad_norm": 5.340967178344727, "learning_rate": 4.717370008548747e-06, "loss": 1.6849, "step": 17890 }, { "epoch": 3.044994471378753, "grad_norm": 5.945544719696045, "learning_rate": 4.710299778849335e-06, "loss": 1.604, "step": 17900 }, { "epoch": 3.046695585608574, "grad_norm": 5.568148612976074, "learning_rate": 4.7032319934536645e-06, "loss": 1.6131, "step": 17910 }, { "epoch": 3.0483966998383942, "grad_norm": 5.178503036499023, "learning_rate": 4.696166660944943e-06, "loss": 1.6011, "step": 17920 }, { "epoch": 3.0500978140682147, "grad_norm": 5.598849773406982, "learning_rate": 4.689103789903394e-06, "loss": 1.572, "step": 17930 }, { "epoch": 3.051798928298035, "grad_norm": 5.570218086242676, "learning_rate": 4.68204338890626e-06, "loss": 1.6541, "step": 17940 }, { "epoch": 3.0535000425278556, "grad_norm": 5.031559467315674, "learning_rate": 4.674985466527773e-06, "loss": 1.5255, "step": 17950 }, { "epoch": 3.0552011567576765, "grad_norm": 5.859495639801025, "learning_rate": 4.6679300313391715e-06, "loss": 1.5468, "step": 17960 }, { "epoch": 3.056902270987497, "grad_norm": 5.605506420135498, "learning_rate": 4.660877091908658e-06, "loss": 1.6024, "step": 17970 }, { "epoch": 3.0586033852173173, "grad_norm": 6.32056999206543, "learning_rate": 4.653826656801409e-06, "loss": 1.6519, "step": 17980 }, { "epoch": 3.060304499447138, "grad_norm": 5.6299028396606445, "learning_rate": 4.646778734579562e-06, "loss": 1.6598, "step": 17990 }, { "epoch": 3.0620056136769582, "grad_norm": 5.1421942710876465, "learning_rate": 4.639733333802202e-06, "loss": 1.6494, "step": 18000 }, { "epoch": 3.063706727906779, "grad_norm": 4.699141025543213, "learning_rate": 4.632690463025349e-06, "loss": 1.6086, "step": 18010 }, { "epoch": 3.0654078421365996, "grad_norm": 5.812696933746338, "learning_rate": 4.625650130801955e-06, "loss": 1.6311, "step": 18020 }, { "epoch": 3.06710895636642, "grad_norm": 5.819453716278076, "learning_rate": 4.618612345681886e-06, "loss": 1.653, "step": 18030 }, { "epoch": 3.0688100705962404, "grad_norm": 5.791920185089111, "learning_rate": 4.611577116211917e-06, "loss": 1.5801, "step": 18040 }, { "epoch": 3.070511184826061, "grad_norm": 6.71321439743042, "learning_rate": 4.604544450935716e-06, "loss": 1.5094, "step": 18050 }, { "epoch": 3.072212299055882, "grad_norm": 4.962828636169434, "learning_rate": 4.597514358393839e-06, "loss": 1.5627, "step": 18060 }, { "epoch": 3.0739134132857022, "grad_norm": 5.420648097991943, "learning_rate": 4.590486847123719e-06, "loss": 1.5667, "step": 18070 }, { "epoch": 3.0756145275155227, "grad_norm": 4.635862827301025, "learning_rate": 4.583461925659653e-06, "loss": 1.5673, "step": 18080 }, { "epoch": 3.077315641745343, "grad_norm": 3.965139865875244, "learning_rate": 4.576439602532793e-06, "loss": 1.6144, "step": 18090 }, { "epoch": 3.0790167559751636, "grad_norm": 5.953652858734131, "learning_rate": 4.569419886271135e-06, "loss": 1.6531, "step": 18100 }, { "epoch": 3.0807178702049844, "grad_norm": 6.302707672119141, "learning_rate": 4.562402785399509e-06, "loss": 1.6322, "step": 18110 }, { "epoch": 3.082418984434805, "grad_norm": 5.7404375076293945, "learning_rate": 4.555388308439569e-06, "loss": 1.7066, "step": 18120 }, { "epoch": 3.0841200986646253, "grad_norm": 6.028812408447266, "learning_rate": 4.548376463909786e-06, "loss": 1.695, "step": 18130 }, { "epoch": 3.0858212128944458, "grad_norm": 5.067834854125977, "learning_rate": 4.541367260325428e-06, "loss": 1.6536, "step": 18140 }, { "epoch": 3.087522327124266, "grad_norm": 4.90779447555542, "learning_rate": 4.53436070619856e-06, "loss": 1.655, "step": 18150 }, { "epoch": 3.089223441354087, "grad_norm": 5.944270610809326, "learning_rate": 4.52735681003803e-06, "loss": 1.4941, "step": 18160 }, { "epoch": 3.0909245555839076, "grad_norm": 5.422323226928711, "learning_rate": 4.520355580349455e-06, "loss": 1.5717, "step": 18170 }, { "epoch": 3.092625669813728, "grad_norm": 4.691229343414307, "learning_rate": 4.513357025635211e-06, "loss": 1.6177, "step": 18180 }, { "epoch": 3.0943267840435484, "grad_norm": 5.544463157653809, "learning_rate": 4.506361154394438e-06, "loss": 1.674, "step": 18190 }, { "epoch": 3.096027898273369, "grad_norm": 5.867314338684082, "learning_rate": 4.499367975123003e-06, "loss": 1.6024, "step": 18200 }, { "epoch": 3.0977290125031898, "grad_norm": 6.396510124206543, "learning_rate": 4.492377496313514e-06, "loss": 1.6325, "step": 18210 }, { "epoch": 3.09943012673301, "grad_norm": 4.796721458435059, "learning_rate": 4.4853897264552926e-06, "loss": 1.6043, "step": 18220 }, { "epoch": 3.1011312409628307, "grad_norm": 5.387908458709717, "learning_rate": 4.478404674034369e-06, "loss": 1.6566, "step": 18230 }, { "epoch": 3.102832355192651, "grad_norm": 4.891656875610352, "learning_rate": 4.471422347533487e-06, "loss": 1.5837, "step": 18240 }, { "epoch": 3.1045334694224715, "grad_norm": 5.820724010467529, "learning_rate": 4.464442755432066e-06, "loss": 1.5957, "step": 18250 }, { "epoch": 3.1062345836522924, "grad_norm": 5.723635196685791, "learning_rate": 4.45746590620621e-06, "loss": 1.6345, "step": 18260 }, { "epoch": 3.107935697882113, "grad_norm": 4.311638832092285, "learning_rate": 4.450491808328689e-06, "loss": 1.6473, "step": 18270 }, { "epoch": 3.1096368121119333, "grad_norm": 5.072981357574463, "learning_rate": 4.443520470268936e-06, "loss": 1.6255, "step": 18280 }, { "epoch": 3.1113379263417538, "grad_norm": 5.348658084869385, "learning_rate": 4.436551900493029e-06, "loss": 1.6239, "step": 18290 }, { "epoch": 3.113039040571574, "grad_norm": 4.85001802444458, "learning_rate": 4.42958610746369e-06, "loss": 1.6204, "step": 18300 }, { "epoch": 3.114740154801395, "grad_norm": 5.611588001251221, "learning_rate": 4.422623099640261e-06, "loss": 1.5433, "step": 18310 }, { "epoch": 3.1164412690312155, "grad_norm": 6.364828586578369, "learning_rate": 4.415662885478706e-06, "loss": 1.6021, "step": 18320 }, { "epoch": 3.118142383261036, "grad_norm": 5.450611114501953, "learning_rate": 4.408705473431594e-06, "loss": 1.6387, "step": 18330 }, { "epoch": 3.1198434974908564, "grad_norm": 5.470499038696289, "learning_rate": 4.401750871948095e-06, "loss": 1.6206, "step": 18340 }, { "epoch": 3.121544611720677, "grad_norm": 5.396094799041748, "learning_rate": 4.394799089473962e-06, "loss": 1.6674, "step": 18350 }, { "epoch": 3.1232457259504978, "grad_norm": 4.812134742736816, "learning_rate": 4.387850134451527e-06, "loss": 1.5964, "step": 18360 }, { "epoch": 3.124946840180318, "grad_norm": 5.111337184906006, "learning_rate": 4.380904015319687e-06, "loss": 1.629, "step": 18370 }, { "epoch": 3.1266479544101387, "grad_norm": 5.7565741539001465, "learning_rate": 4.373960740513894e-06, "loss": 1.5306, "step": 18380 }, { "epoch": 3.128349068639959, "grad_norm": 6.662924289703369, "learning_rate": 4.367020318466149e-06, "loss": 1.6761, "step": 18390 }, { "epoch": 3.1300501828697795, "grad_norm": 5.767753601074219, "learning_rate": 4.360082757604986e-06, "loss": 1.6244, "step": 18400 }, { "epoch": 3.1317512970996004, "grad_norm": 4.988311767578125, "learning_rate": 4.353148066355466e-06, "loss": 1.5582, "step": 18410 }, { "epoch": 3.133452411329421, "grad_norm": 5.014961242675781, "learning_rate": 4.346216253139164e-06, "loss": 1.652, "step": 18420 }, { "epoch": 3.1351535255592413, "grad_norm": 6.216855525970459, "learning_rate": 4.339287326374158e-06, "loss": 1.6092, "step": 18430 }, { "epoch": 3.1368546397890618, "grad_norm": 6.390921592712402, "learning_rate": 4.332361294475024e-06, "loss": 1.5669, "step": 18440 }, { "epoch": 3.138555754018882, "grad_norm": 5.6126556396484375, "learning_rate": 4.325438165852823e-06, "loss": 1.6298, "step": 18450 }, { "epoch": 3.140256868248703, "grad_norm": 6.1353583335876465, "learning_rate": 4.318517948915084e-06, "loss": 1.6227, "step": 18460 }, { "epoch": 3.1419579824785235, "grad_norm": 5.302263259887695, "learning_rate": 4.311600652065809e-06, "loss": 1.6988, "step": 18470 }, { "epoch": 3.143659096708344, "grad_norm": 5.06173038482666, "learning_rate": 4.304686283705449e-06, "loss": 1.6542, "step": 18480 }, { "epoch": 3.1453602109381644, "grad_norm": 5.5619730949401855, "learning_rate": 4.297774852230895e-06, "loss": 1.5825, "step": 18490 }, { "epoch": 3.147061325167985, "grad_norm": 6.599198818206787, "learning_rate": 4.290866366035479e-06, "loss": 1.5114, "step": 18500 }, { "epoch": 3.1487624393978058, "grad_norm": 4.429544448852539, "learning_rate": 4.283960833508952e-06, "loss": 1.5709, "step": 18510 }, { "epoch": 3.150463553627626, "grad_norm": 5.5696492195129395, "learning_rate": 4.277058263037474e-06, "loss": 1.5798, "step": 18520 }, { "epoch": 3.1521646678574466, "grad_norm": 5.115567207336426, "learning_rate": 4.270158663003617e-06, "loss": 1.6599, "step": 18530 }, { "epoch": 3.153865782087267, "grad_norm": 5.814573764801025, "learning_rate": 4.263262041786341e-06, "loss": 1.5718, "step": 18540 }, { "epoch": 3.1555668963170875, "grad_norm": 5.1354899406433105, "learning_rate": 4.256368407760988e-06, "loss": 1.61, "step": 18550 }, { "epoch": 3.1572680105469084, "grad_norm": 6.129085063934326, "learning_rate": 4.249477769299271e-06, "loss": 1.5248, "step": 18560 }, { "epoch": 3.158969124776729, "grad_norm": 6.191493511199951, "learning_rate": 4.242590134769268e-06, "loss": 1.6448, "step": 18570 }, { "epoch": 3.1606702390065493, "grad_norm": 6.188729763031006, "learning_rate": 4.235705512535406e-06, "loss": 1.5009, "step": 18580 }, { "epoch": 3.1623713532363698, "grad_norm": 6.1626691818237305, "learning_rate": 4.22882391095846e-06, "loss": 1.5537, "step": 18590 }, { "epoch": 3.16407246746619, "grad_norm": 6.382812023162842, "learning_rate": 4.221945338395531e-06, "loss": 1.6413, "step": 18600 }, { "epoch": 3.165773581696011, "grad_norm": 4.171438694000244, "learning_rate": 4.215069803200041e-06, "loss": 1.6249, "step": 18610 }, { "epoch": 3.1674746959258315, "grad_norm": 5.829582691192627, "learning_rate": 4.208197313721726e-06, "loss": 1.5484, "step": 18620 }, { "epoch": 3.169175810155652, "grad_norm": 5.257534027099609, "learning_rate": 4.2013278783066204e-06, "loss": 1.6919, "step": 18630 }, { "epoch": 3.1708769243854724, "grad_norm": 6.117254734039307, "learning_rate": 4.194461505297059e-06, "loss": 1.6177, "step": 18640 }, { "epoch": 3.172578038615293, "grad_norm": 6.516603946685791, "learning_rate": 4.187598203031646e-06, "loss": 1.708, "step": 18650 }, { "epoch": 3.1742791528451137, "grad_norm": 6.003725051879883, "learning_rate": 4.1807379798452595e-06, "loss": 1.5948, "step": 18660 }, { "epoch": 3.175980267074934, "grad_norm": 5.0657057762146, "learning_rate": 4.173880844069041e-06, "loss": 1.6061, "step": 18670 }, { "epoch": 3.1776813813047546, "grad_norm": 5.401756763458252, "learning_rate": 4.167026804030383e-06, "loss": 1.6046, "step": 18680 }, { "epoch": 3.179382495534575, "grad_norm": 5.948600769042969, "learning_rate": 4.160175868052914e-06, "loss": 1.6005, "step": 18690 }, { "epoch": 3.1810836097643955, "grad_norm": 6.31746244430542, "learning_rate": 4.153328044456498e-06, "loss": 1.6076, "step": 18700 }, { "epoch": 3.1827847239942164, "grad_norm": 5.688670635223389, "learning_rate": 4.146483341557217e-06, "loss": 1.7147, "step": 18710 }, { "epoch": 3.184485838224037, "grad_norm": 5.909487724304199, "learning_rate": 4.139641767667361e-06, "loss": 1.6519, "step": 18720 }, { "epoch": 3.1861869524538573, "grad_norm": 5.354527950286865, "learning_rate": 4.132803331095425e-06, "loss": 1.5028, "step": 18730 }, { "epoch": 3.1878880666836777, "grad_norm": 6.217424392700195, "learning_rate": 4.12596804014609e-06, "loss": 1.5504, "step": 18740 }, { "epoch": 3.189589180913498, "grad_norm": 6.052481174468994, "learning_rate": 4.119135903120217e-06, "loss": 1.5665, "step": 18750 }, { "epoch": 3.191290295143319, "grad_norm": 4.938386917114258, "learning_rate": 4.1123069283148414e-06, "loss": 1.7223, "step": 18760 }, { "epoch": 3.1929914093731395, "grad_norm": 5.718237400054932, "learning_rate": 4.105481124023152e-06, "loss": 1.5895, "step": 18770 }, { "epoch": 3.19469252360296, "grad_norm": 5.294105052947998, "learning_rate": 4.098658498534494e-06, "loss": 1.6055, "step": 18780 }, { "epoch": 3.1963936378327804, "grad_norm": 4.52695894241333, "learning_rate": 4.091839060134344e-06, "loss": 1.6437, "step": 18790 }, { "epoch": 3.198094752062601, "grad_norm": 6.001376152038574, "learning_rate": 4.085022817104314e-06, "loss": 1.6244, "step": 18800 }, { "epoch": 3.1997958662924217, "grad_norm": 5.516130447387695, "learning_rate": 4.078209777722137e-06, "loss": 1.6684, "step": 18810 }, { "epoch": 3.201496980522242, "grad_norm": 5.8052215576171875, "learning_rate": 4.071399950261648e-06, "loss": 1.5872, "step": 18820 }, { "epoch": 3.2031980947520626, "grad_norm": 6.434172630310059, "learning_rate": 4.064593342992791e-06, "loss": 1.628, "step": 18830 }, { "epoch": 3.204899208981883, "grad_norm": 5.731072425842285, "learning_rate": 4.057789964181592e-06, "loss": 1.6371, "step": 18840 }, { "epoch": 3.2066003232117035, "grad_norm": 5.7734761238098145, "learning_rate": 4.0509898220901594e-06, "loss": 1.5914, "step": 18850 }, { "epoch": 3.2083014374415244, "grad_norm": 5.650068283081055, "learning_rate": 4.044192924976667e-06, "loss": 1.6572, "step": 18860 }, { "epoch": 3.210002551671345, "grad_norm": 5.855218410491943, "learning_rate": 4.037399281095355e-06, "loss": 1.6194, "step": 18870 }, { "epoch": 3.2117036659011653, "grad_norm": 6.3672871589660645, "learning_rate": 4.030608898696508e-06, "loss": 1.5446, "step": 18880 }, { "epoch": 3.2134047801309857, "grad_norm": 5.921889305114746, "learning_rate": 4.023821786026452e-06, "loss": 1.5996, "step": 18890 }, { "epoch": 3.215105894360806, "grad_norm": 5.2454352378845215, "learning_rate": 4.017037951327539e-06, "loss": 1.5815, "step": 18900 }, { "epoch": 3.216807008590627, "grad_norm": 6.049569606781006, "learning_rate": 4.0102574028381414e-06, "loss": 1.6231, "step": 18910 }, { "epoch": 3.2185081228204475, "grad_norm": 4.872544288635254, "learning_rate": 4.00348014879264e-06, "loss": 1.6495, "step": 18920 }, { "epoch": 3.220209237050268, "grad_norm": 4.858197212219238, "learning_rate": 3.996706197421423e-06, "loss": 1.5588, "step": 18930 }, { "epoch": 3.2219103512800884, "grad_norm": 5.183925151824951, "learning_rate": 3.989935556950857e-06, "loss": 1.6679, "step": 18940 }, { "epoch": 3.223611465509909, "grad_norm": 6.936694622039795, "learning_rate": 3.98316823560329e-06, "loss": 1.6339, "step": 18950 }, { "epoch": 3.2253125797397297, "grad_norm": 5.0147175788879395, "learning_rate": 3.976404241597044e-06, "loss": 1.7504, "step": 18960 }, { "epoch": 3.22701369396955, "grad_norm": 5.347295761108398, "learning_rate": 3.969643583146394e-06, "loss": 1.6694, "step": 18970 }, { "epoch": 3.2287148081993706, "grad_norm": 5.799068450927734, "learning_rate": 3.962886268461574e-06, "loss": 1.5398, "step": 18980 }, { "epoch": 3.230415922429191, "grad_norm": 5.399052143096924, "learning_rate": 3.956132305748745e-06, "loss": 1.6176, "step": 18990 }, { "epoch": 3.2321170366590115, "grad_norm": 5.699862003326416, "learning_rate": 3.949381703210006e-06, "loss": 1.6679, "step": 19000 }, { "epoch": 3.2338181508888324, "grad_norm": 6.689784526824951, "learning_rate": 3.94263446904337e-06, "loss": 1.5748, "step": 19010 }, { "epoch": 3.235519265118653, "grad_norm": 5.182729721069336, "learning_rate": 3.935890611442765e-06, "loss": 1.528, "step": 19020 }, { "epoch": 3.2372203793484733, "grad_norm": 6.1299052238464355, "learning_rate": 3.92982403333515e-06, "loss": 1.6028, "step": 19030 }, { "epoch": 3.2389214935782937, "grad_norm": 5.4542646408081055, "learning_rate": 3.923086613769571e-06, "loss": 1.5886, "step": 19040 }, { "epoch": 3.240622607808114, "grad_norm": 5.992674827575684, "learning_rate": 3.9163525945091795e-06, "loss": 1.7332, "step": 19050 }, { "epoch": 3.242323722037935, "grad_norm": 5.830533027648926, "learning_rate": 3.909621983731854e-06, "loss": 1.518, "step": 19060 }, { "epoch": 3.2440248362677555, "grad_norm": 5.197500228881836, "learning_rate": 3.902894789611326e-06, "loss": 1.5895, "step": 19070 }, { "epoch": 3.245725950497576, "grad_norm": 5.365091323852539, "learning_rate": 3.896171020317191e-06, "loss": 1.5947, "step": 19080 }, { "epoch": 3.2474270647273964, "grad_norm": 6.121037483215332, "learning_rate": 3.889450684014876e-06, "loss": 1.6893, "step": 19090 }, { "epoch": 3.249128178957217, "grad_norm": 4.278195858001709, "learning_rate": 3.882733788865639e-06, "loss": 1.5762, "step": 19100 }, { "epoch": 3.2508292931870377, "grad_norm": 6.356879234313965, "learning_rate": 3.8760203430265626e-06, "loss": 1.5187, "step": 19110 }, { "epoch": 3.252530407416858, "grad_norm": 6.2268548011779785, "learning_rate": 3.869310354650538e-06, "loss": 1.6316, "step": 19120 }, { "epoch": 3.2542315216466786, "grad_norm": 6.183928966522217, "learning_rate": 3.8626038318862575e-06, "loss": 1.5908, "step": 19130 }, { "epoch": 3.255932635876499, "grad_norm": 5.49514102935791, "learning_rate": 3.8559007828782074e-06, "loss": 1.6499, "step": 19140 }, { "epoch": 3.2576337501063195, "grad_norm": 5.05054235458374, "learning_rate": 3.849201215766653e-06, "loss": 1.5706, "step": 19150 }, { "epoch": 3.2593348643361404, "grad_norm": 6.043948650360107, "learning_rate": 3.842505138687631e-06, "loss": 1.5639, "step": 19160 }, { "epoch": 3.261035978565961, "grad_norm": 4.8844733238220215, "learning_rate": 3.835812559772943e-06, "loss": 1.5719, "step": 19170 }, { "epoch": 3.2627370927957813, "grad_norm": 6.491847991943359, "learning_rate": 3.829123487150138e-06, "loss": 1.5799, "step": 19180 }, { "epoch": 3.2644382070256017, "grad_norm": 4.930334091186523, "learning_rate": 3.822437928942505e-06, "loss": 1.5475, "step": 19190 }, { "epoch": 3.266139321255422, "grad_norm": 6.247923374176025, "learning_rate": 3.815755893269078e-06, "loss": 1.6433, "step": 19200 }, { "epoch": 3.267840435485243, "grad_norm": 5.70054292678833, "learning_rate": 3.8090773882445975e-06, "loss": 1.5678, "step": 19210 }, { "epoch": 3.2695415497150635, "grad_norm": 4.861880779266357, "learning_rate": 3.802402421979526e-06, "loss": 1.5403, "step": 19220 }, { "epoch": 3.271242663944884, "grad_norm": 5.6805620193481445, "learning_rate": 3.795731002580024e-06, "loss": 1.6332, "step": 19230 }, { "epoch": 3.2729437781747044, "grad_norm": 5.739677906036377, "learning_rate": 3.789063138147946e-06, "loss": 1.6061, "step": 19240 }, { "epoch": 3.274644892404525, "grad_norm": 5.972851276397705, "learning_rate": 3.78239883678083e-06, "loss": 1.5477, "step": 19250 }, { "epoch": 3.2763460066343457, "grad_norm": 4.61767578125, "learning_rate": 3.7757381065718862e-06, "loss": 1.6034, "step": 19260 }, { "epoch": 3.278047120864166, "grad_norm": 5.919322967529297, "learning_rate": 3.7690809556099875e-06, "loss": 1.6656, "step": 19270 }, { "epoch": 3.2797482350939866, "grad_norm": 5.884451389312744, "learning_rate": 3.762427391979662e-06, "loss": 1.5455, "step": 19280 }, { "epoch": 3.281449349323807, "grad_norm": 5.5694475173950195, "learning_rate": 3.755777423761079e-06, "loss": 1.6901, "step": 19290 }, { "epoch": 3.2831504635536275, "grad_norm": 4.459254741668701, "learning_rate": 3.74913105903004e-06, "loss": 1.5733, "step": 19300 }, { "epoch": 3.2848515777834484, "grad_norm": 5.569211959838867, "learning_rate": 3.742488305857978e-06, "loss": 1.6079, "step": 19310 }, { "epoch": 3.286552692013269, "grad_norm": 4.4077348709106445, "learning_rate": 3.735849172311934e-06, "loss": 1.629, "step": 19320 }, { "epoch": 3.2882538062430893, "grad_norm": 5.164341926574707, "learning_rate": 3.7292136664545548e-06, "loss": 1.5199, "step": 19330 }, { "epoch": 3.2899549204729097, "grad_norm": 5.6606645584106445, "learning_rate": 3.722581796344076e-06, "loss": 1.5918, "step": 19340 }, { "epoch": 3.29165603470273, "grad_norm": 6.065959930419922, "learning_rate": 3.7159535700343273e-06, "loss": 1.5498, "step": 19350 }, { "epoch": 3.293357148932551, "grad_norm": 6.435763359069824, "learning_rate": 3.7093289955747043e-06, "loss": 1.6532, "step": 19360 }, { "epoch": 3.2950582631623715, "grad_norm": 4.562117576599121, "learning_rate": 3.7027080810101798e-06, "loss": 1.6412, "step": 19370 }, { "epoch": 3.296759377392192, "grad_norm": 5.498716354370117, "learning_rate": 3.696090834381271e-06, "loss": 1.536, "step": 19380 }, { "epoch": 3.2984604916220124, "grad_norm": 6.3962483406066895, "learning_rate": 3.689477263724045e-06, "loss": 1.6545, "step": 19390 }, { "epoch": 3.300161605851833, "grad_norm": 5.0929341316223145, "learning_rate": 3.682867377070103e-06, "loss": 1.6379, "step": 19400 }, { "epoch": 3.3018627200816537, "grad_norm": 5.5942912101745605, "learning_rate": 3.6762611824465726e-06, "loss": 1.6307, "step": 19410 }, { "epoch": 3.303563834311474, "grad_norm": 5.138519287109375, "learning_rate": 3.6696586878761048e-06, "loss": 1.5445, "step": 19420 }, { "epoch": 3.3052649485412946, "grad_norm": 5.532662868499756, "learning_rate": 3.6630599013768444e-06, "loss": 1.6385, "step": 19430 }, { "epoch": 3.306966062771115, "grad_norm": 6.1705002784729, "learning_rate": 3.656464830962441e-06, "loss": 1.5671, "step": 19440 }, { "epoch": 3.3086671770009355, "grad_norm": 6.107243537902832, "learning_rate": 3.6498734846420314e-06, "loss": 1.5554, "step": 19450 }, { "epoch": 3.3103682912307564, "grad_norm": 5.152639389038086, "learning_rate": 3.6432858704202276e-06, "loss": 1.6207, "step": 19460 }, { "epoch": 3.312069405460577, "grad_norm": 6.471705436706543, "learning_rate": 3.6367019962971085e-06, "loss": 1.6673, "step": 19470 }, { "epoch": 3.3137705196903973, "grad_norm": 4.922879695892334, "learning_rate": 3.630121870268217e-06, "loss": 1.6099, "step": 19480 }, { "epoch": 3.3154716339202177, "grad_norm": 4.955195426940918, "learning_rate": 3.623545500324537e-06, "loss": 1.6964, "step": 19490 }, { "epoch": 3.317172748150038, "grad_norm": 4.401156902313232, "learning_rate": 3.616972894452494e-06, "loss": 1.6287, "step": 19500 }, { "epoch": 3.318873862379859, "grad_norm": 6.410326957702637, "learning_rate": 3.6104040606339437e-06, "loss": 1.5844, "step": 19510 }, { "epoch": 3.3205749766096795, "grad_norm": 5.521576881408691, "learning_rate": 3.603839006846158e-06, "loss": 1.5821, "step": 19520 }, { "epoch": 3.3222760908395, "grad_norm": 8.332356452941895, "learning_rate": 3.5972777410618193e-06, "loss": 1.5981, "step": 19530 }, { "epoch": 3.3239772050693204, "grad_norm": 5.562382221221924, "learning_rate": 3.5907202712490113e-06, "loss": 1.5004, "step": 19540 }, { "epoch": 3.325678319299141, "grad_norm": 5.359004020690918, "learning_rate": 3.584166605371207e-06, "loss": 1.6241, "step": 19550 }, { "epoch": 3.3273794335289617, "grad_norm": 5.369149208068848, "learning_rate": 3.5776167513872585e-06, "loss": 1.4857, "step": 19560 }, { "epoch": 3.329080547758782, "grad_norm": 6.441427707672119, "learning_rate": 3.5710707172513913e-06, "loss": 1.5662, "step": 19570 }, { "epoch": 3.3307816619886026, "grad_norm": 4.339804649353027, "learning_rate": 3.5645285109131873e-06, "loss": 1.6146, "step": 19580 }, { "epoch": 3.332482776218423, "grad_norm": 6.252863883972168, "learning_rate": 3.557990140317588e-06, "loss": 1.5885, "step": 19590 }, { "epoch": 3.3341838904482435, "grad_norm": 6.938775062561035, "learning_rate": 3.55145561340487e-06, "loss": 1.587, "step": 19600 }, { "epoch": 3.3358850046780644, "grad_norm": 6.152193069458008, "learning_rate": 3.544924938110643e-06, "loss": 1.6359, "step": 19610 }, { "epoch": 3.337586118907885, "grad_norm": 4.870347499847412, "learning_rate": 3.538398122365841e-06, "loss": 1.5164, "step": 19620 }, { "epoch": 3.3392872331377053, "grad_norm": 5.487436294555664, "learning_rate": 3.5318751740967133e-06, "loss": 1.6175, "step": 19630 }, { "epoch": 3.3409883473675257, "grad_norm": 6.347774982452393, "learning_rate": 3.5253561012247996e-06, "loss": 1.5868, "step": 19640 }, { "epoch": 3.342689461597346, "grad_norm": 5.935305595397949, "learning_rate": 3.518840911666952e-06, "loss": 1.718, "step": 19650 }, { "epoch": 3.344390575827167, "grad_norm": 5.687722206115723, "learning_rate": 3.5123296133352954e-06, "loss": 1.5982, "step": 19660 }, { "epoch": 3.3460916900569875, "grad_norm": 6.090416431427002, "learning_rate": 3.50582221413723e-06, "loss": 1.5469, "step": 19670 }, { "epoch": 3.347792804286808, "grad_norm": 5.880667686462402, "learning_rate": 3.499318721975424e-06, "loss": 1.7217, "step": 19680 }, { "epoch": 3.3494939185166284, "grad_norm": 5.706654071807861, "learning_rate": 3.492819144747798e-06, "loss": 1.6737, "step": 19690 }, { "epoch": 3.351195032746449, "grad_norm": 4.656063556671143, "learning_rate": 3.4863234903475176e-06, "loss": 1.5404, "step": 19700 }, { "epoch": 3.3528961469762697, "grad_norm": 5.1501240730285645, "learning_rate": 3.479831766662989e-06, "loss": 1.5463, "step": 19710 }, { "epoch": 3.35459726120609, "grad_norm": 6.567721843719482, "learning_rate": 3.473343981577845e-06, "loss": 1.654, "step": 19720 }, { "epoch": 3.3562983754359106, "grad_norm": 5.558073997497559, "learning_rate": 3.4668601429709258e-06, "loss": 1.6144, "step": 19730 }, { "epoch": 3.357999489665731, "grad_norm": 5.354240894317627, "learning_rate": 3.460380258716289e-06, "loss": 1.6214, "step": 19740 }, { "epoch": 3.3597006038955515, "grad_norm": 6.7254319190979, "learning_rate": 3.453904336683185e-06, "loss": 1.587, "step": 19750 }, { "epoch": 3.3614017181253724, "grad_norm": 5.278835773468018, "learning_rate": 3.447432384736053e-06, "loss": 1.6102, "step": 19760 }, { "epoch": 3.363102832355193, "grad_norm": 6.793532848358154, "learning_rate": 3.440964410734515e-06, "loss": 1.5772, "step": 19770 }, { "epoch": 3.3648039465850133, "grad_norm": 6.4684367179870605, "learning_rate": 3.4345004225333584e-06, "loss": 1.516, "step": 19780 }, { "epoch": 3.3665050608148337, "grad_norm": 5.317403316497803, "learning_rate": 3.4280404279825305e-06, "loss": 1.6725, "step": 19790 }, { "epoch": 3.368206175044654, "grad_norm": 6.5871806144714355, "learning_rate": 3.4215844349271284e-06, "loss": 1.5603, "step": 19800 }, { "epoch": 3.369907289274475, "grad_norm": 4.981198787689209, "learning_rate": 3.41513245120739e-06, "loss": 1.6096, "step": 19810 }, { "epoch": 3.3716084035042955, "grad_norm": 5.90072774887085, "learning_rate": 3.4086844846586843e-06, "loss": 1.5599, "step": 19820 }, { "epoch": 3.373309517734116, "grad_norm": 6.637577533721924, "learning_rate": 3.402240543111504e-06, "loss": 1.6088, "step": 19830 }, { "epoch": 3.3750106319639364, "grad_norm": 6.211846828460693, "learning_rate": 3.3958006343914492e-06, "loss": 1.5779, "step": 19840 }, { "epoch": 3.376711746193757, "grad_norm": 6.740238666534424, "learning_rate": 3.3893647663192264e-06, "loss": 1.5889, "step": 19850 }, { "epoch": 3.3784128604235777, "grad_norm": 6.2450785636901855, "learning_rate": 3.3829329467106337e-06, "loss": 1.6165, "step": 19860 }, { "epoch": 3.380113974653398, "grad_norm": 5.701789379119873, "learning_rate": 3.3765051833765502e-06, "loss": 1.6581, "step": 19870 }, { "epoch": 3.3818150888832186, "grad_norm": 6.127169609069824, "learning_rate": 3.3700814841229353e-06, "loss": 1.5565, "step": 19880 }, { "epoch": 3.383516203113039, "grad_norm": 5.727521896362305, "learning_rate": 3.3636618567508085e-06, "loss": 1.6647, "step": 19890 }, { "epoch": 3.3852173173428595, "grad_norm": 7.1242756843566895, "learning_rate": 3.3572463090562442e-06, "loss": 1.5775, "step": 19900 }, { "epoch": 3.3869184315726804, "grad_norm": 5.4397735595703125, "learning_rate": 3.3508348488303637e-06, "loss": 1.6395, "step": 19910 }, { "epoch": 3.388619545802501, "grad_norm": 5.371628761291504, "learning_rate": 3.344427483859325e-06, "loss": 1.5023, "step": 19920 }, { "epoch": 3.3903206600323212, "grad_norm": 5.891162395477295, "learning_rate": 3.3380242219243062e-06, "loss": 1.538, "step": 19930 }, { "epoch": 3.3920217742621417, "grad_norm": 7.087254524230957, "learning_rate": 3.3316250708015143e-06, "loss": 1.5604, "step": 19940 }, { "epoch": 3.393722888491962, "grad_norm": 5.757351398468018, "learning_rate": 3.3252300382621557e-06, "loss": 1.6928, "step": 19950 }, { "epoch": 3.3954240027217826, "grad_norm": 6.146236419677734, "learning_rate": 3.3188391320724374e-06, "loss": 1.6554, "step": 19960 }, { "epoch": 3.3971251169516035, "grad_norm": 6.456681728363037, "learning_rate": 3.3124523599935554e-06, "loss": 1.6398, "step": 19970 }, { "epoch": 3.398826231181424, "grad_norm": 6.639796733856201, "learning_rate": 3.306069729781683e-06, "loss": 1.5743, "step": 19980 }, { "epoch": 3.4005273454112444, "grad_norm": 5.578784465789795, "learning_rate": 3.2996912491879696e-06, "loss": 1.5616, "step": 19990 }, { "epoch": 3.402228459641065, "grad_norm": 5.376632213592529, "learning_rate": 3.2933169259585196e-06, "loss": 1.6228, "step": 20000 }, { "epoch": 3.4039295738708852, "grad_norm": 5.6772236824035645, "learning_rate": 3.286946767834392e-06, "loss": 1.5185, "step": 20010 }, { "epoch": 3.405630688100706, "grad_norm": 6.941185474395752, "learning_rate": 3.2805807825515823e-06, "loss": 1.597, "step": 20020 }, { "epoch": 3.4073318023305266, "grad_norm": 7.1472487449646, "learning_rate": 3.274218977841023e-06, "loss": 1.5752, "step": 20030 }, { "epoch": 3.409032916560347, "grad_norm": 5.509366989135742, "learning_rate": 3.267861361428567e-06, "loss": 1.6204, "step": 20040 }, { "epoch": 3.4107340307901675, "grad_norm": 6.332919120788574, "learning_rate": 3.261507941034988e-06, "loss": 1.6334, "step": 20050 }, { "epoch": 3.412435145019988, "grad_norm": 5.64229154586792, "learning_rate": 3.255158724375956e-06, "loss": 1.5794, "step": 20060 }, { "epoch": 3.414136259249809, "grad_norm": 6.086897373199463, "learning_rate": 3.248813719162038e-06, "loss": 1.722, "step": 20070 }, { "epoch": 3.4158373734796292, "grad_norm": 5.616125583648682, "learning_rate": 3.2424729330986885e-06, "loss": 1.5349, "step": 20080 }, { "epoch": 3.4175384877094497, "grad_norm": 5.05100679397583, "learning_rate": 3.2361363738862344e-06, "loss": 1.5484, "step": 20090 }, { "epoch": 3.41923960193927, "grad_norm": 5.591383457183838, "learning_rate": 3.2298040492198747e-06, "loss": 1.5953, "step": 20100 }, { "epoch": 3.4209407161690906, "grad_norm": 6.765773773193359, "learning_rate": 3.223475966789662e-06, "loss": 1.5554, "step": 20110 }, { "epoch": 3.4226418303989115, "grad_norm": 6.567031383514404, "learning_rate": 3.217152134280497e-06, "loss": 1.5934, "step": 20120 }, { "epoch": 3.424342944628732, "grad_norm": 5.841222763061523, "learning_rate": 3.210832559372123e-06, "loss": 1.5859, "step": 20130 }, { "epoch": 3.4260440588585523, "grad_norm": 5.731585502624512, "learning_rate": 3.2045172497391076e-06, "loss": 1.5609, "step": 20140 }, { "epoch": 3.427745173088373, "grad_norm": 5.261969089508057, "learning_rate": 3.198206213050841e-06, "loss": 1.7027, "step": 20150 }, { "epoch": 3.4294462873181932, "grad_norm": 6.334646701812744, "learning_rate": 3.1918994569715286e-06, "loss": 1.5391, "step": 20160 }, { "epoch": 3.431147401548014, "grad_norm": 6.424821376800537, "learning_rate": 3.1855969891601706e-06, "loss": 1.6238, "step": 20170 }, { "epoch": 3.4328485157778346, "grad_norm": 4.80640983581543, "learning_rate": 3.1792988172705623e-06, "loss": 1.6282, "step": 20180 }, { "epoch": 3.434549630007655, "grad_norm": 5.788819789886475, "learning_rate": 3.1730049489512816e-06, "loss": 1.5954, "step": 20190 }, { "epoch": 3.4362507442374755, "grad_norm": 5.655074596405029, "learning_rate": 3.1667153918456812e-06, "loss": 1.6188, "step": 20200 }, { "epoch": 3.437951858467296, "grad_norm": 6.133515357971191, "learning_rate": 3.160430153591877e-06, "loss": 1.6272, "step": 20210 }, { "epoch": 3.439652972697117, "grad_norm": 5.728498458862305, "learning_rate": 3.1541492418227407e-06, "loss": 1.5631, "step": 20220 }, { "epoch": 3.4413540869269372, "grad_norm": 6.747180938720703, "learning_rate": 3.1478726641658888e-06, "loss": 1.6548, "step": 20230 }, { "epoch": 3.4430552011567577, "grad_norm": 4.901608943939209, "learning_rate": 3.1416004282436757e-06, "loss": 1.6217, "step": 20240 }, { "epoch": 3.444756315386578, "grad_norm": 5.332956314086914, "learning_rate": 3.1353325416731825e-06, "loss": 1.7339, "step": 20250 }, { "epoch": 3.4464574296163986, "grad_norm": 5.859671115875244, "learning_rate": 3.1290690120662092e-06, "loss": 1.618, "step": 20260 }, { "epoch": 3.4481585438462194, "grad_norm": 5.185967445373535, "learning_rate": 3.1228098470292597e-06, "loss": 1.5618, "step": 20270 }, { "epoch": 3.44985965807604, "grad_norm": 5.8756890296936035, "learning_rate": 3.1165550541635487e-06, "loss": 1.4586, "step": 20280 }, { "epoch": 3.4515607723058603, "grad_norm": 6.972698211669922, "learning_rate": 3.110304641064971e-06, "loss": 1.5828, "step": 20290 }, { "epoch": 3.453261886535681, "grad_norm": 5.784275054931641, "learning_rate": 3.1040586153241065e-06, "loss": 1.7556, "step": 20300 }, { "epoch": 3.4549630007655012, "grad_norm": 5.015942096710205, "learning_rate": 3.097816984526208e-06, "loss": 1.6751, "step": 20310 }, { "epoch": 3.456664114995322, "grad_norm": 5.505063056945801, "learning_rate": 3.091579756251185e-06, "loss": 1.5959, "step": 20320 }, { "epoch": 3.4583652292251426, "grad_norm": 6.521902084350586, "learning_rate": 3.0853469380736048e-06, "loss": 1.6657, "step": 20330 }, { "epoch": 3.460066343454963, "grad_norm": 4.743898391723633, "learning_rate": 3.079118537562684e-06, "loss": 1.5205, "step": 20340 }, { "epoch": 3.4617674576847834, "grad_norm": 5.524914264678955, "learning_rate": 3.072894562282268e-06, "loss": 1.6363, "step": 20350 }, { "epoch": 3.463468571914604, "grad_norm": 5.382754325866699, "learning_rate": 3.066675019790829e-06, "loss": 1.5969, "step": 20360 }, { "epoch": 3.4651696861444248, "grad_norm": 4.380696773529053, "learning_rate": 3.0604599176414567e-06, "loss": 1.6067, "step": 20370 }, { "epoch": 3.466870800374245, "grad_norm": 5.742018222808838, "learning_rate": 3.054249263381846e-06, "loss": 1.5962, "step": 20380 }, { "epoch": 3.4685719146040657, "grad_norm": 6.359322547912598, "learning_rate": 3.0480430645543003e-06, "loss": 1.6064, "step": 20390 }, { "epoch": 3.470273028833886, "grad_norm": 5.794898986816406, "learning_rate": 3.041841328695696e-06, "loss": 1.6039, "step": 20400 }, { "epoch": 3.4719741430637066, "grad_norm": 5.615640163421631, "learning_rate": 3.035644063337502e-06, "loss": 1.5731, "step": 20410 }, { "epoch": 3.4736752572935274, "grad_norm": 4.714178562164307, "learning_rate": 3.0294512760057537e-06, "loss": 1.5701, "step": 20420 }, { "epoch": 3.475376371523348, "grad_norm": 5.054441452026367, "learning_rate": 3.0232629742210494e-06, "loss": 1.5568, "step": 20430 }, { "epoch": 3.4770774857531683, "grad_norm": 5.176323890686035, "learning_rate": 3.017079165498537e-06, "loss": 1.6077, "step": 20440 }, { "epoch": 3.4787785999829888, "grad_norm": 5.393445014953613, "learning_rate": 3.010899857347915e-06, "loss": 1.7021, "step": 20450 }, { "epoch": 3.480479714212809, "grad_norm": 5.865242958068848, "learning_rate": 3.0047250572734095e-06, "loss": 1.5678, "step": 20460 }, { "epoch": 3.48218082844263, "grad_norm": 6.027548313140869, "learning_rate": 2.998554772773775e-06, "loss": 1.6008, "step": 20470 }, { "epoch": 3.4838819426724505, "grad_norm": 6.511929035186768, "learning_rate": 2.9923890113422818e-06, "loss": 1.6372, "step": 20480 }, { "epoch": 3.485583056902271, "grad_norm": 5.237430095672607, "learning_rate": 2.986227780466707e-06, "loss": 1.518, "step": 20490 }, { "epoch": 3.4872841711320914, "grad_norm": 6.152370929718018, "learning_rate": 2.9800710876293255e-06, "loss": 1.6589, "step": 20500 }, { "epoch": 3.488985285361912, "grad_norm": 6.018396854400635, "learning_rate": 2.9739189403069026e-06, "loss": 1.6461, "step": 20510 }, { "epoch": 3.4906863995917328, "grad_norm": 5.236014366149902, "learning_rate": 2.9677713459706806e-06, "loss": 1.6335, "step": 20520 }, { "epoch": 3.492387513821553, "grad_norm": 6.262957572937012, "learning_rate": 2.961628312086376e-06, "loss": 1.6554, "step": 20530 }, { "epoch": 3.4940886280513737, "grad_norm": 5.400660037994385, "learning_rate": 2.9554898461141658e-06, "loss": 1.5663, "step": 20540 }, { "epoch": 3.495789742281194, "grad_norm": 6.142693996429443, "learning_rate": 2.9493559555086747e-06, "loss": 1.5755, "step": 20550 }, { "epoch": 3.4974908565110145, "grad_norm": 4.737947463989258, "learning_rate": 2.9432266477189826e-06, "loss": 1.6199, "step": 20560 }, { "epoch": 3.4991919707408354, "grad_norm": 5.962085247039795, "learning_rate": 2.9371019301885942e-06, "loss": 1.6135, "step": 20570 }, { "epoch": 3.500893084970656, "grad_norm": 6.262288570404053, "learning_rate": 2.9309818103554416e-06, "loss": 1.6169, "step": 20580 }, { "epoch": 3.5025941992004763, "grad_norm": 6.423060417175293, "learning_rate": 2.9248662956518753e-06, "loss": 1.6157, "step": 20590 }, { "epoch": 3.5042953134302968, "grad_norm": 5.860107898712158, "learning_rate": 2.918755393504655e-06, "loss": 1.5585, "step": 20600 }, { "epoch": 3.5059964276601177, "grad_norm": 5.546974182128906, "learning_rate": 2.9126491113349277e-06, "loss": 1.5031, "step": 20610 }, { "epoch": 3.5076975418899377, "grad_norm": 5.556706428527832, "learning_rate": 2.906547456558246e-06, "loss": 1.6586, "step": 20620 }, { "epoch": 3.5093986561197585, "grad_norm": 5.941084384918213, "learning_rate": 2.9004504365845343e-06, "loss": 1.5983, "step": 20630 }, { "epoch": 3.511099770349579, "grad_norm": 6.159066677093506, "learning_rate": 2.894358058818087e-06, "loss": 1.6673, "step": 20640 }, { "epoch": 3.5128008845793994, "grad_norm": 6.33928918838501, "learning_rate": 2.8882703306575653e-06, "loss": 1.6407, "step": 20650 }, { "epoch": 3.51450199880922, "grad_norm": 5.965478897094727, "learning_rate": 2.8821872594959803e-06, "loss": 1.5833, "step": 20660 }, { "epoch": 3.5162031130390403, "grad_norm": 6.009367942810059, "learning_rate": 2.8761088527206885e-06, "loss": 1.5992, "step": 20670 }, { "epoch": 3.517904227268861, "grad_norm": 5.0004072189331055, "learning_rate": 2.8700351177133862e-06, "loss": 1.6309, "step": 20680 }, { "epoch": 3.5196053414986816, "grad_norm": 5.877298831939697, "learning_rate": 2.863966061850093e-06, "loss": 1.5811, "step": 20690 }, { "epoch": 3.521306455728502, "grad_norm": 5.1309428215026855, "learning_rate": 2.857901692501141e-06, "loss": 1.6566, "step": 20700 }, { "epoch": 3.5230075699583225, "grad_norm": 5.491775035858154, "learning_rate": 2.8518420170311782e-06, "loss": 1.5789, "step": 20710 }, { "epoch": 3.524708684188143, "grad_norm": 6.656279563903809, "learning_rate": 2.8457870427991465e-06, "loss": 1.4972, "step": 20720 }, { "epoch": 3.526409798417964, "grad_norm": 6.993110656738281, "learning_rate": 2.839736777158288e-06, "loss": 1.7199, "step": 20730 }, { "epoch": 3.5281109126477843, "grad_norm": 6.034376621246338, "learning_rate": 2.8336912274561176e-06, "loss": 1.57, "step": 20740 }, { "epoch": 3.5298120268776048, "grad_norm": 5.539135932922363, "learning_rate": 2.8276504010344247e-06, "loss": 1.5519, "step": 20750 }, { "epoch": 3.531513141107425, "grad_norm": 4.798160552978516, "learning_rate": 2.821614305229266e-06, "loss": 1.5829, "step": 20760 }, { "epoch": 3.5332142553372456, "grad_norm": 5.539159774780273, "learning_rate": 2.815582947370949e-06, "loss": 1.6393, "step": 20770 }, { "epoch": 3.5349153695670665, "grad_norm": 6.6422505378723145, "learning_rate": 2.8095563347840314e-06, "loss": 1.5283, "step": 20780 }, { "epoch": 3.536616483796887, "grad_norm": 4.52136754989624, "learning_rate": 2.803534474787306e-06, "loss": 1.6371, "step": 20790 }, { "epoch": 3.5383175980267074, "grad_norm": 6.491845607757568, "learning_rate": 2.797517374693794e-06, "loss": 1.5793, "step": 20800 }, { "epoch": 3.540018712256528, "grad_norm": 6.235819339752197, "learning_rate": 2.7915050418107366e-06, "loss": 1.5977, "step": 20810 }, { "epoch": 3.5417198264863483, "grad_norm": 4.864239692687988, "learning_rate": 2.785497483439586e-06, "loss": 1.5128, "step": 20820 }, { "epoch": 3.543420940716169, "grad_norm": 5.335078716278076, "learning_rate": 2.7794947068759954e-06, "loss": 1.5557, "step": 20830 }, { "epoch": 3.5451220549459896, "grad_norm": 6.0590362548828125, "learning_rate": 2.7734967194098084e-06, "loss": 1.6613, "step": 20840 }, { "epoch": 3.54682316917581, "grad_norm": 5.815928936004639, "learning_rate": 2.7675035283250606e-06, "loss": 1.5621, "step": 20850 }, { "epoch": 3.5485242834056305, "grad_norm": 8.026349067687988, "learning_rate": 2.7615151408999556e-06, "loss": 1.5136, "step": 20860 }, { "epoch": 3.550225397635451, "grad_norm": 5.613368511199951, "learning_rate": 2.755531564406865e-06, "loss": 1.5729, "step": 20870 }, { "epoch": 3.551926511865272, "grad_norm": 6.851166248321533, "learning_rate": 2.7495528061123187e-06, "loss": 1.6317, "step": 20880 }, { "epoch": 3.5536276260950923, "grad_norm": 5.184919357299805, "learning_rate": 2.7435788732769952e-06, "loss": 1.5271, "step": 20890 }, { "epoch": 3.5553287403249128, "grad_norm": 5.281979084014893, "learning_rate": 2.737609773155713e-06, "loss": 1.5594, "step": 20900 }, { "epoch": 3.557029854554733, "grad_norm": 6.160537242889404, "learning_rate": 2.7316455129974204e-06, "loss": 1.6165, "step": 20910 }, { "epoch": 3.5587309687845536, "grad_norm": 6.142460346221924, "learning_rate": 2.72568610004519e-06, "loss": 1.5558, "step": 20920 }, { "epoch": 3.5604320830143745, "grad_norm": 6.025132179260254, "learning_rate": 2.7197315415362057e-06, "loss": 1.645, "step": 20930 }, { "epoch": 3.562133197244195, "grad_norm": 6.387031078338623, "learning_rate": 2.713781844701759e-06, "loss": 1.5794, "step": 20940 }, { "epoch": 3.5638343114740154, "grad_norm": 6.645457744598389, "learning_rate": 2.7078370167672324e-06, "loss": 1.6584, "step": 20950 }, { "epoch": 3.565535425703836, "grad_norm": 4.468255519866943, "learning_rate": 2.701897064952104e-06, "loss": 1.4577, "step": 20960 }, { "epoch": 3.5672365399336563, "grad_norm": 6.84254264831543, "learning_rate": 2.6959619964699215e-06, "loss": 1.5517, "step": 20970 }, { "epoch": 3.568937654163477, "grad_norm": 7.275737762451172, "learning_rate": 2.6900318185283094e-06, "loss": 1.5276, "step": 20980 }, { "epoch": 3.5706387683932976, "grad_norm": 4.726008415222168, "learning_rate": 2.684106538328944e-06, "loss": 1.5729, "step": 20990 }, { "epoch": 3.572339882623118, "grad_norm": 5.600918292999268, "learning_rate": 2.6781861630675627e-06, "loss": 1.7046, "step": 21000 }, { "epoch": 3.5740409968529385, "grad_norm": 5.3228230476379395, "learning_rate": 2.672270699933939e-06, "loss": 1.6861, "step": 21010 }, { "epoch": 3.575742111082759, "grad_norm": 6.673993110656738, "learning_rate": 2.6663601561118895e-06, "loss": 1.5471, "step": 21020 }, { "epoch": 3.57744322531258, "grad_norm": 5.231881618499756, "learning_rate": 2.6604545387792506e-06, "loss": 1.6026, "step": 21030 }, { "epoch": 3.5791443395424003, "grad_norm": 5.421326637268066, "learning_rate": 2.6545538551078772e-06, "loss": 1.5646, "step": 21040 }, { "epoch": 3.5808454537722207, "grad_norm": 5.813632488250732, "learning_rate": 2.648658112263633e-06, "loss": 1.5693, "step": 21050 }, { "epoch": 3.582546568002041, "grad_norm": 5.844841480255127, "learning_rate": 2.642767317406381e-06, "loss": 1.5946, "step": 21060 }, { "epoch": 3.5842476822318616, "grad_norm": 6.236157417297363, "learning_rate": 2.636881477689975e-06, "loss": 1.6562, "step": 21070 }, { "epoch": 3.5859487964616825, "grad_norm": 4.842199325561523, "learning_rate": 2.631000600262253e-06, "loss": 1.6737, "step": 21080 }, { "epoch": 3.587649910691503, "grad_norm": 5.5632452964782715, "learning_rate": 2.6251246922650255e-06, "loss": 1.6204, "step": 21090 }, { "epoch": 3.5893510249213234, "grad_norm": 5.149427890777588, "learning_rate": 2.619253760834067e-06, "loss": 1.5674, "step": 21100 }, { "epoch": 3.591052139151144, "grad_norm": 6.830357551574707, "learning_rate": 2.6133878130991113e-06, "loss": 1.6327, "step": 21110 }, { "epoch": 3.5927532533809643, "grad_norm": 5.267126083374023, "learning_rate": 2.6075268561838346e-06, "loss": 1.6055, "step": 21120 }, { "epoch": 3.594454367610785, "grad_norm": 5.860008239746094, "learning_rate": 2.6016708972058606e-06, "loss": 1.6324, "step": 21130 }, { "epoch": 3.5961554818406056, "grad_norm": 5.772332191467285, "learning_rate": 2.595819943276737e-06, "loss": 1.6781, "step": 21140 }, { "epoch": 3.597856596070426, "grad_norm": 5.773773670196533, "learning_rate": 2.589974001501934e-06, "loss": 1.6179, "step": 21150 }, { "epoch": 3.5995577103002465, "grad_norm": 5.229680061340332, "learning_rate": 2.584133078980837e-06, "loss": 1.7436, "step": 21160 }, { "epoch": 3.601258824530067, "grad_norm": 5.797763347625732, "learning_rate": 2.5782971828067333e-06, "loss": 1.6327, "step": 21170 }, { "epoch": 3.602959938759888, "grad_norm": 4.195744514465332, "learning_rate": 2.5724663200668086e-06, "loss": 1.5473, "step": 21180 }, { "epoch": 3.6046610529897083, "grad_norm": 5.071658134460449, "learning_rate": 2.5666404978421362e-06, "loss": 1.4735, "step": 21190 }, { "epoch": 3.6063621672195287, "grad_norm": 5.922102928161621, "learning_rate": 2.5608197232076656e-06, "loss": 1.6592, "step": 21200 }, { "epoch": 3.608063281449349, "grad_norm": 5.720836639404297, "learning_rate": 2.5550040032322174e-06, "loss": 1.6041, "step": 21210 }, { "epoch": 3.6097643956791696, "grad_norm": 7.563029766082764, "learning_rate": 2.5491933449784756e-06, "loss": 1.6228, "step": 21220 }, { "epoch": 3.6114655099089905, "grad_norm": 5.825148582458496, "learning_rate": 2.5433877555029748e-06, "loss": 1.5348, "step": 21230 }, { "epoch": 3.613166624138811, "grad_norm": 5.527562141418457, "learning_rate": 2.5375872418560922e-06, "loss": 1.6959, "step": 21240 }, { "epoch": 3.6148677383686314, "grad_norm": 5.692595481872559, "learning_rate": 2.5317918110820496e-06, "loss": 1.5923, "step": 21250 }, { "epoch": 3.616568852598452, "grad_norm": 6.298586845397949, "learning_rate": 2.526001470218888e-06, "loss": 1.7093, "step": 21260 }, { "epoch": 3.6182699668282723, "grad_norm": 5.965487003326416, "learning_rate": 2.520216226298467e-06, "loss": 1.6044, "step": 21270 }, { "epoch": 3.619971081058093, "grad_norm": 6.28692626953125, "learning_rate": 2.5144360863464634e-06, "loss": 1.5801, "step": 21280 }, { "epoch": 3.6216721952879136, "grad_norm": 6.619129180908203, "learning_rate": 2.5086610573823425e-06, "loss": 1.6437, "step": 21290 }, { "epoch": 3.623373309517734, "grad_norm": 6.151878833770752, "learning_rate": 2.5028911464193796e-06, "loss": 1.6172, "step": 21300 }, { "epoch": 3.6250744237475545, "grad_norm": 5.117790699005127, "learning_rate": 2.497126360464623e-06, "loss": 1.564, "step": 21310 }, { "epoch": 3.626775537977375, "grad_norm": 6.424371719360352, "learning_rate": 2.4913667065188997e-06, "loss": 1.5033, "step": 21320 }, { "epoch": 3.628476652207196, "grad_norm": 4.737883567810059, "learning_rate": 2.4856121915768064e-06, "loss": 1.6163, "step": 21330 }, { "epoch": 3.6301777664370163, "grad_norm": 6.619544982910156, "learning_rate": 2.479862822626696e-06, "loss": 1.5846, "step": 21340 }, { "epoch": 3.6318788806668367, "grad_norm": 5.334020137786865, "learning_rate": 2.4741186066506735e-06, "loss": 1.6201, "step": 21350 }, { "epoch": 3.633579994896657, "grad_norm": 5.102516174316406, "learning_rate": 2.468379550624589e-06, "loss": 1.6494, "step": 21360 }, { "epoch": 3.6352811091264776, "grad_norm": 6.121121883392334, "learning_rate": 2.462645661518023e-06, "loss": 1.5958, "step": 21370 }, { "epoch": 3.6369822233562985, "grad_norm": 6.0861968994140625, "learning_rate": 2.4569169462942787e-06, "loss": 1.5282, "step": 21380 }, { "epoch": 3.638683337586119, "grad_norm": 6.331408500671387, "learning_rate": 2.4511934119103807e-06, "loss": 1.6055, "step": 21390 }, { "epoch": 3.6403844518159394, "grad_norm": 7.731186866760254, "learning_rate": 2.44547506531706e-06, "loss": 1.6488, "step": 21400 }, { "epoch": 3.64208556604576, "grad_norm": 4.587552547454834, "learning_rate": 2.439761913458745e-06, "loss": 1.6261, "step": 21410 }, { "epoch": 3.6437866802755803, "grad_norm": 5.035861015319824, "learning_rate": 2.434053963273564e-06, "loss": 1.6608, "step": 21420 }, { "epoch": 3.645487794505401, "grad_norm": 5.879154205322266, "learning_rate": 2.4283512216933196e-06, "loss": 1.5426, "step": 21430 }, { "epoch": 3.6471889087352216, "grad_norm": 5.091813564300537, "learning_rate": 2.4226536956434926e-06, "loss": 1.6141, "step": 21440 }, { "epoch": 3.648890022965042, "grad_norm": 5.193233489990234, "learning_rate": 2.4169613920432284e-06, "loss": 1.6456, "step": 21450 }, { "epoch": 3.6505911371948625, "grad_norm": 6.129888534545898, "learning_rate": 2.411274317805332e-06, "loss": 1.546, "step": 21460 }, { "epoch": 3.652292251424683, "grad_norm": 5.277714729309082, "learning_rate": 2.4055924798362564e-06, "loss": 1.6102, "step": 21470 }, { "epoch": 3.653993365654504, "grad_norm": 5.8031511306762695, "learning_rate": 2.3999158850360967e-06, "loss": 1.6409, "step": 21480 }, { "epoch": 3.6556944798843243, "grad_norm": 5.230282783508301, "learning_rate": 2.3942445402985805e-06, "loss": 1.6237, "step": 21490 }, { "epoch": 3.6573955941141447, "grad_norm": 6.073917388916016, "learning_rate": 2.388578452511059e-06, "loss": 1.6167, "step": 21500 }, { "epoch": 3.659096708343965, "grad_norm": 5.9513258934021, "learning_rate": 2.3829176285544988e-06, "loss": 1.6356, "step": 21510 }, { "epoch": 3.6607978225737856, "grad_norm": 5.671891212463379, "learning_rate": 2.3772620753034735e-06, "loss": 1.5689, "step": 21520 }, { "epoch": 3.6624989368036065, "grad_norm": 6.27203369140625, "learning_rate": 2.371611799626162e-06, "loss": 1.4825, "step": 21530 }, { "epoch": 3.664200051033427, "grad_norm": 7.135629653930664, "learning_rate": 2.365966808384326e-06, "loss": 1.6012, "step": 21540 }, { "epoch": 3.6659011652632474, "grad_norm": 6.8408284187316895, "learning_rate": 2.3603271084333134e-06, "loss": 1.6191, "step": 21550 }, { "epoch": 3.667602279493068, "grad_norm": 4.956294536590576, "learning_rate": 2.3546927066220467e-06, "loss": 1.5503, "step": 21560 }, { "epoch": 3.6693033937228883, "grad_norm": 5.815354347229004, "learning_rate": 2.349063609793014e-06, "loss": 1.5498, "step": 21570 }, { "epoch": 3.671004507952709, "grad_norm": 5.707915782928467, "learning_rate": 2.343439824782255e-06, "loss": 1.5665, "step": 21580 }, { "epoch": 3.6727056221825296, "grad_norm": 6.232976913452148, "learning_rate": 2.3378213584193703e-06, "loss": 1.5358, "step": 21590 }, { "epoch": 3.67440673641235, "grad_norm": 5.771692752838135, "learning_rate": 2.3322082175274944e-06, "loss": 1.5688, "step": 21600 }, { "epoch": 3.6761078506421705, "grad_norm": 6.212607383728027, "learning_rate": 2.3266004089232927e-06, "loss": 1.6291, "step": 21610 }, { "epoch": 3.677808964871991, "grad_norm": 5.270486354827881, "learning_rate": 2.3209979394169597e-06, "loss": 1.516, "step": 21620 }, { "epoch": 3.679510079101812, "grad_norm": 5.60796594619751, "learning_rate": 2.315400815812203e-06, "loss": 1.5644, "step": 21630 }, { "epoch": 3.6812111933316323, "grad_norm": 5.498111248016357, "learning_rate": 2.3098090449062372e-06, "loss": 1.5741, "step": 21640 }, { "epoch": 3.6829123075614527, "grad_norm": 6.797544002532959, "learning_rate": 2.304222633489782e-06, "loss": 1.503, "step": 21650 }, { "epoch": 3.684613421791273, "grad_norm": 5.772705078125, "learning_rate": 2.299199451185788e-06, "loss": 1.6701, "step": 21660 }, { "epoch": 3.6863145360210936, "grad_norm": 5.532495975494385, "learning_rate": 2.293623241484506e-06, "loss": 1.5906, "step": 21670 }, { "epoch": 3.6880156502509145, "grad_norm": 6.167540550231934, "learning_rate": 2.288052410928972e-06, "loss": 1.6057, "step": 21680 }, { "epoch": 3.689716764480735, "grad_norm": 5.464738845825195, "learning_rate": 2.282486966284476e-06, "loss": 1.5806, "step": 21690 }, { "epoch": 3.6914178787105554, "grad_norm": 5.4526472091674805, "learning_rate": 2.2769269143097606e-06, "loss": 1.6392, "step": 21700 }, { "epoch": 3.693118992940376, "grad_norm": 4.271237850189209, "learning_rate": 2.271372261757022e-06, "loss": 1.7209, "step": 21710 }, { "epoch": 3.6948201071701963, "grad_norm": 6.350117206573486, "learning_rate": 2.2658230153718993e-06, "loss": 1.5714, "step": 21720 }, { "epoch": 3.696521221400017, "grad_norm": 6.264103412628174, "learning_rate": 2.2602791818934672e-06, "loss": 1.585, "step": 21730 }, { "epoch": 3.6982223356298376, "grad_norm": 6.342548847198486, "learning_rate": 2.2547407680542244e-06, "loss": 1.5491, "step": 21740 }, { "epoch": 3.699923449859658, "grad_norm": 5.97145938873291, "learning_rate": 2.2492077805800908e-06, "loss": 1.5353, "step": 21750 }, { "epoch": 3.7016245640894785, "grad_norm": 5.500047206878662, "learning_rate": 2.2436802261903934e-06, "loss": 1.6206, "step": 21760 }, { "epoch": 3.703325678319299, "grad_norm": 6.11614465713501, "learning_rate": 2.2381581115978653e-06, "loss": 1.5965, "step": 21770 }, { "epoch": 3.70502679254912, "grad_norm": 6.1228179931640625, "learning_rate": 2.232641443508629e-06, "loss": 1.5294, "step": 21780 }, { "epoch": 3.7067279067789403, "grad_norm": 6.533079147338867, "learning_rate": 2.227130228622194e-06, "loss": 1.5897, "step": 21790 }, { "epoch": 3.7084290210087607, "grad_norm": 5.325455665588379, "learning_rate": 2.221624473631452e-06, "loss": 1.5396, "step": 21800 }, { "epoch": 3.710130135238581, "grad_norm": 4.874083995819092, "learning_rate": 2.2161241852226572e-06, "loss": 1.6193, "step": 21810 }, { "epoch": 3.7118312494684016, "grad_norm": 4.729120254516602, "learning_rate": 2.210629370075429e-06, "loss": 1.6007, "step": 21820 }, { "epoch": 3.7135323636982225, "grad_norm": 6.683821678161621, "learning_rate": 2.2051400348627395e-06, "loss": 1.6439, "step": 21830 }, { "epoch": 3.715233477928043, "grad_norm": 7.281447887420654, "learning_rate": 2.1996561862509047e-06, "loss": 1.6405, "step": 21840 }, { "epoch": 3.7169345921578634, "grad_norm": 5.484798431396484, "learning_rate": 2.19417783089958e-06, "loss": 1.6407, "step": 21850 }, { "epoch": 3.718635706387684, "grad_norm": 5.7020344734191895, "learning_rate": 2.1887049754617463e-06, "loss": 1.5905, "step": 21860 }, { "epoch": 3.7203368206175043, "grad_norm": 6.41756534576416, "learning_rate": 2.1832376265837074e-06, "loss": 1.4172, "step": 21870 }, { "epoch": 3.722037934847325, "grad_norm": 6.037424564361572, "learning_rate": 2.177775790905082e-06, "loss": 1.6442, "step": 21880 }, { "epoch": 3.7237390490771456, "grad_norm": 5.487923622131348, "learning_rate": 2.1723194750587883e-06, "loss": 1.5439, "step": 21890 }, { "epoch": 3.725440163306966, "grad_norm": 6.3822340965271, "learning_rate": 2.166868685671044e-06, "loss": 1.619, "step": 21900 }, { "epoch": 3.7271412775367865, "grad_norm": 5.389329433441162, "learning_rate": 2.1614234293613586e-06, "loss": 1.6746, "step": 21910 }, { "epoch": 3.728842391766607, "grad_norm": 5.0422234535217285, "learning_rate": 2.155983712742517e-06, "loss": 1.578, "step": 21920 }, { "epoch": 3.730543505996428, "grad_norm": 4.832094192504883, "learning_rate": 2.1505495424205793e-06, "loss": 1.6345, "step": 21930 }, { "epoch": 3.7322446202262483, "grad_norm": 5.112161159515381, "learning_rate": 2.1451209249948704e-06, "loss": 1.5959, "step": 21940 }, { "epoch": 3.7339457344560687, "grad_norm": 5.519700050354004, "learning_rate": 2.139697867057968e-06, "loss": 1.548, "step": 21950 }, { "epoch": 3.735646848685889, "grad_norm": 6.192723274230957, "learning_rate": 2.134280375195699e-06, "loss": 1.4994, "step": 21960 }, { "epoch": 3.7373479629157096, "grad_norm": 5.92973518371582, "learning_rate": 2.12886845598714e-06, "loss": 1.5361, "step": 21970 }, { "epoch": 3.7390490771455305, "grad_norm": 5.719332695007324, "learning_rate": 2.123462116004588e-06, "loss": 1.5925, "step": 21980 }, { "epoch": 3.740750191375351, "grad_norm": 6.8935160636901855, "learning_rate": 2.11806136181357e-06, "loss": 1.5683, "step": 21990 }, { "epoch": 3.7424513056051714, "grad_norm": 6.860033988952637, "learning_rate": 2.1132054643143583e-06, "loss": 1.5645, "step": 22000 }, { "epoch": 3.744152419834992, "grad_norm": 6.525299072265625, "learning_rate": 2.1078153411909805e-06, "loss": 1.6685, "step": 22010 }, { "epoch": 3.7458535340648123, "grad_norm": 6.222875595092773, "learning_rate": 2.102430822860775e-06, "loss": 1.5352, "step": 22020 }, { "epoch": 3.747554648294633, "grad_norm": 8.409226417541504, "learning_rate": 2.0970519158627674e-06, "loss": 1.595, "step": 22030 }, { "epoch": 3.7492557625244536, "grad_norm": 6.118697643280029, "learning_rate": 2.0916786267291696e-06, "loss": 1.6523, "step": 22040 }, { "epoch": 3.750956876754274, "grad_norm": 5.883594989776611, "learning_rate": 2.0863109619853705e-06, "loss": 1.5977, "step": 22050 }, { "epoch": 3.7526579909840945, "grad_norm": 6.259503364562988, "learning_rate": 2.080948928149927e-06, "loss": 1.6087, "step": 22060 }, { "epoch": 3.754359105213915, "grad_norm": 5.8465447425842285, "learning_rate": 2.0755925317345633e-06, "loss": 1.6201, "step": 22070 }, { "epoch": 3.756060219443736, "grad_norm": 5.4746832847595215, "learning_rate": 2.0702417792441523e-06, "loss": 1.604, "step": 22080 }, { "epoch": 3.7577613336735562, "grad_norm": 5.659877300262451, "learning_rate": 2.0648966771767136e-06, "loss": 1.5995, "step": 22090 }, { "epoch": 3.7594624479033767, "grad_norm": 6.075840473175049, "learning_rate": 2.059557232023406e-06, "loss": 1.6702, "step": 22100 }, { "epoch": 3.761163562133197, "grad_norm": 5.514547824859619, "learning_rate": 2.054223450268518e-06, "loss": 1.5811, "step": 22110 }, { "epoch": 3.7628646763630176, "grad_norm": 5.915891647338867, "learning_rate": 2.04889533838946e-06, "loss": 1.5995, "step": 22120 }, { "epoch": 3.7645657905928385, "grad_norm": 6.892215251922607, "learning_rate": 2.0435729028567573e-06, "loss": 1.6316, "step": 22130 }, { "epoch": 3.766266904822659, "grad_norm": 6.2456889152526855, "learning_rate": 2.0382561501340425e-06, "loss": 1.5633, "step": 22140 }, { "epoch": 3.7679680190524794, "grad_norm": 4.70587158203125, "learning_rate": 2.0329450866780456e-06, "loss": 1.5695, "step": 22150 }, { "epoch": 3.7696691332823, "grad_norm": 6.266761302947998, "learning_rate": 2.0276397189385885e-06, "loss": 1.6148, "step": 22160 }, { "epoch": 3.7713702475121202, "grad_norm": 5.643571376800537, "learning_rate": 2.0223400533585724e-06, "loss": 1.576, "step": 22170 }, { "epoch": 3.773071361741941, "grad_norm": 5.848235607147217, "learning_rate": 2.0170460963739823e-06, "loss": 1.5702, "step": 22180 }, { "epoch": 3.7747724759717616, "grad_norm": 5.9626688957214355, "learning_rate": 2.011757854413863e-06, "loss": 1.4829, "step": 22190 }, { "epoch": 3.776473590201582, "grad_norm": 6.014424800872803, "learning_rate": 2.006475333900321e-06, "loss": 1.5723, "step": 22200 }, { "epoch": 3.7781747044314025, "grad_norm": 4.955949783325195, "learning_rate": 2.0011985412485175e-06, "loss": 1.7041, "step": 22210 }, { "epoch": 3.779875818661223, "grad_norm": 5.689254283905029, "learning_rate": 1.99592748286665e-06, "loss": 1.5068, "step": 22220 }, { "epoch": 3.781576932891044, "grad_norm": 5.831399440765381, "learning_rate": 1.990662165155958e-06, "loss": 1.5543, "step": 22230 }, { "epoch": 3.7832780471208642, "grad_norm": 6.465705871582031, "learning_rate": 1.985402594510713e-06, "loss": 1.5824, "step": 22240 }, { "epoch": 3.7849791613506847, "grad_norm": 5.482491970062256, "learning_rate": 1.9801487773182003e-06, "loss": 1.7232, "step": 22250 }, { "epoch": 3.786680275580505, "grad_norm": 4.863794803619385, "learning_rate": 1.9749007199587203e-06, "loss": 1.6624, "step": 22260 }, { "epoch": 3.7883813898103256, "grad_norm": 6.264886856079102, "learning_rate": 1.9696584288055808e-06, "loss": 1.6755, "step": 22270 }, { "epoch": 3.7900825040401465, "grad_norm": 6.4072699546813965, "learning_rate": 1.9644219102250836e-06, "loss": 1.5626, "step": 22280 }, { "epoch": 3.791783618269967, "grad_norm": 6.039673805236816, "learning_rate": 1.95919117057652e-06, "loss": 1.6024, "step": 22290 }, { "epoch": 3.7934847324997873, "grad_norm": 6.067235469818115, "learning_rate": 1.953966216212173e-06, "loss": 1.6003, "step": 22300 }, { "epoch": 3.795185846729608, "grad_norm": 6.459325313568115, "learning_rate": 1.948747053477284e-06, "loss": 1.5327, "step": 22310 }, { "epoch": 3.7968869609594282, "grad_norm": 6.460668087005615, "learning_rate": 1.943533688710072e-06, "loss": 1.534, "step": 22320 }, { "epoch": 3.798588075189249, "grad_norm": 6.536547660827637, "learning_rate": 1.938326128241712e-06, "loss": 1.5894, "step": 22330 }, { "epoch": 3.8002891894190696, "grad_norm": 6.6990251541137695, "learning_rate": 1.9331243783963274e-06, "loss": 1.582, "step": 22340 }, { "epoch": 3.80199030364889, "grad_norm": 6.024599552154541, "learning_rate": 1.9279284454909928e-06, "loss": 1.5075, "step": 22350 }, { "epoch": 3.8036914178787105, "grad_norm": 4.894837379455566, "learning_rate": 1.9227383358357116e-06, "loss": 1.5959, "step": 22360 }, { "epoch": 3.805392532108531, "grad_norm": 5.122894287109375, "learning_rate": 1.9175540557334166e-06, "loss": 1.681, "step": 22370 }, { "epoch": 3.807093646338352, "grad_norm": 6.1250410079956055, "learning_rate": 1.912375611479963e-06, "loss": 1.6894, "step": 22380 }, { "epoch": 3.8087947605681722, "grad_norm": 6.071530818939209, "learning_rate": 1.907203009364117e-06, "loss": 1.5696, "step": 22390 }, { "epoch": 3.8104958747979927, "grad_norm": 5.952277660369873, "learning_rate": 1.9020362556675514e-06, "loss": 1.6921, "step": 22400 }, { "epoch": 3.812196989027813, "grad_norm": 5.401397705078125, "learning_rate": 1.8968753566648353e-06, "loss": 1.6918, "step": 22410 }, { "epoch": 3.8138981032576336, "grad_norm": 5.378725051879883, "learning_rate": 1.8917203186234292e-06, "loss": 1.5513, "step": 22420 }, { "epoch": 3.8155992174874545, "grad_norm": 4.487756729125977, "learning_rate": 1.8865711478036758e-06, "loss": 1.6278, "step": 22430 }, { "epoch": 3.817300331717275, "grad_norm": 5.296054840087891, "learning_rate": 1.8814278504587913e-06, "loss": 1.5496, "step": 22440 }, { "epoch": 3.8190014459470953, "grad_norm": 6.797940254211426, "learning_rate": 1.8762904328348596e-06, "loss": 1.5314, "step": 22450 }, { "epoch": 3.820702560176916, "grad_norm": 5.526535987854004, "learning_rate": 1.8711589011708233e-06, "loss": 1.6617, "step": 22460 }, { "epoch": 3.8224036744067362, "grad_norm": 6.257224082946777, "learning_rate": 1.8660332616984825e-06, "loss": 1.6189, "step": 22470 }, { "epoch": 3.824104788636557, "grad_norm": 6.83498477935791, "learning_rate": 1.8609135206424742e-06, "loss": 1.6298, "step": 22480 }, { "epoch": 3.8258059028663776, "grad_norm": 6.862575531005859, "learning_rate": 1.8557996842202764e-06, "loss": 1.6964, "step": 22490 }, { "epoch": 3.827507017096198, "grad_norm": 5.854297161102295, "learning_rate": 1.8506917586421957e-06, "loss": 1.6054, "step": 22500 }, { "epoch": 3.8292081313260184, "grad_norm": 7.284146308898926, "learning_rate": 1.8455897501113618e-06, "loss": 1.5447, "step": 22510 }, { "epoch": 3.830909245555839, "grad_norm": 6.290617942810059, "learning_rate": 1.8404936648237115e-06, "loss": 1.6434, "step": 22520 }, { "epoch": 3.83261035978566, "grad_norm": 7.121439456939697, "learning_rate": 1.8354035089680003e-06, "loss": 1.5908, "step": 22530 }, { "epoch": 3.8343114740154802, "grad_norm": 5.1764631271362305, "learning_rate": 1.8303192887257742e-06, "loss": 1.5425, "step": 22540 }, { "epoch": 3.8360125882453007, "grad_norm": 5.700981616973877, "learning_rate": 1.8252410102713733e-06, "loss": 1.5265, "step": 22550 }, { "epoch": 3.837713702475121, "grad_norm": 5.483993053436279, "learning_rate": 1.820168679771923e-06, "loss": 1.6314, "step": 22560 }, { "epoch": 3.8394148167049416, "grad_norm": 4.648709297180176, "learning_rate": 1.815102303387322e-06, "loss": 1.6124, "step": 22570 }, { "epoch": 3.8411159309347624, "grad_norm": 6.244708061218262, "learning_rate": 1.8100418872702444e-06, "loss": 1.6019, "step": 22580 }, { "epoch": 3.842817045164583, "grad_norm": 6.450686454772949, "learning_rate": 1.8049874375661229e-06, "loss": 1.595, "step": 22590 }, { "epoch": 3.8445181593944033, "grad_norm": 6.041715621948242, "learning_rate": 1.7999389604131399e-06, "loss": 1.5446, "step": 22600 }, { "epoch": 3.8462192736242238, "grad_norm": 5.104908466339111, "learning_rate": 1.7948964619422305e-06, "loss": 1.6467, "step": 22610 }, { "epoch": 3.847920387854044, "grad_norm": 6.871789932250977, "learning_rate": 1.7898599482770678e-06, "loss": 1.6949, "step": 22620 }, { "epoch": 3.849621502083865, "grad_norm": 6.004401683807373, "learning_rate": 1.7848294255340538e-06, "loss": 1.6199, "step": 22630 }, { "epoch": 3.8513226163136856, "grad_norm": 7.1538310050964355, "learning_rate": 1.779804899822323e-06, "loss": 1.6496, "step": 22640 }, { "epoch": 3.853023730543506, "grad_norm": 6.285670757293701, "learning_rate": 1.7747863772437187e-06, "loss": 1.5965, "step": 22650 }, { "epoch": 3.8547248447733264, "grad_norm": 6.898594856262207, "learning_rate": 1.7697738638927976e-06, "loss": 1.5917, "step": 22660 }, { "epoch": 3.856425959003147, "grad_norm": 5.652676582336426, "learning_rate": 1.7647673658568186e-06, "loss": 1.5332, "step": 22670 }, { "epoch": 3.8581270732329678, "grad_norm": 5.607017993927002, "learning_rate": 1.7597668892157346e-06, "loss": 1.5489, "step": 22680 }, { "epoch": 3.859828187462788, "grad_norm": 6.1689958572387695, "learning_rate": 1.7547724400421868e-06, "loss": 1.6221, "step": 22690 }, { "epoch": 3.8615293016926087, "grad_norm": 5.738541126251221, "learning_rate": 1.7497840244014958e-06, "loss": 1.5205, "step": 22700 }, { "epoch": 3.863230415922429, "grad_norm": 5.920195579528809, "learning_rate": 1.7448016483516562e-06, "loss": 1.5938, "step": 22710 }, { "epoch": 3.8649315301522496, "grad_norm": 5.464554786682129, "learning_rate": 1.7398253179433267e-06, "loss": 1.6791, "step": 22720 }, { "epoch": 3.8666326443820704, "grad_norm": 6.0924835205078125, "learning_rate": 1.7348550392198253e-06, "loss": 1.6658, "step": 22730 }, { "epoch": 3.868333758611891, "grad_norm": 6.268444061279297, "learning_rate": 1.729890818217118e-06, "loss": 1.6934, "step": 22740 }, { "epoch": 3.8700348728417113, "grad_norm": 4.383749008178711, "learning_rate": 1.7249326609638205e-06, "loss": 1.6304, "step": 22750 }, { "epoch": 3.8717359870715318, "grad_norm": 5.348143100738525, "learning_rate": 1.7199805734811787e-06, "loss": 1.6227, "step": 22760 }, { "epoch": 3.873437101301352, "grad_norm": 6.921054840087891, "learning_rate": 1.715034561783069e-06, "loss": 1.5271, "step": 22770 }, { "epoch": 3.875138215531173, "grad_norm": 6.553023338317871, "learning_rate": 1.7100946318759897e-06, "loss": 1.654, "step": 22780 }, { "epoch": 3.8768393297609935, "grad_norm": 6.585522651672363, "learning_rate": 1.705160789759052e-06, "loss": 1.5698, "step": 22790 }, { "epoch": 3.878540443990814, "grad_norm": 6.700231075286865, "learning_rate": 1.7002330414239764e-06, "loss": 1.6655, "step": 22800 }, { "epoch": 3.8802415582206344, "grad_norm": 5.89600133895874, "learning_rate": 1.6953113928550818e-06, "loss": 1.6149, "step": 22810 }, { "epoch": 3.881942672450455, "grad_norm": 5.654646873474121, "learning_rate": 1.6903958500292783e-06, "loss": 1.653, "step": 22820 }, { "epoch": 3.8836437866802758, "grad_norm": 5.169059753417969, "learning_rate": 1.6854864189160622e-06, "loss": 1.6146, "step": 22830 }, { "epoch": 3.885344900910096, "grad_norm": 5.4262261390686035, "learning_rate": 1.6805831054775081e-06, "loss": 1.5751, "step": 22840 }, { "epoch": 3.8870460151399167, "grad_norm": 5.427737712860107, "learning_rate": 1.6756859156682615e-06, "loss": 1.5781, "step": 22850 }, { "epoch": 3.888747129369737, "grad_norm": 5.8518805503845215, "learning_rate": 1.6707948554355268e-06, "loss": 1.569, "step": 22860 }, { "epoch": 3.8904482435995575, "grad_norm": 5.492292881011963, "learning_rate": 1.6659099307190734e-06, "loss": 1.644, "step": 22870 }, { "epoch": 3.8921493578293784, "grad_norm": 5.627607822418213, "learning_rate": 1.6610311474512128e-06, "loss": 1.699, "step": 22880 }, { "epoch": 3.893850472059199, "grad_norm": 5.437195777893066, "learning_rate": 1.656158511556802e-06, "loss": 1.5462, "step": 22890 }, { "epoch": 3.8955515862890193, "grad_norm": 6.071361541748047, "learning_rate": 1.6512920289532255e-06, "loss": 1.5707, "step": 22900 }, { "epoch": 3.8972527005188398, "grad_norm": 5.888106822967529, "learning_rate": 1.6464317055504013e-06, "loss": 1.574, "step": 22910 }, { "epoch": 3.89895381474866, "grad_norm": 6.413266658782959, "learning_rate": 1.6415775472507699e-06, "loss": 1.6255, "step": 22920 }, { "epoch": 3.900654928978481, "grad_norm": 6.208846569061279, "learning_rate": 1.636729559949279e-06, "loss": 1.5323, "step": 22930 }, { "epoch": 3.9023560432083015, "grad_norm": 6.5904765129089355, "learning_rate": 1.6318877495333865e-06, "loss": 1.6064, "step": 22940 }, { "epoch": 3.904057157438122, "grad_norm": 6.936824798583984, "learning_rate": 1.627052121883045e-06, "loss": 1.5421, "step": 22950 }, { "epoch": 3.9057582716679424, "grad_norm": 6.398342132568359, "learning_rate": 1.622222682870702e-06, "loss": 1.5537, "step": 22960 }, { "epoch": 3.907459385897763, "grad_norm": 7.79954195022583, "learning_rate": 1.6173994383612863e-06, "loss": 1.5808, "step": 22970 }, { "epoch": 3.9091605001275838, "grad_norm": 5.884366512298584, "learning_rate": 1.612582394212211e-06, "loss": 1.6081, "step": 22980 }, { "epoch": 3.910861614357404, "grad_norm": 5.413835525512695, "learning_rate": 1.6077715562733492e-06, "loss": 1.6782, "step": 22990 }, { "epoch": 3.9125627285872246, "grad_norm": 4.960882663726807, "learning_rate": 1.602966930387044e-06, "loss": 1.5789, "step": 23000 }, { "epoch": 3.914263842817045, "grad_norm": 4.974776744842529, "learning_rate": 1.598168522388093e-06, "loss": 1.5556, "step": 23010 }, { "epoch": 3.9159649570468655, "grad_norm": 6.388287544250488, "learning_rate": 1.5933763381037416e-06, "loss": 1.52, "step": 23020 }, { "epoch": 3.9176660712766864, "grad_norm": 6.267454147338867, "learning_rate": 1.5885903833536774e-06, "loss": 1.5922, "step": 23030 }, { "epoch": 3.919367185506507, "grad_norm": 5.785188674926758, "learning_rate": 1.5838106639500258e-06, "loss": 1.6034, "step": 23040 }, { "epoch": 3.9210682997363273, "grad_norm": 6.028636455535889, "learning_rate": 1.579037185697336e-06, "loss": 1.5497, "step": 23050 }, { "epoch": 3.9227694139661478, "grad_norm": 7.901721477508545, "learning_rate": 1.5742699543925796e-06, "loss": 1.5244, "step": 23060 }, { "epoch": 3.924470528195968, "grad_norm": 6.526932239532471, "learning_rate": 1.5695089758251414e-06, "loss": 1.6008, "step": 23070 }, { "epoch": 3.926171642425789, "grad_norm": 5.383223056793213, "learning_rate": 1.5647542557768127e-06, "loss": 1.6404, "step": 23080 }, { "epoch": 3.9278727566556095, "grad_norm": 5.718662738800049, "learning_rate": 1.5600058000217844e-06, "loss": 1.613, "step": 23090 }, { "epoch": 3.92957387088543, "grad_norm": 5.968343257904053, "learning_rate": 1.5552636143266408e-06, "loss": 1.7002, "step": 23100 }, { "epoch": 3.9312749851152504, "grad_norm": 5.775221347808838, "learning_rate": 1.550527704450351e-06, "loss": 1.5685, "step": 23110 }, { "epoch": 3.932976099345071, "grad_norm": 5.761837482452393, "learning_rate": 1.5457980761442623e-06, "loss": 1.6043, "step": 23120 }, { "epoch": 3.9346772135748918, "grad_norm": 3.1403274536132812, "learning_rate": 1.541074735152094e-06, "loss": 1.5386, "step": 23130 }, { "epoch": 3.936378327804712, "grad_norm": 7.0953545570373535, "learning_rate": 1.5363576872099288e-06, "loss": 1.5012, "step": 23140 }, { "epoch": 3.9380794420345326, "grad_norm": 5.90140438079834, "learning_rate": 1.5316469380462119e-06, "loss": 1.5493, "step": 23150 }, { "epoch": 3.939780556264353, "grad_norm": 5.884961128234863, "learning_rate": 1.5269424933817336e-06, "loss": 1.6243, "step": 23160 }, { "epoch": 3.9414816704941735, "grad_norm": 6.109187602996826, "learning_rate": 1.5222443589296305e-06, "loss": 1.6247, "step": 23170 }, { "epoch": 3.9431827847239944, "grad_norm": 5.5967698097229, "learning_rate": 1.5175525403953758e-06, "loss": 1.6835, "step": 23180 }, { "epoch": 3.944883898953815, "grad_norm": 5.781443119049072, "learning_rate": 1.5128670434767698e-06, "loss": 1.6234, "step": 23190 }, { "epoch": 3.9465850131836353, "grad_norm": 8.294487953186035, "learning_rate": 1.5081878738639384e-06, "loss": 1.6517, "step": 23200 }, { "epoch": 3.9482861274134557, "grad_norm": 6.1851091384887695, "learning_rate": 1.5035150372393257e-06, "loss": 1.5225, "step": 23210 }, { "epoch": 3.949987241643276, "grad_norm": 7.144291877746582, "learning_rate": 1.4988485392776821e-06, "loss": 1.7338, "step": 23220 }, { "epoch": 3.951688355873097, "grad_norm": 5.25929069519043, "learning_rate": 1.49418838564606e-06, "loss": 1.5546, "step": 23230 }, { "epoch": 3.9533894701029175, "grad_norm": 6.643313884735107, "learning_rate": 1.4895345820038072e-06, "loss": 1.5909, "step": 23240 }, { "epoch": 3.955090584332738, "grad_norm": 5.798901081085205, "learning_rate": 1.4848871340025624e-06, "loss": 1.5675, "step": 23250 }, { "epoch": 3.9567916985625584, "grad_norm": 5.724977493286133, "learning_rate": 1.4802460472862405e-06, "loss": 1.6318, "step": 23260 }, { "epoch": 3.958492812792379, "grad_norm": 5.830146789550781, "learning_rate": 1.4756113274910413e-06, "loss": 1.6487, "step": 23270 }, { "epoch": 3.9601939270221997, "grad_norm": 5.694972038269043, "learning_rate": 1.4709829802454207e-06, "loss": 1.5821, "step": 23280 }, { "epoch": 3.96189504125202, "grad_norm": 6.186990261077881, "learning_rate": 1.466361011170103e-06, "loss": 1.5545, "step": 23290 }, { "epoch": 3.9635961554818406, "grad_norm": 6.006585597991943, "learning_rate": 1.461745425878064e-06, "loss": 1.5724, "step": 23300 }, { "epoch": 3.965297269711661, "grad_norm": 5.7341742515563965, "learning_rate": 1.4571362299745269e-06, "loss": 1.5861, "step": 23310 }, { "epoch": 3.9669983839414815, "grad_norm": 5.754456043243408, "learning_rate": 1.4525334290569599e-06, "loss": 1.6198, "step": 23320 }, { "epoch": 3.9686994981713024, "grad_norm": 5.866809844970703, "learning_rate": 1.4479370287150595e-06, "loss": 1.7648, "step": 23330 }, { "epoch": 3.970400612401123, "grad_norm": 4.520585060119629, "learning_rate": 1.4433470345307512e-06, "loss": 1.5776, "step": 23340 }, { "epoch": 3.9721017266309433, "grad_norm": 6.06074333190918, "learning_rate": 1.4387634520781819e-06, "loss": 1.565, "step": 23350 }, { "epoch": 3.9738028408607637, "grad_norm": 6.441244125366211, "learning_rate": 1.4341862869237102e-06, "loss": 1.552, "step": 23360 }, { "epoch": 3.975503955090584, "grad_norm": 5.399563789367676, "learning_rate": 1.4296155446259024e-06, "loss": 1.4966, "step": 23370 }, { "epoch": 3.977205069320405, "grad_norm": 6.354936122894287, "learning_rate": 1.4250512307355253e-06, "loss": 1.6199, "step": 23380 }, { "epoch": 3.9789061835502255, "grad_norm": 5.502198219299316, "learning_rate": 1.4204933507955374e-06, "loss": 1.6908, "step": 23390 }, { "epoch": 3.980607297780046, "grad_norm": 5.9025959968566895, "learning_rate": 1.415941910341086e-06, "loss": 1.5611, "step": 23400 }, { "epoch": 3.9823084120098664, "grad_norm": 6.057744979858398, "learning_rate": 1.4113969148994958e-06, "loss": 1.5161, "step": 23410 }, { "epoch": 3.984009526239687, "grad_norm": 5.675593852996826, "learning_rate": 1.406858369990267e-06, "loss": 1.5843, "step": 23420 }, { "epoch": 3.9857106404695077, "grad_norm": 5.631038188934326, "learning_rate": 1.402326281125061e-06, "loss": 1.6411, "step": 23430 }, { "epoch": 3.987411754699328, "grad_norm": 6.619244575500488, "learning_rate": 1.397800653807709e-06, "loss": 1.5863, "step": 23440 }, { "epoch": 3.9891128689291486, "grad_norm": 5.93541955947876, "learning_rate": 1.393281493534185e-06, "loss": 1.6251, "step": 23450 }, { "epoch": 3.990813983158969, "grad_norm": 6.078390598297119, "learning_rate": 1.3887688057926153e-06, "loss": 1.5593, "step": 23460 }, { "epoch": 3.9925150973887895, "grad_norm": 6.589411735534668, "learning_rate": 1.384262596063262e-06, "loss": 1.6533, "step": 23470 }, { "epoch": 3.9942162116186104, "grad_norm": 5.347785949707031, "learning_rate": 1.3797628698185241e-06, "loss": 1.5861, "step": 23480 }, { "epoch": 3.995917325848431, "grad_norm": 6.955543041229248, "learning_rate": 1.3752696325229234e-06, "loss": 1.6355, "step": 23490 }, { "epoch": 3.9976184400782513, "grad_norm": 6.043464660644531, "learning_rate": 1.3707828896331037e-06, "loss": 1.649, "step": 23500 }, { "epoch": 3.9993195543080717, "grad_norm": 5.864923000335693, "learning_rate": 1.3663026465978225e-06, "loss": 1.6235, "step": 23510 }, { "epoch": 4.001020668537892, "grad_norm": 5.533880710601807, "learning_rate": 1.3618289088579407e-06, "loss": 1.6016, "step": 23520 }, { "epoch": 4.002721782767713, "grad_norm": 5.245782852172852, "learning_rate": 1.3573616818464215e-06, "loss": 1.5046, "step": 23530 }, { "epoch": 4.004422896997533, "grad_norm": 7.184761047363281, "learning_rate": 1.3529009709883203e-06, "loss": 1.5545, "step": 23540 }, { "epoch": 4.006124011227354, "grad_norm": 5.881488800048828, "learning_rate": 1.3484467817007815e-06, "loss": 1.5075, "step": 23550 }, { "epoch": 4.007825125457175, "grad_norm": 5.9063920974731445, "learning_rate": 1.3439991193930276e-06, "loss": 1.5791, "step": 23560 }, { "epoch": 4.009526239686995, "grad_norm": 6.223015308380127, "learning_rate": 1.3395579894663567e-06, "loss": 1.5318, "step": 23570 }, { "epoch": 4.011227353916816, "grad_norm": 5.480140686035156, "learning_rate": 1.3351233973141272e-06, "loss": 1.636, "step": 23580 }, { "epoch": 4.012928468146636, "grad_norm": 5.175749778747559, "learning_rate": 1.3306953483217657e-06, "loss": 1.5678, "step": 23590 }, { "epoch": 4.014629582376457, "grad_norm": 5.125709056854248, "learning_rate": 1.3262738478667487e-06, "loss": 1.6087, "step": 23600 }, { "epoch": 4.0163306966062775, "grad_norm": 6.26732873916626, "learning_rate": 1.321858901318604e-06, "loss": 1.5714, "step": 23610 }, { "epoch": 4.0180318108360975, "grad_norm": 6.341707706451416, "learning_rate": 1.317450514038895e-06, "loss": 1.4367, "step": 23620 }, { "epoch": 4.019732925065918, "grad_norm": 5.748213291168213, "learning_rate": 1.3130486913812236e-06, "loss": 1.5628, "step": 23630 }, { "epoch": 4.021434039295738, "grad_norm": 5.659625053405762, "learning_rate": 1.3086534386912171e-06, "loss": 1.4844, "step": 23640 }, { "epoch": 4.023135153525559, "grad_norm": 5.962423801422119, "learning_rate": 1.3042647613065255e-06, "loss": 1.5308, "step": 23650 }, { "epoch": 4.02483626775538, "grad_norm": 6.780612945556641, "learning_rate": 1.2998826645568116e-06, "loss": 1.5014, "step": 23660 }, { "epoch": 4.0265373819852, "grad_norm": 5.359397888183594, "learning_rate": 1.2955071537637503e-06, "loss": 1.503, "step": 23670 }, { "epoch": 4.028238496215021, "grad_norm": 7.911985397338867, "learning_rate": 1.2911382342410145e-06, "loss": 1.4167, "step": 23680 }, { "epoch": 4.029939610444841, "grad_norm": 5.2868876457214355, "learning_rate": 1.2867759112942756e-06, "loss": 1.4317, "step": 23690 }, { "epoch": 4.031640724674662, "grad_norm": 6.8723978996276855, "learning_rate": 1.2824201902211918e-06, "loss": 1.5133, "step": 23700 }, { "epoch": 4.033341838904483, "grad_norm": 6.042233943939209, "learning_rate": 1.2780710763114038e-06, "loss": 1.587, "step": 23710 }, { "epoch": 4.035042953134303, "grad_norm": 5.142142295837402, "learning_rate": 1.2737285748465322e-06, "loss": 1.4903, "step": 23720 }, { "epoch": 4.036744067364124, "grad_norm": 5.050568103790283, "learning_rate": 1.2693926911001645e-06, "loss": 1.5208, "step": 23730 }, { "epoch": 4.038445181593944, "grad_norm": 6.0770182609558105, "learning_rate": 1.2650634303378506e-06, "loss": 1.5963, "step": 23740 }, { "epoch": 4.040146295823765, "grad_norm": 4.7540974617004395, "learning_rate": 1.2607407978170987e-06, "loss": 1.5353, "step": 23750 }, { "epoch": 4.0418474100535855, "grad_norm": 5.933732509613037, "learning_rate": 1.2564247987873672e-06, "loss": 1.5335, "step": 23760 }, { "epoch": 4.0435485242834055, "grad_norm": 6.690618991851807, "learning_rate": 1.2521154384900597e-06, "loss": 1.5351, "step": 23770 }, { "epoch": 4.045249638513226, "grad_norm": 6.390956401824951, "learning_rate": 1.247812722158516e-06, "loss": 1.6059, "step": 23780 }, { "epoch": 4.046950752743046, "grad_norm": 5.966754913330078, "learning_rate": 1.2435166550180092e-06, "loss": 1.5127, "step": 23790 }, { "epoch": 4.048651866972867, "grad_norm": 6.638439655303955, "learning_rate": 1.239227242285736e-06, "loss": 1.6776, "step": 23800 }, { "epoch": 4.050352981202688, "grad_norm": 5.8886895179748535, "learning_rate": 1.2349444891708132e-06, "loss": 1.5403, "step": 23810 }, { "epoch": 4.052054095432508, "grad_norm": 6.793363094329834, "learning_rate": 1.2306684008742683e-06, "loss": 1.5017, "step": 23820 }, { "epoch": 4.053755209662329, "grad_norm": 4.266611576080322, "learning_rate": 1.2263989825890345e-06, "loss": 1.6085, "step": 23830 }, { "epoch": 4.055456323892149, "grad_norm": 6.3949432373046875, "learning_rate": 1.2221362394999506e-06, "loss": 1.5812, "step": 23840 }, { "epoch": 4.05715743812197, "grad_norm": 6.3855204582214355, "learning_rate": 1.2178801767837432e-06, "loss": 1.5012, "step": 23850 }, { "epoch": 4.058858552351791, "grad_norm": 5.799639701843262, "learning_rate": 1.2136307996090278e-06, "loss": 1.5891, "step": 23860 }, { "epoch": 4.060559666581611, "grad_norm": 5.730774402618408, "learning_rate": 1.2093881131362994e-06, "loss": 1.5129, "step": 23870 }, { "epoch": 4.062260780811432, "grad_norm": 6.441750526428223, "learning_rate": 1.2051521225179277e-06, "loss": 1.4608, "step": 23880 }, { "epoch": 4.063961895041252, "grad_norm": 6.180943965911865, "learning_rate": 1.2009228328981571e-06, "loss": 1.7483, "step": 23890 }, { "epoch": 4.065663009271073, "grad_norm": 5.97748327255249, "learning_rate": 1.1967002494130864e-06, "loss": 1.5257, "step": 23900 }, { "epoch": 4.0673641235008935, "grad_norm": 5.881425857543945, "learning_rate": 1.1924843771906737e-06, "loss": 1.6079, "step": 23910 }, { "epoch": 4.0690652377307135, "grad_norm": 6.427362442016602, "learning_rate": 1.1882752213507273e-06, "loss": 1.4936, "step": 23920 }, { "epoch": 4.070766351960534, "grad_norm": 6.745382785797119, "learning_rate": 1.1840727870048976e-06, "loss": 1.6802, "step": 23930 }, { "epoch": 4.072467466190354, "grad_norm": 6.204290866851807, "learning_rate": 1.1798770792566723e-06, "loss": 1.5797, "step": 23940 }, { "epoch": 4.074168580420175, "grad_norm": 4.805713653564453, "learning_rate": 1.1756881032013753e-06, "loss": 1.6186, "step": 23950 }, { "epoch": 4.075869694649996, "grad_norm": 5.432549476623535, "learning_rate": 1.171505863926146e-06, "loss": 1.5508, "step": 23960 }, { "epoch": 4.077570808879816, "grad_norm": 6.900859355926514, "learning_rate": 1.16733036650995e-06, "loss": 1.5908, "step": 23970 }, { "epoch": 4.079271923109637, "grad_norm": 4.752617359161377, "learning_rate": 1.1631616160235624e-06, "loss": 1.534, "step": 23980 }, { "epoch": 4.080973037339457, "grad_norm": 7.080214500427246, "learning_rate": 1.158999617529567e-06, "loss": 1.5779, "step": 23990 }, { "epoch": 4.082674151569278, "grad_norm": 6.091671943664551, "learning_rate": 1.154844376082343e-06, "loss": 1.605, "step": 24000 }, { "epoch": 4.084375265799099, "grad_norm": 6.665387153625488, "learning_rate": 1.1506958967280725e-06, "loss": 1.3982, "step": 24010 }, { "epoch": 4.086076380028919, "grad_norm": 7.309806823730469, "learning_rate": 1.1465541845047173e-06, "loss": 1.5491, "step": 24020 }, { "epoch": 4.08777749425874, "grad_norm": 6.847934722900391, "learning_rate": 1.1424192444420241e-06, "loss": 1.5341, "step": 24030 }, { "epoch": 4.08947860848856, "grad_norm": 3.901092767715454, "learning_rate": 1.1382910815615167e-06, "loss": 1.6616, "step": 24040 }, { "epoch": 4.091179722718381, "grad_norm": 5.525901794433594, "learning_rate": 1.134169700876486e-06, "loss": 1.5667, "step": 24050 }, { "epoch": 4.0928808369482015, "grad_norm": 6.85420036315918, "learning_rate": 1.1300551073919891e-06, "loss": 1.5278, "step": 24060 }, { "epoch": 4.0945819511780215, "grad_norm": 5.8117265701293945, "learning_rate": 1.1259473061048387e-06, "loss": 1.5882, "step": 24070 }, { "epoch": 4.096283065407842, "grad_norm": 7.019490718841553, "learning_rate": 1.1218463020035997e-06, "loss": 1.6056, "step": 24080 }, { "epoch": 4.097984179637662, "grad_norm": 5.084234237670898, "learning_rate": 1.117752100068583e-06, "loss": 1.6973, "step": 24090 }, { "epoch": 4.099685293867483, "grad_norm": 5.35864782333374, "learning_rate": 1.113664705271838e-06, "loss": 1.6448, "step": 24100 }, { "epoch": 4.101386408097304, "grad_norm": 6.192386627197266, "learning_rate": 1.109584122577146e-06, "loss": 1.534, "step": 24110 }, { "epoch": 4.103087522327124, "grad_norm": 6.709290981292725, "learning_rate": 1.1055103569400195e-06, "loss": 1.5793, "step": 24120 }, { "epoch": 4.104788636556945, "grad_norm": 6.019222736358643, "learning_rate": 1.101443413307691e-06, "loss": 1.4722, "step": 24130 }, { "epoch": 4.106489750786765, "grad_norm": 5.954079627990723, "learning_rate": 1.0973832966191049e-06, "loss": 1.4929, "step": 24140 }, { "epoch": 4.108190865016586, "grad_norm": 6.824218273162842, "learning_rate": 1.0933300118049192e-06, "loss": 1.5698, "step": 24150 }, { "epoch": 4.109891979246407, "grad_norm": 5.82558012008667, "learning_rate": 1.0892835637874938e-06, "loss": 1.6172, "step": 24160 }, { "epoch": 4.111593093476227, "grad_norm": 6.433945655822754, "learning_rate": 1.0852439574808808e-06, "loss": 1.4807, "step": 24170 }, { "epoch": 4.113294207706048, "grad_norm": 6.1400909423828125, "learning_rate": 1.0812111977908331e-06, "loss": 1.5646, "step": 24180 }, { "epoch": 4.114995321935868, "grad_norm": 6.493676662445068, "learning_rate": 1.0771852896147828e-06, "loss": 1.5641, "step": 24190 }, { "epoch": 4.116696436165689, "grad_norm": 5.852802276611328, "learning_rate": 1.0731662378418425e-06, "loss": 1.5711, "step": 24200 }, { "epoch": 4.1183975503955095, "grad_norm": 6.089602470397949, "learning_rate": 1.0691540473527989e-06, "loss": 1.5124, "step": 24210 }, { "epoch": 4.1200986646253295, "grad_norm": 7.201757907867432, "learning_rate": 1.065148723020105e-06, "loss": 1.5698, "step": 24220 }, { "epoch": 4.12179977885515, "grad_norm": 5.866385459899902, "learning_rate": 1.061150269707878e-06, "loss": 1.5362, "step": 24230 }, { "epoch": 4.12350089308497, "grad_norm": 5.761326313018799, "learning_rate": 1.05715869227189e-06, "loss": 1.4711, "step": 24240 }, { "epoch": 4.125202007314791, "grad_norm": 7.943985939025879, "learning_rate": 1.0531739955595599e-06, "loss": 1.5031, "step": 24250 }, { "epoch": 4.126903121544612, "grad_norm": 5.94057035446167, "learning_rate": 1.0491961844099538e-06, "loss": 1.5621, "step": 24260 }, { "epoch": 4.128604235774432, "grad_norm": 6.1100993156433105, "learning_rate": 1.0452252636537741e-06, "loss": 1.6238, "step": 24270 }, { "epoch": 4.130305350004253, "grad_norm": 6.636387348175049, "learning_rate": 1.0412612381133568e-06, "loss": 1.5536, "step": 24280 }, { "epoch": 4.132006464234073, "grad_norm": 6.823086261749268, "learning_rate": 1.037304112602665e-06, "loss": 1.6157, "step": 24290 }, { "epoch": 4.133707578463894, "grad_norm": 5.04664945602417, "learning_rate": 1.0333538919272803e-06, "loss": 1.4875, "step": 24300 }, { "epoch": 4.135408692693715, "grad_norm": 6.446927070617676, "learning_rate": 1.0294105808843993e-06, "loss": 1.5516, "step": 24310 }, { "epoch": 4.137109806923535, "grad_norm": 4.810828685760498, "learning_rate": 1.025474184262829e-06, "loss": 1.5258, "step": 24320 }, { "epoch": 4.138810921153356, "grad_norm": 6.347567081451416, "learning_rate": 1.021544706842977e-06, "loss": 1.6322, "step": 24330 }, { "epoch": 4.140512035383176, "grad_norm": 5.357848644256592, "learning_rate": 1.0176221533968506e-06, "loss": 1.5757, "step": 24340 }, { "epoch": 4.142213149612997, "grad_norm": 5.994540691375732, "learning_rate": 1.0137065286880458e-06, "loss": 1.5293, "step": 24350 }, { "epoch": 4.1439142638428175, "grad_norm": 5.068691253662109, "learning_rate": 1.0097978374717488e-06, "loss": 1.6628, "step": 24360 }, { "epoch": 4.1456153780726375, "grad_norm": 6.810344696044922, "learning_rate": 1.0058960844947197e-06, "loss": 1.5297, "step": 24370 }, { "epoch": 4.147316492302458, "grad_norm": 7.203075408935547, "learning_rate": 1.0020012744952977e-06, "loss": 1.5712, "step": 24380 }, { "epoch": 4.149017606532278, "grad_norm": 5.671009063720703, "learning_rate": 9.981134122033878e-07, "loss": 1.592, "step": 24390 }, { "epoch": 4.150718720762099, "grad_norm": 5.365944862365723, "learning_rate": 9.94232502340456e-07, "loss": 1.5454, "step": 24400 }, { "epoch": 4.15241983499192, "grad_norm": 7.130199432373047, "learning_rate": 9.903585496195326e-07, "loss": 1.4981, "step": 24410 }, { "epoch": 4.15412094922174, "grad_norm": 6.007693290710449, "learning_rate": 9.864915587451904e-07, "loss": 1.494, "step": 24420 }, { "epoch": 4.155822063451561, "grad_norm": 6.625998497009277, "learning_rate": 9.826315344135521e-07, "loss": 1.4852, "step": 24430 }, { "epoch": 4.157523177681381, "grad_norm": 7.0912065505981445, "learning_rate": 9.787784813122784e-07, "loss": 1.5454, "step": 24440 }, { "epoch": 4.159224291911202, "grad_norm": 6.79998254776001, "learning_rate": 9.749324041205656e-07, "loss": 1.5747, "step": 24450 }, { "epoch": 4.160925406141023, "grad_norm": 6.3917555809021, "learning_rate": 9.71093307509138e-07, "loss": 1.4357, "step": 24460 }, { "epoch": 4.162626520370843, "grad_norm": 5.7439775466918945, "learning_rate": 9.672611961402406e-07, "loss": 1.5676, "step": 24470 }, { "epoch": 4.164327634600664, "grad_norm": 5.881418704986572, "learning_rate": 9.634360746676387e-07, "loss": 1.615, "step": 24480 }, { "epoch": 4.166028748830484, "grad_norm": 6.05189847946167, "learning_rate": 9.596179477366064e-07, "loss": 1.6308, "step": 24490 }, { "epoch": 4.167729863060305, "grad_norm": 5.774824142456055, "learning_rate": 9.55806819983925e-07, "loss": 1.6272, "step": 24500 }, { "epoch": 4.1694309772901255, "grad_norm": 6.309668064117432, "learning_rate": 9.520026960378738e-07, "loss": 1.5436, "step": 24510 }, { "epoch": 4.1711320915199455, "grad_norm": 7.101531028747559, "learning_rate": 9.482055805182315e-07, "loss": 1.575, "step": 24520 }, { "epoch": 4.172833205749766, "grad_norm": 5.314475059509277, "learning_rate": 9.444154780362593e-07, "loss": 1.5636, "step": 24530 }, { "epoch": 4.174534319979586, "grad_norm": 6.091689586639404, "learning_rate": 9.406323931947083e-07, "loss": 1.5368, "step": 24540 }, { "epoch": 4.176235434209407, "grad_norm": 6.525472164154053, "learning_rate": 9.36856330587799e-07, "loss": 1.5087, "step": 24550 }, { "epoch": 4.177936548439227, "grad_norm": 7.5823540687561035, "learning_rate": 9.330872948012317e-07, "loss": 1.6523, "step": 24560 }, { "epoch": 4.179637662669048, "grad_norm": 6.965406894683838, "learning_rate": 9.293252904121677e-07, "loss": 1.5556, "step": 24570 }, { "epoch": 4.181338776898869, "grad_norm": 5.5527753829956055, "learning_rate": 9.255703219892354e-07, "loss": 1.6019, "step": 24580 }, { "epoch": 4.183039891128689, "grad_norm": 7.09881591796875, "learning_rate": 9.218223940925138e-07, "loss": 1.5558, "step": 24590 }, { "epoch": 4.18474100535851, "grad_norm": 6.202441215515137, "learning_rate": 9.180815112735318e-07, "loss": 1.618, "step": 24600 }, { "epoch": 4.18644211958833, "grad_norm": 6.6593098640441895, "learning_rate": 9.143476780752655e-07, "loss": 1.4606, "step": 24610 }, { "epoch": 4.188143233818151, "grad_norm": 5.991734981536865, "learning_rate": 9.106208990321262e-07, "loss": 1.5272, "step": 24620 }, { "epoch": 4.189844348047972, "grad_norm": 6.9438910484313965, "learning_rate": 9.069011786699657e-07, "loss": 1.5686, "step": 24630 }, { "epoch": 4.191545462277792, "grad_norm": 7.357669353485107, "learning_rate": 9.031885215060529e-07, "loss": 1.5204, "step": 24640 }, { "epoch": 4.193246576507613, "grad_norm": 5.462925910949707, "learning_rate": 8.994829320490877e-07, "loss": 1.5844, "step": 24650 }, { "epoch": 4.194947690737433, "grad_norm": 6.265537738800049, "learning_rate": 8.95784414799183e-07, "loss": 1.4904, "step": 24660 }, { "epoch": 4.1966488049672535, "grad_norm": 5.956068515777588, "learning_rate": 8.920929742478644e-07, "loss": 1.4445, "step": 24670 }, { "epoch": 4.198349919197074, "grad_norm": 6.320197105407715, "learning_rate": 8.884086148780609e-07, "loss": 1.6526, "step": 24680 }, { "epoch": 4.200051033426894, "grad_norm": 5.123306751251221, "learning_rate": 8.847313411641087e-07, "loss": 1.53, "step": 24690 }, { "epoch": 4.201752147656715, "grad_norm": 5.411904811859131, "learning_rate": 8.810611575717326e-07, "loss": 1.5991, "step": 24700 }, { "epoch": 4.203453261886535, "grad_norm": 6.095061779022217, "learning_rate": 8.773980685580491e-07, "loss": 1.4786, "step": 24710 }, { "epoch": 4.205154376116356, "grad_norm": 6.627599716186523, "learning_rate": 8.737420785715589e-07, "loss": 1.6051, "step": 24720 }, { "epoch": 4.206855490346177, "grad_norm": 8.003243446350098, "learning_rate": 8.700931920521412e-07, "loss": 1.511, "step": 24730 }, { "epoch": 4.208556604575997, "grad_norm": 5.817923069000244, "learning_rate": 8.664514134310501e-07, "loss": 1.6458, "step": 24740 }, { "epoch": 4.210257718805818, "grad_norm": 6.329773902893066, "learning_rate": 8.628167471309068e-07, "loss": 1.6663, "step": 24750 }, { "epoch": 4.211958833035638, "grad_norm": 4.948273181915283, "learning_rate": 8.591891975656939e-07, "loss": 1.6422, "step": 24760 }, { "epoch": 4.213659947265459, "grad_norm": 7.214920520782471, "learning_rate": 8.555687691407538e-07, "loss": 1.5631, "step": 24770 }, { "epoch": 4.21536106149528, "grad_norm": 6.368295669555664, "learning_rate": 8.519554662527801e-07, "loss": 1.6313, "step": 24780 }, { "epoch": 4.2170621757251, "grad_norm": 5.196382522583008, "learning_rate": 8.483492932898098e-07, "loss": 1.4689, "step": 24790 }, { "epoch": 4.218763289954921, "grad_norm": 6.417914867401123, "learning_rate": 8.447502546312278e-07, "loss": 1.517, "step": 24800 }, { "epoch": 4.220464404184741, "grad_norm": 6.153533458709717, "learning_rate": 8.41158354647751e-07, "loss": 1.6234, "step": 24810 }, { "epoch": 4.2221655184145614, "grad_norm": 6.462419033050537, "learning_rate": 8.375735977014251e-07, "loss": 1.5166, "step": 24820 }, { "epoch": 4.223866632644382, "grad_norm": 6.8873467445373535, "learning_rate": 8.339959881456272e-07, "loss": 1.5588, "step": 24830 }, { "epoch": 4.225567746874202, "grad_norm": 6.402502536773682, "learning_rate": 8.30425530325046e-07, "loss": 1.4579, "step": 24840 }, { "epoch": 4.227268861104023, "grad_norm": 7.729940414428711, "learning_rate": 8.268622285756913e-07, "loss": 1.4804, "step": 24850 }, { "epoch": 4.228969975333843, "grad_norm": 6.577713489532471, "learning_rate": 8.233060872248829e-07, "loss": 1.6403, "step": 24860 }, { "epoch": 4.230671089563664, "grad_norm": 5.388174057006836, "learning_rate": 8.19757110591242e-07, "loss": 1.5362, "step": 24870 }, { "epoch": 4.232372203793485, "grad_norm": 5.965421676635742, "learning_rate": 8.162153029846898e-07, "loss": 1.5833, "step": 24880 }, { "epoch": 4.234073318023305, "grad_norm": 7.016608715057373, "learning_rate": 8.126806687064414e-07, "loss": 1.6166, "step": 24890 }, { "epoch": 4.235774432253126, "grad_norm": 6.407740116119385, "learning_rate": 8.091532120490011e-07, "loss": 1.6687, "step": 24900 }, { "epoch": 4.237475546482946, "grad_norm": 6.694855690002441, "learning_rate": 8.056329372961547e-07, "loss": 1.5413, "step": 24910 }, { "epoch": 4.239176660712767, "grad_norm": 5.850477695465088, "learning_rate": 8.021198487229706e-07, "loss": 1.6718, "step": 24920 }, { "epoch": 4.240877774942588, "grad_norm": 4.856194019317627, "learning_rate": 7.98613950595784e-07, "loss": 1.5363, "step": 24930 }, { "epoch": 4.242578889172408, "grad_norm": 7.285855770111084, "learning_rate": 7.951152471722027e-07, "loss": 1.5563, "step": 24940 }, { "epoch": 4.2442800034022286, "grad_norm": 6.888695240020752, "learning_rate": 7.916237427010946e-07, "loss": 1.5032, "step": 24950 }, { "epoch": 4.2459811176320486, "grad_norm": 5.297257423400879, "learning_rate": 7.881394414225872e-07, "loss": 1.61, "step": 24960 }, { "epoch": 4.247682231861869, "grad_norm": 5.850154876708984, "learning_rate": 7.84662347568058e-07, "loss": 1.425, "step": 24970 }, { "epoch": 4.24938334609169, "grad_norm": 5.764167785644531, "learning_rate": 7.81192465360134e-07, "loss": 1.6219, "step": 24980 }, { "epoch": 4.25108446032151, "grad_norm": 5.481007099151611, "learning_rate": 7.777297990126833e-07, "loss": 1.5636, "step": 24990 }, { "epoch": 4.252785574551331, "grad_norm": 6.768319129943848, "learning_rate": 7.742743527308116e-07, "loss": 1.5378, "step": 25000 }, { "epoch": 4.254486688781151, "grad_norm": 6.511185169219971, "learning_rate": 7.708261307108543e-07, "loss": 1.6191, "step": 25010 }, { "epoch": 4.256187803010972, "grad_norm": 6.309399604797363, "learning_rate": 7.673851371403754e-07, "loss": 1.5455, "step": 25020 }, { "epoch": 4.257888917240793, "grad_norm": 6.184329032897949, "learning_rate": 7.639513761981605e-07, "loss": 1.481, "step": 25030 }, { "epoch": 4.259590031470613, "grad_norm": 6.8204545974731445, "learning_rate": 7.605248520542113e-07, "loss": 1.5367, "step": 25040 }, { "epoch": 4.261291145700434, "grad_norm": 5.148837089538574, "learning_rate": 7.571055688697419e-07, "loss": 1.5995, "step": 25050 }, { "epoch": 4.262992259930254, "grad_norm": 5.682942867279053, "learning_rate": 7.536935307971718e-07, "loss": 1.5473, "step": 25060 }, { "epoch": 4.264693374160075, "grad_norm": 6.7601165771484375, "learning_rate": 7.502887419801217e-07, "loss": 1.6096, "step": 25070 }, { "epoch": 4.266394488389896, "grad_norm": 4.368737697601318, "learning_rate": 7.468912065534099e-07, "loss": 1.5496, "step": 25080 }, { "epoch": 4.268095602619716, "grad_norm": 4.591710090637207, "learning_rate": 7.435009286430469e-07, "loss": 1.5739, "step": 25090 }, { "epoch": 4.2697967168495365, "grad_norm": 5.633373260498047, "learning_rate": 7.401179123662278e-07, "loss": 1.5376, "step": 25100 }, { "epoch": 4.2714978310793565, "grad_norm": 6.092051029205322, "learning_rate": 7.367421618313284e-07, "loss": 1.5793, "step": 25110 }, { "epoch": 4.273198945309177, "grad_norm": 6.855485916137695, "learning_rate": 7.333736811379033e-07, "loss": 1.5421, "step": 25120 }, { "epoch": 4.274900059538998, "grad_norm": 6.99765157699585, "learning_rate": 7.300124743766778e-07, "loss": 1.6597, "step": 25130 }, { "epoch": 4.276601173768818, "grad_norm": 6.900430679321289, "learning_rate": 7.266585456295398e-07, "loss": 1.5686, "step": 25140 }, { "epoch": 4.278302287998639, "grad_norm": 5.6722893714904785, "learning_rate": 7.233118989695459e-07, "loss": 1.5508, "step": 25150 }, { "epoch": 4.280003402228459, "grad_norm": 5.736631393432617, "learning_rate": 7.199725384609033e-07, "loss": 1.6568, "step": 25160 }, { "epoch": 4.28170451645828, "grad_norm": 7.063878536224365, "learning_rate": 7.166404681589739e-07, "loss": 1.469, "step": 25170 }, { "epoch": 4.283405630688101, "grad_norm": 6.601837635040283, "learning_rate": 7.133156921102655e-07, "loss": 1.5388, "step": 25180 }, { "epoch": 4.285106744917921, "grad_norm": 6.6246819496154785, "learning_rate": 7.099982143524243e-07, "loss": 1.5674, "step": 25190 }, { "epoch": 4.286807859147742, "grad_norm": 6.178937911987305, "learning_rate": 7.066880389142407e-07, "loss": 1.6288, "step": 25200 }, { "epoch": 4.288508973377562, "grad_norm": 5.929183006286621, "learning_rate": 7.033851698156318e-07, "loss": 1.563, "step": 25210 }, { "epoch": 4.290210087607383, "grad_norm": 6.338135719299316, "learning_rate": 7.000896110676427e-07, "loss": 1.5498, "step": 25220 }, { "epoch": 4.291911201837204, "grad_norm": 7.406379699707031, "learning_rate": 6.9680136667244e-07, "loss": 1.5654, "step": 25230 }, { "epoch": 4.293612316067024, "grad_norm": 5.393094539642334, "learning_rate": 6.935204406233082e-07, "loss": 1.5828, "step": 25240 }, { "epoch": 4.2953134302968445, "grad_norm": 4.678094387054443, "learning_rate": 6.902468369046441e-07, "loss": 1.5138, "step": 25250 }, { "epoch": 4.2970145445266645, "grad_norm": 5.554921627044678, "learning_rate": 6.869805594919559e-07, "loss": 1.4475, "step": 25260 }, { "epoch": 4.298715658756485, "grad_norm": 5.954467296600342, "learning_rate": 6.837216123518489e-07, "loss": 1.4953, "step": 25270 }, { "epoch": 4.300416772986306, "grad_norm": 5.233072280883789, "learning_rate": 6.804699994420301e-07, "loss": 1.555, "step": 25280 }, { "epoch": 4.302117887216126, "grad_norm": 4.819551944732666, "learning_rate": 6.772257247112979e-07, "loss": 1.5833, "step": 25290 }, { "epoch": 4.303819001445947, "grad_norm": 6.186118125915527, "learning_rate": 6.73988792099541e-07, "loss": 1.5618, "step": 25300 }, { "epoch": 4.305520115675767, "grad_norm": 5.4033966064453125, "learning_rate": 6.707592055377291e-07, "loss": 1.5009, "step": 25310 }, { "epoch": 4.307221229905588, "grad_norm": 5.955039978027344, "learning_rate": 6.675369689479125e-07, "loss": 1.5152, "step": 25320 }, { "epoch": 4.308922344135409, "grad_norm": 5.828554630279541, "learning_rate": 6.643220862432158e-07, "loss": 1.5833, "step": 25330 }, { "epoch": 4.310623458365229, "grad_norm": 5.943400859832764, "learning_rate": 6.61114561327833e-07, "loss": 1.648, "step": 25340 }, { "epoch": 4.31232457259505, "grad_norm": 7.454378128051758, "learning_rate": 6.579143980970211e-07, "loss": 1.5626, "step": 25350 }, { "epoch": 4.31402568682487, "grad_norm": 6.213706970214844, "learning_rate": 6.547216004370977e-07, "loss": 1.6978, "step": 25360 }, { "epoch": 4.315726801054691, "grad_norm": 5.878660678863525, "learning_rate": 6.51536172225438e-07, "loss": 1.5151, "step": 25370 }, { "epoch": 4.317427915284512, "grad_norm": 5.611740589141846, "learning_rate": 6.483581173304637e-07, "loss": 1.573, "step": 25380 }, { "epoch": 4.319129029514332, "grad_norm": 6.904658317565918, "learning_rate": 6.451874396116454e-07, "loss": 1.6441, "step": 25390 }, { "epoch": 4.3208301437441525, "grad_norm": 5.6536030769348145, "learning_rate": 6.420241429194932e-07, "loss": 1.542, "step": 25400 }, { "epoch": 4.3225312579739725, "grad_norm": 6.491143226623535, "learning_rate": 6.388682310955532e-07, "loss": 1.5492, "step": 25410 }, { "epoch": 4.324232372203793, "grad_norm": 6.302955150604248, "learning_rate": 6.357197079724048e-07, "loss": 1.5434, "step": 25420 }, { "epoch": 4.325933486433614, "grad_norm": 5.890113830566406, "learning_rate": 6.325785773736533e-07, "loss": 1.4887, "step": 25430 }, { "epoch": 4.327634600663434, "grad_norm": 5.251640319824219, "learning_rate": 6.294448431139262e-07, "loss": 1.5869, "step": 25440 }, { "epoch": 4.329335714893255, "grad_norm": 5.661231994628906, "learning_rate": 6.263185089988697e-07, "loss": 1.6795, "step": 25450 }, { "epoch": 4.331036829123075, "grad_norm": 5.819065093994141, "learning_rate": 6.231995788251436e-07, "loss": 1.5383, "step": 25460 }, { "epoch": 4.332737943352896, "grad_norm": 6.791083335876465, "learning_rate": 6.200880563804138e-07, "loss": 1.4843, "step": 25470 }, { "epoch": 4.334439057582717, "grad_norm": 6.878266334533691, "learning_rate": 6.169839454433517e-07, "loss": 1.5374, "step": 25480 }, { "epoch": 4.336140171812537, "grad_norm": 6.439700603485107, "learning_rate": 6.138872497836306e-07, "loss": 1.5041, "step": 25490 }, { "epoch": 4.337841286042358, "grad_norm": 6.028425693511963, "learning_rate": 6.107979731619145e-07, "loss": 1.6296, "step": 25500 }, { "epoch": 4.339542400272178, "grad_norm": 6.454329490661621, "learning_rate": 6.077161193298606e-07, "loss": 1.5674, "step": 25510 }, { "epoch": 4.341243514501999, "grad_norm": 6.224812030792236, "learning_rate": 6.04641692030108e-07, "loss": 1.5929, "step": 25520 }, { "epoch": 4.34294462873182, "grad_norm": 7.219799995422363, "learning_rate": 6.0157469499628e-07, "loss": 1.5856, "step": 25530 }, { "epoch": 4.34464574296164, "grad_norm": 6.953548908233643, "learning_rate": 5.98515131952978e-07, "loss": 1.5167, "step": 25540 }, { "epoch": 4.3463468571914605, "grad_norm": 6.339746475219727, "learning_rate": 5.954630066157725e-07, "loss": 1.5609, "step": 25550 }, { "epoch": 4.3480479714212805, "grad_norm": 7.047362327575684, "learning_rate": 5.924183226912013e-07, "loss": 1.5132, "step": 25560 }, { "epoch": 4.349749085651101, "grad_norm": 7.085602760314941, "learning_rate": 5.893810838767672e-07, "loss": 1.5972, "step": 25570 }, { "epoch": 4.351450199880922, "grad_norm": 7.212773323059082, "learning_rate": 5.863512938609308e-07, "loss": 1.5564, "step": 25580 }, { "epoch": 4.353151314110742, "grad_norm": 6.354076862335205, "learning_rate": 5.833289563231057e-07, "loss": 1.4931, "step": 25590 }, { "epoch": 4.354852428340563, "grad_norm": 5.628859043121338, "learning_rate": 5.803140749336587e-07, "loss": 1.5475, "step": 25600 }, { "epoch": 4.356553542570383, "grad_norm": 6.166843891143799, "learning_rate": 5.773066533538966e-07, "loss": 1.6855, "step": 25610 }, { "epoch": 4.358254656800204, "grad_norm": 6.750080585479736, "learning_rate": 5.743066952360714e-07, "loss": 1.5174, "step": 25620 }, { "epoch": 4.359955771030025, "grad_norm": 5.141016483306885, "learning_rate": 5.713142042233677e-07, "loss": 1.4891, "step": 25630 }, { "epoch": 4.361656885259845, "grad_norm": 6.939876556396484, "learning_rate": 5.683291839499045e-07, "loss": 1.6594, "step": 25640 }, { "epoch": 4.363357999489666, "grad_norm": 6.1759867668151855, "learning_rate": 5.653516380407264e-07, "loss": 1.5642, "step": 25650 }, { "epoch": 4.365059113719486, "grad_norm": 5.9017181396484375, "learning_rate": 5.623815701118051e-07, "loss": 1.5946, "step": 25660 }, { "epoch": 4.366760227949307, "grad_norm": 5.6289472579956055, "learning_rate": 5.594189837700257e-07, "loss": 1.5393, "step": 25670 }, { "epoch": 4.368461342179128, "grad_norm": 6.2212700843811035, "learning_rate": 5.564638826131908e-07, "loss": 1.5124, "step": 25680 }, { "epoch": 4.370162456408948, "grad_norm": 5.144286632537842, "learning_rate": 5.53516270230012e-07, "loss": 1.5942, "step": 25690 }, { "epoch": 4.3718635706387685, "grad_norm": 6.080607891082764, "learning_rate": 5.505761502001057e-07, "loss": 1.5799, "step": 25700 }, { "epoch": 4.3735646848685885, "grad_norm": 5.448061943054199, "learning_rate": 5.476435260939898e-07, "loss": 1.587, "step": 25710 }, { "epoch": 4.375265799098409, "grad_norm": 6.332202911376953, "learning_rate": 5.447184014730809e-07, "loss": 1.5345, "step": 25720 }, { "epoch": 4.37696691332823, "grad_norm": 4.908957481384277, "learning_rate": 5.418007798896856e-07, "loss": 1.6112, "step": 25730 }, { "epoch": 4.37866802755805, "grad_norm": 6.102475643157959, "learning_rate": 5.388906648870004e-07, "loss": 1.5951, "step": 25740 }, { "epoch": 4.380369141787871, "grad_norm": 6.507709503173828, "learning_rate": 5.359880599991039e-07, "loss": 1.4762, "step": 25750 }, { "epoch": 4.382070256017691, "grad_norm": 6.359583377838135, "learning_rate": 5.330929687509564e-07, "loss": 1.5631, "step": 25760 }, { "epoch": 4.383771370247512, "grad_norm": 6.574647903442383, "learning_rate": 5.302053946583929e-07, "loss": 1.5766, "step": 25770 }, { "epoch": 4.385472484477333, "grad_norm": 6.09061861038208, "learning_rate": 5.273253412281195e-07, "loss": 1.5324, "step": 25780 }, { "epoch": 4.387173598707153, "grad_norm": 6.537026882171631, "learning_rate": 5.244528119577085e-07, "loss": 1.5823, "step": 25790 }, { "epoch": 4.388874712936974, "grad_norm": 5.031495094299316, "learning_rate": 5.215878103355952e-07, "loss": 1.4866, "step": 25800 }, { "epoch": 4.390575827166794, "grad_norm": 6.294393062591553, "learning_rate": 5.187303398410738e-07, "loss": 1.5707, "step": 25810 }, { "epoch": 4.392276941396615, "grad_norm": 7.1139631271362305, "learning_rate": 5.158804039442892e-07, "loss": 1.5829, "step": 25820 }, { "epoch": 4.393978055626436, "grad_norm": 6.569021224975586, "learning_rate": 5.130380061062422e-07, "loss": 1.5318, "step": 25830 }, { "epoch": 4.395679169856256, "grad_norm": 5.906765460968018, "learning_rate": 5.102031497787743e-07, "loss": 1.4922, "step": 25840 }, { "epoch": 4.3973802840860765, "grad_norm": 6.8976616859436035, "learning_rate": 5.073758384045713e-07, "loss": 1.5555, "step": 25850 }, { "epoch": 4.3990813983158965, "grad_norm": 6.1184983253479, "learning_rate": 5.045560754171539e-07, "loss": 1.5352, "step": 25860 }, { "epoch": 4.400782512545717, "grad_norm": 5.493861198425293, "learning_rate": 5.01743864240879e-07, "loss": 1.5153, "step": 25870 }, { "epoch": 4.402483626775538, "grad_norm": 4.283514022827148, "learning_rate": 4.989392082909284e-07, "loss": 1.6017, "step": 25880 }, { "epoch": 4.404184741005358, "grad_norm": 6.298120498657227, "learning_rate": 4.961421109733143e-07, "loss": 1.4176, "step": 25890 }, { "epoch": 4.405885855235179, "grad_norm": 6.147403717041016, "learning_rate": 4.933525756848641e-07, "loss": 1.5778, "step": 25900 }, { "epoch": 4.407586969464999, "grad_norm": 5.236167907714844, "learning_rate": 4.905706058132251e-07, "loss": 1.6218, "step": 25910 }, { "epoch": 4.40928808369482, "grad_norm": 6.064047813415527, "learning_rate": 4.877962047368553e-07, "loss": 1.5278, "step": 25920 }, { "epoch": 4.410989197924641, "grad_norm": 5.734475135803223, "learning_rate": 4.850293758250207e-07, "loss": 1.5434, "step": 25930 }, { "epoch": 4.412690312154461, "grad_norm": 7.101593017578125, "learning_rate": 4.82270122437796e-07, "loss": 1.5119, "step": 25940 }, { "epoch": 4.414391426384282, "grad_norm": 5.9773969650268555, "learning_rate": 4.795184479260509e-07, "loss": 1.5912, "step": 25950 }, { "epoch": 4.416092540614102, "grad_norm": 6.129922389984131, "learning_rate": 4.7677435563145313e-07, "loss": 1.4223, "step": 25960 }, { "epoch": 4.417793654843923, "grad_norm": 6.448030948638916, "learning_rate": 4.7403784888646313e-07, "loss": 1.5637, "step": 25970 }, { "epoch": 4.419494769073744, "grad_norm": 5.361682415008545, "learning_rate": 4.7130893101432745e-07, "loss": 1.5282, "step": 25980 }, { "epoch": 4.421195883303564, "grad_norm": 6.881174087524414, "learning_rate": 4.6858760532907837e-07, "loss": 1.6037, "step": 25990 }, { "epoch": 4.4228969975333845, "grad_norm": 6.652550220489502, "learning_rate": 4.658738751355284e-07, "loss": 1.5783, "step": 26000 }, { "epoch": 4.4245981117632045, "grad_norm": 6.802812099456787, "learning_rate": 4.631677437292645e-07, "loss": 1.5237, "step": 26010 }, { "epoch": 4.426299225993025, "grad_norm": 5.49721622467041, "learning_rate": 4.604692143966459e-07, "loss": 1.4783, "step": 26020 }, { "epoch": 4.428000340222846, "grad_norm": 6.297482967376709, "learning_rate": 4.5777829041480083e-07, "loss": 1.5841, "step": 26030 }, { "epoch": 4.429701454452666, "grad_norm": 7.4595537185668945, "learning_rate": 4.550949750516208e-07, "loss": 1.6048, "step": 26040 }, { "epoch": 4.431402568682487, "grad_norm": 5.517505645751953, "learning_rate": 4.5241927156575604e-07, "loss": 1.4844, "step": 26050 }, { "epoch": 4.433103682912307, "grad_norm": 6.22703218460083, "learning_rate": 4.4975118320661623e-07, "loss": 1.6691, "step": 26060 }, { "epoch": 4.434804797142128, "grad_norm": 7.3646039962768555, "learning_rate": 4.470907132143594e-07, "loss": 1.5742, "step": 26070 }, { "epoch": 4.436505911371949, "grad_norm": 6.23193883895874, "learning_rate": 4.444378648198939e-07, "loss": 1.5626, "step": 26080 }, { "epoch": 4.438207025601769, "grad_norm": 6.38597297668457, "learning_rate": 4.4179264124487213e-07, "loss": 1.601, "step": 26090 }, { "epoch": 4.43990813983159, "grad_norm": 5.653160572052002, "learning_rate": 4.391550457016864e-07, "loss": 1.5889, "step": 26100 }, { "epoch": 4.44160925406141, "grad_norm": 5.833569049835205, "learning_rate": 4.365250813934646e-07, "loss": 1.6619, "step": 26110 }, { "epoch": 4.443310368291231, "grad_norm": 5.669712066650391, "learning_rate": 4.3390275151406945e-07, "loss": 1.5954, "step": 26120 }, { "epoch": 4.445011482521052, "grad_norm": 6.262190818786621, "learning_rate": 4.312880592480897e-07, "loss": 1.4984, "step": 26130 }, { "epoch": 4.446712596750872, "grad_norm": 5.5008344650268555, "learning_rate": 4.286810077708405e-07, "loss": 1.4724, "step": 26140 }, { "epoch": 4.4484137109806925, "grad_norm": 5.778617858886719, "learning_rate": 4.2608160024835784e-07, "loss": 1.5239, "step": 26150 }, { "epoch": 4.4501148252105125, "grad_norm": 6.689032554626465, "learning_rate": 4.23489839837393e-07, "loss": 1.6242, "step": 26160 }, { "epoch": 4.451815939440333, "grad_norm": 5.875165939331055, "learning_rate": 4.2090572968541434e-07, "loss": 1.5732, "step": 26170 }, { "epoch": 4.453517053670154, "grad_norm": 7.6699748039245605, "learning_rate": 4.1832927293059584e-07, "loss": 1.5344, "step": 26180 }, { "epoch": 4.455218167899974, "grad_norm": 6.2144951820373535, "learning_rate": 4.157604727018209e-07, "loss": 1.5593, "step": 26190 }, { "epoch": 4.456919282129795, "grad_norm": 6.983086109161377, "learning_rate": 4.1319933211866903e-07, "loss": 1.5265, "step": 26200 }, { "epoch": 4.458620396359615, "grad_norm": 5.426665306091309, "learning_rate": 4.1064585429142326e-07, "loss": 1.5845, "step": 26210 }, { "epoch": 4.460321510589436, "grad_norm": 6.375173568725586, "learning_rate": 4.081000423210565e-07, "loss": 1.5287, "step": 26220 }, { "epoch": 4.462022624819257, "grad_norm": 5.231582164764404, "learning_rate": 4.0556189929923794e-07, "loss": 1.5301, "step": 26230 }, { "epoch": 4.463723739049077, "grad_norm": 6.492799282073975, "learning_rate": 4.0303142830831763e-07, "loss": 1.5539, "step": 26240 }, { "epoch": 4.465424853278898, "grad_norm": 6.998684406280518, "learning_rate": 4.00508632421332e-07, "loss": 1.5151, "step": 26250 }, { "epoch": 4.467125967508718, "grad_norm": 6.301689147949219, "learning_rate": 3.97993514701995e-07, "loss": 1.5371, "step": 26260 }, { "epoch": 4.468827081738539, "grad_norm": 6.627586364746094, "learning_rate": 3.954860782046977e-07, "loss": 1.5115, "step": 26270 }, { "epoch": 4.47052819596836, "grad_norm": 6.861813545227051, "learning_rate": 3.929863259745006e-07, "loss": 1.5546, "step": 26280 }, { "epoch": 4.47222931019818, "grad_norm": 6.784213542938232, "learning_rate": 3.9049426104713526e-07, "loss": 1.5602, "step": 26290 }, { "epoch": 4.4739304244280005, "grad_norm": 6.133279800415039, "learning_rate": 3.880098864489959e-07, "loss": 1.5719, "step": 26300 }, { "epoch": 4.4756315386578205, "grad_norm": 6.854134559631348, "learning_rate": 3.855332051971385e-07, "loss": 1.5037, "step": 26310 }, { "epoch": 4.477332652887641, "grad_norm": 5.71651029586792, "learning_rate": 3.830642202992738e-07, "loss": 1.6071, "step": 26320 }, { "epoch": 4.479033767117462, "grad_norm": 6.421847820281982, "learning_rate": 3.8060293475376833e-07, "loss": 1.4797, "step": 26330 }, { "epoch": 4.480734881347282, "grad_norm": 6.611272811889648, "learning_rate": 3.781493515496392e-07, "loss": 1.5943, "step": 26340 }, { "epoch": 4.482435995577103, "grad_norm": 6.366344928741455, "learning_rate": 3.757034736665479e-07, "loss": 1.5623, "step": 26350 }, { "epoch": 4.484137109806923, "grad_norm": 6.059317111968994, "learning_rate": 3.7326530407479763e-07, "loss": 1.6145, "step": 26360 }, { "epoch": 4.485838224036744, "grad_norm": 6.124782562255859, "learning_rate": 3.70834845735333e-07, "loss": 1.5629, "step": 26370 }, { "epoch": 4.487539338266565, "grad_norm": 4.732071399688721, "learning_rate": 3.684121015997327e-07, "loss": 1.4886, "step": 26380 }, { "epoch": 4.489240452496385, "grad_norm": 5.714395999908447, "learning_rate": 3.6599707461020724e-07, "loss": 1.5639, "step": 26390 }, { "epoch": 4.490941566726206, "grad_norm": 5.7255024909973145, "learning_rate": 3.635897676995946e-07, "loss": 1.4485, "step": 26400 }, { "epoch": 4.492642680956026, "grad_norm": 6.477797985076904, "learning_rate": 3.6119018379135927e-07, "loss": 1.5884, "step": 26410 }, { "epoch": 4.494343795185847, "grad_norm": 6.513632774353027, "learning_rate": 3.587983257995856e-07, "loss": 1.5626, "step": 26420 }, { "epoch": 4.496044909415668, "grad_norm": 6.233043193817139, "learning_rate": 3.5641419662897493e-07, "loss": 1.5763, "step": 26430 }, { "epoch": 4.497746023645488, "grad_norm": 6.740249156951904, "learning_rate": 3.54037799174844e-07, "loss": 1.5187, "step": 26440 }, { "epoch": 4.4994471378753085, "grad_norm": 5.980910778045654, "learning_rate": 3.516691363231181e-07, "loss": 1.4677, "step": 26450 }, { "epoch": 4.5011482521051285, "grad_norm": 6.505252838134766, "learning_rate": 3.4930821095033297e-07, "loss": 1.6046, "step": 26460 }, { "epoch": 4.502849366334949, "grad_norm": 5.84398889541626, "learning_rate": 3.4695502592362467e-07, "loss": 1.5715, "step": 26470 }, { "epoch": 4.50455048056477, "grad_norm": 6.0322747230529785, "learning_rate": 3.4460958410073116e-07, "loss": 1.6288, "step": 26480 }, { "epoch": 4.50625159479459, "grad_norm": 5.351884365081787, "learning_rate": 3.422718883299843e-07, "loss": 1.5561, "step": 26490 }, { "epoch": 4.507952709024411, "grad_norm": 5.781704425811768, "learning_rate": 3.399419414503105e-07, "loss": 1.5336, "step": 26500 }, { "epoch": 4.509653823254231, "grad_norm": 5.929610729217529, "learning_rate": 3.376197462912291e-07, "loss": 1.4502, "step": 26510 }, { "epoch": 4.511354937484052, "grad_norm": 6.840120792388916, "learning_rate": 3.353053056728412e-07, "loss": 1.6567, "step": 26520 }, { "epoch": 4.513056051713873, "grad_norm": 6.549271583557129, "learning_rate": 3.329986224058321e-07, "loss": 1.5748, "step": 26530 }, { "epoch": 4.514757165943693, "grad_norm": 6.0582499504089355, "learning_rate": 3.3069969929146656e-07, "loss": 1.5188, "step": 26540 }, { "epoch": 4.516458280173514, "grad_norm": 5.637298107147217, "learning_rate": 3.284085391215854e-07, "loss": 1.6182, "step": 26550 }, { "epoch": 4.518159394403334, "grad_norm": 4.3700785636901855, "learning_rate": 3.26125144678602e-07, "loss": 1.5956, "step": 26560 }, { "epoch": 4.519860508633155, "grad_norm": 5.547298908233643, "learning_rate": 3.2407673166848086e-07, "loss": 1.5279, "step": 26570 }, { "epoch": 4.521561622862976, "grad_norm": 5.613595485687256, "learning_rate": 3.218080997383742e-07, "loss": 1.586, "step": 26580 }, { "epoch": 4.523262737092796, "grad_norm": 5.9025492668151855, "learning_rate": 3.1954724155082025e-07, "loss": 1.6782, "step": 26590 }, { "epoch": 4.5249638513226165, "grad_norm": 5.403884410858154, "learning_rate": 3.1729415985143313e-07, "loss": 1.5698, "step": 26600 }, { "epoch": 4.5266649655524365, "grad_norm": 6.578800201416016, "learning_rate": 3.150488573763856e-07, "loss": 1.6192, "step": 26610 }, { "epoch": 4.528366079782257, "grad_norm": 5.751435279846191, "learning_rate": 3.128113368523995e-07, "loss": 1.4884, "step": 26620 }, { "epoch": 4.530067194012078, "grad_norm": 6.38753080368042, "learning_rate": 3.105816009967472e-07, "loss": 1.4877, "step": 26630 }, { "epoch": 4.531768308241898, "grad_norm": 5.90024471282959, "learning_rate": 3.083596525172483e-07, "loss": 1.6379, "step": 26640 }, { "epoch": 4.533469422471719, "grad_norm": 6.648676872253418, "learning_rate": 3.061454941122634e-07, "loss": 1.6367, "step": 26650 }, { "epoch": 4.535170536701539, "grad_norm": 7.269857406616211, "learning_rate": 3.039391284706961e-07, "loss": 1.5853, "step": 26660 }, { "epoch": 4.53687165093136, "grad_norm": 5.743645668029785, "learning_rate": 3.0174055827198176e-07, "loss": 1.5216, "step": 26670 }, { "epoch": 4.538572765161181, "grad_norm": 6.32423734664917, "learning_rate": 2.995497861860935e-07, "loss": 1.5555, "step": 26680 }, { "epoch": 4.540273879391001, "grad_norm": 6.08209753036499, "learning_rate": 2.9736681487353105e-07, "loss": 1.5388, "step": 26690 }, { "epoch": 4.541974993620822, "grad_norm": 6.673112869262695, "learning_rate": 2.951916469853215e-07, "loss": 1.4634, "step": 26700 }, { "epoch": 4.543676107850642, "grad_norm": 5.572475433349609, "learning_rate": 2.930242851630162e-07, "loss": 1.6853, "step": 26710 }, { "epoch": 4.545377222080463, "grad_norm": 7.366125583648682, "learning_rate": 2.9086473203868655e-07, "loss": 1.5605, "step": 26720 }, { "epoch": 4.547078336310284, "grad_norm": 5.890539646148682, "learning_rate": 2.8871299023492066e-07, "loss": 1.4455, "step": 26730 }, { "epoch": 4.548779450540104, "grad_norm": 6.030550003051758, "learning_rate": 2.865690623648207e-07, "loss": 1.5712, "step": 26740 }, { "epoch": 4.5504805647699245, "grad_norm": 5.688420295715332, "learning_rate": 2.8443295103199955e-07, "loss": 1.5425, "step": 26750 }, { "epoch": 4.5521816789997445, "grad_norm": 6.9885969161987305, "learning_rate": 2.8230465883057673e-07, "loss": 1.5411, "step": 26760 }, { "epoch": 4.553882793229565, "grad_norm": 6.742706775665283, "learning_rate": 2.801841883451751e-07, "loss": 1.5059, "step": 26770 }, { "epoch": 4.555583907459386, "grad_norm": 6.455204963684082, "learning_rate": 2.7807154215092315e-07, "loss": 1.6318, "step": 26780 }, { "epoch": 4.557285021689206, "grad_norm": 6.3332438468933105, "learning_rate": 2.759667228134441e-07, "loss": 1.4313, "step": 26790 }, { "epoch": 4.558986135919027, "grad_norm": 5.9591593742370605, "learning_rate": 2.738697328888554e-07, "loss": 1.6398, "step": 26800 }, { "epoch": 4.560687250148847, "grad_norm": 5.817910671234131, "learning_rate": 2.717805749237684e-07, "loss": 1.6467, "step": 26810 }, { "epoch": 4.562388364378668, "grad_norm": 6.373818874359131, "learning_rate": 2.6969925145528293e-07, "loss": 1.6025, "step": 26820 }, { "epoch": 4.564089478608489, "grad_norm": 6.842092037200928, "learning_rate": 2.6762576501098285e-07, "loss": 1.5696, "step": 26830 }, { "epoch": 4.565790592838309, "grad_norm": 6.427211284637451, "learning_rate": 2.655601181089368e-07, "loss": 1.606, "step": 26840 }, { "epoch": 4.56749170706813, "grad_norm": 6.684589862823486, "learning_rate": 2.6350231325769386e-07, "loss": 1.5685, "step": 26850 }, { "epoch": 4.56919282129795, "grad_norm": 6.076772689819336, "learning_rate": 2.6145235295627427e-07, "loss": 1.5562, "step": 26860 }, { "epoch": 4.570893935527771, "grad_norm": 6.500069618225098, "learning_rate": 2.594102396941772e-07, "loss": 1.5452, "step": 26870 }, { "epoch": 4.572595049757592, "grad_norm": 6.09898042678833, "learning_rate": 2.573759759513694e-07, "loss": 1.5775, "step": 26880 }, { "epoch": 4.574296163987412, "grad_norm": 5.9650492668151855, "learning_rate": 2.553495641982861e-07, "loss": 1.589, "step": 26890 }, { "epoch": 4.5759972782172325, "grad_norm": 6.79572057723999, "learning_rate": 2.533310068958285e-07, "loss": 1.5511, "step": 26900 }, { "epoch": 4.5776983924470525, "grad_norm": 6.421900749206543, "learning_rate": 2.5132030649535657e-07, "loss": 1.5457, "step": 26910 }, { "epoch": 4.579399506676873, "grad_norm": 6.944169521331787, "learning_rate": 2.4931746543868967e-07, "loss": 1.6124, "step": 26920 }, { "epoch": 4.581100620906694, "grad_norm": 6.094720363616943, "learning_rate": 2.4732248615810356e-07, "loss": 1.4469, "step": 26930 }, { "epoch": 4.582801735136514, "grad_norm": 5.676002502441406, "learning_rate": 2.4533537107632523e-07, "loss": 1.5275, "step": 26940 }, { "epoch": 4.584502849366335, "grad_norm": 6.113831996917725, "learning_rate": 2.433561226065321e-07, "loss": 1.5873, "step": 26950 }, { "epoch": 4.586203963596155, "grad_norm": 6.3694000244140625, "learning_rate": 2.413847431523476e-07, "loss": 1.5317, "step": 26960 }, { "epoch": 4.587905077825976, "grad_norm": 5.022193908691406, "learning_rate": 2.3942123510784047e-07, "loss": 1.6011, "step": 26970 }, { "epoch": 4.589606192055797, "grad_norm": 6.505123138427734, "learning_rate": 2.3746560085751847e-07, "loss": 1.5448, "step": 26980 }, { "epoch": 4.591307306285617, "grad_norm": 7.246630668640137, "learning_rate": 2.3551784277632884e-07, "loss": 1.5249, "step": 26990 }, { "epoch": 4.593008420515438, "grad_norm": 5.185988426208496, "learning_rate": 2.3357796322965128e-07, "loss": 1.5699, "step": 27000 }, { "epoch": 4.594709534745258, "grad_norm": 7.010645389556885, "learning_rate": 2.3164596457330253e-07, "loss": 1.4652, "step": 27010 }, { "epoch": 4.596410648975079, "grad_norm": 6.542299270629883, "learning_rate": 2.2972184915352362e-07, "loss": 1.4913, "step": 27020 }, { "epoch": 4.5981117632049, "grad_norm": 7.340641498565674, "learning_rate": 2.2780561930698522e-07, "loss": 1.5091, "step": 27030 }, { "epoch": 4.59981287743472, "grad_norm": 6.255001544952393, "learning_rate": 2.2589727736078071e-07, "loss": 1.5317, "step": 27040 }, { "epoch": 4.6015139916645404, "grad_norm": 5.629670143127441, "learning_rate": 2.2399682563242422e-07, "loss": 1.6179, "step": 27050 }, { "epoch": 4.6032151058943604, "grad_norm": 6.794814109802246, "learning_rate": 2.221042664298459e-07, "loss": 1.4689, "step": 27060 }, { "epoch": 4.604916220124181, "grad_norm": 5.563018321990967, "learning_rate": 2.2021960205139574e-07, "loss": 1.5535, "step": 27070 }, { "epoch": 4.606617334354002, "grad_norm": 6.172718524932861, "learning_rate": 2.183428347858325e-07, "loss": 1.5345, "step": 27080 }, { "epoch": 4.608318448583822, "grad_norm": 6.285296440124512, "learning_rate": 2.1647396691232645e-07, "loss": 1.5687, "step": 27090 }, { "epoch": 4.610019562813643, "grad_norm": 6.232929229736328, "learning_rate": 2.1461300070045285e-07, "loss": 1.5277, "step": 27100 }, { "epoch": 4.611720677043463, "grad_norm": 5.748643398284912, "learning_rate": 2.1275993841019192e-07, "loss": 1.4778, "step": 27110 }, { "epoch": 4.613421791273284, "grad_norm": 5.501770973205566, "learning_rate": 2.1091478229192721e-07, "loss": 1.5975, "step": 27120 }, { "epoch": 4.615122905503105, "grad_norm": 5.694693088531494, "learning_rate": 2.0907753458643836e-07, "loss": 1.4357, "step": 27130 }, { "epoch": 4.616824019732925, "grad_norm": 7.41944694519043, "learning_rate": 2.0724819752490235e-07, "loss": 1.5026, "step": 27140 }, { "epoch": 4.618525133962746, "grad_norm": 5.143401622772217, "learning_rate": 2.0542677332888775e-07, "loss": 1.5638, "step": 27150 }, { "epoch": 4.620226248192566, "grad_norm": 6.477053165435791, "learning_rate": 2.0361326421035533e-07, "loss": 1.5626, "step": 27160 }, { "epoch": 4.621927362422387, "grad_norm": 6.121665954589844, "learning_rate": 2.0180767237165135e-07, "loss": 1.6153, "step": 27170 }, { "epoch": 4.6236284766522076, "grad_norm": 5.462719440460205, "learning_rate": 2.000100000055121e-07, "loss": 1.5212, "step": 27180 }, { "epoch": 4.6253295908820276, "grad_norm": 6.056469440460205, "learning_rate": 1.982202492950519e-07, "loss": 1.5548, "step": 27190 }, { "epoch": 4.627030705111848, "grad_norm": 6.080183982849121, "learning_rate": 1.9643842241376537e-07, "loss": 1.6376, "step": 27200 }, { "epoch": 4.628731819341668, "grad_norm": 5.385749340057373, "learning_rate": 1.9466452152552673e-07, "loss": 1.5231, "step": 27210 }, { "epoch": 4.630432933571489, "grad_norm": 7.34156608581543, "learning_rate": 1.928985487845822e-07, "loss": 1.6556, "step": 27220 }, { "epoch": 4.63213404780131, "grad_norm": 6.492604732513428, "learning_rate": 1.911405063355509e-07, "loss": 1.6597, "step": 27230 }, { "epoch": 4.63383516203113, "grad_norm": 6.061577796936035, "learning_rate": 1.8939039631342245e-07, "loss": 1.5342, "step": 27240 }, { "epoch": 4.635536276260951, "grad_norm": 5.184089660644531, "learning_rate": 1.8764822084355118e-07, "loss": 1.5325, "step": 27250 }, { "epoch": 4.637237390490771, "grad_norm": 6.684836387634277, "learning_rate": 1.8591398204165678e-07, "loss": 1.4896, "step": 27260 }, { "epoch": 4.638938504720592, "grad_norm": 6.151313304901123, "learning_rate": 1.841876820138205e-07, "loss": 1.4978, "step": 27270 }, { "epoch": 4.640639618950413, "grad_norm": 5.921164512634277, "learning_rate": 1.8246932285648228e-07, "loss": 1.6357, "step": 27280 }, { "epoch": 4.642340733180233, "grad_norm": 5.848247528076172, "learning_rate": 1.8075890665644e-07, "loss": 1.5606, "step": 27290 }, { "epoch": 4.644041847410054, "grad_norm": 5.273307800292969, "learning_rate": 1.7905643549084268e-07, "loss": 1.4879, "step": 27300 }, { "epoch": 4.645742961639874, "grad_norm": 6.233772277832031, "learning_rate": 1.773619114271942e-07, "loss": 1.526, "step": 27310 }, { "epoch": 4.647444075869695, "grad_norm": 6.645590782165527, "learning_rate": 1.7567533652334457e-07, "loss": 1.6352, "step": 27320 }, { "epoch": 4.6491451900995155, "grad_norm": 4.733581066131592, "learning_rate": 1.7399671282749103e-07, "loss": 1.5203, "step": 27330 }, { "epoch": 4.6508463043293355, "grad_norm": 6.412637233734131, "learning_rate": 1.7232604237817623e-07, "loss": 1.5169, "step": 27340 }, { "epoch": 4.652547418559156, "grad_norm": 5.247931480407715, "learning_rate": 1.7066332720428192e-07, "loss": 1.5933, "step": 27350 }, { "epoch": 4.654248532788976, "grad_norm": 6.40666389465332, "learning_rate": 1.6900856932503066e-07, "loss": 1.5831, "step": 27360 }, { "epoch": 4.655949647018797, "grad_norm": 6.300868511199951, "learning_rate": 1.6736177074998052e-07, "loss": 1.4672, "step": 27370 }, { "epoch": 4.657650761248618, "grad_norm": 7.2393035888671875, "learning_rate": 1.65722933479025e-07, "loss": 1.5738, "step": 27380 }, { "epoch": 4.659351875478438, "grad_norm": 6.379903793334961, "learning_rate": 1.6409205950238725e-07, "loss": 1.5274, "step": 27390 }, { "epoch": 4.661052989708259, "grad_norm": 4.601118087768555, "learning_rate": 1.6246915080062138e-07, "loss": 1.6328, "step": 27400 }, { "epoch": 4.662754103938079, "grad_norm": 5.284566402435303, "learning_rate": 1.6085420934460806e-07, "loss": 1.5417, "step": 27410 }, { "epoch": 4.6644552181679, "grad_norm": 8.52403450012207, "learning_rate": 1.5924723709555303e-07, "loss": 1.5184, "step": 27420 }, { "epoch": 4.666156332397721, "grad_norm": 6.385263442993164, "learning_rate": 1.5764823600498187e-07, "loss": 1.4659, "step": 27430 }, { "epoch": 4.667857446627541, "grad_norm": 6.358104705810547, "learning_rate": 1.5605720801474369e-07, "loss": 1.6184, "step": 27440 }, { "epoch": 4.669558560857362, "grad_norm": 6.500202178955078, "learning_rate": 1.5447415505699927e-07, "loss": 1.617, "step": 27450 }, { "epoch": 4.671259675087182, "grad_norm": 5.578927993774414, "learning_rate": 1.5289907905422921e-07, "loss": 1.4817, "step": 27460 }, { "epoch": 4.672960789317003, "grad_norm": 5.1049604415893555, "learning_rate": 1.5133198191922647e-07, "loss": 1.5698, "step": 27470 }, { "epoch": 4.6746619035468235, "grad_norm": 6.624853134155273, "learning_rate": 1.4977286555509265e-07, "loss": 1.4749, "step": 27480 }, { "epoch": 4.6763630177766435, "grad_norm": 5.951974391937256, "learning_rate": 1.48221731855238e-07, "loss": 1.3458, "step": 27490 }, { "epoch": 4.678064132006464, "grad_norm": 6.709983825683594, "learning_rate": 1.466785827033784e-07, "loss": 1.6426, "step": 27500 }, { "epoch": 4.679765246236284, "grad_norm": 5.524947643280029, "learning_rate": 1.4514341997353242e-07, "loss": 1.5079, "step": 27510 }, { "epoch": 4.681466360466105, "grad_norm": 7.322075366973877, "learning_rate": 1.4361624553002282e-07, "loss": 1.5107, "step": 27520 }, { "epoch": 4.683167474695926, "grad_norm": 7.2505340576171875, "learning_rate": 1.4209706122746746e-07, "loss": 1.5047, "step": 27530 }, { "epoch": 4.684868588925746, "grad_norm": 5.918062210083008, "learning_rate": 1.4058586891078397e-07, "loss": 1.6389, "step": 27540 }, { "epoch": 4.686569703155567, "grad_norm": 4.98066520690918, "learning_rate": 1.390826704151822e-07, "loss": 1.5393, "step": 27550 }, { "epoch": 4.688270817385387, "grad_norm": 6.098052024841309, "learning_rate": 1.375874675661649e-07, "loss": 1.5437, "step": 27560 }, { "epoch": 4.689971931615208, "grad_norm": 6.57733154296875, "learning_rate": 1.3610026217952485e-07, "loss": 1.5263, "step": 27570 }, { "epoch": 4.691673045845029, "grad_norm": 5.79831600189209, "learning_rate": 1.3462105606134405e-07, "loss": 1.4929, "step": 27580 }, { "epoch": 4.693374160074849, "grad_norm": 7.397582530975342, "learning_rate": 1.3314985100798777e-07, "loss": 1.5243, "step": 27590 }, { "epoch": 4.69507527430467, "grad_norm": 7.244752407073975, "learning_rate": 1.3168664880610598e-07, "loss": 1.462, "step": 27600 }, { "epoch": 4.69677638853449, "grad_norm": 6.627752780914307, "learning_rate": 1.3023145123262976e-07, "loss": 1.5952, "step": 27610 }, { "epoch": 4.698477502764311, "grad_norm": 6.352685451507568, "learning_rate": 1.2878426005476813e-07, "loss": 1.5902, "step": 27620 }, { "epoch": 4.7001786169941315, "grad_norm": 6.046491622924805, "learning_rate": 1.2734507703000974e-07, "loss": 1.4715, "step": 27630 }, { "epoch": 4.7018797312239515, "grad_norm": 5.038861274719238, "learning_rate": 1.2591390390611518e-07, "loss": 1.4988, "step": 27640 }, { "epoch": 4.703580845453772, "grad_norm": 6.450471878051758, "learning_rate": 1.2449074242111873e-07, "loss": 1.4618, "step": 27650 }, { "epoch": 4.705281959683592, "grad_norm": 7.268611907958984, "learning_rate": 1.2307559430332588e-07, "loss": 1.5477, "step": 27660 }, { "epoch": 4.706983073913413, "grad_norm": 5.249655723571777, "learning_rate": 1.2166846127130977e-07, "loss": 1.5072, "step": 27670 }, { "epoch": 4.708684188143234, "grad_norm": 5.183140754699707, "learning_rate": 1.2026934503391037e-07, "loss": 1.5799, "step": 27680 }, { "epoch": 4.710385302373054, "grad_norm": 6.299753665924072, "learning_rate": 1.1887824729023222e-07, "loss": 1.5838, "step": 27690 }, { "epoch": 4.712086416602875, "grad_norm": 5.463996887207031, "learning_rate": 1.1749516972964151e-07, "loss": 1.5262, "step": 27700 }, { "epoch": 4.713787530832695, "grad_norm": 5.149056911468506, "learning_rate": 1.1612011403176379e-07, "loss": 1.4921, "step": 27710 }, { "epoch": 4.715488645062516, "grad_norm": 6.004656791687012, "learning_rate": 1.1475308186648545e-07, "loss": 1.5962, "step": 27720 }, { "epoch": 4.717189759292337, "grad_norm": 6.959606647491455, "learning_rate": 1.1339407489394709e-07, "loss": 1.5032, "step": 27730 }, { "epoch": 4.718890873522157, "grad_norm": 6.741888046264648, "learning_rate": 1.1204309476454192e-07, "loss": 1.6347, "step": 27740 }, { "epoch": 4.720591987751978, "grad_norm": 8.727808952331543, "learning_rate": 1.1070014311891806e-07, "loss": 1.5043, "step": 27750 }, { "epoch": 4.722293101981798, "grad_norm": 5.873548984527588, "learning_rate": 1.0936522158797331e-07, "loss": 1.5971, "step": 27760 }, { "epoch": 4.723994216211619, "grad_norm": 5.455075740814209, "learning_rate": 1.0803833179285291e-07, "loss": 1.5516, "step": 27770 }, { "epoch": 4.7256953304414395, "grad_norm": 5.840266227722168, "learning_rate": 1.0671947534494727e-07, "loss": 1.5351, "step": 27780 }, { "epoch": 4.7273964446712595, "grad_norm": 3.786184787750244, "learning_rate": 1.0540865384589275e-07, "loss": 1.5615, "step": 27790 }, { "epoch": 4.72909755890108, "grad_norm": 7.517036437988281, "learning_rate": 1.0410586888756642e-07, "loss": 1.6074, "step": 27800 }, { "epoch": 4.7307986731309, "grad_norm": 6.73851203918457, "learning_rate": 1.0281112205208758e-07, "loss": 1.6169, "step": 27810 }, { "epoch": 4.732499787360721, "grad_norm": 6.1009440422058105, "learning_rate": 1.0152441491181323e-07, "loss": 1.6053, "step": 27820 }, { "epoch": 4.734200901590542, "grad_norm": 6.914463996887207, "learning_rate": 1.0024574902933513e-07, "loss": 1.5295, "step": 27830 }, { "epoch": 4.735902015820362, "grad_norm": 4.348443031311035, "learning_rate": 9.897512595748203e-08, "loss": 1.6825, "step": 27840 }, { "epoch": 4.737603130050183, "grad_norm": 6.353835105895996, "learning_rate": 9.771254723931293e-08, "loss": 1.7024, "step": 27850 }, { "epoch": 4.739304244280003, "grad_norm": 7.957035541534424, "learning_rate": 9.645801440812157e-08, "loss": 1.561, "step": 27860 }, { "epoch": 4.741005358509824, "grad_norm": 6.56823205947876, "learning_rate": 9.521152898742601e-08, "loss": 1.4863, "step": 27870 }, { "epoch": 4.742706472739645, "grad_norm": 5.221477031707764, "learning_rate": 9.397309249097455e-08, "loss": 1.6663, "step": 27880 }, { "epoch": 4.744407586969465, "grad_norm": 6.17525053024292, "learning_rate": 9.27427064227398e-08, "loss": 1.5474, "step": 27890 }, { "epoch": 4.746108701199286, "grad_norm": 6.125594615936279, "learning_rate": 9.15203722769179e-08, "loss": 1.5277, "step": 27900 }, { "epoch": 4.747809815429106, "grad_norm": 5.346764087677002, "learning_rate": 9.030609153792708e-08, "loss": 1.4824, "step": 27910 }, { "epoch": 4.749510929658927, "grad_norm": 6.787766456604004, "learning_rate": 8.909986568040384e-08, "loss": 1.5468, "step": 27920 }, { "epoch": 4.7512120438887475, "grad_norm": 5.578236103057861, "learning_rate": 8.790169616920454e-08, "loss": 1.5302, "step": 27930 }, { "epoch": 4.7529131581185675, "grad_norm": 7.268450736999512, "learning_rate": 8.671158445940085e-08, "loss": 1.5299, "step": 27940 }, { "epoch": 4.754614272348388, "grad_norm": 4.132484436035156, "learning_rate": 8.552953199627981e-08, "loss": 1.6264, "step": 27950 }, { "epoch": 4.756315386578208, "grad_norm": 6.548470973968506, "learning_rate": 8.43555402153393e-08, "loss": 1.4976, "step": 27960 }, { "epoch": 4.758016500808029, "grad_norm": 7.993465900421143, "learning_rate": 8.3189610542291e-08, "loss": 1.5844, "step": 27970 }, { "epoch": 4.75971761503785, "grad_norm": 6.39706563949585, "learning_rate": 8.203174439305381e-08, "loss": 1.459, "step": 27980 }, { "epoch": 4.76141872926767, "grad_norm": 7.2803778648376465, "learning_rate": 8.088194317375442e-08, "loss": 1.5256, "step": 27990 }, { "epoch": 4.763119843497491, "grad_norm": 6.996268272399902, "learning_rate": 7.97402082807267e-08, "loss": 1.5922, "step": 28000 }, { "epoch": 4.764820957727311, "grad_norm": 7.688847541809082, "learning_rate": 7.860654110050639e-08, "loss": 1.508, "step": 28010 }, { "epoch": 4.766522071957132, "grad_norm": 5.641658306121826, "learning_rate": 7.748094300983411e-08, "loss": 1.5541, "step": 28020 }, { "epoch": 4.768223186186953, "grad_norm": 5.608862400054932, "learning_rate": 7.636341537564947e-08, "loss": 1.6232, "step": 28030 }, { "epoch": 4.769924300416773, "grad_norm": 6.867303371429443, "learning_rate": 7.525395955509162e-08, "loss": 1.5451, "step": 28040 }, { "epoch": 4.771625414646594, "grad_norm": 8.263049125671387, "learning_rate": 7.415257689549874e-08, "loss": 1.5382, "step": 28050 }, { "epoch": 4.773326528876414, "grad_norm": 6.207581043243408, "learning_rate": 7.305926873440264e-08, "loss": 1.6129, "step": 28060 }, { "epoch": 4.775027643106235, "grad_norm": 6.10377836227417, "learning_rate": 7.197403639953109e-08, "loss": 1.5315, "step": 28070 }, { "epoch": 4.7767287573360555, "grad_norm": 6.987624645233154, "learning_rate": 7.089688120880178e-08, "loss": 1.5435, "step": 28080 }, { "epoch": 4.7784298715658755, "grad_norm": 6.28458833694458, "learning_rate": 6.982780447032763e-08, "loss": 1.6071, "step": 28090 }, { "epoch": 4.780130985795696, "grad_norm": 6.522460460662842, "learning_rate": 6.876680748240849e-08, "loss": 1.5836, "step": 28100 }, { "epoch": 4.781832100025516, "grad_norm": 6.875545501708984, "learning_rate": 6.771389153353124e-08, "loss": 1.5461, "step": 28110 }, { "epoch": 4.783533214255337, "grad_norm": 6.85712194442749, "learning_rate": 6.666905790237041e-08, "loss": 1.6475, "step": 28120 }, { "epoch": 4.785234328485158, "grad_norm": 5.073885917663574, "learning_rate": 6.563230785778535e-08, "loss": 1.5565, "step": 28130 }, { "epoch": 4.786935442714978, "grad_norm": 6.406914710998535, "learning_rate": 6.460364265881706e-08, "loss": 1.4715, "step": 28140 }, { "epoch": 4.788636556944799, "grad_norm": 7.194412708282471, "learning_rate": 6.35830635546906e-08, "loss": 1.4826, "step": 28150 }, { "epoch": 4.790337671174619, "grad_norm": 7.12645149230957, "learning_rate": 6.257057178480904e-08, "loss": 1.5125, "step": 28160 }, { "epoch": 4.79203878540444, "grad_norm": 5.810079574584961, "learning_rate": 6.156616857875641e-08, "loss": 1.5234, "step": 28170 }, { "epoch": 4.793739899634261, "grad_norm": 6.861225605010986, "learning_rate": 6.056985515629033e-08, "loss": 1.6324, "step": 28180 }, { "epoch": 4.795441013864081, "grad_norm": 6.956240177154541, "learning_rate": 5.958163272734789e-08, "loss": 1.5656, "step": 28190 }, { "epoch": 4.797142128093902, "grad_norm": 5.78271484375, "learning_rate": 5.8601502492038254e-08, "loss": 1.4416, "step": 28200 }, { "epoch": 4.798843242323722, "grad_norm": 5.435102939605713, "learning_rate": 5.7629465640643356e-08, "loss": 1.6197, "step": 28210 }, { "epoch": 4.800544356553543, "grad_norm": 6.35980749130249, "learning_rate": 5.666552335361644e-08, "loss": 1.5614, "step": 28220 }, { "epoch": 4.8022454707833635, "grad_norm": 5.305126190185547, "learning_rate": 5.5709676801582056e-08, "loss": 1.5634, "step": 28230 }, { "epoch": 4.8039465850131835, "grad_norm": 7.220401763916016, "learning_rate": 5.47619271453323e-08, "loss": 1.5344, "step": 28240 }, { "epoch": 4.805647699243004, "grad_norm": 6.255699157714844, "learning_rate": 5.382227553582463e-08, "loss": 1.5944, "step": 28250 }, { "epoch": 4.807348813472824, "grad_norm": 5.981529235839844, "learning_rate": 5.2890723114184774e-08, "loss": 1.5448, "step": 28260 }, { "epoch": 4.809049927702645, "grad_norm": 6.389290809631348, "learning_rate": 5.1967271011701577e-08, "loss": 1.6385, "step": 28270 }, { "epoch": 4.810751041932466, "grad_norm": 6.058357238769531, "learning_rate": 5.105192034982623e-08, "loss": 1.5977, "step": 28280 }, { "epoch": 4.812452156162286, "grad_norm": 6.061993598937988, "learning_rate": 5.0144672240172246e-08, "loss": 1.5187, "step": 28290 }, { "epoch": 4.814153270392107, "grad_norm": 5.450552463531494, "learning_rate": 4.9245527784512514e-08, "loss": 1.4993, "step": 28300 }, { "epoch": 4.815854384621927, "grad_norm": 5.724689483642578, "learning_rate": 4.8354488074780013e-08, "loss": 1.5683, "step": 28310 }, { "epoch": 4.817555498851748, "grad_norm": 4.988497734069824, "learning_rate": 4.7471554193064094e-08, "loss": 1.5693, "step": 28320 }, { "epoch": 4.819256613081569, "grad_norm": 7.25648832321167, "learning_rate": 4.6596727211611236e-08, "loss": 1.5494, "step": 28330 }, { "epoch": 4.820957727311389, "grad_norm": 6.171444416046143, "learning_rate": 4.573000819282055e-08, "loss": 1.4583, "step": 28340 }, { "epoch": 4.82265884154121, "grad_norm": 6.128518581390381, "learning_rate": 4.4871398189248257e-08, "loss": 1.6047, "step": 28350 }, { "epoch": 4.82435995577103, "grad_norm": 5.2125935554504395, "learning_rate": 4.40208982435995e-08, "loss": 1.6133, "step": 28360 }, { "epoch": 4.826061070000851, "grad_norm": 7.181666851043701, "learning_rate": 4.317850938873279e-08, "loss": 1.5734, "step": 28370 }, { "epoch": 4.8277621842306715, "grad_norm": 7.01062536239624, "learning_rate": 4.2344232647655566e-08, "loss": 1.5536, "step": 28380 }, { "epoch": 4.8294632984604915, "grad_norm": 7.739013671875, "learning_rate": 4.151806903352341e-08, "loss": 1.5742, "step": 28390 }, { "epoch": 4.831164412690312, "grad_norm": 6.069748878479004, "learning_rate": 4.0700019549640814e-08, "loss": 1.5094, "step": 28400 }, { "epoch": 4.832865526920132, "grad_norm": 6.2133307456970215, "learning_rate": 3.989008518945596e-08, "loss": 1.6012, "step": 28410 }, { "epoch": 4.834566641149953, "grad_norm": 6.628243923187256, "learning_rate": 3.908826693656369e-08, "loss": 1.5492, "step": 28420 }, { "epoch": 4.836267755379774, "grad_norm": 6.392285346984863, "learning_rate": 3.829456576470254e-08, "loss": 1.6312, "step": 28430 }, { "epoch": 4.837968869609594, "grad_norm": 6.148524761199951, "learning_rate": 3.7508982637751715e-08, "loss": 1.5596, "step": 28440 }, { "epoch": 4.839669983839415, "grad_norm": 5.219897270202637, "learning_rate": 3.6731518509734885e-08, "loss": 1.6073, "step": 28450 }, { "epoch": 4.841371098069235, "grad_norm": 5.39851188659668, "learning_rate": 3.596217432481266e-08, "loss": 1.5382, "step": 28460 }, { "epoch": 4.843072212299056, "grad_norm": 5.752907752990723, "learning_rate": 3.5200951017287095e-08, "loss": 1.4574, "step": 28470 }, { "epoch": 4.844773326528877, "grad_norm": 5.786567687988281, "learning_rate": 3.444784951159647e-08, "loss": 1.5123, "step": 28480 }, { "epoch": 4.846474440758697, "grad_norm": 5.8161115646362305, "learning_rate": 3.3702870722316775e-08, "loss": 1.567, "step": 28490 }, { "epoch": 4.848175554988518, "grad_norm": 5.222090244293213, "learning_rate": 3.2966015554160956e-08, "loss": 1.6132, "step": 28500 }, { "epoch": 4.849876669218338, "grad_norm": 6.055660247802734, "learning_rate": 3.2237284901972964e-08, "loss": 1.6142, "step": 28510 }, { "epoch": 4.851577783448159, "grad_norm": 6.723481178283691, "learning_rate": 3.151667965073371e-08, "loss": 1.5036, "step": 28520 }, { "epoch": 4.8532788976779795, "grad_norm": 7.013541221618652, "learning_rate": 3.080420067555509e-08, "loss": 1.5581, "step": 28530 }, { "epoch": 4.8549800119077995, "grad_norm": 6.067813396453857, "learning_rate": 3.009984884167926e-08, "loss": 1.4148, "step": 28540 }, { "epoch": 4.85668112613762, "grad_norm": 7.258260250091553, "learning_rate": 2.9403625004480853e-08, "loss": 1.5148, "step": 28550 }, { "epoch": 4.85838224036744, "grad_norm": 6.704066276550293, "learning_rate": 2.8715530009462512e-08, "loss": 1.5143, "step": 28560 }, { "epoch": 4.860083354597261, "grad_norm": 6.565433025360107, "learning_rate": 2.803556469225564e-08, "loss": 1.6015, "step": 28570 }, { "epoch": 4.861784468827082, "grad_norm": 5.494863033294678, "learning_rate": 2.736372987861814e-08, "loss": 1.4758, "step": 28580 }, { "epoch": 4.863485583056902, "grad_norm": 6.6142354011535645, "learning_rate": 2.6700026384435187e-08, "loss": 1.5951, "step": 28590 }, { "epoch": 4.865186697286723, "grad_norm": 6.28970193862915, "learning_rate": 2.6044455015715486e-08, "loss": 1.4526, "step": 28600 }, { "epoch": 4.866887811516543, "grad_norm": 5.576261520385742, "learning_rate": 2.5397016568594253e-08, "loss": 1.5508, "step": 28610 }, { "epoch": 4.868588925746364, "grad_norm": 5.773414611816406, "learning_rate": 2.4757711829327996e-08, "loss": 1.4665, "step": 28620 }, { "epoch": 4.870290039976185, "grad_norm": 6.628889083862305, "learning_rate": 2.4126541574296e-08, "loss": 1.5606, "step": 28630 }, { "epoch": 4.871991154206005, "grad_norm": 6.1548237800598145, "learning_rate": 2.3503506570000334e-08, "loss": 1.5788, "step": 28640 }, { "epoch": 4.873692268435826, "grad_norm": 6.75529670715332, "learning_rate": 2.2888607573061382e-08, "loss": 1.5508, "step": 28650 }, { "epoch": 4.875393382665646, "grad_norm": 5.715561389923096, "learning_rate": 2.2281845330220808e-08, "loss": 1.5172, "step": 28660 }, { "epoch": 4.877094496895467, "grad_norm": 6.15458869934082, "learning_rate": 2.168322057833858e-08, "loss": 1.6079, "step": 28670 }, { "epoch": 4.8787956111252875, "grad_norm": 7.746540069580078, "learning_rate": 2.109273404439073e-08, "loss": 1.5471, "step": 28680 }, { "epoch": 4.8804967253551075, "grad_norm": 5.893173694610596, "learning_rate": 2.0510386445473097e-08, "loss": 1.4594, "step": 28690 }, { "epoch": 4.882197839584928, "grad_norm": 7.44713020324707, "learning_rate": 1.9936178488794588e-08, "loss": 1.5551, "step": 28700 }, { "epoch": 4.883898953814748, "grad_norm": 5.640448093414307, "learning_rate": 1.937011087168018e-08, "loss": 1.5991, "step": 28710 }, { "epoch": 4.885600068044569, "grad_norm": 6.992408752441406, "learning_rate": 1.8812184281570913e-08, "loss": 1.5025, "step": 28720 }, { "epoch": 4.88730118227439, "grad_norm": 6.222136974334717, "learning_rate": 1.826239939601867e-08, "loss": 1.5883, "step": 28730 }, { "epoch": 4.88900229650421, "grad_norm": 6.587214946746826, "learning_rate": 1.772075688268916e-08, "loss": 1.5208, "step": 28740 }, { "epoch": 4.890703410734031, "grad_norm": 7.627504825592041, "learning_rate": 1.718725739936042e-08, "loss": 1.603, "step": 28750 }, { "epoch": 4.892404524963851, "grad_norm": 5.792977809906006, "learning_rate": 1.6661901593919838e-08, "loss": 1.5085, "step": 28760 }, { "epoch": 4.894105639193672, "grad_norm": 5.654362678527832, "learning_rate": 1.614469010436639e-08, "loss": 1.4997, "step": 28770 }, { "epoch": 4.895806753423493, "grad_norm": 5.740152359008789, "learning_rate": 1.5635623558808395e-08, "loss": 1.5557, "step": 28780 }, { "epoch": 4.897507867653313, "grad_norm": 6.2546162605285645, "learning_rate": 1.5134702575463524e-08, "loss": 1.5566, "step": 28790 }, { "epoch": 4.899208981883134, "grad_norm": 6.2122015953063965, "learning_rate": 1.464192776265431e-08, "loss": 1.5772, "step": 28800 }, { "epoch": 4.900910096112954, "grad_norm": 5.178815841674805, "learning_rate": 1.4157299718814137e-08, "loss": 1.6597, "step": 28810 }, { "epoch": 4.902611210342775, "grad_norm": 6.3028564453125, "learning_rate": 1.36808190324805e-08, "loss": 1.4925, "step": 28820 }, { "epoch": 4.9043123245725955, "grad_norm": 7.456543445587158, "learning_rate": 1.321248628229801e-08, "loss": 1.474, "step": 28830 }, { "epoch": 4.9060134388024155, "grad_norm": 6.090532302856445, "learning_rate": 1.2752302037016141e-08, "loss": 1.5729, "step": 28840 }, { "epoch": 4.907714553032236, "grad_norm": 5.7161173820495605, "learning_rate": 1.2300266855487749e-08, "loss": 1.5853, "step": 28850 }, { "epoch": 4.909415667262056, "grad_norm": 6.457132816314697, "learning_rate": 1.1856381286669807e-08, "loss": 1.5537, "step": 28860 }, { "epoch": 4.911116781491877, "grad_norm": 7.032171249389648, "learning_rate": 1.1420645869622669e-08, "loss": 1.5618, "step": 28870 }, { "epoch": 4.912817895721698, "grad_norm": 5.624493598937988, "learning_rate": 1.0993061133508568e-08, "loss": 1.5175, "step": 28880 }, { "epoch": 4.914519009951518, "grad_norm": 6.3817057609558105, "learning_rate": 1.057362759759237e-08, "loss": 1.55, "step": 28890 }, { "epoch": 4.916220124181339, "grad_norm": 6.508059501647949, "learning_rate": 1.0162345771239327e-08, "loss": 1.6487, "step": 28900 }, { "epoch": 4.917921238411159, "grad_norm": 5.9004340171813965, "learning_rate": 9.759216153915087e-09, "loss": 1.539, "step": 28910 }, { "epoch": 4.91962235264098, "grad_norm": 5.990011215209961, "learning_rate": 9.36423923518419e-09, "loss": 1.6223, "step": 28920 }, { "epoch": 4.921323466870801, "grad_norm": 6.900084972381592, "learning_rate": 8.977415494713067e-09, "loss": 1.4912, "step": 28930 }, { "epoch": 4.923024581100621, "grad_norm": 7.24298620223999, "learning_rate": 8.598745402264797e-09, "loss": 1.5822, "step": 28940 }, { "epoch": 4.924725695330442, "grad_norm": 6.8726348876953125, "learning_rate": 8.22822941770062e-09, "loss": 1.5516, "step": 28950 }, { "epoch": 4.926426809560262, "grad_norm": 5.8677825927734375, "learning_rate": 7.865867990979918e-09, "loss": 1.5089, "step": 28960 }, { "epoch": 4.928127923790083, "grad_norm": 6.619903087615967, "learning_rate": 7.51166156215874e-09, "loss": 1.4692, "step": 28970 }, { "epoch": 4.9298290380199035, "grad_norm": 5.755423069000244, "learning_rate": 7.165610561390534e-09, "loss": 1.5079, "step": 28980 }, { "epoch": 4.9315301522497235, "grad_norm": 5.638603687286377, "learning_rate": 6.827715408923169e-09, "loss": 1.6086, "step": 28990 }, { "epoch": 4.933231266479544, "grad_norm": 6.027549743652344, "learning_rate": 6.497976515101174e-09, "loss": 1.5588, "step": 29000 }, { "epoch": 4.934932380709364, "grad_norm": 6.331847667694092, "learning_rate": 6.176394280362749e-09, "loss": 1.5272, "step": 29010 }, { "epoch": 4.936633494939185, "grad_norm": 5.185964584350586, "learning_rate": 5.862969095242751e-09, "loss": 1.5543, "step": 29020 }, { "epoch": 4.938334609169006, "grad_norm": 6.43770694732666, "learning_rate": 5.557701340367474e-09, "loss": 1.4981, "step": 29030 }, { "epoch": 4.940035723398826, "grad_norm": 7.014509201049805, "learning_rate": 5.260591386458374e-09, "loss": 1.5031, "step": 29040 }, { "epoch": 4.941736837628647, "grad_norm": 7.215841293334961, "learning_rate": 4.9716395943290895e-09, "loss": 1.5035, "step": 29050 }, { "epoch": 4.943437951858467, "grad_norm": 8.555038452148438, "learning_rate": 4.69084631488693e-09, "loss": 1.5901, "step": 29060 }, { "epoch": 4.945139066088288, "grad_norm": 5.919086456298828, "learning_rate": 4.418211889129894e-09, "loss": 1.546, "step": 29070 }, { "epoch": 4.946840180318109, "grad_norm": 5.5981974601745605, "learning_rate": 4.153736648149651e-09, "loss": 1.5385, "step": 29080 }, { "epoch": 4.948541294547929, "grad_norm": 5.864959716796875, "learning_rate": 3.897420913127065e-09, "loss": 1.5689, "step": 29090 }, { "epoch": 4.95024240877775, "grad_norm": 6.871073246002197, "learning_rate": 3.649264995335928e-09, "loss": 1.5557, "step": 29100 }, { "epoch": 4.95194352300757, "grad_norm": 7.135589599609375, "learning_rate": 3.4092691961392237e-09, "loss": 1.5018, "step": 29110 }, { "epoch": 4.953644637237391, "grad_norm": 6.150665283203125, "learning_rate": 3.1774338069913717e-09, "loss": 1.4963, "step": 29120 }, { "epoch": 4.9553457514672115, "grad_norm": 6.790001392364502, "learning_rate": 2.9537591094359843e-09, "loss": 1.4249, "step": 29130 }, { "epoch": 4.9570468656970315, "grad_norm": 7.3811492919921875, "learning_rate": 2.7382453751066152e-09, "loss": 1.546, "step": 29140 }, { "epoch": 4.958747979926852, "grad_norm": 5.9309797286987305, "learning_rate": 2.5308928657260123e-09, "loss": 1.5004, "step": 29150 }, { "epoch": 4.960449094156672, "grad_norm": 5.938831806182861, "learning_rate": 2.331701833104625e-09, "loss": 1.5158, "step": 29160 }, { "epoch": 4.962150208386493, "grad_norm": 5.254824161529541, "learning_rate": 2.1406725191435896e-09, "loss": 1.5662, "step": 29170 }, { "epoch": 4.963851322616314, "grad_norm": 5.960704326629639, "learning_rate": 1.957805155830251e-09, "loss": 1.5568, "step": 29180 }, { "epoch": 4.965552436846134, "grad_norm": 7.006120204925537, "learning_rate": 1.7830999652426422e-09, "loss": 1.6309, "step": 29190 }, { "epoch": 4.967253551075955, "grad_norm": 7.150751113891602, "learning_rate": 1.6165571595427641e-09, "loss": 1.6519, "step": 29200 }, { "epoch": 4.968954665305775, "grad_norm": 6.631286144256592, "learning_rate": 1.458176940984052e-09, "loss": 1.5314, "step": 29210 }, { "epoch": 4.970655779535596, "grad_norm": 5.777578353881836, "learning_rate": 1.307959501904657e-09, "loss": 1.6618, "step": 29220 }, { "epoch": 4.972356893765417, "grad_norm": 7.999058723449707, "learning_rate": 1.1659050247304316e-09, "loss": 1.5696, "step": 29230 }, { "epoch": 4.974058007995237, "grad_norm": 6.014828681945801, "learning_rate": 1.0320136819741833e-09, "loss": 1.5464, "step": 29240 }, { "epoch": 4.975759122225058, "grad_norm": 5.929403781890869, "learning_rate": 9.062856362349282e-10, "loss": 1.5707, "step": 29250 }, { "epoch": 4.977460236454878, "grad_norm": 7.148036956787109, "learning_rate": 7.887210401993842e-10, "loss": 1.5358, "step": 29260 }, { "epoch": 4.979161350684699, "grad_norm": 7.831778049468994, "learning_rate": 6.793200366382383e-10, "loss": 1.5649, "step": 29270 }, { "epoch": 4.9808624649145194, "grad_norm": 6.543948650360107, "learning_rate": 5.780827584106254e-10, "loss": 1.601, "step": 29280 }, { "epoch": 4.9825635791443394, "grad_norm": 6.772243976593018, "learning_rate": 4.850093284596501e-10, "loss": 1.567, "step": 29290 }, { "epoch": 4.98426469337416, "grad_norm": 5.369320869445801, "learning_rate": 4.0009985981462535e-10, "loss": 1.6045, "step": 29300 }, { "epoch": 4.98596580760398, "grad_norm": 5.320074081420898, "learning_rate": 3.2335445559032656e-10, "loss": 1.5856, "step": 29310 }, { "epoch": 4.987666921833801, "grad_norm": 6.3092145919799805, "learning_rate": 2.547732089892306e-10, "loss": 1.5139, "step": 29320 }, { "epoch": 4.989368036063622, "grad_norm": 6.682821750640869, "learning_rate": 1.9435620329479807e-10, "loss": 1.5759, "step": 29330 }, { "epoch": 4.991069150293442, "grad_norm": 6.385833263397217, "learning_rate": 1.4210351188043023e-10, "loss": 1.49, "step": 29340 }, { "epoch": 4.992770264523263, "grad_norm": 7.004361629486084, "learning_rate": 9.801519820051227e-11, "loss": 1.5654, "step": 29350 }, { "epoch": 4.994471378753083, "grad_norm": 5.7715020179748535, "learning_rate": 6.209131579787716e-11, "loss": 1.4253, "step": 29360 }, { "epoch": 4.996172492982904, "grad_norm": 4.990482330322266, "learning_rate": 3.4331908297834544e-11, "loss": 1.561, "step": 29370 }, { "epoch": 4.997873607212725, "grad_norm": 6.701514720916748, "learning_rate": 1.4737009412649115e-11, "loss": 1.5542, "step": 29380 }, { "epoch": 4.999574721442545, "grad_norm": 6.899890899658203, "learning_rate": 3.3066429378086008e-12, "loss": 1.5503, "step": 29390 } ], "logging_steps": 10, "max_steps": 29390, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.207322559602852e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }