diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,136248 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 19458, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 5.139274334463974e-05, + "grad_norm": 18.983932495117188, + "learning_rate": 1.7123287671232876e-08, + "loss": 1.5915, + "step": 1 + }, + { + "epoch": 0.00010278548668927948, + "grad_norm": 4.5546159744262695, + "learning_rate": 3.424657534246575e-08, + "loss": 1.0546, + "step": 2 + }, + { + "epoch": 0.0001541782300339192, + "grad_norm": 16.91658592224121, + "learning_rate": 5.136986301369863e-08, + "loss": 1.6223, + "step": 3 + }, + { + "epoch": 0.00020557097337855896, + "grad_norm": 17.328807830810547, + "learning_rate": 6.84931506849315e-08, + "loss": 1.5198, + "step": 4 + }, + { + "epoch": 0.00025696371672319867, + "grad_norm": 18.038570404052734, + "learning_rate": 8.561643835616439e-08, + "loss": 1.6088, + "step": 5 + }, + { + "epoch": 0.0003083564600678384, + "grad_norm": 17.907302856445312, + "learning_rate": 1.0273972602739726e-07, + "loss": 1.5136, + "step": 6 + }, + { + "epoch": 0.00035974920341247813, + "grad_norm": 18.671998977661133, + "learning_rate": 1.1986301369863014e-07, + "loss": 1.5237, + "step": 7 + }, + { + "epoch": 0.0004111419467571179, + "grad_norm": 15.614900588989258, + "learning_rate": 1.36986301369863e-07, + "loss": 1.5378, + "step": 8 + }, + { + "epoch": 0.00046253469010175765, + "grad_norm": 19.567293167114258, + "learning_rate": 1.541095890410959e-07, + "loss": 1.5818, + "step": 9 + }, + { + "epoch": 0.0005139274334463973, + "grad_norm": 15.693636894226074, + "learning_rate": 1.7123287671232878e-07, + "loss": 1.5181, + "step": 10 + }, + { + "epoch": 0.0005653201767910371, + "grad_norm": 16.947132110595703, + "learning_rate": 1.8835616438356165e-07, + "loss": 1.5241, + "step": 11 + }, + { + "epoch": 0.0006167129201356768, + "grad_norm": 19.202796936035156, + "learning_rate": 2.0547945205479452e-07, + "loss": 1.5925, + "step": 12 + }, + { + "epoch": 0.0006681056634803166, + "grad_norm": 20.05133628845215, + "learning_rate": 2.226027397260274e-07, + "loss": 1.5909, + "step": 13 + }, + { + "epoch": 0.0007194984068249563, + "grad_norm": 18.02488899230957, + "learning_rate": 2.397260273972603e-07, + "loss": 1.5972, + "step": 14 + }, + { + "epoch": 0.000770891150169596, + "grad_norm": 19.896997451782227, + "learning_rate": 2.568493150684932e-07, + "loss": 1.6129, + "step": 15 + }, + { + "epoch": 0.0008222838935142358, + "grad_norm": 4.352841377258301, + "learning_rate": 2.73972602739726e-07, + "loss": 1.0081, + "step": 16 + }, + { + "epoch": 0.0008736766368588755, + "grad_norm": 14.912474632263184, + "learning_rate": 2.910958904109589e-07, + "loss": 1.4785, + "step": 17 + }, + { + "epoch": 0.0009250693802035153, + "grad_norm": 15.636301040649414, + "learning_rate": 3.082191780821918e-07, + "loss": 1.566, + "step": 18 + }, + { + "epoch": 0.000976462123548155, + "grad_norm": 13.412144660949707, + "learning_rate": 3.2534246575342466e-07, + "loss": 1.5415, + "step": 19 + }, + { + "epoch": 0.0010278548668927947, + "grad_norm": 14.40302562713623, + "learning_rate": 3.4246575342465755e-07, + "loss": 1.5296, + "step": 20 + }, + { + "epoch": 0.0010792476102374346, + "grad_norm": 3.5788261890411377, + "learning_rate": 3.595890410958904e-07, + "loss": 0.9648, + "step": 21 + }, + { + "epoch": 0.0011306403535820742, + "grad_norm": 4.096730709075928, + "learning_rate": 3.767123287671233e-07, + "loss": 0.9836, + "step": 22 + }, + { + "epoch": 0.001182033096926714, + "grad_norm": 15.651545524597168, + "learning_rate": 3.938356164383562e-07, + "loss": 1.4893, + "step": 23 + }, + { + "epoch": 0.0012334258402713536, + "grad_norm": 11.431192398071289, + "learning_rate": 4.1095890410958903e-07, + "loss": 1.4979, + "step": 24 + }, + { + "epoch": 0.0012848185836159935, + "grad_norm": 8.430842399597168, + "learning_rate": 4.2808219178082193e-07, + "loss": 1.4355, + "step": 25 + }, + { + "epoch": 0.0013362113269606332, + "grad_norm": 8.259258270263672, + "learning_rate": 4.452054794520548e-07, + "loss": 1.4671, + "step": 26 + }, + { + "epoch": 0.0013876040703052729, + "grad_norm": 8.58527946472168, + "learning_rate": 4.6232876712328767e-07, + "loss": 1.4885, + "step": 27 + }, + { + "epoch": 0.0014389968136499125, + "grad_norm": 8.001357078552246, + "learning_rate": 4.794520547945206e-07, + "loss": 1.4421, + "step": 28 + }, + { + "epoch": 0.0014903895569945524, + "grad_norm": 6.4084343910217285, + "learning_rate": 4.965753424657534e-07, + "loss": 1.3488, + "step": 29 + }, + { + "epoch": 0.001541782300339192, + "grad_norm": 7.5306291580200195, + "learning_rate": 5.136986301369864e-07, + "loss": 1.4144, + "step": 30 + }, + { + "epoch": 0.0015931750436838318, + "grad_norm": 3.1045236587524414, + "learning_rate": 5.308219178082192e-07, + "loss": 0.9378, + "step": 31 + }, + { + "epoch": 0.0016445677870284717, + "grad_norm": 6.288414001464844, + "learning_rate": 5.47945205479452e-07, + "loss": 1.4156, + "step": 32 + }, + { + "epoch": 0.0016959605303731114, + "grad_norm": 2.602916717529297, + "learning_rate": 5.65068493150685e-07, + "loss": 0.9508, + "step": 33 + }, + { + "epoch": 0.001747353273717751, + "grad_norm": 3.513256549835205, + "learning_rate": 5.821917808219178e-07, + "loss": 1.3227, + "step": 34 + }, + { + "epoch": 0.0017987460170623907, + "grad_norm": 2.450483560562134, + "learning_rate": 5.993150684931507e-07, + "loss": 0.9727, + "step": 35 + }, + { + "epoch": 0.0018501387604070306, + "grad_norm": 2.507052183151245, + "learning_rate": 6.164383561643836e-07, + "loss": 0.9407, + "step": 36 + }, + { + "epoch": 0.0019015315037516703, + "grad_norm": 3.8305482864379883, + "learning_rate": 6.335616438356165e-07, + "loss": 1.2981, + "step": 37 + }, + { + "epoch": 0.00195292424709631, + "grad_norm": 3.7786405086517334, + "learning_rate": 6.506849315068493e-07, + "loss": 1.3007, + "step": 38 + }, + { + "epoch": 0.0020043169904409497, + "grad_norm": 3.929419994354248, + "learning_rate": 6.678082191780823e-07, + "loss": 1.2698, + "step": 39 + }, + { + "epoch": 0.0020557097337855893, + "grad_norm": 2.5162405967712402, + "learning_rate": 6.849315068493151e-07, + "loss": 0.9581, + "step": 40 + }, + { + "epoch": 0.002107102477130229, + "grad_norm": 3.632524013519287, + "learning_rate": 7.020547945205481e-07, + "loss": 1.2698, + "step": 41 + }, + { + "epoch": 0.002158495220474869, + "grad_norm": 3.509355068206787, + "learning_rate": 7.191780821917808e-07, + "loss": 1.2689, + "step": 42 + }, + { + "epoch": 0.002209887963819509, + "grad_norm": 3.567049741744995, + "learning_rate": 7.363013698630137e-07, + "loss": 1.3343, + "step": 43 + }, + { + "epoch": 0.0022612807071641485, + "grad_norm": 2.3726067543029785, + "learning_rate": 7.534246575342466e-07, + "loss": 0.9122, + "step": 44 + }, + { + "epoch": 0.002312673450508788, + "grad_norm": 3.2103631496429443, + "learning_rate": 7.705479452054795e-07, + "loss": 1.2341, + "step": 45 + }, + { + "epoch": 0.002364066193853428, + "grad_norm": 3.190967321395874, + "learning_rate": 7.876712328767124e-07, + "loss": 1.306, + "step": 46 + }, + { + "epoch": 0.0024154589371980675, + "grad_norm": 2.694685459136963, + "learning_rate": 8.047945205479453e-07, + "loss": 1.2526, + "step": 47 + }, + { + "epoch": 0.002466851680542707, + "grad_norm": 2.587993860244751, + "learning_rate": 8.219178082191781e-07, + "loss": 1.2065, + "step": 48 + }, + { + "epoch": 0.0025182444238873473, + "grad_norm": 1.8106046915054321, + "learning_rate": 8.39041095890411e-07, + "loss": 0.8855, + "step": 49 + }, + { + "epoch": 0.002569637167231987, + "grad_norm": 2.268317937850952, + "learning_rate": 8.561643835616439e-07, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.0026210299105766267, + "grad_norm": 1.9886833429336548, + "learning_rate": 8.732876712328768e-07, + "loss": 1.2747, + "step": 51 + }, + { + "epoch": 0.0026724226539212663, + "grad_norm": 1.6132230758666992, + "learning_rate": 8.904109589041097e-07, + "loss": 0.9159, + "step": 52 + }, + { + "epoch": 0.002723815397265906, + "grad_norm": 2.5420403480529785, + "learning_rate": 9.075342465753426e-07, + "loss": 1.2213, + "step": 53 + }, + { + "epoch": 0.0027752081406105457, + "grad_norm": 2.235058069229126, + "learning_rate": 9.246575342465753e-07, + "loss": 1.2533, + "step": 54 + }, + { + "epoch": 0.0028266008839551854, + "grad_norm": 2.079479932785034, + "learning_rate": 9.417808219178083e-07, + "loss": 1.1482, + "step": 55 + }, + { + "epoch": 0.002877993627299825, + "grad_norm": 2.2093863487243652, + "learning_rate": 9.589041095890411e-07, + "loss": 1.2019, + "step": 56 + }, + { + "epoch": 0.002929386370644465, + "grad_norm": 2.105095148086548, + "learning_rate": 9.76027397260274e-07, + "loss": 1.1674, + "step": 57 + }, + { + "epoch": 0.002980779113989105, + "grad_norm": 1.8603713512420654, + "learning_rate": 9.931506849315068e-07, + "loss": 0.9176, + "step": 58 + }, + { + "epoch": 0.0030321718573337445, + "grad_norm": 1.8567322492599487, + "learning_rate": 1.0102739726027399e-06, + "loss": 0.9331, + "step": 59 + }, + { + "epoch": 0.003083564600678384, + "grad_norm": 1.9976909160614014, + "learning_rate": 1.0273972602739727e-06, + "loss": 1.1061, + "step": 60 + }, + { + "epoch": 0.003134957344023024, + "grad_norm": 1.964990496635437, + "learning_rate": 1.0445205479452056e-06, + "loss": 1.2088, + "step": 61 + }, + { + "epoch": 0.0031863500873676636, + "grad_norm": 1.89922297000885, + "learning_rate": 1.0616438356164384e-06, + "loss": 1.2079, + "step": 62 + }, + { + "epoch": 0.0032377428307123032, + "grad_norm": 1.9209692478179932, + "learning_rate": 1.0787671232876712e-06, + "loss": 1.1916, + "step": 63 + }, + { + "epoch": 0.0032891355740569434, + "grad_norm": 1.6601163148880005, + "learning_rate": 1.095890410958904e-06, + "loss": 1.1852, + "step": 64 + }, + { + "epoch": 0.003340528317401583, + "grad_norm": 1.662931203842163, + "learning_rate": 1.1130136986301371e-06, + "loss": 0.8447, + "step": 65 + }, + { + "epoch": 0.0033919210607462227, + "grad_norm": 1.6068320274353027, + "learning_rate": 1.13013698630137e-06, + "loss": 1.1932, + "step": 66 + }, + { + "epoch": 0.0034433138040908624, + "grad_norm": 1.7915514707565308, + "learning_rate": 1.1472602739726028e-06, + "loss": 1.1881, + "step": 67 + }, + { + "epoch": 0.003494706547435502, + "grad_norm": 1.7644329071044922, + "learning_rate": 1.1643835616438357e-06, + "loss": 1.1785, + "step": 68 + }, + { + "epoch": 0.0035460992907801418, + "grad_norm": 1.5297054052352905, + "learning_rate": 1.1815068493150685e-06, + "loss": 1.0823, + "step": 69 + }, + { + "epoch": 0.0035974920341247814, + "grad_norm": 1.6290435791015625, + "learning_rate": 1.1986301369863014e-06, + "loss": 1.1546, + "step": 70 + }, + { + "epoch": 0.003648884777469421, + "grad_norm": 1.503154993057251, + "learning_rate": 1.2157534246575344e-06, + "loss": 0.8808, + "step": 71 + }, + { + "epoch": 0.0037002775208140612, + "grad_norm": 1.669655680656433, + "learning_rate": 1.2328767123287673e-06, + "loss": 1.2138, + "step": 72 + }, + { + "epoch": 0.003751670264158701, + "grad_norm": 1.6407079696655273, + "learning_rate": 1.25e-06, + "loss": 1.1743, + "step": 73 + }, + { + "epoch": 0.0038030630075033406, + "grad_norm": 1.433539867401123, + "learning_rate": 1.267123287671233e-06, + "loss": 0.9028, + "step": 74 + }, + { + "epoch": 0.0038544557508479803, + "grad_norm": 1.5575098991394043, + "learning_rate": 1.284246575342466e-06, + "loss": 1.1462, + "step": 75 + }, + { + "epoch": 0.00390584849419262, + "grad_norm": 1.3791413307189941, + "learning_rate": 1.3013698630136986e-06, + "loss": 0.9162, + "step": 76 + }, + { + "epoch": 0.00395724123753726, + "grad_norm": 1.2825815677642822, + "learning_rate": 1.3184931506849317e-06, + "loss": 0.8641, + "step": 77 + }, + { + "epoch": 0.004008633980881899, + "grad_norm": 1.521395206451416, + "learning_rate": 1.3356164383561645e-06, + "loss": 1.1824, + "step": 78 + }, + { + "epoch": 0.004060026724226539, + "grad_norm": 1.635690689086914, + "learning_rate": 1.3527397260273976e-06, + "loss": 1.2042, + "step": 79 + }, + { + "epoch": 0.004111419467571179, + "grad_norm": 1.5896871089935303, + "learning_rate": 1.3698630136986302e-06, + "loss": 1.1835, + "step": 80 + }, + { + "epoch": 0.004162812210915819, + "grad_norm": 1.5003917217254639, + "learning_rate": 1.386986301369863e-06, + "loss": 1.2263, + "step": 81 + }, + { + "epoch": 0.004214204954260458, + "grad_norm": 1.2084776163101196, + "learning_rate": 1.4041095890410961e-06, + "loss": 0.8795, + "step": 82 + }, + { + "epoch": 0.004265597697605098, + "grad_norm": 1.4843071699142456, + "learning_rate": 1.421232876712329e-06, + "loss": 1.2149, + "step": 83 + }, + { + "epoch": 0.004316990440949738, + "grad_norm": 1.675994873046875, + "learning_rate": 1.4383561643835616e-06, + "loss": 1.1447, + "step": 84 + }, + { + "epoch": 0.0043683831842943775, + "grad_norm": 1.1407861709594727, + "learning_rate": 1.4554794520547946e-06, + "loss": 0.868, + "step": 85 + }, + { + "epoch": 0.004419775927639018, + "grad_norm": 1.8884973526000977, + "learning_rate": 1.4726027397260275e-06, + "loss": 1.202, + "step": 86 + }, + { + "epoch": 0.004471168670983657, + "grad_norm": 1.434266448020935, + "learning_rate": 1.4897260273972605e-06, + "loss": 1.1098, + "step": 87 + }, + { + "epoch": 0.004522561414328297, + "grad_norm": 1.4622822999954224, + "learning_rate": 1.5068493150684932e-06, + "loss": 1.0677, + "step": 88 + }, + { + "epoch": 0.004573954157672936, + "grad_norm": 1.4630745649337769, + "learning_rate": 1.5239726027397262e-06, + "loss": 1.0797, + "step": 89 + }, + { + "epoch": 0.004625346901017576, + "grad_norm": 1.4478847980499268, + "learning_rate": 1.541095890410959e-06, + "loss": 1.1446, + "step": 90 + }, + { + "epoch": 0.004676739644362216, + "grad_norm": 1.4457114934921265, + "learning_rate": 1.5582191780821921e-06, + "loss": 1.0545, + "step": 91 + }, + { + "epoch": 0.004728132387706856, + "grad_norm": 1.4008527994155884, + "learning_rate": 1.5753424657534248e-06, + "loss": 1.11, + "step": 92 + }, + { + "epoch": 0.004779525131051496, + "grad_norm": 1.3423473834991455, + "learning_rate": 1.5924657534246576e-06, + "loss": 1.1122, + "step": 93 + }, + { + "epoch": 0.004830917874396135, + "grad_norm": 1.3709416389465332, + "learning_rate": 1.6095890410958907e-06, + "loss": 1.0117, + "step": 94 + }, + { + "epoch": 0.004882310617740775, + "grad_norm": 1.381400465965271, + "learning_rate": 1.6267123287671235e-06, + "loss": 0.9166, + "step": 95 + }, + { + "epoch": 0.004933703361085414, + "grad_norm": 1.9969120025634766, + "learning_rate": 1.6438356164383561e-06, + "loss": 1.1557, + "step": 96 + }, + { + "epoch": 0.0049850961044300545, + "grad_norm": 1.4465864896774292, + "learning_rate": 1.6609589041095892e-06, + "loss": 1.1365, + "step": 97 + }, + { + "epoch": 0.005036488847774695, + "grad_norm": 2.0803210735321045, + "learning_rate": 1.678082191780822e-06, + "loss": 1.1198, + "step": 98 + }, + { + "epoch": 0.005087881591119334, + "grad_norm": 1.6235849857330322, + "learning_rate": 1.695205479452055e-06, + "loss": 1.1256, + "step": 99 + }, + { + "epoch": 0.005139274334463974, + "grad_norm": 1.590221881866455, + "learning_rate": 1.7123287671232877e-06, + "loss": 1.0791, + "step": 100 + }, + { + "epoch": 0.005190667077808613, + "grad_norm": 2.3823463916778564, + "learning_rate": 1.7294520547945206e-06, + "loss": 0.9377, + "step": 101 + }, + { + "epoch": 0.005242059821153253, + "grad_norm": 1.3421955108642578, + "learning_rate": 1.7465753424657536e-06, + "loss": 1.054, + "step": 102 + }, + { + "epoch": 0.005293452564497893, + "grad_norm": 1.121549129486084, + "learning_rate": 1.7636986301369865e-06, + "loss": 0.8505, + "step": 103 + }, + { + "epoch": 0.005344845307842533, + "grad_norm": 1.8572452068328857, + "learning_rate": 1.7808219178082193e-06, + "loss": 1.0778, + "step": 104 + }, + { + "epoch": 0.005396238051187172, + "grad_norm": 1.1325087547302246, + "learning_rate": 1.7979452054794521e-06, + "loss": 0.7966, + "step": 105 + }, + { + "epoch": 0.005447630794531812, + "grad_norm": 1.4741758108139038, + "learning_rate": 1.8150684931506852e-06, + "loss": 1.0987, + "step": 106 + }, + { + "epoch": 0.005499023537876452, + "grad_norm": 1.4123742580413818, + "learning_rate": 1.832191780821918e-06, + "loss": 1.1147, + "step": 107 + }, + { + "epoch": 0.005550416281221091, + "grad_norm": 1.3640860319137573, + "learning_rate": 1.8493150684931507e-06, + "loss": 1.1028, + "step": 108 + }, + { + "epoch": 0.0056018090245657315, + "grad_norm": 1.4527561664581299, + "learning_rate": 1.8664383561643837e-06, + "loss": 1.0789, + "step": 109 + }, + { + "epoch": 0.005653201767910371, + "grad_norm": 1.0783603191375732, + "learning_rate": 1.8835616438356166e-06, + "loss": 0.9282, + "step": 110 + }, + { + "epoch": 0.005704594511255011, + "grad_norm": 1.396845817565918, + "learning_rate": 1.9006849315068496e-06, + "loss": 1.0918, + "step": 111 + }, + { + "epoch": 0.00575598725459965, + "grad_norm": 0.992274820804596, + "learning_rate": 1.9178082191780823e-06, + "loss": 0.776, + "step": 112 + }, + { + "epoch": 0.00580737999794429, + "grad_norm": 1.4029620885849, + "learning_rate": 1.9349315068493153e-06, + "loss": 1.0743, + "step": 113 + }, + { + "epoch": 0.00585877274128893, + "grad_norm": 1.3273193836212158, + "learning_rate": 1.952054794520548e-06, + "loss": 1.0451, + "step": 114 + }, + { + "epoch": 0.00591016548463357, + "grad_norm": 1.0597772598266602, + "learning_rate": 1.969178082191781e-06, + "loss": 0.8557, + "step": 115 + }, + { + "epoch": 0.00596155822797821, + "grad_norm": 1.3844999074935913, + "learning_rate": 1.9863013698630136e-06, + "loss": 1.05, + "step": 116 + }, + { + "epoch": 0.006012950971322849, + "grad_norm": 1.449572205543518, + "learning_rate": 2.0034246575342467e-06, + "loss": 1.0657, + "step": 117 + }, + { + "epoch": 0.006064343714667489, + "grad_norm": 1.3290841579437256, + "learning_rate": 2.0205479452054797e-06, + "loss": 0.9605, + "step": 118 + }, + { + "epoch": 0.006115736458012128, + "grad_norm": 1.4795900583267212, + "learning_rate": 2.037671232876713e-06, + "loss": 1.1017, + "step": 119 + }, + { + "epoch": 0.006167129201356768, + "grad_norm": 1.4142014980316162, + "learning_rate": 2.0547945205479454e-06, + "loss": 1.1145, + "step": 120 + }, + { + "epoch": 0.0062185219447014085, + "grad_norm": 1.7041820287704468, + "learning_rate": 2.071917808219178e-06, + "loss": 1.0972, + "step": 121 + }, + { + "epoch": 0.006269914688046048, + "grad_norm": 1.4172296524047852, + "learning_rate": 2.089041095890411e-06, + "loss": 1.0592, + "step": 122 + }, + { + "epoch": 0.006321307431390688, + "grad_norm": 1.5352330207824707, + "learning_rate": 2.106164383561644e-06, + "loss": 1.088, + "step": 123 + }, + { + "epoch": 0.006372700174735327, + "grad_norm": 1.5836726427078247, + "learning_rate": 2.123287671232877e-06, + "loss": 1.0782, + "step": 124 + }, + { + "epoch": 0.006424092918079967, + "grad_norm": 1.33640718460083, + "learning_rate": 2.14041095890411e-06, + "loss": 1.0236, + "step": 125 + }, + { + "epoch": 0.0064754856614246065, + "grad_norm": 1.4273734092712402, + "learning_rate": 2.1575342465753425e-06, + "loss": 1.054, + "step": 126 + }, + { + "epoch": 0.006526878404769247, + "grad_norm": 1.447994351387024, + "learning_rate": 2.1746575342465755e-06, + "loss": 1.0304, + "step": 127 + }, + { + "epoch": 0.006578271148113887, + "grad_norm": 1.3829752206802368, + "learning_rate": 2.191780821917808e-06, + "loss": 1.0589, + "step": 128 + }, + { + "epoch": 0.006629663891458526, + "grad_norm": 0.9735933542251587, + "learning_rate": 2.2089041095890412e-06, + "loss": 0.8343, + "step": 129 + }, + { + "epoch": 0.006681056634803166, + "grad_norm": 1.5326259136199951, + "learning_rate": 2.2260273972602743e-06, + "loss": 1.013, + "step": 130 + }, + { + "epoch": 0.006732449378147805, + "grad_norm": 1.454871654510498, + "learning_rate": 2.243150684931507e-06, + "loss": 1.0101, + "step": 131 + }, + { + "epoch": 0.0067838421214924454, + "grad_norm": 1.1300477981567383, + "learning_rate": 2.26027397260274e-06, + "loss": 0.8717, + "step": 132 + }, + { + "epoch": 0.006835234864837085, + "grad_norm": 1.4412310123443604, + "learning_rate": 2.2773972602739726e-06, + "loss": 1.0502, + "step": 133 + }, + { + "epoch": 0.006886627608181725, + "grad_norm": 1.4205049276351929, + "learning_rate": 2.2945205479452057e-06, + "loss": 1.0533, + "step": 134 + }, + { + "epoch": 0.006938020351526364, + "grad_norm": 1.7551305294036865, + "learning_rate": 2.3116438356164387e-06, + "loss": 1.0119, + "step": 135 + }, + { + "epoch": 0.006989413094871004, + "grad_norm": 1.3921761512756348, + "learning_rate": 2.3287671232876713e-06, + "loss": 1.0708, + "step": 136 + }, + { + "epoch": 0.007040805838215644, + "grad_norm": 1.415696382522583, + "learning_rate": 2.3458904109589044e-06, + "loss": 1.0456, + "step": 137 + }, + { + "epoch": 0.0070921985815602835, + "grad_norm": 1.12046217918396, + "learning_rate": 2.363013698630137e-06, + "loss": 0.8092, + "step": 138 + }, + { + "epoch": 0.007143591324904924, + "grad_norm": 1.3467131853103638, + "learning_rate": 2.38013698630137e-06, + "loss": 1.0546, + "step": 139 + }, + { + "epoch": 0.007194984068249563, + "grad_norm": 0.8997917771339417, + "learning_rate": 2.3972602739726027e-06, + "loss": 0.822, + "step": 140 + }, + { + "epoch": 0.007246376811594203, + "grad_norm": 1.397667646408081, + "learning_rate": 2.4143835616438358e-06, + "loss": 1.0567, + "step": 141 + }, + { + "epoch": 0.007297769554938842, + "grad_norm": 1.604783296585083, + "learning_rate": 2.431506849315069e-06, + "loss": 1.0272, + "step": 142 + }, + { + "epoch": 0.007349162298283482, + "grad_norm": 1.4480196237564087, + "learning_rate": 2.4486301369863015e-06, + "loss": 0.8425, + "step": 143 + }, + { + "epoch": 0.0074005550416281225, + "grad_norm": 1.4750083684921265, + "learning_rate": 2.4657534246575345e-06, + "loss": 0.9614, + "step": 144 + }, + { + "epoch": 0.007451947784972762, + "grad_norm": 1.3589426279067993, + "learning_rate": 2.482876712328767e-06, + "loss": 0.9821, + "step": 145 + }, + { + "epoch": 0.007503340528317402, + "grad_norm": 1.4352126121520996, + "learning_rate": 2.5e-06, + "loss": 1.0243, + "step": 146 + }, + { + "epoch": 0.007554733271662041, + "grad_norm": 1.4147143363952637, + "learning_rate": 2.5171232876712333e-06, + "loss": 0.9968, + "step": 147 + }, + { + "epoch": 0.007606126015006681, + "grad_norm": 1.359907627105713, + "learning_rate": 2.534246575342466e-06, + "loss": 1.0272, + "step": 148 + }, + { + "epoch": 0.00765751875835132, + "grad_norm": 1.3496214151382446, + "learning_rate": 2.551369863013699e-06, + "loss": 0.9633, + "step": 149 + }, + { + "epoch": 0.0077089115016959605, + "grad_norm": 1.859374761581421, + "learning_rate": 2.568493150684932e-06, + "loss": 1.0322, + "step": 150 + }, + { + "epoch": 0.007760304245040601, + "grad_norm": 1.4341025352478027, + "learning_rate": 2.585616438356164e-06, + "loss": 1.0475, + "step": 151 + }, + { + "epoch": 0.00781169698838524, + "grad_norm": 1.4478822946548462, + "learning_rate": 2.6027397260273973e-06, + "loss": 1.0347, + "step": 152 + }, + { + "epoch": 0.00786308973172988, + "grad_norm": 1.4878865480422974, + "learning_rate": 2.6198630136986303e-06, + "loss": 1.0536, + "step": 153 + }, + { + "epoch": 0.00791448247507452, + "grad_norm": 1.0657761096954346, + "learning_rate": 2.6369863013698634e-06, + "loss": 0.8161, + "step": 154 + }, + { + "epoch": 0.007965875218419158, + "grad_norm": 1.97149658203125, + "learning_rate": 2.654109589041096e-06, + "loss": 1.0389, + "step": 155 + }, + { + "epoch": 0.008017267961763799, + "grad_norm": 1.1665364503860474, + "learning_rate": 2.671232876712329e-06, + "loss": 0.8622, + "step": 156 + }, + { + "epoch": 0.008068660705108439, + "grad_norm": 1.52482008934021, + "learning_rate": 2.688356164383562e-06, + "loss": 0.9671, + "step": 157 + }, + { + "epoch": 0.008120053448453079, + "grad_norm": 1.8569509983062744, + "learning_rate": 2.705479452054795e-06, + "loss": 1.009, + "step": 158 + }, + { + "epoch": 0.008171446191797719, + "grad_norm": 1.0221922397613525, + "learning_rate": 2.7226027397260274e-06, + "loss": 0.8084, + "step": 159 + }, + { + "epoch": 0.008222838935142357, + "grad_norm": 1.399552822113037, + "learning_rate": 2.7397260273972604e-06, + "loss": 1.0087, + "step": 160 + }, + { + "epoch": 0.008274231678486997, + "grad_norm": 1.404884934425354, + "learning_rate": 2.7568493150684935e-06, + "loss": 0.9931, + "step": 161 + }, + { + "epoch": 0.008325624421831638, + "grad_norm": 1.3287042379379272, + "learning_rate": 2.773972602739726e-06, + "loss": 0.9632, + "step": 162 + }, + { + "epoch": 0.008377017165176278, + "grad_norm": 1.4677836894989014, + "learning_rate": 2.791095890410959e-06, + "loss": 0.9791, + "step": 163 + }, + { + "epoch": 0.008428409908520916, + "grad_norm": 1.1199952363967896, + "learning_rate": 2.8082191780821922e-06, + "loss": 0.8202, + "step": 164 + }, + { + "epoch": 0.008479802651865556, + "grad_norm": 1.3308522701263428, + "learning_rate": 2.8253424657534253e-06, + "loss": 0.9557, + "step": 165 + }, + { + "epoch": 0.008531195395210196, + "grad_norm": 1.4022819995880127, + "learning_rate": 2.842465753424658e-06, + "loss": 0.9773, + "step": 166 + }, + { + "epoch": 0.008582588138554836, + "grad_norm": 1.3995765447616577, + "learning_rate": 2.8595890410958905e-06, + "loss": 1.0606, + "step": 167 + }, + { + "epoch": 0.008633980881899476, + "grad_norm": 1.7663745880126953, + "learning_rate": 2.876712328767123e-06, + "loss": 1.0134, + "step": 168 + }, + { + "epoch": 0.008685373625244115, + "grad_norm": 1.3537906408309937, + "learning_rate": 2.8938356164383562e-06, + "loss": 0.9966, + "step": 169 + }, + { + "epoch": 0.008736766368588755, + "grad_norm": 1.5769232511520386, + "learning_rate": 2.9109589041095893e-06, + "loss": 0.9464, + "step": 170 + }, + { + "epoch": 0.008788159111933395, + "grad_norm": 1.6474496126174927, + "learning_rate": 2.9280821917808223e-06, + "loss": 0.9946, + "step": 171 + }, + { + "epoch": 0.008839551855278035, + "grad_norm": 1.3077877759933472, + "learning_rate": 2.945205479452055e-06, + "loss": 0.9827, + "step": 172 + }, + { + "epoch": 0.008890944598622675, + "grad_norm": 1.4552854299545288, + "learning_rate": 2.962328767123288e-06, + "loss": 1.0465, + "step": 173 + }, + { + "epoch": 0.008942337341967314, + "grad_norm": 1.3246502876281738, + "learning_rate": 2.979452054794521e-06, + "loss": 1.0203, + "step": 174 + }, + { + "epoch": 0.008993730085311954, + "grad_norm": 1.292330026626587, + "learning_rate": 2.9965753424657533e-06, + "loss": 1.0213, + "step": 175 + }, + { + "epoch": 0.009045122828656594, + "grad_norm": 1.3146753311157227, + "learning_rate": 3.0136986301369864e-06, + "loss": 0.9799, + "step": 176 + }, + { + "epoch": 0.009096515572001234, + "grad_norm": 0.9643856883049011, + "learning_rate": 3.0308219178082194e-06, + "loss": 0.814, + "step": 177 + }, + { + "epoch": 0.009147908315345872, + "grad_norm": 1.344741940498352, + "learning_rate": 3.0479452054794525e-06, + "loss": 1.0283, + "step": 178 + }, + { + "epoch": 0.009199301058690513, + "grad_norm": 1.3446404933929443, + "learning_rate": 3.065068493150685e-06, + "loss": 1.0285, + "step": 179 + }, + { + "epoch": 0.009250693802035153, + "grad_norm": 1.33372962474823, + "learning_rate": 3.082191780821918e-06, + "loss": 1.0179, + "step": 180 + }, + { + "epoch": 0.009302086545379793, + "grad_norm": 1.2783104181289673, + "learning_rate": 3.099315068493151e-06, + "loss": 1.0117, + "step": 181 + }, + { + "epoch": 0.009353479288724433, + "grad_norm": 1.0819905996322632, + "learning_rate": 3.1164383561643843e-06, + "loss": 0.7637, + "step": 182 + }, + { + "epoch": 0.009404872032069071, + "grad_norm": 1.5114994049072266, + "learning_rate": 3.1335616438356165e-06, + "loss": 0.9671, + "step": 183 + }, + { + "epoch": 0.009456264775413711, + "grad_norm": 0.8833361268043518, + "learning_rate": 3.1506849315068495e-06, + "loss": 0.7806, + "step": 184 + }, + { + "epoch": 0.009507657518758351, + "grad_norm": 1.5377718210220337, + "learning_rate": 3.167808219178082e-06, + "loss": 1.0183, + "step": 185 + }, + { + "epoch": 0.009559050262102992, + "grad_norm": 1.4501428604125977, + "learning_rate": 3.184931506849315e-06, + "loss": 1.049, + "step": 186 + }, + { + "epoch": 0.00961044300544763, + "grad_norm": 1.1223877668380737, + "learning_rate": 3.2020547945205483e-06, + "loss": 0.8187, + "step": 187 + }, + { + "epoch": 0.00966183574879227, + "grad_norm": 1.33689546585083, + "learning_rate": 3.2191780821917813e-06, + "loss": 0.8652, + "step": 188 + }, + { + "epoch": 0.00971322849213691, + "grad_norm": 1.5084279775619507, + "learning_rate": 3.236301369863014e-06, + "loss": 0.997, + "step": 189 + }, + { + "epoch": 0.00976462123548155, + "grad_norm": 1.3489594459533691, + "learning_rate": 3.253424657534247e-06, + "loss": 1.0134, + "step": 190 + }, + { + "epoch": 0.00981601397882619, + "grad_norm": 0.8779999017715454, + "learning_rate": 3.2705479452054796e-06, + "loss": 0.8041, + "step": 191 + }, + { + "epoch": 0.009867406722170829, + "grad_norm": 1.529397964477539, + "learning_rate": 3.2876712328767123e-06, + "loss": 1.0341, + "step": 192 + }, + { + "epoch": 0.009918799465515469, + "grad_norm": 1.4480550289154053, + "learning_rate": 3.3047945205479453e-06, + "loss": 0.9562, + "step": 193 + }, + { + "epoch": 0.009970192208860109, + "grad_norm": 1.3672980070114136, + "learning_rate": 3.3219178082191784e-06, + "loss": 0.9988, + "step": 194 + }, + { + "epoch": 0.010021584952204749, + "grad_norm": 0.8857347965240479, + "learning_rate": 3.3390410958904114e-06, + "loss": 0.7854, + "step": 195 + }, + { + "epoch": 0.01007297769554939, + "grad_norm": 0.9103947877883911, + "learning_rate": 3.356164383561644e-06, + "loss": 0.7833, + "step": 196 + }, + { + "epoch": 0.010124370438894028, + "grad_norm": 1.821307897567749, + "learning_rate": 3.373287671232877e-06, + "loss": 0.9508, + "step": 197 + }, + { + "epoch": 0.010175763182238668, + "grad_norm": 1.4973331689834595, + "learning_rate": 3.39041095890411e-06, + "loss": 1.0175, + "step": 198 + }, + { + "epoch": 0.010227155925583308, + "grad_norm": 1.3801848888397217, + "learning_rate": 3.4075342465753424e-06, + "loss": 1.046, + "step": 199 + }, + { + "epoch": 0.010278548668927948, + "grad_norm": 1.4681309461593628, + "learning_rate": 3.4246575342465754e-06, + "loss": 0.9831, + "step": 200 + }, + { + "epoch": 0.010329941412272586, + "grad_norm": 1.5392931699752808, + "learning_rate": 3.4417808219178085e-06, + "loss": 0.9838, + "step": 201 + }, + { + "epoch": 0.010381334155617226, + "grad_norm": 1.4939289093017578, + "learning_rate": 3.458904109589041e-06, + "loss": 1.0093, + "step": 202 + }, + { + "epoch": 0.010432726898961867, + "grad_norm": 1.4377801418304443, + "learning_rate": 3.476027397260274e-06, + "loss": 0.9876, + "step": 203 + }, + { + "epoch": 0.010484119642306507, + "grad_norm": 1.546335220336914, + "learning_rate": 3.4931506849315072e-06, + "loss": 0.9303, + "step": 204 + }, + { + "epoch": 0.010535512385651147, + "grad_norm": 1.359356164932251, + "learning_rate": 3.5102739726027403e-06, + "loss": 0.929, + "step": 205 + }, + { + "epoch": 0.010586905128995785, + "grad_norm": 0.9822274446487427, + "learning_rate": 3.527397260273973e-06, + "loss": 0.8617, + "step": 206 + }, + { + "epoch": 0.010638297872340425, + "grad_norm": 0.9313762784004211, + "learning_rate": 3.5445205479452056e-06, + "loss": 0.8215, + "step": 207 + }, + { + "epoch": 0.010689690615685065, + "grad_norm": 1.4884226322174072, + "learning_rate": 3.5616438356164386e-06, + "loss": 0.9872, + "step": 208 + }, + { + "epoch": 0.010741083359029705, + "grad_norm": 1.4232925176620483, + "learning_rate": 3.5787671232876712e-06, + "loss": 0.9577, + "step": 209 + }, + { + "epoch": 0.010792476102374344, + "grad_norm": 1.4278874397277832, + "learning_rate": 3.5958904109589043e-06, + "loss": 0.9844, + "step": 210 + }, + { + "epoch": 0.010843868845718984, + "grad_norm": 0.8510668873786926, + "learning_rate": 3.6130136986301373e-06, + "loss": 0.8278, + "step": 211 + }, + { + "epoch": 0.010895261589063624, + "grad_norm": 0.9740152359008789, + "learning_rate": 3.6301369863013704e-06, + "loss": 0.798, + "step": 212 + }, + { + "epoch": 0.010946654332408264, + "grad_norm": 1.3978337049484253, + "learning_rate": 3.647260273972603e-06, + "loss": 1.0321, + "step": 213 + }, + { + "epoch": 0.010998047075752904, + "grad_norm": 0.9594858884811401, + "learning_rate": 3.664383561643836e-06, + "loss": 0.8215, + "step": 214 + }, + { + "epoch": 0.011049439819097543, + "grad_norm": 1.3392057418823242, + "learning_rate": 3.6815068493150687e-06, + "loss": 0.9828, + "step": 215 + }, + { + "epoch": 0.011100832562442183, + "grad_norm": 0.940901517868042, + "learning_rate": 3.6986301369863014e-06, + "loss": 0.7873, + "step": 216 + }, + { + "epoch": 0.011152225305786823, + "grad_norm": 1.3940353393554688, + "learning_rate": 3.7157534246575344e-06, + "loss": 0.9713, + "step": 217 + }, + { + "epoch": 0.011203618049131463, + "grad_norm": 1.3776757717132568, + "learning_rate": 3.7328767123287675e-06, + "loss": 0.9929, + "step": 218 + }, + { + "epoch": 0.011255010792476103, + "grad_norm": 1.338098406791687, + "learning_rate": 3.7500000000000005e-06, + "loss": 1.0037, + "step": 219 + }, + { + "epoch": 0.011306403535820742, + "grad_norm": 1.431422233581543, + "learning_rate": 3.767123287671233e-06, + "loss": 0.9562, + "step": 220 + }, + { + "epoch": 0.011357796279165382, + "grad_norm": 1.3269257545471191, + "learning_rate": 3.784246575342466e-06, + "loss": 1.0014, + "step": 221 + }, + { + "epoch": 0.011409189022510022, + "grad_norm": 1.2928478717803955, + "learning_rate": 3.8013698630136993e-06, + "loss": 0.9367, + "step": 222 + }, + { + "epoch": 0.011460581765854662, + "grad_norm": 0.9002137780189514, + "learning_rate": 3.818493150684932e-06, + "loss": 0.8075, + "step": 223 + }, + { + "epoch": 0.0115119745091993, + "grad_norm": 1.3738528490066528, + "learning_rate": 3.8356164383561645e-06, + "loss": 0.9403, + "step": 224 + }, + { + "epoch": 0.01156336725254394, + "grad_norm": 1.4318796396255493, + "learning_rate": 3.852739726027397e-06, + "loss": 0.9459, + "step": 225 + }, + { + "epoch": 0.01161475999588858, + "grad_norm": 1.1008661985397339, + "learning_rate": 3.869863013698631e-06, + "loss": 0.8388, + "step": 226 + }, + { + "epoch": 0.01166615273923322, + "grad_norm": 1.5036805868148804, + "learning_rate": 3.886986301369863e-06, + "loss": 0.9753, + "step": 227 + }, + { + "epoch": 0.01171754548257786, + "grad_norm": 1.336403250694275, + "learning_rate": 3.904109589041096e-06, + "loss": 1.0148, + "step": 228 + }, + { + "epoch": 0.011768938225922499, + "grad_norm": 1.4252920150756836, + "learning_rate": 3.921232876712329e-06, + "loss": 0.9182, + "step": 229 + }, + { + "epoch": 0.01182033096926714, + "grad_norm": 1.3516569137573242, + "learning_rate": 3.938356164383562e-06, + "loss": 0.989, + "step": 230 + }, + { + "epoch": 0.01187172371261178, + "grad_norm": 1.0649734735488892, + "learning_rate": 3.9554794520547955e-06, + "loss": 0.7999, + "step": 231 + }, + { + "epoch": 0.01192311645595642, + "grad_norm": 1.3591573238372803, + "learning_rate": 3.972602739726027e-06, + "loss": 0.9598, + "step": 232 + }, + { + "epoch": 0.01197450919930106, + "grad_norm": 1.8600594997406006, + "learning_rate": 3.989726027397261e-06, + "loss": 0.933, + "step": 233 + }, + { + "epoch": 0.012025901942645698, + "grad_norm": 1.3544315099716187, + "learning_rate": 4.006849315068493e-06, + "loss": 0.9473, + "step": 234 + }, + { + "epoch": 0.012077294685990338, + "grad_norm": 1.410604476928711, + "learning_rate": 4.023972602739726e-06, + "loss": 0.9927, + "step": 235 + }, + { + "epoch": 0.012128687429334978, + "grad_norm": 1.3216272592544556, + "learning_rate": 4.0410958904109595e-06, + "loss": 0.9414, + "step": 236 + }, + { + "epoch": 0.012180080172679618, + "grad_norm": 1.4487719535827637, + "learning_rate": 4.058219178082192e-06, + "loss": 0.9896, + "step": 237 + }, + { + "epoch": 0.012231472916024257, + "grad_norm": 1.48616361618042, + "learning_rate": 4.075342465753426e-06, + "loss": 1.0043, + "step": 238 + }, + { + "epoch": 0.012282865659368897, + "grad_norm": 1.428780436515808, + "learning_rate": 4.092465753424658e-06, + "loss": 0.9794, + "step": 239 + }, + { + "epoch": 0.012334258402713537, + "grad_norm": 1.4327056407928467, + "learning_rate": 4.109589041095891e-06, + "loss": 0.9936, + "step": 240 + }, + { + "epoch": 0.012385651146058177, + "grad_norm": 1.3653701543807983, + "learning_rate": 4.1267123287671235e-06, + "loss": 0.9727, + "step": 241 + }, + { + "epoch": 0.012437043889402817, + "grad_norm": 1.2695425748825073, + "learning_rate": 4.143835616438356e-06, + "loss": 1.0017, + "step": 242 + }, + { + "epoch": 0.012488436632747455, + "grad_norm": 1.2896161079406738, + "learning_rate": 4.16095890410959e-06, + "loss": 0.9541, + "step": 243 + }, + { + "epoch": 0.012539829376092096, + "grad_norm": 1.2927240133285522, + "learning_rate": 4.178082191780822e-06, + "loss": 0.9417, + "step": 244 + }, + { + "epoch": 0.012591222119436736, + "grad_norm": 0.9451867938041687, + "learning_rate": 4.195205479452055e-06, + "loss": 0.8099, + "step": 245 + }, + { + "epoch": 0.012642614862781376, + "grad_norm": 1.327837347984314, + "learning_rate": 4.212328767123288e-06, + "loss": 1.0247, + "step": 246 + }, + { + "epoch": 0.012694007606126014, + "grad_norm": 1.7542698383331299, + "learning_rate": 4.229452054794521e-06, + "loss": 0.9664, + "step": 247 + }, + { + "epoch": 0.012745400349470654, + "grad_norm": 1.5239324569702148, + "learning_rate": 4.246575342465754e-06, + "loss": 0.9556, + "step": 248 + }, + { + "epoch": 0.012796793092815294, + "grad_norm": 1.2914499044418335, + "learning_rate": 4.263698630136986e-06, + "loss": 0.8311, + "step": 249 + }, + { + "epoch": 0.012848185836159935, + "grad_norm": 0.9499532580375671, + "learning_rate": 4.28082191780822e-06, + "loss": 0.7882, + "step": 250 + }, + { + "epoch": 0.012899578579504575, + "grad_norm": 1.4977997541427612, + "learning_rate": 4.297945205479452e-06, + "loss": 0.9427, + "step": 251 + }, + { + "epoch": 0.012950971322849213, + "grad_norm": 1.5635406970977783, + "learning_rate": 4.315068493150685e-06, + "loss": 0.9925, + "step": 252 + }, + { + "epoch": 0.013002364066193853, + "grad_norm": 1.3498114347457886, + "learning_rate": 4.3321917808219185e-06, + "loss": 0.9673, + "step": 253 + }, + { + "epoch": 0.013053756809538493, + "grad_norm": 1.3927500247955322, + "learning_rate": 4.349315068493151e-06, + "loss": 0.9581, + "step": 254 + }, + { + "epoch": 0.013105149552883133, + "grad_norm": 1.3908201456069946, + "learning_rate": 4.3664383561643846e-06, + "loss": 0.9689, + "step": 255 + }, + { + "epoch": 0.013156542296227773, + "grad_norm": 1.3558933734893799, + "learning_rate": 4.383561643835616e-06, + "loss": 0.9759, + "step": 256 + }, + { + "epoch": 0.013207935039572412, + "grad_norm": 1.5187759399414062, + "learning_rate": 4.40068493150685e-06, + "loss": 0.9272, + "step": 257 + }, + { + "epoch": 0.013259327782917052, + "grad_norm": 1.4035674333572388, + "learning_rate": 4.4178082191780825e-06, + "loss": 0.9814, + "step": 258 + }, + { + "epoch": 0.013310720526261692, + "grad_norm": 1.3661476373672485, + "learning_rate": 4.434931506849315e-06, + "loss": 0.9428, + "step": 259 + }, + { + "epoch": 0.013362113269606332, + "grad_norm": 1.3462390899658203, + "learning_rate": 4.4520547945205486e-06, + "loss": 0.9524, + "step": 260 + }, + { + "epoch": 0.01341350601295097, + "grad_norm": 1.2937759160995483, + "learning_rate": 4.469178082191781e-06, + "loss": 0.9337, + "step": 261 + }, + { + "epoch": 0.01346489875629561, + "grad_norm": 1.3532419204711914, + "learning_rate": 4.486301369863014e-06, + "loss": 1.0307, + "step": 262 + }, + { + "epoch": 0.01351629149964025, + "grad_norm": 0.8749688267707825, + "learning_rate": 4.503424657534247e-06, + "loss": 0.7494, + "step": 263 + }, + { + "epoch": 0.013567684242984891, + "grad_norm": 0.9858847260475159, + "learning_rate": 4.52054794520548e-06, + "loss": 0.8273, + "step": 264 + }, + { + "epoch": 0.013619076986329531, + "grad_norm": 1.7489287853240967, + "learning_rate": 4.537671232876713e-06, + "loss": 0.9371, + "step": 265 + }, + { + "epoch": 0.01367046972967417, + "grad_norm": 1.5460171699523926, + "learning_rate": 4.554794520547945e-06, + "loss": 0.8878, + "step": 266 + }, + { + "epoch": 0.01372186247301881, + "grad_norm": 1.434395432472229, + "learning_rate": 4.571917808219179e-06, + "loss": 0.9427, + "step": 267 + }, + { + "epoch": 0.01377325521636345, + "grad_norm": 1.308001160621643, + "learning_rate": 4.589041095890411e-06, + "loss": 0.9972, + "step": 268 + }, + { + "epoch": 0.01382464795970809, + "grad_norm": 1.2756896018981934, + "learning_rate": 4.606164383561644e-06, + "loss": 0.9386, + "step": 269 + }, + { + "epoch": 0.013876040703052728, + "grad_norm": 1.363455057144165, + "learning_rate": 4.6232876712328774e-06, + "loss": 0.981, + "step": 270 + }, + { + "epoch": 0.013927433446397368, + "grad_norm": 0.8435012102127075, + "learning_rate": 4.64041095890411e-06, + "loss": 0.7865, + "step": 271 + }, + { + "epoch": 0.013978826189742008, + "grad_norm": 1.3855482339859009, + "learning_rate": 4.657534246575343e-06, + "loss": 0.7666, + "step": 272 + }, + { + "epoch": 0.014030218933086648, + "grad_norm": 1.3576053380966187, + "learning_rate": 4.674657534246575e-06, + "loss": 0.9482, + "step": 273 + }, + { + "epoch": 0.014081611676431289, + "grad_norm": 1.412172555923462, + "learning_rate": 4.691780821917809e-06, + "loss": 0.9507, + "step": 274 + }, + { + "epoch": 0.014133004419775927, + "grad_norm": 1.417295217514038, + "learning_rate": 4.7089041095890414e-06, + "loss": 0.9619, + "step": 275 + }, + { + "epoch": 0.014184397163120567, + "grad_norm": 1.2438583374023438, + "learning_rate": 4.726027397260274e-06, + "loss": 0.9134, + "step": 276 + }, + { + "epoch": 0.014235789906465207, + "grad_norm": 1.4614331722259521, + "learning_rate": 4.7431506849315075e-06, + "loss": 0.9364, + "step": 277 + }, + { + "epoch": 0.014287182649809847, + "grad_norm": 1.36215078830719, + "learning_rate": 4.76027397260274e-06, + "loss": 0.9903, + "step": 278 + }, + { + "epoch": 0.014338575393154487, + "grad_norm": 1.529297113418579, + "learning_rate": 4.777397260273973e-06, + "loss": 0.9316, + "step": 279 + }, + { + "epoch": 0.014389968136499126, + "grad_norm": 0.9258805513381958, + "learning_rate": 4.7945205479452054e-06, + "loss": 0.8018, + "step": 280 + }, + { + "epoch": 0.014441360879843766, + "grad_norm": 1.5578103065490723, + "learning_rate": 4.811643835616439e-06, + "loss": 0.9279, + "step": 281 + }, + { + "epoch": 0.014492753623188406, + "grad_norm": 1.422542691230774, + "learning_rate": 4.8287671232876716e-06, + "loss": 0.9283, + "step": 282 + }, + { + "epoch": 0.014544146366533046, + "grad_norm": 1.4068387746810913, + "learning_rate": 4.845890410958904e-06, + "loss": 0.9731, + "step": 283 + }, + { + "epoch": 0.014595539109877684, + "grad_norm": 0.9610908031463623, + "learning_rate": 4.863013698630138e-06, + "loss": 0.7616, + "step": 284 + }, + { + "epoch": 0.014646931853222325, + "grad_norm": 1.8511143922805786, + "learning_rate": 4.88013698630137e-06, + "loss": 0.91, + "step": 285 + }, + { + "epoch": 0.014698324596566965, + "grad_norm": 1.3814449310302734, + "learning_rate": 4.897260273972603e-06, + "loss": 0.9226, + "step": 286 + }, + { + "epoch": 0.014749717339911605, + "grad_norm": 1.5602917671203613, + "learning_rate": 4.914383561643836e-06, + "loss": 0.9199, + "step": 287 + }, + { + "epoch": 0.014801110083256245, + "grad_norm": 1.2778757810592651, + "learning_rate": 4.931506849315069e-06, + "loss": 0.9865, + "step": 288 + }, + { + "epoch": 0.014852502826600883, + "grad_norm": 1.3599287271499634, + "learning_rate": 4.948630136986302e-06, + "loss": 0.9854, + "step": 289 + }, + { + "epoch": 0.014903895569945523, + "grad_norm": 1.4347745180130005, + "learning_rate": 4.965753424657534e-06, + "loss": 0.9572, + "step": 290 + }, + { + "epoch": 0.014955288313290164, + "grad_norm": 1.7798638343811035, + "learning_rate": 4.982876712328768e-06, + "loss": 0.9056, + "step": 291 + }, + { + "epoch": 0.015006681056634804, + "grad_norm": 1.6638036966323853, + "learning_rate": 5e-06, + "loss": 0.9379, + "step": 292 + }, + { + "epoch": 0.015058073799979444, + "grad_norm": 1.4086577892303467, + "learning_rate": 5.017123287671233e-06, + "loss": 0.9601, + "step": 293 + }, + { + "epoch": 0.015109466543324082, + "grad_norm": 1.3726524114608765, + "learning_rate": 5.0342465753424665e-06, + "loss": 0.8875, + "step": 294 + }, + { + "epoch": 0.015160859286668722, + "grad_norm": 1.4501359462738037, + "learning_rate": 5.051369863013698e-06, + "loss": 0.9678, + "step": 295 + }, + { + "epoch": 0.015212252030013362, + "grad_norm": 1.3798588514328003, + "learning_rate": 5.068493150684932e-06, + "loss": 0.982, + "step": 296 + }, + { + "epoch": 0.015263644773358002, + "grad_norm": 1.5755680799484253, + "learning_rate": 5.085616438356164e-06, + "loss": 0.9253, + "step": 297 + }, + { + "epoch": 0.01531503751670264, + "grad_norm": 0.8342780470848083, + "learning_rate": 5.102739726027398e-06, + "loss": 0.7714, + "step": 298 + }, + { + "epoch": 0.015366430260047281, + "grad_norm": 2.09627628326416, + "learning_rate": 5.1198630136986305e-06, + "loss": 0.9229, + "step": 299 + }, + { + "epoch": 0.015417823003391921, + "grad_norm": 1.3578782081604004, + "learning_rate": 5.136986301369864e-06, + "loss": 0.9518, + "step": 300 + }, + { + "epoch": 0.015469215746736561, + "grad_norm": 1.680617094039917, + "learning_rate": 5.154109589041097e-06, + "loss": 0.9778, + "step": 301 + }, + { + "epoch": 0.015520608490081201, + "grad_norm": 1.3613545894622803, + "learning_rate": 5.171232876712328e-06, + "loss": 0.9288, + "step": 302 + }, + { + "epoch": 0.01557200123342584, + "grad_norm": 1.403126835823059, + "learning_rate": 5.188356164383562e-06, + "loss": 0.8962, + "step": 303 + }, + { + "epoch": 0.01562339397677048, + "grad_norm": 1.5144846439361572, + "learning_rate": 5.2054794520547945e-06, + "loss": 0.9268, + "step": 304 + }, + { + "epoch": 0.01567478672011512, + "grad_norm": 1.4168750047683716, + "learning_rate": 5.222602739726028e-06, + "loss": 0.9976, + "step": 305 + }, + { + "epoch": 0.01572617946345976, + "grad_norm": 1.2738122940063477, + "learning_rate": 5.239726027397261e-06, + "loss": 0.9101, + "step": 306 + }, + { + "epoch": 0.0157775722068044, + "grad_norm": 1.5010753870010376, + "learning_rate": 5.256849315068494e-06, + "loss": 0.9762, + "step": 307 + }, + { + "epoch": 0.01582896495014904, + "grad_norm": 1.3929429054260254, + "learning_rate": 5.273972602739727e-06, + "loss": 0.9544, + "step": 308 + }, + { + "epoch": 0.01588035769349368, + "grad_norm": 1.0012789964675903, + "learning_rate": 5.2910958904109585e-06, + "loss": 0.7541, + "step": 309 + }, + { + "epoch": 0.015931750436838317, + "grad_norm": 1.007140874862671, + "learning_rate": 5.308219178082192e-06, + "loss": 0.7853, + "step": 310 + }, + { + "epoch": 0.015983143180182957, + "grad_norm": 0.9868108034133911, + "learning_rate": 5.325342465753425e-06, + "loss": 0.7855, + "step": 311 + }, + { + "epoch": 0.016034535923527597, + "grad_norm": 1.3924250602722168, + "learning_rate": 5.342465753424658e-06, + "loss": 0.9485, + "step": 312 + }, + { + "epoch": 0.016085928666872237, + "grad_norm": 1.3775475025177002, + "learning_rate": 5.359589041095891e-06, + "loss": 0.957, + "step": 313 + }, + { + "epoch": 0.016137321410216877, + "grad_norm": 1.4916207790374756, + "learning_rate": 5.376712328767124e-06, + "loss": 0.9483, + "step": 314 + }, + { + "epoch": 0.016188714153561518, + "grad_norm": 0.9480423331260681, + "learning_rate": 5.393835616438357e-06, + "loss": 0.7487, + "step": 315 + }, + { + "epoch": 0.016240106896906158, + "grad_norm": 1.4612795114517212, + "learning_rate": 5.41095890410959e-06, + "loss": 0.9515, + "step": 316 + }, + { + "epoch": 0.016291499640250798, + "grad_norm": 1.4398325681686401, + "learning_rate": 5.428082191780822e-06, + "loss": 0.9681, + "step": 317 + }, + { + "epoch": 0.016342892383595438, + "grad_norm": 1.3018696308135986, + "learning_rate": 5.445205479452055e-06, + "loss": 0.9708, + "step": 318 + }, + { + "epoch": 0.016394285126940075, + "grad_norm": 1.6679677963256836, + "learning_rate": 5.462328767123288e-06, + "loss": 0.9553, + "step": 319 + }, + { + "epoch": 0.016445677870284715, + "grad_norm": 1.6321253776550293, + "learning_rate": 5.479452054794521e-06, + "loss": 0.9212, + "step": 320 + }, + { + "epoch": 0.016497070613629355, + "grad_norm": 1.4244147539138794, + "learning_rate": 5.496575342465754e-06, + "loss": 0.9129, + "step": 321 + }, + { + "epoch": 0.016548463356973995, + "grad_norm": 1.3892810344696045, + "learning_rate": 5.513698630136987e-06, + "loss": 0.9415, + "step": 322 + }, + { + "epoch": 0.016599856100318635, + "grad_norm": 1.5489944219589233, + "learning_rate": 5.5308219178082205e-06, + "loss": 0.9071, + "step": 323 + }, + { + "epoch": 0.016651248843663275, + "grad_norm": 1.471104383468628, + "learning_rate": 5.547945205479452e-06, + "loss": 0.9639, + "step": 324 + }, + { + "epoch": 0.016702641587007915, + "grad_norm": 1.2654674053192139, + "learning_rate": 5.565068493150685e-06, + "loss": 0.9327, + "step": 325 + }, + { + "epoch": 0.016754034330352555, + "grad_norm": 1.489034652709961, + "learning_rate": 5.582191780821918e-06, + "loss": 0.9138, + "step": 326 + }, + { + "epoch": 0.016805427073697195, + "grad_norm": 1.2891885042190552, + "learning_rate": 5.599315068493151e-06, + "loss": 0.9494, + "step": 327 + }, + { + "epoch": 0.016856819817041832, + "grad_norm": 1.5262436866760254, + "learning_rate": 5.6164383561643845e-06, + "loss": 0.9181, + "step": 328 + }, + { + "epoch": 0.016908212560386472, + "grad_norm": 1.3686035871505737, + "learning_rate": 5.633561643835616e-06, + "loss": 0.9706, + "step": 329 + }, + { + "epoch": 0.016959605303731112, + "grad_norm": 1.0391219854354858, + "learning_rate": 5.6506849315068506e-06, + "loss": 0.7452, + "step": 330 + }, + { + "epoch": 0.017010998047075752, + "grad_norm": 1.3939989805221558, + "learning_rate": 5.667808219178082e-06, + "loss": 0.9516, + "step": 331 + }, + { + "epoch": 0.017062390790420393, + "grad_norm": 1.5979833602905273, + "learning_rate": 5.684931506849316e-06, + "loss": 0.9319, + "step": 332 + }, + { + "epoch": 0.017113783533765033, + "grad_norm": 1.290928602218628, + "learning_rate": 5.7020547945205485e-06, + "loss": 0.9187, + "step": 333 + }, + { + "epoch": 0.017165176277109673, + "grad_norm": 1.3770060539245605, + "learning_rate": 5.719178082191781e-06, + "loss": 0.8851, + "step": 334 + }, + { + "epoch": 0.017216569020454313, + "grad_norm": 1.4167877435684204, + "learning_rate": 5.736301369863015e-06, + "loss": 0.8445, + "step": 335 + }, + { + "epoch": 0.017267961763798953, + "grad_norm": 1.4726027250289917, + "learning_rate": 5.753424657534246e-06, + "loss": 0.9331, + "step": 336 + }, + { + "epoch": 0.01731935450714359, + "grad_norm": 1.5692436695098877, + "learning_rate": 5.77054794520548e-06, + "loss": 0.9434, + "step": 337 + }, + { + "epoch": 0.01737074725048823, + "grad_norm": 1.3294492959976196, + "learning_rate": 5.7876712328767125e-06, + "loss": 0.8812, + "step": 338 + }, + { + "epoch": 0.01742213999383287, + "grad_norm": 1.3776077032089233, + "learning_rate": 5.804794520547946e-06, + "loss": 0.9406, + "step": 339 + }, + { + "epoch": 0.01747353273717751, + "grad_norm": 1.1556237936019897, + "learning_rate": 5.821917808219179e-06, + "loss": 0.7863, + "step": 340 + }, + { + "epoch": 0.01752492548052215, + "grad_norm": 1.296207308769226, + "learning_rate": 5.839041095890411e-06, + "loss": 0.8985, + "step": 341 + }, + { + "epoch": 0.01757631822386679, + "grad_norm": 0.9795817732810974, + "learning_rate": 5.856164383561645e-06, + "loss": 0.8046, + "step": 342 + }, + { + "epoch": 0.01762771096721143, + "grad_norm": 1.393006443977356, + "learning_rate": 5.8732876712328765e-06, + "loss": 0.9877, + "step": 343 + }, + { + "epoch": 0.01767910371055607, + "grad_norm": 1.3946648836135864, + "learning_rate": 5.89041095890411e-06, + "loss": 0.8758, + "step": 344 + }, + { + "epoch": 0.01773049645390071, + "grad_norm": 1.7366056442260742, + "learning_rate": 5.907534246575343e-06, + "loss": 0.8851, + "step": 345 + }, + { + "epoch": 0.01778188919724535, + "grad_norm": 1.3458216190338135, + "learning_rate": 5.924657534246576e-06, + "loss": 0.9078, + "step": 346 + }, + { + "epoch": 0.017833281940589987, + "grad_norm": 1.3426355123519897, + "learning_rate": 5.941780821917809e-06, + "loss": 0.9457, + "step": 347 + }, + { + "epoch": 0.017884674683934627, + "grad_norm": 1.40995454788208, + "learning_rate": 5.958904109589042e-06, + "loss": 0.8889, + "step": 348 + }, + { + "epoch": 0.017936067427279268, + "grad_norm": 1.346014142036438, + "learning_rate": 5.976027397260275e-06, + "loss": 0.8812, + "step": 349 + }, + { + "epoch": 0.017987460170623908, + "grad_norm": 1.356227993965149, + "learning_rate": 5.993150684931507e-06, + "loss": 0.9212, + "step": 350 + }, + { + "epoch": 0.018038852913968548, + "grad_norm": 1.462105631828308, + "learning_rate": 6.01027397260274e-06, + "loss": 0.8681, + "step": 351 + }, + { + "epoch": 0.018090245657313188, + "grad_norm": 1.391425609588623, + "learning_rate": 6.027397260273973e-06, + "loss": 0.9499, + "step": 352 + }, + { + "epoch": 0.018141638400657828, + "grad_norm": 1.4402172565460205, + "learning_rate": 6.044520547945206e-06, + "loss": 0.9208, + "step": 353 + }, + { + "epoch": 0.018193031144002468, + "grad_norm": 1.3497949838638306, + "learning_rate": 6.061643835616439e-06, + "loss": 0.9442, + "step": 354 + }, + { + "epoch": 0.018244423887347108, + "grad_norm": 2.0256710052490234, + "learning_rate": 6.078767123287672e-06, + "loss": 0.933, + "step": 355 + }, + { + "epoch": 0.018295816630691745, + "grad_norm": 1.3752402067184448, + "learning_rate": 6.095890410958905e-06, + "loss": 0.989, + "step": 356 + }, + { + "epoch": 0.018347209374036385, + "grad_norm": 1.2419018745422363, + "learning_rate": 6.113013698630137e-06, + "loss": 0.932, + "step": 357 + }, + { + "epoch": 0.018398602117381025, + "grad_norm": 1.3438737392425537, + "learning_rate": 6.13013698630137e-06, + "loss": 0.902, + "step": 358 + }, + { + "epoch": 0.018449994860725665, + "grad_norm": 1.2710260152816772, + "learning_rate": 6.147260273972603e-06, + "loss": 0.9042, + "step": 359 + }, + { + "epoch": 0.018501387604070305, + "grad_norm": 1.0422375202178955, + "learning_rate": 6.164383561643836e-06, + "loss": 0.7595, + "step": 360 + }, + { + "epoch": 0.018552780347414945, + "grad_norm": 1.309249997138977, + "learning_rate": 6.181506849315069e-06, + "loss": 0.8895, + "step": 361 + }, + { + "epoch": 0.018604173090759586, + "grad_norm": 1.3443186283111572, + "learning_rate": 6.198630136986302e-06, + "loss": 0.9764, + "step": 362 + }, + { + "epoch": 0.018655565834104226, + "grad_norm": 1.0971837043762207, + "learning_rate": 6.215753424657535e-06, + "loss": 0.7716, + "step": 363 + }, + { + "epoch": 0.018706958577448866, + "grad_norm": 1.3132166862487793, + "learning_rate": 6.2328767123287685e-06, + "loss": 0.9934, + "step": 364 + }, + { + "epoch": 0.018758351320793502, + "grad_norm": 0.8206494450569153, + "learning_rate": 6.25e-06, + "loss": 0.8413, + "step": 365 + }, + { + "epoch": 0.018809744064138142, + "grad_norm": 1.3536328077316284, + "learning_rate": 6.267123287671233e-06, + "loss": 0.9598, + "step": 366 + }, + { + "epoch": 0.018861136807482783, + "grad_norm": 1.3234273195266724, + "learning_rate": 6.284246575342466e-06, + "loss": 0.9691, + "step": 367 + }, + { + "epoch": 0.018912529550827423, + "grad_norm": 1.3051198720932007, + "learning_rate": 6.301369863013699e-06, + "loss": 1.0015, + "step": 368 + }, + { + "epoch": 0.018963922294172063, + "grad_norm": 2.0231826305389404, + "learning_rate": 6.3184931506849325e-06, + "loss": 1.0115, + "step": 369 + }, + { + "epoch": 0.019015315037516703, + "grad_norm": 1.624998688697815, + "learning_rate": 6.335616438356164e-06, + "loss": 0.9672, + "step": 370 + }, + { + "epoch": 0.019066707780861343, + "grad_norm": 1.3017244338989258, + "learning_rate": 6.352739726027398e-06, + "loss": 0.8957, + "step": 371 + }, + { + "epoch": 0.019118100524205983, + "grad_norm": 1.3124035596847534, + "learning_rate": 6.36986301369863e-06, + "loss": 0.886, + "step": 372 + }, + { + "epoch": 0.019169493267550623, + "grad_norm": 1.5079002380371094, + "learning_rate": 6.386986301369864e-06, + "loss": 0.8884, + "step": 373 + }, + { + "epoch": 0.01922088601089526, + "grad_norm": 0.9560659527778625, + "learning_rate": 6.4041095890410965e-06, + "loss": 0.7692, + "step": 374 + }, + { + "epoch": 0.0192722787542399, + "grad_norm": 1.3236057758331299, + "learning_rate": 6.421232876712329e-06, + "loss": 0.8956, + "step": 375 + }, + { + "epoch": 0.01932367149758454, + "grad_norm": 1.3703919649124146, + "learning_rate": 6.438356164383563e-06, + "loss": 0.9326, + "step": 376 + }, + { + "epoch": 0.01937506424092918, + "grad_norm": 1.3508483171463013, + "learning_rate": 6.455479452054794e-06, + "loss": 0.915, + "step": 377 + }, + { + "epoch": 0.01942645698427382, + "grad_norm": 0.864358127117157, + "learning_rate": 6.472602739726028e-06, + "loss": 0.775, + "step": 378 + }, + { + "epoch": 0.01947784972761846, + "grad_norm": 1.316990613937378, + "learning_rate": 6.4897260273972605e-06, + "loss": 0.8833, + "step": 379 + }, + { + "epoch": 0.0195292424709631, + "grad_norm": 1.3886998891830444, + "learning_rate": 6.506849315068494e-06, + "loss": 0.9576, + "step": 380 + }, + { + "epoch": 0.01958063521430774, + "grad_norm": 0.8977193236351013, + "learning_rate": 6.523972602739727e-06, + "loss": 0.7429, + "step": 381 + }, + { + "epoch": 0.01963202795765238, + "grad_norm": 1.2974063158035278, + "learning_rate": 6.541095890410959e-06, + "loss": 0.9836, + "step": 382 + }, + { + "epoch": 0.01968342070099702, + "grad_norm": 1.2336817979812622, + "learning_rate": 6.558219178082193e-06, + "loss": 0.8745, + "step": 383 + }, + { + "epoch": 0.019734813444341658, + "grad_norm": 1.3238983154296875, + "learning_rate": 6.5753424657534245e-06, + "loss": 0.9416, + "step": 384 + }, + { + "epoch": 0.019786206187686298, + "grad_norm": 1.4125125408172607, + "learning_rate": 6.592465753424658e-06, + "loss": 0.9572, + "step": 385 + }, + { + "epoch": 0.019837598931030938, + "grad_norm": 1.3972201347351074, + "learning_rate": 6.609589041095891e-06, + "loss": 0.9207, + "step": 386 + }, + { + "epoch": 0.019888991674375578, + "grad_norm": 0.9599208831787109, + "learning_rate": 6.626712328767124e-06, + "loss": 0.819, + "step": 387 + }, + { + "epoch": 0.019940384417720218, + "grad_norm": 1.2984942197799683, + "learning_rate": 6.643835616438357e-06, + "loss": 0.9084, + "step": 388 + }, + { + "epoch": 0.019991777161064858, + "grad_norm": 1.420408010482788, + "learning_rate": 6.66095890410959e-06, + "loss": 0.9027, + "step": 389 + }, + { + "epoch": 0.020043169904409498, + "grad_norm": 1.3168015480041504, + "learning_rate": 6.678082191780823e-06, + "loss": 0.9178, + "step": 390 + }, + { + "epoch": 0.02009456264775414, + "grad_norm": 1.3716025352478027, + "learning_rate": 6.695205479452055e-06, + "loss": 0.9449, + "step": 391 + }, + { + "epoch": 0.02014595539109878, + "grad_norm": 1.3068482875823975, + "learning_rate": 6.712328767123288e-06, + "loss": 0.8779, + "step": 392 + }, + { + "epoch": 0.020197348134443415, + "grad_norm": 1.261688470840454, + "learning_rate": 6.729452054794521e-06, + "loss": 0.9063, + "step": 393 + }, + { + "epoch": 0.020248740877788055, + "grad_norm": 1.3852274417877197, + "learning_rate": 6.746575342465754e-06, + "loss": 0.9187, + "step": 394 + }, + { + "epoch": 0.020300133621132695, + "grad_norm": 1.3238940238952637, + "learning_rate": 6.763698630136987e-06, + "loss": 0.8916, + "step": 395 + }, + { + "epoch": 0.020351526364477335, + "grad_norm": 1.2999320030212402, + "learning_rate": 6.78082191780822e-06, + "loss": 0.9293, + "step": 396 + }, + { + "epoch": 0.020402919107821976, + "grad_norm": 1.3329417705535889, + "learning_rate": 6.797945205479453e-06, + "loss": 0.9258, + "step": 397 + }, + { + "epoch": 0.020454311851166616, + "grad_norm": 1.3602274656295776, + "learning_rate": 6.815068493150685e-06, + "loss": 0.9048, + "step": 398 + }, + { + "epoch": 0.020505704594511256, + "grad_norm": 1.3849248886108398, + "learning_rate": 6.832191780821918e-06, + "loss": 0.9595, + "step": 399 + }, + { + "epoch": 0.020557097337855896, + "grad_norm": 1.168339490890503, + "learning_rate": 6.849315068493151e-06, + "loss": 0.7628, + "step": 400 + }, + { + "epoch": 0.020608490081200536, + "grad_norm": 1.285650610923767, + "learning_rate": 6.866438356164384e-06, + "loss": 0.9392, + "step": 401 + }, + { + "epoch": 0.020659882824545173, + "grad_norm": 1.033577561378479, + "learning_rate": 6.883561643835617e-06, + "loss": 0.7979, + "step": 402 + }, + { + "epoch": 0.020711275567889813, + "grad_norm": 1.316782832145691, + "learning_rate": 6.9006849315068505e-06, + "loss": 0.9075, + "step": 403 + }, + { + "epoch": 0.020762668311234453, + "grad_norm": 1.3603746891021729, + "learning_rate": 6.917808219178082e-06, + "loss": 0.9979, + "step": 404 + }, + { + "epoch": 0.020814061054579093, + "grad_norm": 0.8924427032470703, + "learning_rate": 6.9349315068493166e-06, + "loss": 0.7678, + "step": 405 + }, + { + "epoch": 0.020865453797923733, + "grad_norm": 1.3670562505722046, + "learning_rate": 6.952054794520548e-06, + "loss": 0.9005, + "step": 406 + }, + { + "epoch": 0.020916846541268373, + "grad_norm": 1.270111083984375, + "learning_rate": 6.969178082191781e-06, + "loss": 0.9192, + "step": 407 + }, + { + "epoch": 0.020968239284613013, + "grad_norm": 1.3472708463668823, + "learning_rate": 6.9863013698630145e-06, + "loss": 0.9591, + "step": 408 + }, + { + "epoch": 0.021019632027957653, + "grad_norm": 1.2749837636947632, + "learning_rate": 7.003424657534247e-06, + "loss": 0.9058, + "step": 409 + }, + { + "epoch": 0.021071024771302294, + "grad_norm": 1.2710347175598145, + "learning_rate": 7.020547945205481e-06, + "loss": 0.9332, + "step": 410 + }, + { + "epoch": 0.02112241751464693, + "grad_norm": 1.3473694324493408, + "learning_rate": 7.037671232876712e-06, + "loss": 0.8746, + "step": 411 + }, + { + "epoch": 0.02117381025799157, + "grad_norm": 1.4851934909820557, + "learning_rate": 7.054794520547946e-06, + "loss": 0.9648, + "step": 412 + }, + { + "epoch": 0.02122520300133621, + "grad_norm": 0.9628872275352478, + "learning_rate": 7.0719178082191785e-06, + "loss": 0.7489, + "step": 413 + }, + { + "epoch": 0.02127659574468085, + "grad_norm": 1.3260000944137573, + "learning_rate": 7.089041095890411e-06, + "loss": 0.9467, + "step": 414 + }, + { + "epoch": 0.02132798848802549, + "grad_norm": 0.8726317286491394, + "learning_rate": 7.106164383561645e-06, + "loss": 0.7638, + "step": 415 + }, + { + "epoch": 0.02137938123137013, + "grad_norm": 0.8331014513969421, + "learning_rate": 7.123287671232877e-06, + "loss": 0.7773, + "step": 416 + }, + { + "epoch": 0.02143077397471477, + "grad_norm": 1.250875473022461, + "learning_rate": 7.140410958904111e-06, + "loss": 0.8514, + "step": 417 + }, + { + "epoch": 0.02148216671805941, + "grad_norm": 1.4555552005767822, + "learning_rate": 7.1575342465753425e-06, + "loss": 0.9066, + "step": 418 + }, + { + "epoch": 0.02153355946140405, + "grad_norm": 1.4497421979904175, + "learning_rate": 7.174657534246576e-06, + "loss": 0.9651, + "step": 419 + }, + { + "epoch": 0.021584952204748688, + "grad_norm": 1.360961675643921, + "learning_rate": 7.191780821917809e-06, + "loss": 0.9091, + "step": 420 + }, + { + "epoch": 0.021636344948093328, + "grad_norm": 0.9046820402145386, + "learning_rate": 7.208904109589042e-06, + "loss": 0.7895, + "step": 421 + }, + { + "epoch": 0.021687737691437968, + "grad_norm": 0.9685566425323486, + "learning_rate": 7.226027397260275e-06, + "loss": 0.8109, + "step": 422 + }, + { + "epoch": 0.021739130434782608, + "grad_norm": 1.4642139673233032, + "learning_rate": 7.243150684931507e-06, + "loss": 0.8898, + "step": 423 + }, + { + "epoch": 0.021790523178127248, + "grad_norm": 0.941369891166687, + "learning_rate": 7.260273972602741e-06, + "loss": 0.7847, + "step": 424 + }, + { + "epoch": 0.02184191592147189, + "grad_norm": 1.502695918083191, + "learning_rate": 7.277397260273973e-06, + "loss": 0.9316, + "step": 425 + }, + { + "epoch": 0.02189330866481653, + "grad_norm": 1.435257911682129, + "learning_rate": 7.294520547945206e-06, + "loss": 0.86, + "step": 426 + }, + { + "epoch": 0.02194470140816117, + "grad_norm": 0.8365346789360046, + "learning_rate": 7.311643835616439e-06, + "loss": 0.7686, + "step": 427 + }, + { + "epoch": 0.02199609415150581, + "grad_norm": 1.302642583847046, + "learning_rate": 7.328767123287672e-06, + "loss": 0.9244, + "step": 428 + }, + { + "epoch": 0.02204748689485045, + "grad_norm": 1.379760980606079, + "learning_rate": 7.345890410958905e-06, + "loss": 0.923, + "step": 429 + }, + { + "epoch": 0.022098879638195085, + "grad_norm": 0.8242074847221375, + "learning_rate": 7.3630136986301374e-06, + "loss": 0.7484, + "step": 430 + }, + { + "epoch": 0.022150272381539726, + "grad_norm": 1.5371723175048828, + "learning_rate": 7.380136986301371e-06, + "loss": 0.8992, + "step": 431 + }, + { + "epoch": 0.022201665124884366, + "grad_norm": 1.2551140785217285, + "learning_rate": 7.397260273972603e-06, + "loss": 0.9111, + "step": 432 + }, + { + "epoch": 0.022253057868229006, + "grad_norm": 1.2591822147369385, + "learning_rate": 7.414383561643836e-06, + "loss": 0.9252, + "step": 433 + }, + { + "epoch": 0.022304450611573646, + "grad_norm": 1.4434462785720825, + "learning_rate": 7.431506849315069e-06, + "loss": 0.8905, + "step": 434 + }, + { + "epoch": 0.022355843354918286, + "grad_norm": 1.3594499826431274, + "learning_rate": 7.448630136986302e-06, + "loss": 0.9449, + "step": 435 + }, + { + "epoch": 0.022407236098262926, + "grad_norm": 1.3536311388015747, + "learning_rate": 7.465753424657535e-06, + "loss": 0.8685, + "step": 436 + }, + { + "epoch": 0.022458628841607566, + "grad_norm": 1.2333133220672607, + "learning_rate": 7.482876712328768e-06, + "loss": 0.8983, + "step": 437 + }, + { + "epoch": 0.022510021584952206, + "grad_norm": 1.5317007303237915, + "learning_rate": 7.500000000000001e-06, + "loss": 0.9548, + "step": 438 + }, + { + "epoch": 0.022561414328296843, + "grad_norm": 1.0413998365402222, + "learning_rate": 7.517123287671233e-06, + "loss": 0.8, + "step": 439 + }, + { + "epoch": 0.022612807071641483, + "grad_norm": 1.3448002338409424, + "learning_rate": 7.534246575342466e-06, + "loss": 0.952, + "step": 440 + }, + { + "epoch": 0.022664199814986123, + "grad_norm": 1.3198351860046387, + "learning_rate": 7.551369863013699e-06, + "loss": 0.9142, + "step": 441 + }, + { + "epoch": 0.022715592558330763, + "grad_norm": 0.9853560328483582, + "learning_rate": 7.568493150684932e-06, + "loss": 0.7682, + "step": 442 + }, + { + "epoch": 0.022766985301675403, + "grad_norm": 1.3470577001571655, + "learning_rate": 7.585616438356165e-06, + "loss": 0.8641, + "step": 443 + }, + { + "epoch": 0.022818378045020044, + "grad_norm": 1.5047709941864014, + "learning_rate": 7.6027397260273985e-06, + "loss": 0.8773, + "step": 444 + }, + { + "epoch": 0.022869770788364684, + "grad_norm": 1.2822844982147217, + "learning_rate": 7.61986301369863e-06, + "loss": 0.9194, + "step": 445 + }, + { + "epoch": 0.022921163531709324, + "grad_norm": 1.2968412637710571, + "learning_rate": 7.636986301369864e-06, + "loss": 0.877, + "step": 446 + }, + { + "epoch": 0.022972556275053964, + "grad_norm": 1.3752939701080322, + "learning_rate": 7.654109589041097e-06, + "loss": 0.927, + "step": 447 + }, + { + "epoch": 0.0230239490183986, + "grad_norm": 1.317786455154419, + "learning_rate": 7.671232876712329e-06, + "loss": 0.9084, + "step": 448 + }, + { + "epoch": 0.02307534176174324, + "grad_norm": 1.365004539489746, + "learning_rate": 7.688356164383563e-06, + "loss": 0.8406, + "step": 449 + }, + { + "epoch": 0.02312673450508788, + "grad_norm": 1.2234840393066406, + "learning_rate": 7.705479452054794e-06, + "loss": 0.8853, + "step": 450 + }, + { + "epoch": 0.02317812724843252, + "grad_norm": 1.3845592737197876, + "learning_rate": 7.722602739726028e-06, + "loss": 0.8775, + "step": 451 + }, + { + "epoch": 0.02322951999177716, + "grad_norm": 1.2836191654205322, + "learning_rate": 7.739726027397261e-06, + "loss": 0.8983, + "step": 452 + }, + { + "epoch": 0.0232809127351218, + "grad_norm": 1.307465672492981, + "learning_rate": 7.756849315068495e-06, + "loss": 0.9377, + "step": 453 + }, + { + "epoch": 0.02333230547846644, + "grad_norm": 1.323665976524353, + "learning_rate": 7.773972602739727e-06, + "loss": 0.9041, + "step": 454 + }, + { + "epoch": 0.02338369822181108, + "grad_norm": 0.9895176887512207, + "learning_rate": 7.791095890410958e-06, + "loss": 0.781, + "step": 455 + }, + { + "epoch": 0.02343509096515572, + "grad_norm": 1.4118356704711914, + "learning_rate": 7.808219178082192e-06, + "loss": 0.9338, + "step": 456 + }, + { + "epoch": 0.023486483708500358, + "grad_norm": 1.2677648067474365, + "learning_rate": 7.825342465753425e-06, + "loss": 0.8752, + "step": 457 + }, + { + "epoch": 0.023537876451844998, + "grad_norm": 0.8953911662101746, + "learning_rate": 7.842465753424659e-06, + "loss": 0.7926, + "step": 458 + }, + { + "epoch": 0.02358926919518964, + "grad_norm": 0.9235230684280396, + "learning_rate": 7.85958904109589e-06, + "loss": 0.779, + "step": 459 + }, + { + "epoch": 0.02364066193853428, + "grad_norm": 1.4078550338745117, + "learning_rate": 7.876712328767124e-06, + "loss": 0.8697, + "step": 460 + }, + { + "epoch": 0.02369205468187892, + "grad_norm": 1.4137729406356812, + "learning_rate": 7.893835616438357e-06, + "loss": 0.9036, + "step": 461 + }, + { + "epoch": 0.02374344742522356, + "grad_norm": 1.3060131072998047, + "learning_rate": 7.910958904109591e-06, + "loss": 0.889, + "step": 462 + }, + { + "epoch": 0.0237948401685682, + "grad_norm": 1.2938730716705322, + "learning_rate": 7.928082191780823e-06, + "loss": 0.8787, + "step": 463 + }, + { + "epoch": 0.02384623291191284, + "grad_norm": 1.2133954763412476, + "learning_rate": 7.945205479452055e-06, + "loss": 0.8951, + "step": 464 + }, + { + "epoch": 0.02389762565525748, + "grad_norm": 1.0642343759536743, + "learning_rate": 7.962328767123288e-06, + "loss": 0.7783, + "step": 465 + }, + { + "epoch": 0.02394901839860212, + "grad_norm": 1.457010269165039, + "learning_rate": 7.979452054794521e-06, + "loss": 0.9638, + "step": 466 + }, + { + "epoch": 0.024000411141946756, + "grad_norm": 1.1099587678909302, + "learning_rate": 7.996575342465755e-06, + "loss": 0.7329, + "step": 467 + }, + { + "epoch": 0.024051803885291396, + "grad_norm": 1.3938628435134888, + "learning_rate": 8.013698630136987e-06, + "loss": 0.9033, + "step": 468 + }, + { + "epoch": 0.024103196628636036, + "grad_norm": 0.8189371824264526, + "learning_rate": 8.03082191780822e-06, + "loss": 0.7044, + "step": 469 + }, + { + "epoch": 0.024154589371980676, + "grad_norm": 1.444404125213623, + "learning_rate": 8.047945205479452e-06, + "loss": 0.9838, + "step": 470 + }, + { + "epoch": 0.024205982115325316, + "grad_norm": 1.3004415035247803, + "learning_rate": 8.065068493150686e-06, + "loss": 0.9098, + "step": 471 + }, + { + "epoch": 0.024257374858669956, + "grad_norm": 1.2714608907699585, + "learning_rate": 8.082191780821919e-06, + "loss": 0.8452, + "step": 472 + }, + { + "epoch": 0.024308767602014596, + "grad_norm": 0.916312575340271, + "learning_rate": 8.09931506849315e-06, + "loss": 0.808, + "step": 473 + }, + { + "epoch": 0.024360160345359236, + "grad_norm": 1.299035906791687, + "learning_rate": 8.116438356164384e-06, + "loss": 0.9937, + "step": 474 + }, + { + "epoch": 0.024411553088703877, + "grad_norm": 1.332391381263733, + "learning_rate": 8.133561643835616e-06, + "loss": 0.9253, + "step": 475 + }, + { + "epoch": 0.024462945832048513, + "grad_norm": 1.3227243423461914, + "learning_rate": 8.150684931506851e-06, + "loss": 0.9039, + "step": 476 + }, + { + "epoch": 0.024514338575393153, + "grad_norm": 0.9378124475479126, + "learning_rate": 8.167808219178083e-06, + "loss": 0.7843, + "step": 477 + }, + { + "epoch": 0.024565731318737793, + "grad_norm": 1.27048659324646, + "learning_rate": 8.184931506849316e-06, + "loss": 0.8905, + "step": 478 + }, + { + "epoch": 0.024617124062082434, + "grad_norm": 1.4072848558425903, + "learning_rate": 8.202054794520548e-06, + "loss": 0.9027, + "step": 479 + }, + { + "epoch": 0.024668516805427074, + "grad_norm": 1.315452218055725, + "learning_rate": 8.219178082191782e-06, + "loss": 0.8958, + "step": 480 + }, + { + "epoch": 0.024719909548771714, + "grad_norm": 1.2800546884536743, + "learning_rate": 8.236301369863015e-06, + "loss": 0.9197, + "step": 481 + }, + { + "epoch": 0.024771302292116354, + "grad_norm": 0.9843120574951172, + "learning_rate": 8.253424657534247e-06, + "loss": 0.722, + "step": 482 + }, + { + "epoch": 0.024822695035460994, + "grad_norm": 1.2540639638900757, + "learning_rate": 8.27054794520548e-06, + "loss": 0.8471, + "step": 483 + }, + { + "epoch": 0.024874087778805634, + "grad_norm": 1.417625069618225, + "learning_rate": 8.287671232876712e-06, + "loss": 0.938, + "step": 484 + }, + { + "epoch": 0.02492548052215027, + "grad_norm": 1.3447060585021973, + "learning_rate": 8.304794520547946e-06, + "loss": 0.9048, + "step": 485 + }, + { + "epoch": 0.02497687326549491, + "grad_norm": 1.3468190431594849, + "learning_rate": 8.32191780821918e-06, + "loss": 0.95, + "step": 486 + }, + { + "epoch": 0.02502826600883955, + "grad_norm": 1.3093456029891968, + "learning_rate": 8.339041095890411e-06, + "loss": 0.8961, + "step": 487 + }, + { + "epoch": 0.02507965875218419, + "grad_norm": 1.320758581161499, + "learning_rate": 8.356164383561644e-06, + "loss": 0.8272, + "step": 488 + }, + { + "epoch": 0.02513105149552883, + "grad_norm": 1.4054956436157227, + "learning_rate": 8.373287671232876e-06, + "loss": 0.9157, + "step": 489 + }, + { + "epoch": 0.02518244423887347, + "grad_norm": 1.3265403509140015, + "learning_rate": 8.39041095890411e-06, + "loss": 0.8638, + "step": 490 + }, + { + "epoch": 0.02523383698221811, + "grad_norm": 1.2834113836288452, + "learning_rate": 8.407534246575343e-06, + "loss": 0.8892, + "step": 491 + }, + { + "epoch": 0.02528522972556275, + "grad_norm": 1.4444869756698608, + "learning_rate": 8.424657534246577e-06, + "loss": 0.9675, + "step": 492 + }, + { + "epoch": 0.02533662246890739, + "grad_norm": 1.3552523851394653, + "learning_rate": 8.441780821917808e-06, + "loss": 0.8755, + "step": 493 + }, + { + "epoch": 0.02538801521225203, + "grad_norm": 1.2685366868972778, + "learning_rate": 8.458904109589042e-06, + "loss": 0.8719, + "step": 494 + }, + { + "epoch": 0.02543940795559667, + "grad_norm": 1.3015787601470947, + "learning_rate": 8.476027397260275e-06, + "loss": 0.8681, + "step": 495 + }, + { + "epoch": 0.02549080069894131, + "grad_norm": 0.9308233261108398, + "learning_rate": 8.493150684931507e-06, + "loss": 0.7643, + "step": 496 + }, + { + "epoch": 0.02554219344228595, + "grad_norm": 1.3988115787506104, + "learning_rate": 8.51027397260274e-06, + "loss": 0.9531, + "step": 497 + }, + { + "epoch": 0.02559358618563059, + "grad_norm": 1.3057786226272583, + "learning_rate": 8.527397260273972e-06, + "loss": 0.94, + "step": 498 + }, + { + "epoch": 0.02564497892897523, + "grad_norm": 1.245055079460144, + "learning_rate": 8.544520547945206e-06, + "loss": 0.9073, + "step": 499 + }, + { + "epoch": 0.02569637167231987, + "grad_norm": 1.2349739074707031, + "learning_rate": 8.56164383561644e-06, + "loss": 0.8856, + "step": 500 + }, + { + "epoch": 0.02574776441566451, + "grad_norm": 1.3133636713027954, + "learning_rate": 8.578767123287673e-06, + "loss": 0.9515, + "step": 501 + }, + { + "epoch": 0.02579915715900915, + "grad_norm": 1.3320437669754028, + "learning_rate": 8.595890410958905e-06, + "loss": 0.9599, + "step": 502 + }, + { + "epoch": 0.02585054990235379, + "grad_norm": 1.3448456525802612, + "learning_rate": 8.613013698630136e-06, + "loss": 0.919, + "step": 503 + }, + { + "epoch": 0.025901942645698426, + "grad_norm": 1.631572961807251, + "learning_rate": 8.63013698630137e-06, + "loss": 0.8978, + "step": 504 + }, + { + "epoch": 0.025953335389043066, + "grad_norm": 1.336680293083191, + "learning_rate": 8.647260273972603e-06, + "loss": 0.9386, + "step": 505 + }, + { + "epoch": 0.026004728132387706, + "grad_norm": 1.5338842868804932, + "learning_rate": 8.664383561643837e-06, + "loss": 0.9319, + "step": 506 + }, + { + "epoch": 0.026056120875732346, + "grad_norm": 1.3168104887008667, + "learning_rate": 8.681506849315069e-06, + "loss": 0.9353, + "step": 507 + }, + { + "epoch": 0.026107513619076986, + "grad_norm": 1.422582983970642, + "learning_rate": 8.698630136986302e-06, + "loss": 0.8878, + "step": 508 + }, + { + "epoch": 0.026158906362421627, + "grad_norm": 1.3373196125030518, + "learning_rate": 8.715753424657536e-06, + "loss": 0.8858, + "step": 509 + }, + { + "epoch": 0.026210299105766267, + "grad_norm": 1.4007619619369507, + "learning_rate": 8.732876712328769e-06, + "loss": 0.9865, + "step": 510 + }, + { + "epoch": 0.026261691849110907, + "grad_norm": 1.2220408916473389, + "learning_rate": 8.750000000000001e-06, + "loss": 0.843, + "step": 511 + }, + { + "epoch": 0.026313084592455547, + "grad_norm": 1.3026251792907715, + "learning_rate": 8.767123287671233e-06, + "loss": 0.886, + "step": 512 + }, + { + "epoch": 0.026364477335800184, + "grad_norm": 1.3168457746505737, + "learning_rate": 8.784246575342466e-06, + "loss": 0.8811, + "step": 513 + }, + { + "epoch": 0.026415870079144824, + "grad_norm": 0.8794660568237305, + "learning_rate": 8.8013698630137e-06, + "loss": 0.745, + "step": 514 + }, + { + "epoch": 0.026467262822489464, + "grad_norm": 1.1745655536651611, + "learning_rate": 8.818493150684933e-06, + "loss": 0.8788, + "step": 515 + }, + { + "epoch": 0.026518655565834104, + "grad_norm": 1.6665301322937012, + "learning_rate": 8.835616438356165e-06, + "loss": 0.9026, + "step": 516 + }, + { + "epoch": 0.026570048309178744, + "grad_norm": 0.81757652759552, + "learning_rate": 8.852739726027398e-06, + "loss": 0.7922, + "step": 517 + }, + { + "epoch": 0.026621441052523384, + "grad_norm": 1.3101197481155396, + "learning_rate": 8.86986301369863e-06, + "loss": 0.9025, + "step": 518 + }, + { + "epoch": 0.026672833795868024, + "grad_norm": 1.31574285030365, + "learning_rate": 8.886986301369864e-06, + "loss": 0.8853, + "step": 519 + }, + { + "epoch": 0.026724226539212664, + "grad_norm": 1.3258775472640991, + "learning_rate": 8.904109589041097e-06, + "loss": 0.9031, + "step": 520 + }, + { + "epoch": 0.026775619282557304, + "grad_norm": 1.3346577882766724, + "learning_rate": 8.921232876712329e-06, + "loss": 0.8933, + "step": 521 + }, + { + "epoch": 0.02682701202590194, + "grad_norm": 1.2473400831222534, + "learning_rate": 8.938356164383562e-06, + "loss": 0.895, + "step": 522 + }, + { + "epoch": 0.02687840476924658, + "grad_norm": 1.393160104751587, + "learning_rate": 8.955479452054794e-06, + "loss": 0.9309, + "step": 523 + }, + { + "epoch": 0.02692979751259122, + "grad_norm": 1.2653206586837769, + "learning_rate": 8.972602739726028e-06, + "loss": 0.8854, + "step": 524 + }, + { + "epoch": 0.02698119025593586, + "grad_norm": 1.2554978132247925, + "learning_rate": 8.989726027397261e-06, + "loss": 0.8798, + "step": 525 + }, + { + "epoch": 0.0270325829992805, + "grad_norm": 1.053855061531067, + "learning_rate": 9.006849315068495e-06, + "loss": 0.7405, + "step": 526 + }, + { + "epoch": 0.02708397574262514, + "grad_norm": 1.3175573348999023, + "learning_rate": 9.023972602739726e-06, + "loss": 0.9455, + "step": 527 + }, + { + "epoch": 0.027135368485969782, + "grad_norm": 1.4462525844573975, + "learning_rate": 9.04109589041096e-06, + "loss": 0.8866, + "step": 528 + }, + { + "epoch": 0.027186761229314422, + "grad_norm": 1.3384977579116821, + "learning_rate": 9.058219178082193e-06, + "loss": 0.928, + "step": 529 + }, + { + "epoch": 0.027238153972659062, + "grad_norm": 1.3296003341674805, + "learning_rate": 9.075342465753425e-06, + "loss": 0.9485, + "step": 530 + }, + { + "epoch": 0.0272895467160037, + "grad_norm": 1.2948641777038574, + "learning_rate": 9.092465753424659e-06, + "loss": 0.9269, + "step": 531 + }, + { + "epoch": 0.02734093945934834, + "grad_norm": 1.2651878595352173, + "learning_rate": 9.10958904109589e-06, + "loss": 0.8796, + "step": 532 + }, + { + "epoch": 0.02739233220269298, + "grad_norm": 1.3631980419158936, + "learning_rate": 9.126712328767124e-06, + "loss": 0.875, + "step": 533 + }, + { + "epoch": 0.02744372494603762, + "grad_norm": 1.3452153205871582, + "learning_rate": 9.143835616438357e-06, + "loss": 0.9005, + "step": 534 + }, + { + "epoch": 0.02749511768938226, + "grad_norm": 1.597040057182312, + "learning_rate": 9.160958904109591e-06, + "loss": 0.936, + "step": 535 + }, + { + "epoch": 0.0275465104327269, + "grad_norm": 0.9114028811454773, + "learning_rate": 9.178082191780823e-06, + "loss": 0.7766, + "step": 536 + }, + { + "epoch": 0.02759790317607154, + "grad_norm": 1.4212180376052856, + "learning_rate": 9.195205479452054e-06, + "loss": 0.8899, + "step": 537 + }, + { + "epoch": 0.02764929591941618, + "grad_norm": 0.9036937355995178, + "learning_rate": 9.212328767123288e-06, + "loss": 0.7256, + "step": 538 + }, + { + "epoch": 0.02770068866276082, + "grad_norm": 1.7179898023605347, + "learning_rate": 9.229452054794521e-06, + "loss": 0.8575, + "step": 539 + }, + { + "epoch": 0.027752081406105456, + "grad_norm": 1.0034892559051514, + "learning_rate": 9.246575342465755e-06, + "loss": 0.7628, + "step": 540 + }, + { + "epoch": 0.027803474149450096, + "grad_norm": 1.3620257377624512, + "learning_rate": 9.263698630136987e-06, + "loss": 0.8688, + "step": 541 + }, + { + "epoch": 0.027854866892794736, + "grad_norm": 1.2762998342514038, + "learning_rate": 9.28082191780822e-06, + "loss": 0.8426, + "step": 542 + }, + { + "epoch": 0.027906259636139377, + "grad_norm": 1.327167272567749, + "learning_rate": 9.297945205479454e-06, + "loss": 0.9141, + "step": 543 + }, + { + "epoch": 0.027957652379484017, + "grad_norm": 1.724090337753296, + "learning_rate": 9.315068493150685e-06, + "loss": 0.9725, + "step": 544 + }, + { + "epoch": 0.028009045122828657, + "grad_norm": 1.2443183660507202, + "learning_rate": 9.332191780821919e-06, + "loss": 0.9013, + "step": 545 + }, + { + "epoch": 0.028060437866173297, + "grad_norm": 1.4725433588027954, + "learning_rate": 9.34931506849315e-06, + "loss": 0.9119, + "step": 546 + }, + { + "epoch": 0.028111830609517937, + "grad_norm": 1.154873013496399, + "learning_rate": 9.366438356164384e-06, + "loss": 0.9116, + "step": 547 + }, + { + "epoch": 0.028163223352862577, + "grad_norm": 1.2435468435287476, + "learning_rate": 9.383561643835618e-06, + "loss": 0.9111, + "step": 548 + }, + { + "epoch": 0.028214616096207217, + "grad_norm": 1.277804970741272, + "learning_rate": 9.400684931506851e-06, + "loss": 0.816, + "step": 549 + }, + { + "epoch": 0.028266008839551854, + "grad_norm": 1.4964183568954468, + "learning_rate": 9.417808219178083e-06, + "loss": 0.9148, + "step": 550 + }, + { + "epoch": 0.028317401582896494, + "grad_norm": 0.9280702471733093, + "learning_rate": 9.434931506849316e-06, + "loss": 0.7587, + "step": 551 + }, + { + "epoch": 0.028368794326241134, + "grad_norm": 1.3291866779327393, + "learning_rate": 9.452054794520548e-06, + "loss": 0.9138, + "step": 552 + }, + { + "epoch": 0.028420187069585774, + "grad_norm": 1.3056268692016602, + "learning_rate": 9.469178082191782e-06, + "loss": 0.9014, + "step": 553 + }, + { + "epoch": 0.028471579812930414, + "grad_norm": 1.3049315214157104, + "learning_rate": 9.486301369863015e-06, + "loss": 0.9186, + "step": 554 + }, + { + "epoch": 0.028522972556275054, + "grad_norm": 0.9101473689079285, + "learning_rate": 9.503424657534247e-06, + "loss": 0.7749, + "step": 555 + }, + { + "epoch": 0.028574365299619695, + "grad_norm": 1.24942946434021, + "learning_rate": 9.52054794520548e-06, + "loss": 0.9414, + "step": 556 + }, + { + "epoch": 0.028625758042964335, + "grad_norm": 0.7815028429031372, + "learning_rate": 9.537671232876712e-06, + "loss": 0.7474, + "step": 557 + }, + { + "epoch": 0.028677150786308975, + "grad_norm": 1.3021060228347778, + "learning_rate": 9.554794520547946e-06, + "loss": 0.904, + "step": 558 + }, + { + "epoch": 0.02872854352965361, + "grad_norm": 1.4569242000579834, + "learning_rate": 9.571917808219179e-06, + "loss": 0.9555, + "step": 559 + }, + { + "epoch": 0.02877993627299825, + "grad_norm": 1.3382290601730347, + "learning_rate": 9.589041095890411e-06, + "loss": 0.9057, + "step": 560 + }, + { + "epoch": 0.02883132901634289, + "grad_norm": 1.2955886125564575, + "learning_rate": 9.606164383561644e-06, + "loss": 0.9103, + "step": 561 + }, + { + "epoch": 0.02888272175968753, + "grad_norm": 0.9059459567070007, + "learning_rate": 9.623287671232878e-06, + "loss": 0.7646, + "step": 562 + }, + { + "epoch": 0.028934114503032172, + "grad_norm": 1.2799419164657593, + "learning_rate": 9.640410958904111e-06, + "loss": 0.9653, + "step": 563 + }, + { + "epoch": 0.028985507246376812, + "grad_norm": 1.4008582830429077, + "learning_rate": 9.657534246575343e-06, + "loss": 0.9203, + "step": 564 + }, + { + "epoch": 0.029036899989721452, + "grad_norm": 1.3185688257217407, + "learning_rate": 9.674657534246577e-06, + "loss": 0.9308, + "step": 565 + }, + { + "epoch": 0.029088292733066092, + "grad_norm": 1.2655415534973145, + "learning_rate": 9.691780821917808e-06, + "loss": 0.9055, + "step": 566 + }, + { + "epoch": 0.029139685476410732, + "grad_norm": 1.2992969751358032, + "learning_rate": 9.708904109589042e-06, + "loss": 0.9107, + "step": 567 + }, + { + "epoch": 0.02919107821975537, + "grad_norm": 0.9581859707832336, + "learning_rate": 9.726027397260275e-06, + "loss": 0.8054, + "step": 568 + }, + { + "epoch": 0.02924247096310001, + "grad_norm": 1.3011211156845093, + "learning_rate": 9.743150684931507e-06, + "loss": 0.9577, + "step": 569 + }, + { + "epoch": 0.02929386370644465, + "grad_norm": 0.9357772469520569, + "learning_rate": 9.76027397260274e-06, + "loss": 0.7503, + "step": 570 + }, + { + "epoch": 0.02934525644978929, + "grad_norm": 0.9294967651367188, + "learning_rate": 9.777397260273972e-06, + "loss": 0.7447, + "step": 571 + }, + { + "epoch": 0.02939664919313393, + "grad_norm": 1.2695695161819458, + "learning_rate": 9.794520547945206e-06, + "loss": 0.8848, + "step": 572 + }, + { + "epoch": 0.02944804193647857, + "grad_norm": 0.9333198070526123, + "learning_rate": 9.81164383561644e-06, + "loss": 0.7458, + "step": 573 + }, + { + "epoch": 0.02949943467982321, + "grad_norm": 1.3401907682418823, + "learning_rate": 9.828767123287673e-06, + "loss": 0.8545, + "step": 574 + }, + { + "epoch": 0.02955082742316785, + "grad_norm": 1.2322946786880493, + "learning_rate": 9.845890410958905e-06, + "loss": 0.899, + "step": 575 + }, + { + "epoch": 0.02960222016651249, + "grad_norm": 1.2715824842453003, + "learning_rate": 9.863013698630138e-06, + "loss": 0.8651, + "step": 576 + }, + { + "epoch": 0.029653612909857126, + "grad_norm": 1.333211064338684, + "learning_rate": 9.880136986301372e-06, + "loss": 0.9101, + "step": 577 + }, + { + "epoch": 0.029705005653201767, + "grad_norm": 1.2988038063049316, + "learning_rate": 9.897260273972603e-06, + "loss": 0.88, + "step": 578 + }, + { + "epoch": 0.029756398396546407, + "grad_norm": 1.460774302482605, + "learning_rate": 9.914383561643837e-06, + "loss": 0.9268, + "step": 579 + }, + { + "epoch": 0.029807791139891047, + "grad_norm": 1.265076756477356, + "learning_rate": 9.931506849315069e-06, + "loss": 0.8335, + "step": 580 + }, + { + "epoch": 0.029859183883235687, + "grad_norm": 0.943951427936554, + "learning_rate": 9.948630136986302e-06, + "loss": 0.7368, + "step": 581 + }, + { + "epoch": 0.029910576626580327, + "grad_norm": 1.2957152128219604, + "learning_rate": 9.965753424657536e-06, + "loss": 0.9397, + "step": 582 + }, + { + "epoch": 0.029961969369924967, + "grad_norm": 1.28718101978302, + "learning_rate": 9.982876712328769e-06, + "loss": 0.9139, + "step": 583 + }, + { + "epoch": 0.030013362113269607, + "grad_norm": 1.2743099927902222, + "learning_rate": 1e-05, + "loss": 0.9163, + "step": 584 + }, + { + "epoch": 0.030064754856614247, + "grad_norm": 1.2341121435165405, + "learning_rate": 9.999999930735318e-06, + "loss": 0.9249, + "step": 585 + }, + { + "epoch": 0.030116147599958887, + "grad_norm": 0.8864800333976746, + "learning_rate": 9.99999972294127e-06, + "loss": 0.7115, + "step": 586 + }, + { + "epoch": 0.030167540343303524, + "grad_norm": 1.2989847660064697, + "learning_rate": 9.999999376617863e-06, + "loss": 0.8635, + "step": 587 + }, + { + "epoch": 0.030218933086648164, + "grad_norm": 1.0578948259353638, + "learning_rate": 9.99999889176511e-06, + "loss": 0.7805, + "step": 588 + }, + { + "epoch": 0.030270325829992804, + "grad_norm": 1.2896292209625244, + "learning_rate": 9.999998268383018e-06, + "loss": 0.9167, + "step": 589 + }, + { + "epoch": 0.030321718573337444, + "grad_norm": 1.466029167175293, + "learning_rate": 9.99999750647161e-06, + "loss": 0.9525, + "step": 590 + }, + { + "epoch": 0.030373111316682085, + "grad_norm": 1.366807222366333, + "learning_rate": 9.999996606030905e-06, + "loss": 0.875, + "step": 591 + }, + { + "epoch": 0.030424504060026725, + "grad_norm": 1.3337703943252563, + "learning_rate": 9.999995567060927e-06, + "loss": 0.9312, + "step": 592 + }, + { + "epoch": 0.030475896803371365, + "grad_norm": 1.2864301204681396, + "learning_rate": 9.999994389561704e-06, + "loss": 0.906, + "step": 593 + }, + { + "epoch": 0.030527289546716005, + "grad_norm": 1.3428853750228882, + "learning_rate": 9.999993073533273e-06, + "loss": 0.8645, + "step": 594 + }, + { + "epoch": 0.030578682290060645, + "grad_norm": 1.2185192108154297, + "learning_rate": 9.999991618975667e-06, + "loss": 0.9117, + "step": 595 + }, + { + "epoch": 0.03063007503340528, + "grad_norm": 1.3805981874465942, + "learning_rate": 9.999990025888925e-06, + "loss": 0.9444, + "step": 596 + }, + { + "epoch": 0.030681467776749922, + "grad_norm": 1.3172290325164795, + "learning_rate": 9.999988294273095e-06, + "loss": 0.9011, + "step": 597 + }, + { + "epoch": 0.030732860520094562, + "grad_norm": 1.5256799459457397, + "learning_rate": 9.999986424128224e-06, + "loss": 0.9358, + "step": 598 + }, + { + "epoch": 0.030784253263439202, + "grad_norm": 1.2593270540237427, + "learning_rate": 9.999984415454362e-06, + "loss": 0.9033, + "step": 599 + }, + { + "epoch": 0.030835646006783842, + "grad_norm": 1.292752742767334, + "learning_rate": 9.999982268251565e-06, + "loss": 0.8908, + "step": 600 + }, + { + "epoch": 0.030887038750128482, + "grad_norm": 1.512018084526062, + "learning_rate": 9.999979982519892e-06, + "loss": 0.9191, + "step": 601 + }, + { + "epoch": 0.030938431493473122, + "grad_norm": 1.3083957433700562, + "learning_rate": 9.99997755825941e-06, + "loss": 0.9545, + "step": 602 + }, + { + "epoch": 0.030989824236817762, + "grad_norm": 1.592584490776062, + "learning_rate": 9.99997499547018e-06, + "loss": 0.9306, + "step": 603 + }, + { + "epoch": 0.031041216980162403, + "grad_norm": 1.25839102268219, + "learning_rate": 9.99997229415228e-06, + "loss": 0.8516, + "step": 604 + }, + { + "epoch": 0.03109260972350704, + "grad_norm": 1.3436771631240845, + "learning_rate": 9.99996945430578e-06, + "loss": 0.9107, + "step": 605 + }, + { + "epoch": 0.03114400246685168, + "grad_norm": 1.5542758703231812, + "learning_rate": 9.99996647593076e-06, + "loss": 0.9661, + "step": 606 + }, + { + "epoch": 0.03119539521019632, + "grad_norm": 1.3693493604660034, + "learning_rate": 9.999963359027303e-06, + "loss": 0.8683, + "step": 607 + }, + { + "epoch": 0.03124678795354096, + "grad_norm": 1.2629454135894775, + "learning_rate": 9.999960103595495e-06, + "loss": 0.8985, + "step": 608 + }, + { + "epoch": 0.0312981806968856, + "grad_norm": 1.2224509716033936, + "learning_rate": 9.999956709635427e-06, + "loss": 0.8551, + "step": 609 + }, + { + "epoch": 0.03134957344023024, + "grad_norm": 1.2754219770431519, + "learning_rate": 9.99995317714719e-06, + "loss": 0.9594, + "step": 610 + }, + { + "epoch": 0.03140096618357488, + "grad_norm": 1.2681502103805542, + "learning_rate": 9.999949506130886e-06, + "loss": 0.8778, + "step": 611 + }, + { + "epoch": 0.03145235892691952, + "grad_norm": 1.3756245374679565, + "learning_rate": 9.999945696586613e-06, + "loss": 0.8867, + "step": 612 + }, + { + "epoch": 0.03150375167026416, + "grad_norm": 1.2620686292648315, + "learning_rate": 9.99994174851448e-06, + "loss": 0.9525, + "step": 613 + }, + { + "epoch": 0.0315551444136088, + "grad_norm": 1.2894796133041382, + "learning_rate": 9.999937661914593e-06, + "loss": 0.8999, + "step": 614 + }, + { + "epoch": 0.03160653715695344, + "grad_norm": 1.348724365234375, + "learning_rate": 9.999933436787068e-06, + "loss": 0.9606, + "step": 615 + }, + { + "epoch": 0.03165792990029808, + "grad_norm": 1.2647407054901123, + "learning_rate": 9.999929073132022e-06, + "loss": 0.846, + "step": 616 + }, + { + "epoch": 0.03170932264364272, + "grad_norm": 1.4494421482086182, + "learning_rate": 9.999924570949573e-06, + "loss": 0.8995, + "step": 617 + }, + { + "epoch": 0.03176071538698736, + "grad_norm": 1.3663712739944458, + "learning_rate": 9.999919930239847e-06, + "loss": 0.9398, + "step": 618 + }, + { + "epoch": 0.031812108130331994, + "grad_norm": 1.2619757652282715, + "learning_rate": 9.999915151002976e-06, + "loss": 0.9251, + "step": 619 + }, + { + "epoch": 0.031863500873676634, + "grad_norm": 1.265712857246399, + "learning_rate": 9.999910233239087e-06, + "loss": 0.925, + "step": 620 + }, + { + "epoch": 0.031914893617021274, + "grad_norm": 1.3284708261489868, + "learning_rate": 9.999905176948321e-06, + "loss": 0.9401, + "step": 621 + }, + { + "epoch": 0.031966286360365914, + "grad_norm": 1.212500810623169, + "learning_rate": 9.999899982130814e-06, + "loss": 0.9478, + "step": 622 + }, + { + "epoch": 0.032017679103710554, + "grad_norm": 1.14016592502594, + "learning_rate": 9.999894648786713e-06, + "loss": 0.7327, + "step": 623 + }, + { + "epoch": 0.032069071847055194, + "grad_norm": 1.397247314453125, + "learning_rate": 9.999889176916164e-06, + "loss": 0.9271, + "step": 624 + }, + { + "epoch": 0.032120464590399835, + "grad_norm": 1.258782982826233, + "learning_rate": 9.99988356651932e-06, + "loss": 0.8644, + "step": 625 + }, + { + "epoch": 0.032171857333744475, + "grad_norm": 1.7937285900115967, + "learning_rate": 9.999877817596336e-06, + "loss": 0.9062, + "step": 626 + }, + { + "epoch": 0.032223250077089115, + "grad_norm": 1.538447618484497, + "learning_rate": 9.999871930147369e-06, + "loss": 0.8829, + "step": 627 + }, + { + "epoch": 0.032274642820433755, + "grad_norm": 1.2555336952209473, + "learning_rate": 9.999865904172585e-06, + "loss": 0.8701, + "step": 628 + }, + { + "epoch": 0.032326035563778395, + "grad_norm": 1.2921088933944702, + "learning_rate": 9.999859739672151e-06, + "loss": 0.9332, + "step": 629 + }, + { + "epoch": 0.032377428307123035, + "grad_norm": 1.2067265510559082, + "learning_rate": 9.999853436646237e-06, + "loss": 0.8662, + "step": 630 + }, + { + "epoch": 0.032428821050467675, + "grad_norm": 1.271276831626892, + "learning_rate": 9.999846995095016e-06, + "loss": 0.8084, + "step": 631 + }, + { + "epoch": 0.032480213793812315, + "grad_norm": 1.3928368091583252, + "learning_rate": 9.99984041501867e-06, + "loss": 0.868, + "step": 632 + }, + { + "epoch": 0.032531606537156955, + "grad_norm": 1.2404413223266602, + "learning_rate": 9.999833696417376e-06, + "loss": 0.9162, + "step": 633 + }, + { + "epoch": 0.032582999280501596, + "grad_norm": 1.3089007139205933, + "learning_rate": 9.999826839291325e-06, + "loss": 0.8752, + "step": 634 + }, + { + "epoch": 0.032634392023846236, + "grad_norm": 1.3305209875106812, + "learning_rate": 9.999819843640706e-06, + "loss": 0.8634, + "step": 635 + }, + { + "epoch": 0.032685784767190876, + "grad_norm": 1.265342116355896, + "learning_rate": 9.999812709465711e-06, + "loss": 0.8658, + "step": 636 + }, + { + "epoch": 0.03273717751053551, + "grad_norm": 1.3872308731079102, + "learning_rate": 9.99980543676654e-06, + "loss": 0.8684, + "step": 637 + }, + { + "epoch": 0.03278857025388015, + "grad_norm": 1.2806648015975952, + "learning_rate": 9.999798025543393e-06, + "loss": 0.9003, + "step": 638 + }, + { + "epoch": 0.03283996299722479, + "grad_norm": 1.2658412456512451, + "learning_rate": 9.999790475796475e-06, + "loss": 0.8383, + "step": 639 + }, + { + "epoch": 0.03289135574056943, + "grad_norm": 1.6125788688659668, + "learning_rate": 9.999782787525995e-06, + "loss": 0.8849, + "step": 640 + }, + { + "epoch": 0.03294274848391407, + "grad_norm": 1.1932075023651123, + "learning_rate": 9.99977496073217e-06, + "loss": 0.8996, + "step": 641 + }, + { + "epoch": 0.03299414122725871, + "grad_norm": 1.2844421863555908, + "learning_rate": 9.99976699541521e-06, + "loss": 0.8828, + "step": 642 + }, + { + "epoch": 0.03304553397060335, + "grad_norm": 1.053963541984558, + "learning_rate": 9.999758891575342e-06, + "loss": 0.6973, + "step": 643 + }, + { + "epoch": 0.03309692671394799, + "grad_norm": 1.3341604471206665, + "learning_rate": 9.999750649212787e-06, + "loss": 0.898, + "step": 644 + }, + { + "epoch": 0.03314831945729263, + "grad_norm": 1.2578569650650024, + "learning_rate": 9.999742268327774e-06, + "loss": 0.7745, + "step": 645 + }, + { + "epoch": 0.03319971220063727, + "grad_norm": 1.382563591003418, + "learning_rate": 9.999733748920537e-06, + "loss": 0.9263, + "step": 646 + }, + { + "epoch": 0.03325110494398191, + "grad_norm": 1.3626760244369507, + "learning_rate": 9.999725090991308e-06, + "loss": 0.9254, + "step": 647 + }, + { + "epoch": 0.03330249768732655, + "grad_norm": 0.9993988871574402, + "learning_rate": 9.999716294540331e-06, + "loss": 0.7546, + "step": 648 + }, + { + "epoch": 0.03335389043067119, + "grad_norm": 0.9365474581718445, + "learning_rate": 9.999707359567847e-06, + "loss": 0.7375, + "step": 649 + }, + { + "epoch": 0.03340528317401583, + "grad_norm": 1.3606112003326416, + "learning_rate": 9.999698286074107e-06, + "loss": 0.8897, + "step": 650 + }, + { + "epoch": 0.03345667591736047, + "grad_norm": 1.243239164352417, + "learning_rate": 9.999689074059358e-06, + "loss": 0.8646, + "step": 651 + }, + { + "epoch": 0.03350806866070511, + "grad_norm": 1.31145179271698, + "learning_rate": 9.999679723523857e-06, + "loss": 0.889, + "step": 652 + }, + { + "epoch": 0.03355946140404975, + "grad_norm": 1.3311896324157715, + "learning_rate": 9.999670234467864e-06, + "loss": 0.9205, + "step": 653 + }, + { + "epoch": 0.03361085414739439, + "grad_norm": 1.4149863719940186, + "learning_rate": 9.999660606891642e-06, + "loss": 0.8216, + "step": 654 + }, + { + "epoch": 0.03366224689073903, + "grad_norm": 1.2291474342346191, + "learning_rate": 9.999650840795456e-06, + "loss": 0.8304, + "step": 655 + }, + { + "epoch": 0.033713639634083664, + "grad_norm": 1.907629370689392, + "learning_rate": 9.999640936179578e-06, + "loss": 0.9434, + "step": 656 + }, + { + "epoch": 0.033765032377428304, + "grad_norm": 1.2146872282028198, + "learning_rate": 9.99963089304428e-06, + "loss": 0.8204, + "step": 657 + }, + { + "epoch": 0.033816425120772944, + "grad_norm": 1.2001131772994995, + "learning_rate": 9.999620711389846e-06, + "loss": 0.9003, + "step": 658 + }, + { + "epoch": 0.033867817864117584, + "grad_norm": 1.2128374576568604, + "learning_rate": 9.999610391216552e-06, + "loss": 0.8561, + "step": 659 + }, + { + "epoch": 0.033919210607462225, + "grad_norm": 1.2235902547836304, + "learning_rate": 9.999599932524686e-06, + "loss": 0.8785, + "step": 660 + }, + { + "epoch": 0.033970603350806865, + "grad_norm": 1.276451587677002, + "learning_rate": 9.99958933531454e-06, + "loss": 0.9213, + "step": 661 + }, + { + "epoch": 0.034021996094151505, + "grad_norm": 1.3629931211471558, + "learning_rate": 9.999578599586403e-06, + "loss": 0.845, + "step": 662 + }, + { + "epoch": 0.034073388837496145, + "grad_norm": 1.4644831418991089, + "learning_rate": 9.999567725340576e-06, + "loss": 0.9308, + "step": 663 + }, + { + "epoch": 0.034124781580840785, + "grad_norm": 1.230907917022705, + "learning_rate": 9.99955671257736e-06, + "loss": 0.8848, + "step": 664 + }, + { + "epoch": 0.034176174324185425, + "grad_norm": 1.3106567859649658, + "learning_rate": 9.999545561297056e-06, + "loss": 0.8804, + "step": 665 + }, + { + "epoch": 0.034227567067530065, + "grad_norm": 1.1782751083374023, + "learning_rate": 9.999534271499982e-06, + "loss": 0.7919, + "step": 666 + }, + { + "epoch": 0.034278959810874705, + "grad_norm": 1.2492984533309937, + "learning_rate": 9.999522843186442e-06, + "loss": 0.8357, + "step": 667 + }, + { + "epoch": 0.034330352554219346, + "grad_norm": 1.1943573951721191, + "learning_rate": 9.999511276356755e-06, + "loss": 0.9432, + "step": 668 + }, + { + "epoch": 0.034381745297563986, + "grad_norm": 1.321894645690918, + "learning_rate": 9.999499571011244e-06, + "loss": 0.9181, + "step": 669 + }, + { + "epoch": 0.034433138040908626, + "grad_norm": 1.2689727544784546, + "learning_rate": 9.999487727150232e-06, + "loss": 0.9105, + "step": 670 + }, + { + "epoch": 0.034484530784253266, + "grad_norm": 1.1956273317337036, + "learning_rate": 9.999475744774046e-06, + "loss": 0.8256, + "step": 671 + }, + { + "epoch": 0.034535923527597906, + "grad_norm": 1.2164257764816284, + "learning_rate": 9.999463623883017e-06, + "loss": 0.8054, + "step": 672 + }, + { + "epoch": 0.034587316270942546, + "grad_norm": 0.8890982270240784, + "learning_rate": 9.999451364477487e-06, + "loss": 0.735, + "step": 673 + }, + { + "epoch": 0.03463870901428718, + "grad_norm": 1.3526277542114258, + "learning_rate": 9.999438966557787e-06, + "loss": 0.9192, + "step": 674 + }, + { + "epoch": 0.03469010175763182, + "grad_norm": 1.432132363319397, + "learning_rate": 9.999426430124266e-06, + "loss": 0.8689, + "step": 675 + }, + { + "epoch": 0.03474149450097646, + "grad_norm": 0.9870145320892334, + "learning_rate": 9.999413755177269e-06, + "loss": 0.7423, + "step": 676 + }, + { + "epoch": 0.0347928872443211, + "grad_norm": 0.8942204117774963, + "learning_rate": 9.999400941717151e-06, + "loss": 0.7597, + "step": 677 + }, + { + "epoch": 0.03484427998766574, + "grad_norm": 1.3862026929855347, + "learning_rate": 9.999387989744262e-06, + "loss": 0.8977, + "step": 678 + }, + { + "epoch": 0.03489567273101038, + "grad_norm": 1.256982684135437, + "learning_rate": 9.999374899258964e-06, + "loss": 0.8729, + "step": 679 + }, + { + "epoch": 0.03494706547435502, + "grad_norm": 1.264243721961975, + "learning_rate": 9.999361670261618e-06, + "loss": 0.8814, + "step": 680 + }, + { + "epoch": 0.03499845821769966, + "grad_norm": 0.9732556343078613, + "learning_rate": 9.999348302752592e-06, + "loss": 0.8128, + "step": 681 + }, + { + "epoch": 0.0350498509610443, + "grad_norm": 1.3299076557159424, + "learning_rate": 9.999334796732255e-06, + "loss": 0.9228, + "step": 682 + }, + { + "epoch": 0.03510124370438894, + "grad_norm": 0.9515122175216675, + "learning_rate": 9.999321152200982e-06, + "loss": 0.7468, + "step": 683 + }, + { + "epoch": 0.03515263644773358, + "grad_norm": 1.2781256437301636, + "learning_rate": 9.99930736915915e-06, + "loss": 0.9191, + "step": 684 + }, + { + "epoch": 0.03520402919107822, + "grad_norm": 1.3453145027160645, + "learning_rate": 9.999293447607143e-06, + "loss": 0.8895, + "step": 685 + }, + { + "epoch": 0.03525542193442286, + "grad_norm": 1.3089325428009033, + "learning_rate": 9.999279387545346e-06, + "loss": 0.919, + "step": 686 + }, + { + "epoch": 0.0353068146777675, + "grad_norm": 1.3162271976470947, + "learning_rate": 9.999265188974149e-06, + "loss": 0.9113, + "step": 687 + }, + { + "epoch": 0.03535820742111214, + "grad_norm": 1.1579227447509766, + "learning_rate": 9.99925085189394e-06, + "loss": 0.8468, + "step": 688 + }, + { + "epoch": 0.03540960016445678, + "grad_norm": 1.318410873413086, + "learning_rate": 9.999236376305123e-06, + "loss": 0.8899, + "step": 689 + }, + { + "epoch": 0.03546099290780142, + "grad_norm": 1.266695499420166, + "learning_rate": 9.999221762208098e-06, + "loss": 0.8224, + "step": 690 + }, + { + "epoch": 0.03551238565114606, + "grad_norm": 1.184025764465332, + "learning_rate": 9.999207009603266e-06, + "loss": 0.8368, + "step": 691 + }, + { + "epoch": 0.0355637783944907, + "grad_norm": 1.245678186416626, + "learning_rate": 9.99919211849104e-06, + "loss": 0.8565, + "step": 692 + }, + { + "epoch": 0.035615171137835334, + "grad_norm": 1.2107875347137451, + "learning_rate": 9.99917708887183e-06, + "loss": 0.8637, + "step": 693 + }, + { + "epoch": 0.035666563881179975, + "grad_norm": 1.2797000408172607, + "learning_rate": 9.999161920746055e-06, + "loss": 0.7954, + "step": 694 + }, + { + "epoch": 0.035717956624524615, + "grad_norm": 0.9214054346084595, + "learning_rate": 9.99914661411413e-06, + "loss": 0.7751, + "step": 695 + }, + { + "epoch": 0.035769349367869255, + "grad_norm": 1.3945088386535645, + "learning_rate": 9.999131168976485e-06, + "loss": 0.9364, + "step": 696 + }, + { + "epoch": 0.035820742111213895, + "grad_norm": 1.3009486198425293, + "learning_rate": 9.999115585333546e-06, + "loss": 0.8712, + "step": 697 + }, + { + "epoch": 0.035872134854558535, + "grad_norm": 1.2121648788452148, + "learning_rate": 9.999099863185743e-06, + "loss": 0.8373, + "step": 698 + }, + { + "epoch": 0.035923527597903175, + "grad_norm": 1.3146964311599731, + "learning_rate": 9.999084002533513e-06, + "loss": 0.9365, + "step": 699 + }, + { + "epoch": 0.035974920341247815, + "grad_norm": 1.4433462619781494, + "learning_rate": 9.999068003377296e-06, + "loss": 0.8918, + "step": 700 + }, + { + "epoch": 0.036026313084592455, + "grad_norm": 1.297878623008728, + "learning_rate": 9.999051865717535e-06, + "loss": 0.8625, + "step": 701 + }, + { + "epoch": 0.036077705827937095, + "grad_norm": 1.3019086122512817, + "learning_rate": 9.999035589554675e-06, + "loss": 0.9551, + "step": 702 + }, + { + "epoch": 0.036129098571281736, + "grad_norm": 1.201297402381897, + "learning_rate": 9.99901917488917e-06, + "loss": 0.8528, + "step": 703 + }, + { + "epoch": 0.036180491314626376, + "grad_norm": 1.2464736700057983, + "learning_rate": 9.999002621721473e-06, + "loss": 0.901, + "step": 704 + }, + { + "epoch": 0.036231884057971016, + "grad_norm": 1.7273004055023193, + "learning_rate": 9.998985930052042e-06, + "loss": 0.7966, + "step": 705 + }, + { + "epoch": 0.036283276801315656, + "grad_norm": 1.2638431787490845, + "learning_rate": 9.998969099881341e-06, + "loss": 0.967, + "step": 706 + }, + { + "epoch": 0.036334669544660296, + "grad_norm": 1.2900207042694092, + "learning_rate": 9.998952131209836e-06, + "loss": 0.8496, + "step": 707 + }, + { + "epoch": 0.036386062288004936, + "grad_norm": 0.8974463939666748, + "learning_rate": 9.998935024037998e-06, + "loss": 0.748, + "step": 708 + }, + { + "epoch": 0.036437455031349576, + "grad_norm": 1.2598536014556885, + "learning_rate": 9.998917778366299e-06, + "loss": 0.8948, + "step": 709 + }, + { + "epoch": 0.036488847774694216, + "grad_norm": 1.433349370956421, + "learning_rate": 9.998900394195217e-06, + "loss": 0.8779, + "step": 710 + }, + { + "epoch": 0.03654024051803885, + "grad_norm": 1.191897988319397, + "learning_rate": 9.998882871525234e-06, + "loss": 0.8856, + "step": 711 + }, + { + "epoch": 0.03659163326138349, + "grad_norm": 1.0730814933776855, + "learning_rate": 9.998865210356839e-06, + "loss": 0.7432, + "step": 712 + }, + { + "epoch": 0.03664302600472813, + "grad_norm": 1.4379756450653076, + "learning_rate": 9.998847410690515e-06, + "loss": 0.9208, + "step": 713 + }, + { + "epoch": 0.03669441874807277, + "grad_norm": 1.2651309967041016, + "learning_rate": 9.998829472526758e-06, + "loss": 0.8665, + "step": 714 + }, + { + "epoch": 0.03674581149141741, + "grad_norm": 1.178600549697876, + "learning_rate": 9.998811395866067e-06, + "loss": 0.8208, + "step": 715 + }, + { + "epoch": 0.03679720423476205, + "grad_norm": 1.254520297050476, + "learning_rate": 9.99879318070894e-06, + "loss": 0.8775, + "step": 716 + }, + { + "epoch": 0.03684859697810669, + "grad_norm": 1.3362984657287598, + "learning_rate": 9.998774827055884e-06, + "loss": 0.916, + "step": 717 + }, + { + "epoch": 0.03689998972145133, + "grad_norm": 1.2543765306472778, + "learning_rate": 9.998756334907404e-06, + "loss": 0.8354, + "step": 718 + }, + { + "epoch": 0.03695138246479597, + "grad_norm": 1.393196702003479, + "learning_rate": 9.998737704264017e-06, + "loss": 0.8723, + "step": 719 + }, + { + "epoch": 0.03700277520814061, + "grad_norm": 1.2626738548278809, + "learning_rate": 9.998718935126236e-06, + "loss": 0.8213, + "step": 720 + }, + { + "epoch": 0.03705416795148525, + "grad_norm": 1.3131617307662964, + "learning_rate": 9.99870002749458e-06, + "loss": 0.8805, + "step": 721 + }, + { + "epoch": 0.03710556069482989, + "grad_norm": 1.271390438079834, + "learning_rate": 9.998680981369577e-06, + "loss": 0.8748, + "step": 722 + }, + { + "epoch": 0.03715695343817453, + "grad_norm": 1.1442046165466309, + "learning_rate": 9.998661796751751e-06, + "loss": 0.8429, + "step": 723 + }, + { + "epoch": 0.03720834618151917, + "grad_norm": 0.9757936596870422, + "learning_rate": 9.998642473641638e-06, + "loss": 0.7885, + "step": 724 + }, + { + "epoch": 0.03725973892486381, + "grad_norm": 1.2529871463775635, + "learning_rate": 9.998623012039768e-06, + "loss": 0.9013, + "step": 725 + }, + { + "epoch": 0.03731113166820845, + "grad_norm": 1.3068562746047974, + "learning_rate": 9.99860341194668e-06, + "loss": 0.9318, + "step": 726 + }, + { + "epoch": 0.03736252441155309, + "grad_norm": 0.9531645774841309, + "learning_rate": 9.998583673362922e-06, + "loss": 0.7402, + "step": 727 + }, + { + "epoch": 0.03741391715489773, + "grad_norm": 1.2776199579238892, + "learning_rate": 9.998563796289038e-06, + "loss": 0.9033, + "step": 728 + }, + { + "epoch": 0.03746530989824237, + "grad_norm": 0.8968835473060608, + "learning_rate": 9.99854378072558e-06, + "loss": 0.7731, + "step": 729 + }, + { + "epoch": 0.037516702641587005, + "grad_norm": 1.2996851205825806, + "learning_rate": 9.9985236266731e-06, + "loss": 0.8855, + "step": 730 + }, + { + "epoch": 0.037568095384931645, + "grad_norm": 1.360831618309021, + "learning_rate": 9.998503334132159e-06, + "loss": 0.859, + "step": 731 + }, + { + "epoch": 0.037619488128276285, + "grad_norm": 1.271178126335144, + "learning_rate": 9.998482903103318e-06, + "loss": 0.8943, + "step": 732 + }, + { + "epoch": 0.037670880871620925, + "grad_norm": 1.3013023138046265, + "learning_rate": 9.998462333587143e-06, + "loss": 0.9497, + "step": 733 + }, + { + "epoch": 0.037722273614965565, + "grad_norm": 1.2942689657211304, + "learning_rate": 9.998441625584206e-06, + "loss": 0.9516, + "step": 734 + }, + { + "epoch": 0.037773666358310205, + "grad_norm": 1.2302870750427246, + "learning_rate": 9.998420779095076e-06, + "loss": 0.9122, + "step": 735 + }, + { + "epoch": 0.037825059101654845, + "grad_norm": 0.7748158574104309, + "learning_rate": 9.998399794120335e-06, + "loss": 0.7811, + "step": 736 + }, + { + "epoch": 0.037876451844999486, + "grad_norm": 1.2053734064102173, + "learning_rate": 9.998378670660563e-06, + "loss": 0.8755, + "step": 737 + }, + { + "epoch": 0.037927844588344126, + "grad_norm": 1.5758365392684937, + "learning_rate": 9.998357408716345e-06, + "loss": 0.95, + "step": 738 + }, + { + "epoch": 0.037979237331688766, + "grad_norm": 1.298250436782837, + "learning_rate": 9.998336008288269e-06, + "loss": 0.7766, + "step": 739 + }, + { + "epoch": 0.038030630075033406, + "grad_norm": 1.288628101348877, + "learning_rate": 9.99831446937693e-06, + "loss": 0.8274, + "step": 740 + }, + { + "epoch": 0.038082022818378046, + "grad_norm": 1.2925089597702026, + "learning_rate": 9.998292791982924e-06, + "loss": 0.9455, + "step": 741 + }, + { + "epoch": 0.038133415561722686, + "grad_norm": 1.236112117767334, + "learning_rate": 9.998270976106852e-06, + "loss": 0.937, + "step": 742 + }, + { + "epoch": 0.038184808305067326, + "grad_norm": 0.9334130883216858, + "learning_rate": 9.998249021749317e-06, + "loss": 0.7603, + "step": 743 + }, + { + "epoch": 0.038236201048411966, + "grad_norm": 0.9988341927528381, + "learning_rate": 9.998226928910928e-06, + "loss": 0.7624, + "step": 744 + }, + { + "epoch": 0.038287593791756606, + "grad_norm": 1.3378779888153076, + "learning_rate": 9.998204697592298e-06, + "loss": 0.8701, + "step": 745 + }, + { + "epoch": 0.03833898653510125, + "grad_norm": 1.3172721862792969, + "learning_rate": 9.998182327794042e-06, + "loss": 0.8697, + "step": 746 + }, + { + "epoch": 0.03839037927844589, + "grad_norm": 1.5073113441467285, + "learning_rate": 9.99815981951678e-06, + "loss": 0.8773, + "step": 747 + }, + { + "epoch": 0.03844177202179052, + "grad_norm": 1.2601313591003418, + "learning_rate": 9.998137172761136e-06, + "loss": 0.8611, + "step": 748 + }, + { + "epoch": 0.03849316476513516, + "grad_norm": 1.2902367115020752, + "learning_rate": 9.998114387527736e-06, + "loss": 0.8753, + "step": 749 + }, + { + "epoch": 0.0385445575084798, + "grad_norm": 1.341047763824463, + "learning_rate": 9.998091463817214e-06, + "loss": 0.8848, + "step": 750 + }, + { + "epoch": 0.03859595025182444, + "grad_norm": 1.2246860265731812, + "learning_rate": 9.9980684016302e-06, + "loss": 0.8842, + "step": 751 + }, + { + "epoch": 0.03864734299516908, + "grad_norm": 1.3612843751907349, + "learning_rate": 9.99804520096734e-06, + "loss": 0.9267, + "step": 752 + }, + { + "epoch": 0.03869873573851372, + "grad_norm": 1.2748205661773682, + "learning_rate": 9.998021861829272e-06, + "loss": 0.9499, + "step": 753 + }, + { + "epoch": 0.03875012848185836, + "grad_norm": 1.2747546434402466, + "learning_rate": 9.997998384216645e-06, + "loss": 0.9278, + "step": 754 + }, + { + "epoch": 0.038801521225203, + "grad_norm": 1.2689528465270996, + "learning_rate": 9.997974768130106e-06, + "loss": 0.8811, + "step": 755 + }, + { + "epoch": 0.03885291396854764, + "grad_norm": 1.2552950382232666, + "learning_rate": 9.997951013570312e-06, + "loss": 0.925, + "step": 756 + }, + { + "epoch": 0.03890430671189228, + "grad_norm": 1.4981372356414795, + "learning_rate": 9.997927120537923e-06, + "loss": 0.8707, + "step": 757 + }, + { + "epoch": 0.03895569945523692, + "grad_norm": 1.0191279649734497, + "learning_rate": 9.997903089033596e-06, + "loss": 0.7708, + "step": 758 + }, + { + "epoch": 0.03900709219858156, + "grad_norm": 1.2214558124542236, + "learning_rate": 9.997878919058001e-06, + "loss": 0.8456, + "step": 759 + }, + { + "epoch": 0.0390584849419262, + "grad_norm": 0.8248938918113708, + "learning_rate": 9.997854610611805e-06, + "loss": 0.7085, + "step": 760 + }, + { + "epoch": 0.03910987768527084, + "grad_norm": 1.341586709022522, + "learning_rate": 9.997830163695685e-06, + "loss": 0.8779, + "step": 761 + }, + { + "epoch": 0.03916127042861548, + "grad_norm": 1.238930344581604, + "learning_rate": 9.997805578310313e-06, + "loss": 0.8587, + "step": 762 + }, + { + "epoch": 0.03921266317196012, + "grad_norm": 1.192710280418396, + "learning_rate": 9.997780854456376e-06, + "loss": 0.7974, + "step": 763 + }, + { + "epoch": 0.03926405591530476, + "grad_norm": 1.2870088815689087, + "learning_rate": 9.997755992134554e-06, + "loss": 0.8906, + "step": 764 + }, + { + "epoch": 0.0393154486586494, + "grad_norm": 0.9952512979507446, + "learning_rate": 9.99773099134554e-06, + "loss": 0.7892, + "step": 765 + }, + { + "epoch": 0.03936684140199404, + "grad_norm": 1.2427223920822144, + "learning_rate": 9.997705852090023e-06, + "loss": 0.8255, + "step": 766 + }, + { + "epoch": 0.039418234145338675, + "grad_norm": 0.998755931854248, + "learning_rate": 9.9976805743687e-06, + "loss": 0.7362, + "step": 767 + }, + { + "epoch": 0.039469626888683315, + "grad_norm": 1.1804059743881226, + "learning_rate": 9.997655158182274e-06, + "loss": 0.8316, + "step": 768 + }, + { + "epoch": 0.039521019632027955, + "grad_norm": 1.2887158393859863, + "learning_rate": 9.99762960353145e-06, + "loss": 0.9099, + "step": 769 + }, + { + "epoch": 0.039572412375372595, + "grad_norm": 1.2008178234100342, + "learning_rate": 9.99760391041693e-06, + "loss": 0.8307, + "step": 770 + }, + { + "epoch": 0.039623805118717235, + "grad_norm": 1.2601547241210938, + "learning_rate": 9.99757807883943e-06, + "loss": 0.9175, + "step": 771 + }, + { + "epoch": 0.039675197862061876, + "grad_norm": 1.3057823181152344, + "learning_rate": 9.997552108799667e-06, + "loss": 0.8564, + "step": 772 + }, + { + "epoch": 0.039726590605406516, + "grad_norm": 1.3248363733291626, + "learning_rate": 9.997526000298357e-06, + "loss": 0.9223, + "step": 773 + }, + { + "epoch": 0.039777983348751156, + "grad_norm": 1.6168568134307861, + "learning_rate": 9.997499753336225e-06, + "loss": 0.9364, + "step": 774 + }, + { + "epoch": 0.039829376092095796, + "grad_norm": 0.9902132153511047, + "learning_rate": 9.997473367914002e-06, + "loss": 0.769, + "step": 775 + }, + { + "epoch": 0.039880768835440436, + "grad_norm": 1.4149781465530396, + "learning_rate": 9.997446844032412e-06, + "loss": 0.8625, + "step": 776 + }, + { + "epoch": 0.039932161578785076, + "grad_norm": 1.4048004150390625, + "learning_rate": 9.997420181692194e-06, + "loss": 0.8149, + "step": 777 + }, + { + "epoch": 0.039983554322129716, + "grad_norm": 1.2902519702911377, + "learning_rate": 9.997393380894085e-06, + "loss": 0.8474, + "step": 778 + }, + { + "epoch": 0.040034947065474356, + "grad_norm": 1.215725302696228, + "learning_rate": 9.997366441638829e-06, + "loss": 0.8152, + "step": 779 + }, + { + "epoch": 0.040086339808818996, + "grad_norm": 1.2510340213775635, + "learning_rate": 9.997339363927172e-06, + "loss": 0.8811, + "step": 780 + }, + { + "epoch": 0.04013773255216364, + "grad_norm": 1.3313243389129639, + "learning_rate": 9.997312147759864e-06, + "loss": 0.8737, + "step": 781 + }, + { + "epoch": 0.04018912529550828, + "grad_norm": 1.2366669178009033, + "learning_rate": 9.99728479313766e-06, + "loss": 0.8386, + "step": 782 + }, + { + "epoch": 0.04024051803885292, + "grad_norm": 1.2371160984039307, + "learning_rate": 9.997257300061316e-06, + "loss": 0.874, + "step": 783 + }, + { + "epoch": 0.04029191078219756, + "grad_norm": 1.276847243309021, + "learning_rate": 9.997229668531595e-06, + "loss": 0.8282, + "step": 784 + }, + { + "epoch": 0.04034330352554219, + "grad_norm": 1.2422157526016235, + "learning_rate": 9.997201898549262e-06, + "loss": 0.8679, + "step": 785 + }, + { + "epoch": 0.04039469626888683, + "grad_norm": 1.412040114402771, + "learning_rate": 9.997173990115085e-06, + "loss": 0.8773, + "step": 786 + }, + { + "epoch": 0.04044608901223147, + "grad_norm": 1.271793007850647, + "learning_rate": 9.997145943229843e-06, + "loss": 0.9256, + "step": 787 + }, + { + "epoch": 0.04049748175557611, + "grad_norm": 1.2193374633789062, + "learning_rate": 9.997117757894306e-06, + "loss": 0.8797, + "step": 788 + }, + { + "epoch": 0.04054887449892075, + "grad_norm": 1.1020599603652954, + "learning_rate": 9.997089434109257e-06, + "loss": 0.7367, + "step": 789 + }, + { + "epoch": 0.04060026724226539, + "grad_norm": 1.3232066631317139, + "learning_rate": 9.997060971875483e-06, + "loss": 0.8628, + "step": 790 + }, + { + "epoch": 0.04065165998561003, + "grad_norm": 1.250009298324585, + "learning_rate": 9.997032371193771e-06, + "loss": 0.892, + "step": 791 + }, + { + "epoch": 0.04070305272895467, + "grad_norm": 1.2399975061416626, + "learning_rate": 9.997003632064914e-06, + "loss": 0.8727, + "step": 792 + }, + { + "epoch": 0.04075444547229931, + "grad_norm": 1.1934688091278076, + "learning_rate": 9.996974754489707e-06, + "loss": 0.7545, + "step": 793 + }, + { + "epoch": 0.04080583821564395, + "grad_norm": 0.8413489460945129, + "learning_rate": 9.99694573846895e-06, + "loss": 0.7173, + "step": 794 + }, + { + "epoch": 0.04085723095898859, + "grad_norm": 1.244056224822998, + "learning_rate": 9.996916584003448e-06, + "loss": 0.8666, + "step": 795 + }, + { + "epoch": 0.04090862370233323, + "grad_norm": 1.2638477087020874, + "learning_rate": 9.996887291094011e-06, + "loss": 0.8823, + "step": 796 + }, + { + "epoch": 0.04096001644567787, + "grad_norm": 1.3062065839767456, + "learning_rate": 9.996857859741447e-06, + "loss": 0.8987, + "step": 797 + }, + { + "epoch": 0.04101140918902251, + "grad_norm": 1.2934308052062988, + "learning_rate": 9.996828289946571e-06, + "loss": 0.9168, + "step": 798 + }, + { + "epoch": 0.04106280193236715, + "grad_norm": 3.6413021087646484, + "learning_rate": 9.996798581710205e-06, + "loss": 0.8263, + "step": 799 + }, + { + "epoch": 0.04111419467571179, + "grad_norm": 1.3083852529525757, + "learning_rate": 9.99676873503317e-06, + "loss": 0.8921, + "step": 800 + }, + { + "epoch": 0.04116558741905643, + "grad_norm": 1.2100480794906616, + "learning_rate": 9.996738749916294e-06, + "loss": 0.9232, + "step": 801 + }, + { + "epoch": 0.04121698016240107, + "grad_norm": 1.220504879951477, + "learning_rate": 9.99670862636041e-06, + "loss": 0.8708, + "step": 802 + }, + { + "epoch": 0.04126837290574571, + "grad_norm": 1.1817091703414917, + "learning_rate": 9.996678364366347e-06, + "loss": 0.7598, + "step": 803 + }, + { + "epoch": 0.041319765649090345, + "grad_norm": 1.2372924089431763, + "learning_rate": 9.996647963934946e-06, + "loss": 0.8688, + "step": 804 + }, + { + "epoch": 0.041371158392434985, + "grad_norm": 1.2500534057617188, + "learning_rate": 9.996617425067052e-06, + "loss": 0.8714, + "step": 805 + }, + { + "epoch": 0.041422551135779626, + "grad_norm": 0.849418580532074, + "learning_rate": 9.99658674776351e-06, + "loss": 0.7227, + "step": 806 + }, + { + "epoch": 0.041473943879124266, + "grad_norm": 1.2983394861221313, + "learning_rate": 9.996555932025167e-06, + "loss": 0.8657, + "step": 807 + }, + { + "epoch": 0.041525336622468906, + "grad_norm": 1.3199512958526611, + "learning_rate": 9.99652497785288e-06, + "loss": 0.8479, + "step": 808 + }, + { + "epoch": 0.041576729365813546, + "grad_norm": 1.2835640907287598, + "learning_rate": 9.996493885247504e-06, + "loss": 0.9049, + "step": 809 + }, + { + "epoch": 0.041628122109158186, + "grad_norm": 1.361087441444397, + "learning_rate": 9.996462654209903e-06, + "loss": 0.8891, + "step": 810 + }, + { + "epoch": 0.041679514852502826, + "grad_norm": 1.259198546409607, + "learning_rate": 9.99643128474094e-06, + "loss": 0.8741, + "step": 811 + }, + { + "epoch": 0.041730907595847466, + "grad_norm": 1.2737828493118286, + "learning_rate": 9.996399776841484e-06, + "loss": 0.8768, + "step": 812 + }, + { + "epoch": 0.041782300339192106, + "grad_norm": 1.2765504121780396, + "learning_rate": 9.99636813051241e-06, + "loss": 0.922, + "step": 813 + }, + { + "epoch": 0.041833693082536746, + "grad_norm": 1.311645269393921, + "learning_rate": 9.996336345754597e-06, + "loss": 0.9281, + "step": 814 + }, + { + "epoch": 0.04188508582588139, + "grad_norm": 1.3025455474853516, + "learning_rate": 9.996304422568919e-06, + "loss": 0.8609, + "step": 815 + }, + { + "epoch": 0.04193647856922603, + "grad_norm": 0.9871379137039185, + "learning_rate": 9.996272360956265e-06, + "loss": 0.7395, + "step": 816 + }, + { + "epoch": 0.04198787131257067, + "grad_norm": 1.3791704177856445, + "learning_rate": 9.99624016091752e-06, + "loss": 0.8602, + "step": 817 + }, + { + "epoch": 0.04203926405591531, + "grad_norm": 1.3354383707046509, + "learning_rate": 9.996207822453583e-06, + "loss": 0.9623, + "step": 818 + }, + { + "epoch": 0.04209065679925995, + "grad_norm": 1.2503334283828735, + "learning_rate": 9.996175345565342e-06, + "loss": 0.8866, + "step": 819 + }, + { + "epoch": 0.04214204954260459, + "grad_norm": 1.2029733657836914, + "learning_rate": 9.996142730253701e-06, + "loss": 0.8049, + "step": 820 + }, + { + "epoch": 0.04219344228594923, + "grad_norm": 0.9518265724182129, + "learning_rate": 9.996109976519564e-06, + "loss": 0.7469, + "step": 821 + }, + { + "epoch": 0.04224483502929386, + "grad_norm": 0.8854423761367798, + "learning_rate": 9.996077084363836e-06, + "loss": 0.7245, + "step": 822 + }, + { + "epoch": 0.0422962277726385, + "grad_norm": 0.7791023850440979, + "learning_rate": 9.996044053787428e-06, + "loss": 0.7654, + "step": 823 + }, + { + "epoch": 0.04234762051598314, + "grad_norm": 1.2723650932312012, + "learning_rate": 9.996010884791258e-06, + "loss": 0.8454, + "step": 824 + }, + { + "epoch": 0.04239901325932778, + "grad_norm": 1.2480024099349976, + "learning_rate": 9.995977577376245e-06, + "loss": 0.8405, + "step": 825 + }, + { + "epoch": 0.04245040600267242, + "grad_norm": 1.2302253246307373, + "learning_rate": 9.995944131543311e-06, + "loss": 0.8855, + "step": 826 + }, + { + "epoch": 0.04250179874601706, + "grad_norm": 4.765591144561768, + "learning_rate": 9.99591054729338e-06, + "loss": 0.7343, + "step": 827 + }, + { + "epoch": 0.0425531914893617, + "grad_norm": 0.977520227432251, + "learning_rate": 9.995876824627386e-06, + "loss": 0.7399, + "step": 828 + }, + { + "epoch": 0.04260458423270634, + "grad_norm": 1.3106623888015747, + "learning_rate": 9.995842963546261e-06, + "loss": 0.9051, + "step": 829 + }, + { + "epoch": 0.04265597697605098, + "grad_norm": 1.2262039184570312, + "learning_rate": 9.995808964050946e-06, + "loss": 0.9119, + "step": 830 + }, + { + "epoch": 0.04270736971939562, + "grad_norm": 1.2501107454299927, + "learning_rate": 9.99577482614238e-06, + "loss": 0.9084, + "step": 831 + }, + { + "epoch": 0.04275876246274026, + "grad_norm": 1.253807544708252, + "learning_rate": 9.99574054982151e-06, + "loss": 0.9051, + "step": 832 + }, + { + "epoch": 0.0428101552060849, + "grad_norm": 1.3432157039642334, + "learning_rate": 9.995706135089283e-06, + "loss": 0.8671, + "step": 833 + }, + { + "epoch": 0.04286154794942954, + "grad_norm": 1.2076985836029053, + "learning_rate": 9.995671581946658e-06, + "loss": 0.8541, + "step": 834 + }, + { + "epoch": 0.04291294069277418, + "grad_norm": 1.402858018875122, + "learning_rate": 9.995636890394588e-06, + "loss": 0.826, + "step": 835 + }, + { + "epoch": 0.04296433343611882, + "grad_norm": 1.1987099647521973, + "learning_rate": 9.995602060434036e-06, + "loss": 0.883, + "step": 836 + }, + { + "epoch": 0.04301572617946346, + "grad_norm": 1.6601828336715698, + "learning_rate": 9.995567092065967e-06, + "loss": 0.8607, + "step": 837 + }, + { + "epoch": 0.0430671189228081, + "grad_norm": 1.201905369758606, + "learning_rate": 9.99553198529135e-06, + "loss": 0.8441, + "step": 838 + }, + { + "epoch": 0.04311851166615274, + "grad_norm": 1.1625796556472778, + "learning_rate": 9.995496740111155e-06, + "loss": 0.8303, + "step": 839 + }, + { + "epoch": 0.043169904409497376, + "grad_norm": 1.228757381439209, + "learning_rate": 9.995461356526362e-06, + "loss": 0.8511, + "step": 840 + }, + { + "epoch": 0.043221297152842016, + "grad_norm": 1.1575579643249512, + "learning_rate": 9.99542583453795e-06, + "loss": 0.9641, + "step": 841 + }, + { + "epoch": 0.043272689896186656, + "grad_norm": 1.2157249450683594, + "learning_rate": 9.995390174146901e-06, + "loss": 0.8376, + "step": 842 + }, + { + "epoch": 0.043324082639531296, + "grad_norm": 1.3628737926483154, + "learning_rate": 9.995354375354207e-06, + "loss": 0.7744, + "step": 843 + }, + { + "epoch": 0.043375475382875936, + "grad_norm": 1.309718132019043, + "learning_rate": 9.995318438160858e-06, + "loss": 0.9427, + "step": 844 + }, + { + "epoch": 0.043426868126220576, + "grad_norm": 1.284839153289795, + "learning_rate": 9.995282362567848e-06, + "loss": 0.8414, + "step": 845 + }, + { + "epoch": 0.043478260869565216, + "grad_norm": 1.5498034954071045, + "learning_rate": 9.99524614857618e-06, + "loss": 0.847, + "step": 846 + }, + { + "epoch": 0.043529653612909856, + "grad_norm": 0.9326204061508179, + "learning_rate": 9.995209796186854e-06, + "loss": 0.7553, + "step": 847 + }, + { + "epoch": 0.043581046356254496, + "grad_norm": 0.8366790413856506, + "learning_rate": 9.99517330540088e-06, + "loss": 0.7461, + "step": 848 + }, + { + "epoch": 0.043632439099599137, + "grad_norm": 1.3054695129394531, + "learning_rate": 9.995136676219265e-06, + "loss": 0.8842, + "step": 849 + }, + { + "epoch": 0.04368383184294378, + "grad_norm": 0.9366014003753662, + "learning_rate": 9.99509990864303e-06, + "loss": 0.7269, + "step": 850 + }, + { + "epoch": 0.04373522458628842, + "grad_norm": 1.3300738334655762, + "learning_rate": 9.995063002673186e-06, + "loss": 0.9163, + "step": 851 + }, + { + "epoch": 0.04378661732963306, + "grad_norm": 0.9600208401679993, + "learning_rate": 9.995025958310762e-06, + "loss": 0.8206, + "step": 852 + }, + { + "epoch": 0.0438380100729777, + "grad_norm": 1.2376947402954102, + "learning_rate": 9.994988775556782e-06, + "loss": 0.91, + "step": 853 + }, + { + "epoch": 0.04388940281632234, + "grad_norm": 1.197518229484558, + "learning_rate": 9.994951454412276e-06, + "loss": 0.8496, + "step": 854 + }, + { + "epoch": 0.04394079555966698, + "grad_norm": 1.2506026029586792, + "learning_rate": 9.994913994878276e-06, + "loss": 0.8787, + "step": 855 + }, + { + "epoch": 0.04399218830301162, + "grad_norm": 1.1781127452850342, + "learning_rate": 9.994876396955827e-06, + "loss": 0.878, + "step": 856 + }, + { + "epoch": 0.04404358104635626, + "grad_norm": 1.1351370811462402, + "learning_rate": 9.994838660645961e-06, + "loss": 0.8494, + "step": 857 + }, + { + "epoch": 0.0440949737897009, + "grad_norm": 1.331215262413025, + "learning_rate": 9.99480078594973e-06, + "loss": 0.8764, + "step": 858 + }, + { + "epoch": 0.04414636653304553, + "grad_norm": 1.2526382207870483, + "learning_rate": 9.994762772868181e-06, + "loss": 0.8266, + "step": 859 + }, + { + "epoch": 0.04419775927639017, + "grad_norm": 0.9349003434181213, + "learning_rate": 9.994724621402367e-06, + "loss": 0.7088, + "step": 860 + }, + { + "epoch": 0.04424915201973481, + "grad_norm": 1.250608205795288, + "learning_rate": 9.994686331553347e-06, + "loss": 0.9288, + "step": 861 + }, + { + "epoch": 0.04430054476307945, + "grad_norm": 1.1939023733139038, + "learning_rate": 9.99464790332218e-06, + "loss": 0.9197, + "step": 862 + }, + { + "epoch": 0.04435193750642409, + "grad_norm": 0.9250991344451904, + "learning_rate": 9.994609336709932e-06, + "loss": 0.7363, + "step": 863 + }, + { + "epoch": 0.04440333024976873, + "grad_norm": 1.230297565460205, + "learning_rate": 9.994570631717672e-06, + "loss": 0.8224, + "step": 864 + }, + { + "epoch": 0.04445472299311337, + "grad_norm": 1.2914111614227295, + "learning_rate": 9.994531788346468e-06, + "loss": 0.8489, + "step": 865 + }, + { + "epoch": 0.04450611573645801, + "grad_norm": 1.2369592189788818, + "learning_rate": 9.994492806597402e-06, + "loss": 0.857, + "step": 866 + }, + { + "epoch": 0.04455750847980265, + "grad_norm": 1.362882375717163, + "learning_rate": 9.99445368647155e-06, + "loss": 0.8636, + "step": 867 + }, + { + "epoch": 0.04460890122314729, + "grad_norm": 1.0259376764297485, + "learning_rate": 9.994414427969999e-06, + "loss": 0.7544, + "step": 868 + }, + { + "epoch": 0.04466029396649193, + "grad_norm": 1.288793683052063, + "learning_rate": 9.994375031093833e-06, + "loss": 0.9061, + "step": 869 + }, + { + "epoch": 0.04471168670983657, + "grad_norm": 0.7983279228210449, + "learning_rate": 9.994335495844145e-06, + "loss": 0.765, + "step": 870 + }, + { + "epoch": 0.04476307945318121, + "grad_norm": 1.2740875482559204, + "learning_rate": 9.994295822222032e-06, + "loss": 0.8894, + "step": 871 + }, + { + "epoch": 0.04481447219652585, + "grad_norm": 0.8956534266471863, + "learning_rate": 9.994256010228592e-06, + "loss": 0.7289, + "step": 872 + }, + { + "epoch": 0.04486586493987049, + "grad_norm": 0.9120482802391052, + "learning_rate": 9.994216059864928e-06, + "loss": 0.7659, + "step": 873 + }, + { + "epoch": 0.04491725768321513, + "grad_norm": 1.2172009944915771, + "learning_rate": 9.994175971132147e-06, + "loss": 0.814, + "step": 874 + }, + { + "epoch": 0.04496865042655977, + "grad_norm": 1.3630836009979248, + "learning_rate": 9.99413574403136e-06, + "loss": 0.9081, + "step": 875 + }, + { + "epoch": 0.04502004316990441, + "grad_norm": 1.2624825239181519, + "learning_rate": 9.994095378563679e-06, + "loss": 0.8703, + "step": 876 + }, + { + "epoch": 0.045071435913249046, + "grad_norm": 1.2351304292678833, + "learning_rate": 9.994054874730227e-06, + "loss": 0.865, + "step": 877 + }, + { + "epoch": 0.045122828656593686, + "grad_norm": 1.1374231576919556, + "learning_rate": 9.994014232532123e-06, + "loss": 0.8449, + "step": 878 + }, + { + "epoch": 0.045174221399938326, + "grad_norm": 1.2344890832901, + "learning_rate": 9.993973451970493e-06, + "loss": 0.9139, + "step": 879 + }, + { + "epoch": 0.045225614143282966, + "grad_norm": 1.2068499326705933, + "learning_rate": 9.993932533046469e-06, + "loss": 0.858, + "step": 880 + }, + { + "epoch": 0.045277006886627606, + "grad_norm": 1.2650738954544067, + "learning_rate": 9.993891475761181e-06, + "loss": 0.8465, + "step": 881 + }, + { + "epoch": 0.045328399629972246, + "grad_norm": 1.2620521783828735, + "learning_rate": 9.99385028011577e-06, + "loss": 0.8735, + "step": 882 + }, + { + "epoch": 0.045379792373316886, + "grad_norm": 1.1206152439117432, + "learning_rate": 9.993808946111376e-06, + "loss": 0.8119, + "step": 883 + }, + { + "epoch": 0.04543118511666153, + "grad_norm": 1.4009485244750977, + "learning_rate": 9.993767473749145e-06, + "loss": 0.861, + "step": 884 + }, + { + "epoch": 0.04548257786000617, + "grad_norm": 1.3079158067703247, + "learning_rate": 9.993725863030224e-06, + "loss": 0.8456, + "step": 885 + }, + { + "epoch": 0.04553397060335081, + "grad_norm": 1.1648167371749878, + "learning_rate": 9.993684113955769e-06, + "loss": 0.935, + "step": 886 + }, + { + "epoch": 0.04558536334669545, + "grad_norm": 1.1732007265090942, + "learning_rate": 9.993642226526934e-06, + "loss": 0.923, + "step": 887 + }, + { + "epoch": 0.04563675609004009, + "grad_norm": 1.198224425315857, + "learning_rate": 9.99360020074488e-06, + "loss": 0.8903, + "step": 888 + }, + { + "epoch": 0.04568814883338473, + "grad_norm": 1.4259309768676758, + "learning_rate": 9.99355803661077e-06, + "loss": 0.9653, + "step": 889 + }, + { + "epoch": 0.04573954157672937, + "grad_norm": 1.2178943157196045, + "learning_rate": 9.993515734125777e-06, + "loss": 0.9011, + "step": 890 + }, + { + "epoch": 0.04579093432007401, + "grad_norm": 1.1884068250656128, + "learning_rate": 9.99347329329107e-06, + "loss": 0.8365, + "step": 891 + }, + { + "epoch": 0.04584232706341865, + "grad_norm": 1.3186900615692139, + "learning_rate": 9.993430714107823e-06, + "loss": 0.8671, + "step": 892 + }, + { + "epoch": 0.04589371980676329, + "grad_norm": 1.2285939455032349, + "learning_rate": 9.99338799657722e-06, + "loss": 0.8665, + "step": 893 + }, + { + "epoch": 0.04594511255010793, + "grad_norm": 1.2262039184570312, + "learning_rate": 9.99334514070044e-06, + "loss": 0.8828, + "step": 894 + }, + { + "epoch": 0.04599650529345257, + "grad_norm": 1.2611392736434937, + "learning_rate": 9.993302146478673e-06, + "loss": 0.8595, + "step": 895 + }, + { + "epoch": 0.0460478980367972, + "grad_norm": 1.150963544845581, + "learning_rate": 9.99325901391311e-06, + "loss": 0.7621, + "step": 896 + }, + { + "epoch": 0.04609929078014184, + "grad_norm": 1.3334376811981201, + "learning_rate": 9.993215743004947e-06, + "loss": 0.9106, + "step": 897 + }, + { + "epoch": 0.04615068352348648, + "grad_norm": 1.216707706451416, + "learning_rate": 9.993172333755379e-06, + "loss": 0.8735, + "step": 898 + }, + { + "epoch": 0.04620207626683112, + "grad_norm": 1.1890424489974976, + "learning_rate": 9.993128786165613e-06, + "loss": 0.8786, + "step": 899 + }, + { + "epoch": 0.04625346901017576, + "grad_norm": 1.1397758722305298, + "learning_rate": 9.993085100236852e-06, + "loss": 0.8319, + "step": 900 + }, + { + "epoch": 0.0463048617535204, + "grad_norm": 1.268941044807434, + "learning_rate": 9.99304127597031e-06, + "loss": 0.8706, + "step": 901 + }, + { + "epoch": 0.04635625449686504, + "grad_norm": 1.1660728454589844, + "learning_rate": 9.992997313367199e-06, + "loss": 0.8663, + "step": 902 + }, + { + "epoch": 0.04640764724020968, + "grad_norm": 1.2808856964111328, + "learning_rate": 9.992953212428738e-06, + "loss": 0.8469, + "step": 903 + }, + { + "epoch": 0.04645903998355432, + "grad_norm": 1.2366071939468384, + "learning_rate": 9.992908973156145e-06, + "loss": 0.8916, + "step": 904 + }, + { + "epoch": 0.04651043272689896, + "grad_norm": 1.21945321559906, + "learning_rate": 9.992864595550653e-06, + "loss": 0.8588, + "step": 905 + }, + { + "epoch": 0.0465618254702436, + "grad_norm": 0.7959684729576111, + "learning_rate": 9.992820079613484e-06, + "loss": 0.7348, + "step": 906 + }, + { + "epoch": 0.04661321821358824, + "grad_norm": 1.2446109056472778, + "learning_rate": 9.992775425345877e-06, + "loss": 0.8705, + "step": 907 + }, + { + "epoch": 0.04666461095693288, + "grad_norm": 1.2366207838058472, + "learning_rate": 9.992730632749065e-06, + "loss": 0.8227, + "step": 908 + }, + { + "epoch": 0.04671600370027752, + "grad_norm": 1.2123759984970093, + "learning_rate": 9.992685701824292e-06, + "loss": 0.8488, + "step": 909 + }, + { + "epoch": 0.04676739644362216, + "grad_norm": 0.9096382260322571, + "learning_rate": 9.992640632572802e-06, + "loss": 0.7521, + "step": 910 + }, + { + "epoch": 0.0468187891869668, + "grad_norm": 0.8697991967201233, + "learning_rate": 9.992595424995843e-06, + "loss": 0.7857, + "step": 911 + }, + { + "epoch": 0.04687018193031144, + "grad_norm": 1.2709344625473022, + "learning_rate": 9.99255007909467e-06, + "loss": 0.8836, + "step": 912 + }, + { + "epoch": 0.04692157467365608, + "grad_norm": 1.2813383340835571, + "learning_rate": 9.992504594870535e-06, + "loss": 0.8368, + "step": 913 + }, + { + "epoch": 0.046972967417000716, + "grad_norm": 1.4158188104629517, + "learning_rate": 9.992458972324702e-06, + "loss": 0.9485, + "step": 914 + }, + { + "epoch": 0.047024360160345356, + "grad_norm": 1.161807656288147, + "learning_rate": 9.992413211458431e-06, + "loss": 0.8275, + "step": 915 + }, + { + "epoch": 0.047075752903689996, + "grad_norm": 1.1344105005264282, + "learning_rate": 9.992367312272995e-06, + "loss": 0.854, + "step": 916 + }, + { + "epoch": 0.047127145647034636, + "grad_norm": 1.260993480682373, + "learning_rate": 9.992321274769661e-06, + "loss": 0.869, + "step": 917 + }, + { + "epoch": 0.04717853839037928, + "grad_norm": 1.3351908922195435, + "learning_rate": 9.992275098949709e-06, + "loss": 0.8803, + "step": 918 + }, + { + "epoch": 0.04722993113372392, + "grad_norm": 1.2215328216552734, + "learning_rate": 9.992228784814414e-06, + "loss": 0.8758, + "step": 919 + }, + { + "epoch": 0.04728132387706856, + "grad_norm": 1.2368913888931274, + "learning_rate": 9.99218233236506e-06, + "loss": 0.8615, + "step": 920 + }, + { + "epoch": 0.0473327166204132, + "grad_norm": 0.9433385133743286, + "learning_rate": 9.992135741602937e-06, + "loss": 0.7385, + "step": 921 + }, + { + "epoch": 0.04738410936375784, + "grad_norm": 1.1929795742034912, + "learning_rate": 9.992089012529335e-06, + "loss": 0.8962, + "step": 922 + }, + { + "epoch": 0.04743550210710248, + "grad_norm": 1.2160569429397583, + "learning_rate": 9.992042145145547e-06, + "loss": 0.9045, + "step": 923 + }, + { + "epoch": 0.04748689485044712, + "grad_norm": 1.5480883121490479, + "learning_rate": 9.99199513945287e-06, + "loss": 0.8098, + "step": 924 + }, + { + "epoch": 0.04753828759379176, + "grad_norm": 1.2664825916290283, + "learning_rate": 9.991947995452612e-06, + "loss": 0.9039, + "step": 925 + }, + { + "epoch": 0.0475896803371364, + "grad_norm": 1.2778738737106323, + "learning_rate": 9.991900713146073e-06, + "loss": 0.8572, + "step": 926 + }, + { + "epoch": 0.04764107308048104, + "grad_norm": 1.5822649002075195, + "learning_rate": 9.991853292534565e-06, + "loss": 0.8533, + "step": 927 + }, + { + "epoch": 0.04769246582382568, + "grad_norm": 1.2170166969299316, + "learning_rate": 9.991805733619405e-06, + "loss": 0.9285, + "step": 928 + }, + { + "epoch": 0.04774385856717032, + "grad_norm": 1.2741129398345947, + "learning_rate": 9.991758036401905e-06, + "loss": 0.8095, + "step": 929 + }, + { + "epoch": 0.04779525131051496, + "grad_norm": 0.8206681609153748, + "learning_rate": 9.991710200883391e-06, + "loss": 0.7744, + "step": 930 + }, + { + "epoch": 0.0478466440538596, + "grad_norm": 1.1823898553848267, + "learning_rate": 9.991662227065187e-06, + "loss": 0.8293, + "step": 931 + }, + { + "epoch": 0.04789803679720424, + "grad_norm": 1.1964441537857056, + "learning_rate": 9.991614114948623e-06, + "loss": 0.8378, + "step": 932 + }, + { + "epoch": 0.04794942954054887, + "grad_norm": 1.189880609512329, + "learning_rate": 9.991565864535028e-06, + "loss": 0.8139, + "step": 933 + }, + { + "epoch": 0.04800082228389351, + "grad_norm": 1.2207995653152466, + "learning_rate": 9.991517475825744e-06, + "loss": 0.9251, + "step": 934 + }, + { + "epoch": 0.04805221502723815, + "grad_norm": 1.1745632886886597, + "learning_rate": 9.991468948822111e-06, + "loss": 0.857, + "step": 935 + }, + { + "epoch": 0.04810360777058279, + "grad_norm": 1.2542591094970703, + "learning_rate": 9.99142028352547e-06, + "loss": 0.9277, + "step": 936 + }, + { + "epoch": 0.04815500051392743, + "grad_norm": 1.1780142784118652, + "learning_rate": 9.991371479937174e-06, + "loss": 0.8924, + "step": 937 + }, + { + "epoch": 0.04820639325727207, + "grad_norm": 1.2028493881225586, + "learning_rate": 9.99132253805857e-06, + "loss": 0.8033, + "step": 938 + }, + { + "epoch": 0.04825778600061671, + "grad_norm": 1.1642537117004395, + "learning_rate": 9.991273457891015e-06, + "loss": 0.8291, + "step": 939 + }, + { + "epoch": 0.04830917874396135, + "grad_norm": 1.2911299467086792, + "learning_rate": 9.991224239435873e-06, + "loss": 0.8627, + "step": 940 + }, + { + "epoch": 0.04836057148730599, + "grad_norm": 1.182978630065918, + "learning_rate": 9.991174882694504e-06, + "loss": 0.9141, + "step": 941 + }, + { + "epoch": 0.04841196423065063, + "grad_norm": 1.2126566171646118, + "learning_rate": 9.991125387668276e-06, + "loss": 0.896, + "step": 942 + }, + { + "epoch": 0.04846335697399527, + "grad_norm": 1.1788886785507202, + "learning_rate": 9.99107575435856e-06, + "loss": 0.9205, + "step": 943 + }, + { + "epoch": 0.04851474971733991, + "grad_norm": 0.8734241127967834, + "learning_rate": 9.991025982766733e-06, + "loss": 0.7539, + "step": 944 + }, + { + "epoch": 0.04856614246068455, + "grad_norm": 1.3815383911132812, + "learning_rate": 9.990976072894172e-06, + "loss": 0.8866, + "step": 945 + }, + { + "epoch": 0.04861753520402919, + "grad_norm": 1.3776768445968628, + "learning_rate": 9.990926024742262e-06, + "loss": 0.8968, + "step": 946 + }, + { + "epoch": 0.04866892794737383, + "grad_norm": 1.209430456161499, + "learning_rate": 9.990875838312387e-06, + "loss": 0.896, + "step": 947 + }, + { + "epoch": 0.04872032069071847, + "grad_norm": 0.752593994140625, + "learning_rate": 9.99082551360594e-06, + "loss": 0.7264, + "step": 948 + }, + { + "epoch": 0.04877171343406311, + "grad_norm": 1.249581217765808, + "learning_rate": 9.990775050624312e-06, + "loss": 0.8631, + "step": 949 + }, + { + "epoch": 0.04882310617740775, + "grad_norm": 1.3245007991790771, + "learning_rate": 9.990724449368903e-06, + "loss": 0.8771, + "step": 950 + }, + { + "epoch": 0.048874498920752386, + "grad_norm": 1.2435704469680786, + "learning_rate": 9.990673709841117e-06, + "loss": 0.888, + "step": 951 + }, + { + "epoch": 0.048925891664097027, + "grad_norm": 1.1820589303970337, + "learning_rate": 9.990622832042355e-06, + "loss": 0.8308, + "step": 952 + }, + { + "epoch": 0.04897728440744167, + "grad_norm": 0.8945339918136597, + "learning_rate": 9.990571815974032e-06, + "loss": 0.673, + "step": 953 + }, + { + "epoch": 0.04902867715078631, + "grad_norm": 1.3911305665969849, + "learning_rate": 9.990520661637559e-06, + "loss": 0.8685, + "step": 954 + }, + { + "epoch": 0.04908006989413095, + "grad_norm": 1.5091290473937988, + "learning_rate": 9.990469369034353e-06, + "loss": 0.8097, + "step": 955 + }, + { + "epoch": 0.04913146263747559, + "grad_norm": 1.2043997049331665, + "learning_rate": 9.990417938165834e-06, + "loss": 0.8348, + "step": 956 + }, + { + "epoch": 0.04918285538082023, + "grad_norm": 1.2417445182800293, + "learning_rate": 9.990366369033428e-06, + "loss": 0.8868, + "step": 957 + }, + { + "epoch": 0.04923424812416487, + "grad_norm": 1.203045129776001, + "learning_rate": 9.990314661638563e-06, + "loss": 0.8806, + "step": 958 + }, + { + "epoch": 0.04928564086750951, + "grad_norm": 1.1912086009979248, + "learning_rate": 9.990262815982674e-06, + "loss": 0.8932, + "step": 959 + }, + { + "epoch": 0.04933703361085415, + "grad_norm": 1.2181487083435059, + "learning_rate": 9.990210832067197e-06, + "loss": 0.8746, + "step": 960 + }, + { + "epoch": 0.04938842635419879, + "grad_norm": 1.251497507095337, + "learning_rate": 9.99015870989357e-06, + "loss": 0.8681, + "step": 961 + }, + { + "epoch": 0.04943981909754343, + "grad_norm": 1.2161120176315308, + "learning_rate": 9.99010644946324e-06, + "loss": 0.866, + "step": 962 + }, + { + "epoch": 0.04949121184088807, + "grad_norm": 1.2148690223693848, + "learning_rate": 9.990054050777652e-06, + "loss": 0.8245, + "step": 963 + }, + { + "epoch": 0.04954260458423271, + "grad_norm": 1.3033994436264038, + "learning_rate": 9.990001513838257e-06, + "loss": 0.8874, + "step": 964 + }, + { + "epoch": 0.04959399732757735, + "grad_norm": 1.1783607006072998, + "learning_rate": 9.989948838646515e-06, + "loss": 0.8553, + "step": 965 + }, + { + "epoch": 0.04964539007092199, + "grad_norm": 1.2339223623275757, + "learning_rate": 9.989896025203882e-06, + "loss": 0.8185, + "step": 966 + }, + { + "epoch": 0.04969678281426663, + "grad_norm": 1.2227520942687988, + "learning_rate": 9.989843073511823e-06, + "loss": 0.8142, + "step": 967 + }, + { + "epoch": 0.04974817555761127, + "grad_norm": 0.8825535774230957, + "learning_rate": 9.989789983571803e-06, + "loss": 0.7025, + "step": 968 + }, + { + "epoch": 0.04979956830095591, + "grad_norm": 0.8393763899803162, + "learning_rate": 9.989736755385296e-06, + "loss": 0.7113, + "step": 969 + }, + { + "epoch": 0.04985096104430054, + "grad_norm": 1.344678521156311, + "learning_rate": 9.989683388953772e-06, + "loss": 0.8574, + "step": 970 + }, + { + "epoch": 0.04990235378764518, + "grad_norm": 1.5798983573913574, + "learning_rate": 9.989629884278715e-06, + "loss": 0.9022, + "step": 971 + }, + { + "epoch": 0.04995374653098982, + "grad_norm": 1.1685906648635864, + "learning_rate": 9.989576241361606e-06, + "loss": 0.8992, + "step": 972 + }, + { + "epoch": 0.05000513927433446, + "grad_norm": 1.1721597909927368, + "learning_rate": 9.989522460203927e-06, + "loss": 0.8778, + "step": 973 + }, + { + "epoch": 0.0500565320176791, + "grad_norm": 1.5442276000976562, + "learning_rate": 9.989468540807173e-06, + "loss": 0.7839, + "step": 974 + }, + { + "epoch": 0.05010792476102374, + "grad_norm": 1.2165648937225342, + "learning_rate": 9.989414483172836e-06, + "loss": 0.897, + "step": 975 + }, + { + "epoch": 0.05015931750436838, + "grad_norm": 1.1980541944503784, + "learning_rate": 9.989360287302414e-06, + "loss": 0.8156, + "step": 976 + }, + { + "epoch": 0.05021071024771302, + "grad_norm": 1.239953637123108, + "learning_rate": 9.989305953197407e-06, + "loss": 0.8944, + "step": 977 + }, + { + "epoch": 0.05026210299105766, + "grad_norm": 1.1867806911468506, + "learning_rate": 9.989251480859322e-06, + "loss": 0.9201, + "step": 978 + }, + { + "epoch": 0.0503134957344023, + "grad_norm": 1.2487953901290894, + "learning_rate": 9.989196870289668e-06, + "loss": 0.8963, + "step": 979 + }, + { + "epoch": 0.05036488847774694, + "grad_norm": 1.2734178304672241, + "learning_rate": 9.989142121489958e-06, + "loss": 0.8605, + "step": 980 + }, + { + "epoch": 0.05041628122109158, + "grad_norm": 0.9181957244873047, + "learning_rate": 9.98908723446171e-06, + "loss": 0.7451, + "step": 981 + }, + { + "epoch": 0.05046767396443622, + "grad_norm": 1.2341207265853882, + "learning_rate": 9.989032209206441e-06, + "loss": 0.8784, + "step": 982 + }, + { + "epoch": 0.05051906670778086, + "grad_norm": 1.2605217695236206, + "learning_rate": 9.98897704572568e-06, + "loss": 0.8552, + "step": 983 + }, + { + "epoch": 0.0505704594511255, + "grad_norm": 0.7708296179771423, + "learning_rate": 9.988921744020953e-06, + "loss": 0.7355, + "step": 984 + }, + { + "epoch": 0.05062185219447014, + "grad_norm": 0.8412183523178101, + "learning_rate": 9.988866304093794e-06, + "loss": 0.7563, + "step": 985 + }, + { + "epoch": 0.05067324493781478, + "grad_norm": 1.17894446849823, + "learning_rate": 9.988810725945736e-06, + "loss": 0.7985, + "step": 986 + }, + { + "epoch": 0.050724637681159424, + "grad_norm": 1.2365716695785522, + "learning_rate": 9.98875500957832e-06, + "loss": 0.8291, + "step": 987 + }, + { + "epoch": 0.05077603042450406, + "grad_norm": 0.9223111867904663, + "learning_rate": 9.98869915499309e-06, + "loss": 0.812, + "step": 988 + }, + { + "epoch": 0.0508274231678487, + "grad_norm": 1.2350711822509766, + "learning_rate": 9.988643162191594e-06, + "loss": 0.928, + "step": 989 + }, + { + "epoch": 0.05087881591119334, + "grad_norm": 1.1816688776016235, + "learning_rate": 9.988587031175384e-06, + "loss": 0.8543, + "step": 990 + }, + { + "epoch": 0.05093020865453798, + "grad_norm": 1.224225640296936, + "learning_rate": 9.98853076194601e-06, + "loss": 0.8251, + "step": 991 + }, + { + "epoch": 0.05098160139788262, + "grad_norm": 0.9485036134719849, + "learning_rate": 9.98847435450504e-06, + "loss": 0.7785, + "step": 992 + }, + { + "epoch": 0.05103299414122726, + "grad_norm": 1.2254879474639893, + "learning_rate": 9.988417808854029e-06, + "loss": 0.8685, + "step": 993 + }, + { + "epoch": 0.0510843868845719, + "grad_norm": 1.220816731452942, + "learning_rate": 9.988361124994547e-06, + "loss": 0.8996, + "step": 994 + }, + { + "epoch": 0.05113577962791654, + "grad_norm": 1.2276394367218018, + "learning_rate": 9.988304302928165e-06, + "loss": 0.8858, + "step": 995 + }, + { + "epoch": 0.05118717237126118, + "grad_norm": 1.2470687627792358, + "learning_rate": 9.988247342656456e-06, + "loss": 0.871, + "step": 996 + }, + { + "epoch": 0.05123856511460582, + "grad_norm": 1.3044482469558716, + "learning_rate": 9.988190244180998e-06, + "loss": 0.8866, + "step": 997 + }, + { + "epoch": 0.05128995785795046, + "grad_norm": 0.8551509976387024, + "learning_rate": 9.988133007503374e-06, + "loss": 0.7724, + "step": 998 + }, + { + "epoch": 0.0513413506012951, + "grad_norm": 1.4605845212936401, + "learning_rate": 9.988075632625168e-06, + "loss": 0.9155, + "step": 999 + }, + { + "epoch": 0.05139274334463974, + "grad_norm": 1.2902426719665527, + "learning_rate": 9.988018119547971e-06, + "loss": 0.7996, + "step": 1000 + }, + { + "epoch": 0.05144413608798438, + "grad_norm": 1.2236751317977905, + "learning_rate": 9.987960468273377e-06, + "loss": 0.9907, + "step": 1001 + }, + { + "epoch": 0.05149552883132902, + "grad_norm": 1.1850382089614868, + "learning_rate": 9.987902678802983e-06, + "loss": 0.8435, + "step": 1002 + }, + { + "epoch": 0.05154692157467366, + "grad_norm": 1.1683807373046875, + "learning_rate": 9.987844751138389e-06, + "loss": 0.7919, + "step": 1003 + }, + { + "epoch": 0.0515983143180183, + "grad_norm": 1.381160020828247, + "learning_rate": 9.9877866852812e-06, + "loss": 0.8719, + "step": 1004 + }, + { + "epoch": 0.05164970706136294, + "grad_norm": 1.1939815282821655, + "learning_rate": 9.987728481233025e-06, + "loss": 0.869, + "step": 1005 + }, + { + "epoch": 0.05170109980470758, + "grad_norm": 1.1784522533416748, + "learning_rate": 9.987670138995478e-06, + "loss": 0.8336, + "step": 1006 + }, + { + "epoch": 0.05175249254805221, + "grad_norm": 1.1805102825164795, + "learning_rate": 9.987611658570174e-06, + "loss": 0.9248, + "step": 1007 + }, + { + "epoch": 0.05180388529139685, + "grad_norm": 0.8179561495780945, + "learning_rate": 9.987553039958732e-06, + "loss": 0.7525, + "step": 1008 + }, + { + "epoch": 0.05185527803474149, + "grad_norm": 1.22408926486969, + "learning_rate": 9.98749428316278e-06, + "loss": 0.8827, + "step": 1009 + }, + { + "epoch": 0.05190667077808613, + "grad_norm": 0.7488641142845154, + "learning_rate": 9.987435388183944e-06, + "loss": 0.7476, + "step": 1010 + }, + { + "epoch": 0.05195806352143077, + "grad_norm": 1.181216835975647, + "learning_rate": 9.987376355023853e-06, + "loss": 0.9104, + "step": 1011 + }, + { + "epoch": 0.05200945626477541, + "grad_norm": 1.1705787181854248, + "learning_rate": 9.987317183684146e-06, + "loss": 0.8537, + "step": 1012 + }, + { + "epoch": 0.05206084900812005, + "grad_norm": 1.3003146648406982, + "learning_rate": 9.987257874166461e-06, + "loss": 0.8231, + "step": 1013 + }, + { + "epoch": 0.05211224175146469, + "grad_norm": 1.1806389093399048, + "learning_rate": 9.987198426472442e-06, + "loss": 0.8397, + "step": 1014 + }, + { + "epoch": 0.05216363449480933, + "grad_norm": 1.2405699491500854, + "learning_rate": 9.987138840603735e-06, + "loss": 0.8915, + "step": 1015 + }, + { + "epoch": 0.05221502723815397, + "grad_norm": 1.2919827699661255, + "learning_rate": 9.987079116561993e-06, + "loss": 0.9294, + "step": 1016 + }, + { + "epoch": 0.05226641998149861, + "grad_norm": 1.185698390007019, + "learning_rate": 9.987019254348867e-06, + "loss": 0.8878, + "step": 1017 + }, + { + "epoch": 0.05231781272484325, + "grad_norm": 1.253759503364563, + "learning_rate": 9.986959253966018e-06, + "loss": 0.8949, + "step": 1018 + }, + { + "epoch": 0.05236920546818789, + "grad_norm": 1.3481354713439941, + "learning_rate": 9.98689911541511e-06, + "loss": 0.8776, + "step": 1019 + }, + { + "epoch": 0.05242059821153253, + "grad_norm": 1.232424020767212, + "learning_rate": 9.986838838697806e-06, + "loss": 0.8595, + "step": 1020 + }, + { + "epoch": 0.05247199095487717, + "grad_norm": 0.9810240864753723, + "learning_rate": 9.986778423815777e-06, + "loss": 0.7827, + "step": 1021 + }, + { + "epoch": 0.052523383698221814, + "grad_norm": 1.2821382284164429, + "learning_rate": 9.986717870770697e-06, + "loss": 0.9027, + "step": 1022 + }, + { + "epoch": 0.052574776441566454, + "grad_norm": 1.1697237491607666, + "learning_rate": 9.986657179564244e-06, + "loss": 0.8659, + "step": 1023 + }, + { + "epoch": 0.052626169184911094, + "grad_norm": 1.1487267017364502, + "learning_rate": 9.986596350198099e-06, + "loss": 0.8716, + "step": 1024 + }, + { + "epoch": 0.05267756192825573, + "grad_norm": 1.1695270538330078, + "learning_rate": 9.986535382673947e-06, + "loss": 0.9102, + "step": 1025 + }, + { + "epoch": 0.05272895467160037, + "grad_norm": 1.4008209705352783, + "learning_rate": 9.98647427699348e-06, + "loss": 0.902, + "step": 1026 + }, + { + "epoch": 0.05278034741494501, + "grad_norm": 1.5199717283248901, + "learning_rate": 9.986413033158386e-06, + "loss": 0.9022, + "step": 1027 + }, + { + "epoch": 0.05283174015828965, + "grad_norm": 0.9533934593200684, + "learning_rate": 9.986351651170367e-06, + "loss": 0.7045, + "step": 1028 + }, + { + "epoch": 0.05288313290163429, + "grad_norm": 1.3180456161499023, + "learning_rate": 9.98629013103112e-06, + "loss": 0.8411, + "step": 1029 + }, + { + "epoch": 0.05293452564497893, + "grad_norm": 1.2371271848678589, + "learning_rate": 9.986228472742352e-06, + "loss": 0.8687, + "step": 1030 + }, + { + "epoch": 0.05298591838832357, + "grad_norm": 1.2746150493621826, + "learning_rate": 9.986166676305767e-06, + "loss": 0.881, + "step": 1031 + }, + { + "epoch": 0.05303731113166821, + "grad_norm": 1.211668610572815, + "learning_rate": 9.98610474172308e-06, + "loss": 0.8549, + "step": 1032 + }, + { + "epoch": 0.05308870387501285, + "grad_norm": 1.3090100288391113, + "learning_rate": 9.986042668996012e-06, + "loss": 0.8788, + "step": 1033 + }, + { + "epoch": 0.05314009661835749, + "grad_norm": 1.2651063203811646, + "learning_rate": 9.985980458126275e-06, + "loss": 0.8467, + "step": 1034 + }, + { + "epoch": 0.05319148936170213, + "grad_norm": 1.2291208505630493, + "learning_rate": 9.985918109115594e-06, + "loss": 0.8443, + "step": 1035 + }, + { + "epoch": 0.05324288210504677, + "grad_norm": 1.2718762159347534, + "learning_rate": 9.985855621965699e-06, + "loss": 0.8285, + "step": 1036 + }, + { + "epoch": 0.05329427484839141, + "grad_norm": 1.2881598472595215, + "learning_rate": 9.98579299667832e-06, + "loss": 0.9003, + "step": 1037 + }, + { + "epoch": 0.05334566759173605, + "grad_norm": 1.1758185625076294, + "learning_rate": 9.985730233255193e-06, + "loss": 0.8947, + "step": 1038 + }, + { + "epoch": 0.05339706033508069, + "grad_norm": 1.0350744724273682, + "learning_rate": 9.985667331698056e-06, + "loss": 0.7741, + "step": 1039 + }, + { + "epoch": 0.05344845307842533, + "grad_norm": 1.23697829246521, + "learning_rate": 9.985604292008651e-06, + "loss": 0.8552, + "step": 1040 + }, + { + "epoch": 0.05349984582176997, + "grad_norm": 1.2260633707046509, + "learning_rate": 9.985541114188727e-06, + "loss": 0.907, + "step": 1041 + }, + { + "epoch": 0.05355123856511461, + "grad_norm": 0.8537032008171082, + "learning_rate": 9.985477798240031e-06, + "loss": 0.7334, + "step": 1042 + }, + { + "epoch": 0.05360263130845925, + "grad_norm": 0.8612209558486938, + "learning_rate": 9.985414344164319e-06, + "loss": 0.7084, + "step": 1043 + }, + { + "epoch": 0.05365402405180388, + "grad_norm": 1.2466439008712769, + "learning_rate": 9.985350751963349e-06, + "loss": 0.8141, + "step": 1044 + }, + { + "epoch": 0.05370541679514852, + "grad_norm": 1.2100951671600342, + "learning_rate": 9.985287021638885e-06, + "loss": 0.8426, + "step": 1045 + }, + { + "epoch": 0.05375680953849316, + "grad_norm": 1.2189053297042847, + "learning_rate": 9.985223153192688e-06, + "loss": 0.8117, + "step": 1046 + }, + { + "epoch": 0.0538082022818378, + "grad_norm": 1.2657212018966675, + "learning_rate": 9.985159146626533e-06, + "loss": 0.8353, + "step": 1047 + }, + { + "epoch": 0.05385959502518244, + "grad_norm": 1.265740156173706, + "learning_rate": 9.985095001942189e-06, + "loss": 0.9153, + "step": 1048 + }, + { + "epoch": 0.05391098776852708, + "grad_norm": 1.2464641332626343, + "learning_rate": 9.985030719141435e-06, + "loss": 0.8382, + "step": 1049 + }, + { + "epoch": 0.05396238051187172, + "grad_norm": 1.208024501800537, + "learning_rate": 9.984966298226052e-06, + "loss": 0.89, + "step": 1050 + }, + { + "epoch": 0.05401377325521636, + "grad_norm": 1.2079896926879883, + "learning_rate": 9.984901739197826e-06, + "loss": 0.9109, + "step": 1051 + }, + { + "epoch": 0.054065165998561, + "grad_norm": 1.1374354362487793, + "learning_rate": 9.984837042058541e-06, + "loss": 0.8636, + "step": 1052 + }, + { + "epoch": 0.05411655874190564, + "grad_norm": 1.2355918884277344, + "learning_rate": 9.984772206809995e-06, + "loss": 0.8409, + "step": 1053 + }, + { + "epoch": 0.05416795148525028, + "grad_norm": 0.7978951930999756, + "learning_rate": 9.984707233453981e-06, + "loss": 0.7453, + "step": 1054 + }, + { + "epoch": 0.05421934422859492, + "grad_norm": 1.1659529209136963, + "learning_rate": 9.984642121992302e-06, + "loss": 0.8486, + "step": 1055 + }, + { + "epoch": 0.054270736971939564, + "grad_norm": 1.1831881999969482, + "learning_rate": 9.984576872426758e-06, + "loss": 0.8362, + "step": 1056 + }, + { + "epoch": 0.054322129715284204, + "grad_norm": 1.197871446609497, + "learning_rate": 9.98451148475916e-06, + "loss": 0.8395, + "step": 1057 + }, + { + "epoch": 0.054373522458628844, + "grad_norm": 1.2231751680374146, + "learning_rate": 9.98444595899132e-06, + "loss": 0.8576, + "step": 1058 + }, + { + "epoch": 0.054424915201973484, + "grad_norm": 1.2782758474349976, + "learning_rate": 9.984380295125052e-06, + "loss": 0.9037, + "step": 1059 + }, + { + "epoch": 0.054476307945318124, + "grad_norm": 1.1367336511611938, + "learning_rate": 9.984314493162172e-06, + "loss": 0.8509, + "step": 1060 + }, + { + "epoch": 0.054527700688662764, + "grad_norm": 0.8485698103904724, + "learning_rate": 9.98424855310451e-06, + "loss": 0.7943, + "step": 1061 + }, + { + "epoch": 0.0545790934320074, + "grad_norm": 0.8685941696166992, + "learning_rate": 9.984182474953887e-06, + "loss": 0.7202, + "step": 1062 + }, + { + "epoch": 0.05463048617535204, + "grad_norm": 1.2617424726486206, + "learning_rate": 9.984116258712138e-06, + "loss": 0.8514, + "step": 1063 + }, + { + "epoch": 0.05468187891869668, + "grad_norm": 0.7770624756813049, + "learning_rate": 9.984049904381095e-06, + "loss": 0.7428, + "step": 1064 + }, + { + "epoch": 0.05473327166204132, + "grad_norm": 0.9104249477386475, + "learning_rate": 9.983983411962597e-06, + "loss": 0.7272, + "step": 1065 + }, + { + "epoch": 0.05478466440538596, + "grad_norm": 1.2631593942642212, + "learning_rate": 9.983916781458485e-06, + "loss": 0.8651, + "step": 1066 + }, + { + "epoch": 0.0548360571487306, + "grad_norm": 1.2397582530975342, + "learning_rate": 9.983850012870609e-06, + "loss": 0.8765, + "step": 1067 + }, + { + "epoch": 0.05488744989207524, + "grad_norm": 1.2812387943267822, + "learning_rate": 9.983783106200814e-06, + "loss": 0.8428, + "step": 1068 + }, + { + "epoch": 0.05493884263541988, + "grad_norm": 1.2409775257110596, + "learning_rate": 9.983716061450957e-06, + "loss": 0.8867, + "step": 1069 + }, + { + "epoch": 0.05499023537876452, + "grad_norm": 0.9984674453735352, + "learning_rate": 9.983648878622894e-06, + "loss": 0.7219, + "step": 1070 + }, + { + "epoch": 0.05504162812210916, + "grad_norm": 1.2058324813842773, + "learning_rate": 9.983581557718487e-06, + "loss": 0.8227, + "step": 1071 + }, + { + "epoch": 0.0550930208654538, + "grad_norm": 1.1356886625289917, + "learning_rate": 9.983514098739602e-06, + "loss": 0.8367, + "step": 1072 + }, + { + "epoch": 0.05514441360879844, + "grad_norm": 0.8446734547615051, + "learning_rate": 9.983446501688105e-06, + "loss": 0.758, + "step": 1073 + }, + { + "epoch": 0.05519580635214308, + "grad_norm": 1.3006490468978882, + "learning_rate": 9.983378766565874e-06, + "loss": 0.9235, + "step": 1074 + }, + { + "epoch": 0.05524719909548772, + "grad_norm": 1.3172494173049927, + "learning_rate": 9.983310893374781e-06, + "loss": 0.8969, + "step": 1075 + }, + { + "epoch": 0.05529859183883236, + "grad_norm": 1.1803165674209595, + "learning_rate": 9.983242882116707e-06, + "loss": 0.848, + "step": 1076 + }, + { + "epoch": 0.055349984582177, + "grad_norm": 1.2701787948608398, + "learning_rate": 9.98317473279354e-06, + "loss": 0.884, + "step": 1077 + }, + { + "epoch": 0.05540137732552164, + "grad_norm": 0.791339635848999, + "learning_rate": 9.983106445407162e-06, + "loss": 0.7379, + "step": 1078 + }, + { + "epoch": 0.05545277006886628, + "grad_norm": 1.2090282440185547, + "learning_rate": 9.98303801995947e-06, + "loss": 0.8823, + "step": 1079 + }, + { + "epoch": 0.05550416281221091, + "grad_norm": 1.262412190437317, + "learning_rate": 9.98296945645236e-06, + "loss": 0.8604, + "step": 1080 + }, + { + "epoch": 0.05555555555555555, + "grad_norm": 1.3175773620605469, + "learning_rate": 9.982900754887728e-06, + "loss": 0.9153, + "step": 1081 + }, + { + "epoch": 0.05560694829890019, + "grad_norm": 1.375179648399353, + "learning_rate": 9.982831915267479e-06, + "loss": 0.8324, + "step": 1082 + }, + { + "epoch": 0.05565834104224483, + "grad_norm": 1.2052053213119507, + "learning_rate": 9.982762937593522e-06, + "loss": 0.8503, + "step": 1083 + }, + { + "epoch": 0.05570973378558947, + "grad_norm": 0.7859538793563843, + "learning_rate": 9.982693821867765e-06, + "loss": 0.7321, + "step": 1084 + }, + { + "epoch": 0.05576112652893411, + "grad_norm": 1.315988540649414, + "learning_rate": 9.982624568092127e-06, + "loss": 0.9078, + "step": 1085 + }, + { + "epoch": 0.05581251927227875, + "grad_norm": 1.3059765100479126, + "learning_rate": 9.982555176268522e-06, + "loss": 0.8708, + "step": 1086 + }, + { + "epoch": 0.05586391201562339, + "grad_norm": 0.8186153769493103, + "learning_rate": 9.982485646398876e-06, + "loss": 0.6849, + "step": 1087 + }, + { + "epoch": 0.05591530475896803, + "grad_norm": 1.2100918292999268, + "learning_rate": 9.982415978485114e-06, + "loss": 0.8035, + "step": 1088 + }, + { + "epoch": 0.05596669750231267, + "grad_norm": 1.2082200050354004, + "learning_rate": 9.982346172529166e-06, + "loss": 0.8517, + "step": 1089 + }, + { + "epoch": 0.056018090245657313, + "grad_norm": 0.8391363024711609, + "learning_rate": 9.982276228532967e-06, + "loss": 0.7698, + "step": 1090 + }, + { + "epoch": 0.056069482989001954, + "grad_norm": 1.2486977577209473, + "learning_rate": 9.982206146498455e-06, + "loss": 0.8893, + "step": 1091 + }, + { + "epoch": 0.056120875732346594, + "grad_norm": 1.217722773551941, + "learning_rate": 9.98213592642757e-06, + "loss": 0.8639, + "step": 1092 + }, + { + "epoch": 0.056172268475691234, + "grad_norm": 1.2651174068450928, + "learning_rate": 9.982065568322258e-06, + "loss": 0.8814, + "step": 1093 + }, + { + "epoch": 0.056223661219035874, + "grad_norm": 1.2954434156417847, + "learning_rate": 9.98199507218447e-06, + "loss": 0.855, + "step": 1094 + }, + { + "epoch": 0.056275053962380514, + "grad_norm": 1.2195677757263184, + "learning_rate": 9.981924438016157e-06, + "loss": 0.8541, + "step": 1095 + }, + { + "epoch": 0.056326446705725154, + "grad_norm": 1.1401110887527466, + "learning_rate": 9.981853665819277e-06, + "loss": 0.7586, + "step": 1096 + }, + { + "epoch": 0.056377839449069794, + "grad_norm": 1.1970553398132324, + "learning_rate": 9.981782755595792e-06, + "loss": 0.8537, + "step": 1097 + }, + { + "epoch": 0.056429232192414434, + "grad_norm": 1.17902410030365, + "learning_rate": 9.981711707347666e-06, + "loss": 0.8833, + "step": 1098 + }, + { + "epoch": 0.05648062493575907, + "grad_norm": 0.8351032137870789, + "learning_rate": 9.981640521076865e-06, + "loss": 0.7286, + "step": 1099 + }, + { + "epoch": 0.05653201767910371, + "grad_norm": 1.3155903816223145, + "learning_rate": 9.981569196785364e-06, + "loss": 0.8347, + "step": 1100 + }, + { + "epoch": 0.05658341042244835, + "grad_norm": 0.9494535326957703, + "learning_rate": 9.981497734475138e-06, + "loss": 0.7345, + "step": 1101 + }, + { + "epoch": 0.05663480316579299, + "grad_norm": 1.194558024406433, + "learning_rate": 9.981426134148167e-06, + "loss": 0.904, + "step": 1102 + }, + { + "epoch": 0.05668619590913763, + "grad_norm": 1.1780834197998047, + "learning_rate": 9.981354395806434e-06, + "loss": 0.8438, + "step": 1103 + }, + { + "epoch": 0.05673758865248227, + "grad_norm": 1.2865723371505737, + "learning_rate": 9.981282519451931e-06, + "loss": 0.8595, + "step": 1104 + }, + { + "epoch": 0.05678898139582691, + "grad_norm": 0.8499127626419067, + "learning_rate": 9.981210505086644e-06, + "loss": 0.6971, + "step": 1105 + }, + { + "epoch": 0.05684037413917155, + "grad_norm": 1.257904052734375, + "learning_rate": 9.98113835271257e-06, + "loss": 0.8507, + "step": 1106 + }, + { + "epoch": 0.05689176688251619, + "grad_norm": 1.2113239765167236, + "learning_rate": 9.981066062331708e-06, + "loss": 0.8779, + "step": 1107 + }, + { + "epoch": 0.05694315962586083, + "grad_norm": 1.3840869665145874, + "learning_rate": 9.98099363394606e-06, + "loss": 0.9012, + "step": 1108 + }, + { + "epoch": 0.05699455236920547, + "grad_norm": 1.2059961557388306, + "learning_rate": 9.980921067557636e-06, + "loss": 0.9265, + "step": 1109 + }, + { + "epoch": 0.05704594511255011, + "grad_norm": 1.1659185886383057, + "learning_rate": 9.980848363168443e-06, + "loss": 0.8556, + "step": 1110 + }, + { + "epoch": 0.05709733785589475, + "grad_norm": 1.2384744882583618, + "learning_rate": 9.980775520780497e-06, + "loss": 0.8517, + "step": 1111 + }, + { + "epoch": 0.05714873059923939, + "grad_norm": 1.1806620359420776, + "learning_rate": 9.980702540395815e-06, + "loss": 0.8619, + "step": 1112 + }, + { + "epoch": 0.05720012334258403, + "grad_norm": 1.2017359733581543, + "learning_rate": 9.98062942201642e-06, + "loss": 0.8324, + "step": 1113 + }, + { + "epoch": 0.05725151608592867, + "grad_norm": 1.1942718029022217, + "learning_rate": 9.980556165644339e-06, + "loss": 0.8244, + "step": 1114 + }, + { + "epoch": 0.05730290882927331, + "grad_norm": 1.211240530014038, + "learning_rate": 9.9804827712816e-06, + "loss": 0.8238, + "step": 1115 + }, + { + "epoch": 0.05735430157261795, + "grad_norm": 1.1809043884277344, + "learning_rate": 9.980409238930235e-06, + "loss": 0.8169, + "step": 1116 + }, + { + "epoch": 0.05740569431596258, + "grad_norm": 1.2746447324752808, + "learning_rate": 9.980335568592285e-06, + "loss": 0.8177, + "step": 1117 + }, + { + "epoch": 0.05745708705930722, + "grad_norm": 1.225766658782959, + "learning_rate": 9.980261760269787e-06, + "loss": 0.8119, + "step": 1118 + }, + { + "epoch": 0.05750847980265186, + "grad_norm": 0.8702387809753418, + "learning_rate": 9.980187813964789e-06, + "loss": 0.7458, + "step": 1119 + }, + { + "epoch": 0.0575598725459965, + "grad_norm": 1.1936593055725098, + "learning_rate": 9.980113729679338e-06, + "loss": 0.8993, + "step": 1120 + }, + { + "epoch": 0.05761126528934114, + "grad_norm": 0.8612618446350098, + "learning_rate": 9.980039507415488e-06, + "loss": 0.7113, + "step": 1121 + }, + { + "epoch": 0.05766265803268578, + "grad_norm": 1.2471098899841309, + "learning_rate": 9.979965147175295e-06, + "loss": 0.8518, + "step": 1122 + }, + { + "epoch": 0.05771405077603042, + "grad_norm": 1.209656834602356, + "learning_rate": 9.979890648960816e-06, + "loss": 0.8756, + "step": 1123 + }, + { + "epoch": 0.05776544351937506, + "grad_norm": 1.309747576713562, + "learning_rate": 9.979816012774122e-06, + "loss": 0.8305, + "step": 1124 + }, + { + "epoch": 0.057816836262719704, + "grad_norm": 0.8830257654190063, + "learning_rate": 9.979741238617275e-06, + "loss": 0.7598, + "step": 1125 + }, + { + "epoch": 0.057868229006064344, + "grad_norm": 1.198681116104126, + "learning_rate": 9.979666326492348e-06, + "loss": 0.8055, + "step": 1126 + }, + { + "epoch": 0.057919621749408984, + "grad_norm": 0.9580893516540527, + "learning_rate": 9.979591276401417e-06, + "loss": 0.7818, + "step": 1127 + }, + { + "epoch": 0.057971014492753624, + "grad_norm": 1.212040901184082, + "learning_rate": 9.979516088346562e-06, + "loss": 0.8292, + "step": 1128 + }, + { + "epoch": 0.058022407236098264, + "grad_norm": 1.2667771577835083, + "learning_rate": 9.979440762329863e-06, + "loss": 0.8984, + "step": 1129 + }, + { + "epoch": 0.058073799979442904, + "grad_norm": 1.2194665670394897, + "learning_rate": 9.979365298353412e-06, + "loss": 0.8714, + "step": 1130 + }, + { + "epoch": 0.058125192722787544, + "grad_norm": 1.157158374786377, + "learning_rate": 9.979289696419298e-06, + "loss": 0.8365, + "step": 1131 + }, + { + "epoch": 0.058176585466132184, + "grad_norm": 1.224460244178772, + "learning_rate": 9.979213956529613e-06, + "loss": 0.8659, + "step": 1132 + }, + { + "epoch": 0.058227978209476824, + "grad_norm": 1.1096312999725342, + "learning_rate": 9.979138078686459e-06, + "loss": 0.7563, + "step": 1133 + }, + { + "epoch": 0.058279370952821465, + "grad_norm": 1.2722305059432983, + "learning_rate": 9.979062062891934e-06, + "loss": 0.8694, + "step": 1134 + }, + { + "epoch": 0.058330763696166105, + "grad_norm": 1.2927436828613281, + "learning_rate": 9.978985909148148e-06, + "loss": 0.8451, + "step": 1135 + }, + { + "epoch": 0.05838215643951074, + "grad_norm": 1.1044056415557861, + "learning_rate": 9.978909617457208e-06, + "loss": 0.8249, + "step": 1136 + }, + { + "epoch": 0.05843354918285538, + "grad_norm": 1.2341526746749878, + "learning_rate": 9.978833187821232e-06, + "loss": 0.9132, + "step": 1137 + }, + { + "epoch": 0.05848494192620002, + "grad_norm": 1.2127639055252075, + "learning_rate": 9.978756620242333e-06, + "loss": 0.864, + "step": 1138 + }, + { + "epoch": 0.05853633466954466, + "grad_norm": 1.2146912813186646, + "learning_rate": 9.978679914722636e-06, + "loss": 0.8296, + "step": 1139 + }, + { + "epoch": 0.0585877274128893, + "grad_norm": 1.2359148263931274, + "learning_rate": 9.978603071264263e-06, + "loss": 0.8225, + "step": 1140 + }, + { + "epoch": 0.05863912015623394, + "grad_norm": 1.1174594163894653, + "learning_rate": 9.978526089869344e-06, + "loss": 0.7333, + "step": 1141 + }, + { + "epoch": 0.05869051289957858, + "grad_norm": 0.9986005425453186, + "learning_rate": 9.978448970540013e-06, + "loss": 0.7699, + "step": 1142 + }, + { + "epoch": 0.05874190564292322, + "grad_norm": 1.2127052545547485, + "learning_rate": 9.978371713278405e-06, + "loss": 0.8283, + "step": 1143 + }, + { + "epoch": 0.05879329838626786, + "grad_norm": 1.301411509513855, + "learning_rate": 9.978294318086661e-06, + "loss": 0.9018, + "step": 1144 + }, + { + "epoch": 0.0588446911296125, + "grad_norm": 1.2672181129455566, + "learning_rate": 9.978216784966927e-06, + "loss": 0.863, + "step": 1145 + }, + { + "epoch": 0.05889608387295714, + "grad_norm": 2.0091192722320557, + "learning_rate": 9.97813911392135e-06, + "loss": 0.9362, + "step": 1146 + }, + { + "epoch": 0.05894747661630178, + "grad_norm": 1.2876921892166138, + "learning_rate": 9.97806130495208e-06, + "loss": 0.8502, + "step": 1147 + }, + { + "epoch": 0.05899886935964642, + "grad_norm": 1.1820954084396362, + "learning_rate": 9.977983358061276e-06, + "loss": 0.885, + "step": 1148 + }, + { + "epoch": 0.05905026210299106, + "grad_norm": 1.2279303073883057, + "learning_rate": 9.977905273251094e-06, + "loss": 0.8557, + "step": 1149 + }, + { + "epoch": 0.0591016548463357, + "grad_norm": 1.0473719835281372, + "learning_rate": 9.977827050523703e-06, + "loss": 0.7275, + "step": 1150 + }, + { + "epoch": 0.05915304758968034, + "grad_norm": 1.1875505447387695, + "learning_rate": 9.977748689881263e-06, + "loss": 0.8613, + "step": 1151 + }, + { + "epoch": 0.05920444033302498, + "grad_norm": 1.2748993635177612, + "learning_rate": 9.977670191325951e-06, + "loss": 0.8957, + "step": 1152 + }, + { + "epoch": 0.05925583307636962, + "grad_norm": 1.1879079341888428, + "learning_rate": 9.97759155485994e-06, + "loss": 0.8217, + "step": 1153 + }, + { + "epoch": 0.05930722581971425, + "grad_norm": 1.2057673931121826, + "learning_rate": 9.977512780485407e-06, + "loss": 0.8568, + "step": 1154 + }, + { + "epoch": 0.05935861856305889, + "grad_norm": 1.3518967628479004, + "learning_rate": 9.977433868204536e-06, + "loss": 0.8646, + "step": 1155 + }, + { + "epoch": 0.05941001130640353, + "grad_norm": 1.3617931604385376, + "learning_rate": 9.977354818019514e-06, + "loss": 0.8135, + "step": 1156 + }, + { + "epoch": 0.05946140404974817, + "grad_norm": 1.2480753660202026, + "learning_rate": 9.97727562993253e-06, + "loss": 0.8801, + "step": 1157 + }, + { + "epoch": 0.05951279679309281, + "grad_norm": 1.1186765432357788, + "learning_rate": 9.977196303945778e-06, + "loss": 0.7491, + "step": 1158 + }, + { + "epoch": 0.059564189536437454, + "grad_norm": 1.4840799570083618, + "learning_rate": 9.977116840061456e-06, + "loss": 0.8154, + "step": 1159 + }, + { + "epoch": 0.059615582279782094, + "grad_norm": 1.2440422773361206, + "learning_rate": 9.977037238281766e-06, + "loss": 0.8596, + "step": 1160 + }, + { + "epoch": 0.059666975023126734, + "grad_norm": 1.3036857843399048, + "learning_rate": 9.976957498608913e-06, + "loss": 0.7861, + "step": 1161 + }, + { + "epoch": 0.059718367766471374, + "grad_norm": 1.229193925857544, + "learning_rate": 9.976877621045106e-06, + "loss": 0.8558, + "step": 1162 + }, + { + "epoch": 0.059769760509816014, + "grad_norm": 1.1989786624908447, + "learning_rate": 9.97679760559256e-06, + "loss": 0.8448, + "step": 1163 + }, + { + "epoch": 0.059821153253160654, + "grad_norm": 1.264654278755188, + "learning_rate": 9.976717452253488e-06, + "loss": 0.8425, + "step": 1164 + }, + { + "epoch": 0.059872545996505294, + "grad_norm": 1.127537488937378, + "learning_rate": 9.976637161030114e-06, + "loss": 0.8465, + "step": 1165 + }, + { + "epoch": 0.059923938739849934, + "grad_norm": 1.2033878564834595, + "learning_rate": 9.976556731924663e-06, + "loss": 0.8519, + "step": 1166 + }, + { + "epoch": 0.059975331483194574, + "grad_norm": 0.886642575263977, + "learning_rate": 9.976476164939361e-06, + "loss": 0.6987, + "step": 1167 + }, + { + "epoch": 0.060026724226539215, + "grad_norm": 1.2378264665603638, + "learning_rate": 9.97639546007644e-06, + "loss": 0.8623, + "step": 1168 + }, + { + "epoch": 0.060078116969883855, + "grad_norm": 1.2321252822875977, + "learning_rate": 9.976314617338139e-06, + "loss": 0.8544, + "step": 1169 + }, + { + "epoch": 0.060129509713228495, + "grad_norm": 1.206283450126648, + "learning_rate": 9.976233636726696e-06, + "loss": 0.8491, + "step": 1170 + }, + { + "epoch": 0.060180902456573135, + "grad_norm": 1.1792248487472534, + "learning_rate": 9.976152518244352e-06, + "loss": 0.8634, + "step": 1171 + }, + { + "epoch": 0.060232295199917775, + "grad_norm": 1.2804940938949585, + "learning_rate": 9.976071261893361e-06, + "loss": 0.9686, + "step": 1172 + }, + { + "epoch": 0.06028368794326241, + "grad_norm": 1.231398344039917, + "learning_rate": 9.975989867675968e-06, + "loss": 0.8894, + "step": 1173 + }, + { + "epoch": 0.06033508068660705, + "grad_norm": 1.161859154701233, + "learning_rate": 9.97590833559443e-06, + "loss": 0.836, + "step": 1174 + }, + { + "epoch": 0.06038647342995169, + "grad_norm": 1.2021398544311523, + "learning_rate": 9.975826665651007e-06, + "loss": 0.8787, + "step": 1175 + }, + { + "epoch": 0.06043786617329633, + "grad_norm": 1.1408778429031372, + "learning_rate": 9.975744857847963e-06, + "loss": 0.8159, + "step": 1176 + }, + { + "epoch": 0.06048925891664097, + "grad_norm": 1.0622419118881226, + "learning_rate": 9.97566291218756e-06, + "loss": 0.7685, + "step": 1177 + }, + { + "epoch": 0.06054065165998561, + "grad_norm": 1.140544056892395, + "learning_rate": 9.975580828672072e-06, + "loss": 0.836, + "step": 1178 + }, + { + "epoch": 0.06059204440333025, + "grad_norm": 1.323164701461792, + "learning_rate": 9.975498607303772e-06, + "loss": 0.8441, + "step": 1179 + }, + { + "epoch": 0.06064343714667489, + "grad_norm": 0.9059852957725525, + "learning_rate": 9.975416248084939e-06, + "loss": 0.7244, + "step": 1180 + }, + { + "epoch": 0.06069482989001953, + "grad_norm": 1.1558243036270142, + "learning_rate": 9.975333751017856e-06, + "loss": 0.8689, + "step": 1181 + }, + { + "epoch": 0.06074622263336417, + "grad_norm": 1.23139488697052, + "learning_rate": 9.975251116104802e-06, + "loss": 0.8863, + "step": 1182 + }, + { + "epoch": 0.06079761537670881, + "grad_norm": 1.1719777584075928, + "learning_rate": 9.975168343348074e-06, + "loss": 0.8845, + "step": 1183 + }, + { + "epoch": 0.06084900812005345, + "grad_norm": 1.1954281330108643, + "learning_rate": 9.975085432749962e-06, + "loss": 0.8835, + "step": 1184 + }, + { + "epoch": 0.06090040086339809, + "grad_norm": 1.0291945934295654, + "learning_rate": 9.975002384312764e-06, + "loss": 0.758, + "step": 1185 + }, + { + "epoch": 0.06095179360674273, + "grad_norm": 1.1825456619262695, + "learning_rate": 9.974919198038782e-06, + "loss": 0.8585, + "step": 1186 + }, + { + "epoch": 0.06100318635008737, + "grad_norm": 1.1498017311096191, + "learning_rate": 9.974835873930316e-06, + "loss": 0.8437, + "step": 1187 + }, + { + "epoch": 0.06105457909343201, + "grad_norm": 1.1359291076660156, + "learning_rate": 9.97475241198968e-06, + "loss": 0.8942, + "step": 1188 + }, + { + "epoch": 0.06110597183677665, + "grad_norm": 1.1776137351989746, + "learning_rate": 9.974668812219184e-06, + "loss": 0.8171, + "step": 1189 + }, + { + "epoch": 0.06115736458012129, + "grad_norm": 1.1480019092559814, + "learning_rate": 9.974585074621143e-06, + "loss": 0.8753, + "step": 1190 + }, + { + "epoch": 0.06120875732346592, + "grad_norm": 0.8512030839920044, + "learning_rate": 9.974501199197882e-06, + "loss": 0.7772, + "step": 1191 + }, + { + "epoch": 0.06126015006681056, + "grad_norm": 0.8866260647773743, + "learning_rate": 9.974417185951718e-06, + "loss": 0.6972, + "step": 1192 + }, + { + "epoch": 0.0613115428101552, + "grad_norm": 1.0050370693206787, + "learning_rate": 9.974333034884983e-06, + "loss": 0.7133, + "step": 1193 + }, + { + "epoch": 0.061362935553499844, + "grad_norm": 1.2891030311584473, + "learning_rate": 9.974248746000007e-06, + "loss": 0.899, + "step": 1194 + }, + { + "epoch": 0.061414328296844484, + "grad_norm": 1.2006300687789917, + "learning_rate": 9.974164319299127e-06, + "loss": 0.8579, + "step": 1195 + }, + { + "epoch": 0.061465721040189124, + "grad_norm": 0.8573015928268433, + "learning_rate": 9.97407975478468e-06, + "loss": 0.759, + "step": 1196 + }, + { + "epoch": 0.061517113783533764, + "grad_norm": 0.7824249267578125, + "learning_rate": 9.97399505245901e-06, + "loss": 0.7519, + "step": 1197 + }, + { + "epoch": 0.061568506526878404, + "grad_norm": 1.1752259731292725, + "learning_rate": 9.973910212324463e-06, + "loss": 0.8405, + "step": 1198 + }, + { + "epoch": 0.061619899270223044, + "grad_norm": 0.8630449771881104, + "learning_rate": 9.973825234383392e-06, + "loss": 0.7313, + "step": 1199 + }, + { + "epoch": 0.061671292013567684, + "grad_norm": 1.2532098293304443, + "learning_rate": 9.973740118638147e-06, + "loss": 0.8543, + "step": 1200 + }, + { + "epoch": 0.061722684756912324, + "grad_norm": 1.2244421243667603, + "learning_rate": 9.97365486509109e-06, + "loss": 0.8644, + "step": 1201 + }, + { + "epoch": 0.061774077500256964, + "grad_norm": 1.1914186477661133, + "learning_rate": 9.973569473744583e-06, + "loss": 0.8245, + "step": 1202 + }, + { + "epoch": 0.061825470243601605, + "grad_norm": 1.2026646137237549, + "learning_rate": 9.973483944600987e-06, + "loss": 0.8265, + "step": 1203 + }, + { + "epoch": 0.061876862986946245, + "grad_norm": 1.1772708892822266, + "learning_rate": 9.973398277662678e-06, + "loss": 0.8637, + "step": 1204 + }, + { + "epoch": 0.061928255730290885, + "grad_norm": 1.220398187637329, + "learning_rate": 9.973312472932026e-06, + "loss": 0.8277, + "step": 1205 + }, + { + "epoch": 0.061979648473635525, + "grad_norm": 1.199230670928955, + "learning_rate": 9.97322653041141e-06, + "loss": 0.8668, + "step": 1206 + }, + { + "epoch": 0.062031041216980165, + "grad_norm": 1.1803576946258545, + "learning_rate": 9.973140450103209e-06, + "loss": 0.8534, + "step": 1207 + }, + { + "epoch": 0.062082433960324805, + "grad_norm": 1.1516332626342773, + "learning_rate": 9.97305423200981e-06, + "loss": 0.8641, + "step": 1208 + }, + { + "epoch": 0.062133826703669445, + "grad_norm": 1.1796621084213257, + "learning_rate": 9.972967876133602e-06, + "loss": 0.8675, + "step": 1209 + }, + { + "epoch": 0.06218521944701408, + "grad_norm": 1.1729705333709717, + "learning_rate": 9.972881382476974e-06, + "loss": 0.8338, + "step": 1210 + }, + { + "epoch": 0.06223661219035872, + "grad_norm": 1.1264246702194214, + "learning_rate": 9.972794751042324e-06, + "loss": 0.8247, + "step": 1211 + }, + { + "epoch": 0.06228800493370336, + "grad_norm": 1.191347360610962, + "learning_rate": 9.972707981832055e-06, + "loss": 0.8228, + "step": 1212 + }, + { + "epoch": 0.062339397677048, + "grad_norm": 0.9049070477485657, + "learning_rate": 9.972621074848569e-06, + "loss": 0.7365, + "step": 1213 + }, + { + "epoch": 0.06239079042039264, + "grad_norm": 1.1573753356933594, + "learning_rate": 9.972534030094272e-06, + "loss": 0.8444, + "step": 1214 + }, + { + "epoch": 0.06244218316373728, + "grad_norm": 1.2291463613510132, + "learning_rate": 9.97244684757158e-06, + "loss": 0.7692, + "step": 1215 + }, + { + "epoch": 0.06249357590708192, + "grad_norm": 1.3106968402862549, + "learning_rate": 9.972359527282905e-06, + "loss": 0.8194, + "step": 1216 + }, + { + "epoch": 0.06254496865042657, + "grad_norm": 1.1492574214935303, + "learning_rate": 9.972272069230667e-06, + "loss": 0.7946, + "step": 1217 + }, + { + "epoch": 0.0625963613937712, + "grad_norm": 1.2317800521850586, + "learning_rate": 9.972184473417287e-06, + "loss": 0.9376, + "step": 1218 + }, + { + "epoch": 0.06264775413711583, + "grad_norm": 1.166545033454895, + "learning_rate": 9.972096739845196e-06, + "loss": 0.8414, + "step": 1219 + }, + { + "epoch": 0.06269914688046048, + "grad_norm": 1.1994495391845703, + "learning_rate": 9.972008868516826e-06, + "loss": 0.8601, + "step": 1220 + }, + { + "epoch": 0.06275053962380511, + "grad_norm": 1.2833749055862427, + "learning_rate": 9.971920859434604e-06, + "loss": 0.8464, + "step": 1221 + }, + { + "epoch": 0.06280193236714976, + "grad_norm": 1.2128567695617676, + "learning_rate": 9.971832712600976e-06, + "loss": 0.9019, + "step": 1222 + }, + { + "epoch": 0.06285332511049439, + "grad_norm": 1.1879664659500122, + "learning_rate": 9.971744428018378e-06, + "loss": 0.8314, + "step": 1223 + }, + { + "epoch": 0.06290471785383904, + "grad_norm": 1.148087978363037, + "learning_rate": 9.97165600568926e-06, + "loss": 0.7985, + "step": 1224 + }, + { + "epoch": 0.06295611059718367, + "grad_norm": 1.1824296712875366, + "learning_rate": 9.971567445616072e-06, + "loss": 0.8649, + "step": 1225 + }, + { + "epoch": 0.06300750334052832, + "grad_norm": 0.8248127102851868, + "learning_rate": 9.971478747801268e-06, + "loss": 0.7213, + "step": 1226 + }, + { + "epoch": 0.06305889608387295, + "grad_norm": 1.2181551456451416, + "learning_rate": 9.971389912247302e-06, + "loss": 0.8074, + "step": 1227 + }, + { + "epoch": 0.0631102888272176, + "grad_norm": 0.7653465270996094, + "learning_rate": 9.971300938956638e-06, + "loss": 0.723, + "step": 1228 + }, + { + "epoch": 0.06316168157056223, + "grad_norm": 1.2489670515060425, + "learning_rate": 9.97121182793174e-06, + "loss": 0.8389, + "step": 1229 + }, + { + "epoch": 0.06321307431390688, + "grad_norm": 1.2638297080993652, + "learning_rate": 9.971122579175077e-06, + "loss": 0.8699, + "step": 1230 + }, + { + "epoch": 0.06326446705725151, + "grad_norm": 1.1709225177764893, + "learning_rate": 9.971033192689121e-06, + "loss": 0.8145, + "step": 1231 + }, + { + "epoch": 0.06331585980059616, + "grad_norm": 1.1956020593643188, + "learning_rate": 9.970943668476351e-06, + "loss": 0.8733, + "step": 1232 + }, + { + "epoch": 0.0633672525439408, + "grad_norm": 1.2909343242645264, + "learning_rate": 9.970854006539246e-06, + "loss": 0.8841, + "step": 1233 + }, + { + "epoch": 0.06341864528728544, + "grad_norm": 0.8098524212837219, + "learning_rate": 9.970764206880289e-06, + "loss": 0.7543, + "step": 1234 + }, + { + "epoch": 0.06347003803063007, + "grad_norm": 1.2524017095565796, + "learning_rate": 9.970674269501968e-06, + "loss": 0.8381, + "step": 1235 + }, + { + "epoch": 0.06352143077397472, + "grad_norm": 1.1908453702926636, + "learning_rate": 9.970584194406779e-06, + "loss": 0.8495, + "step": 1236 + }, + { + "epoch": 0.06357282351731935, + "grad_norm": 1.1576836109161377, + "learning_rate": 9.97049398159721e-06, + "loss": 0.8073, + "step": 1237 + }, + { + "epoch": 0.06362421626066399, + "grad_norm": 1.2045015096664429, + "learning_rate": 9.970403631075768e-06, + "loss": 0.8404, + "step": 1238 + }, + { + "epoch": 0.06367560900400863, + "grad_norm": 1.183829665184021, + "learning_rate": 9.970313142844951e-06, + "loss": 0.8123, + "step": 1239 + }, + { + "epoch": 0.06372700174735327, + "grad_norm": 1.152247428894043, + "learning_rate": 9.97022251690727e-06, + "loss": 0.8696, + "step": 1240 + }, + { + "epoch": 0.06377839449069791, + "grad_norm": 1.2260886430740356, + "learning_rate": 9.970131753265234e-06, + "loss": 0.8567, + "step": 1241 + }, + { + "epoch": 0.06382978723404255, + "grad_norm": 1.2068758010864258, + "learning_rate": 9.970040851921356e-06, + "loss": 0.8128, + "step": 1242 + }, + { + "epoch": 0.0638811799773872, + "grad_norm": 1.2278228998184204, + "learning_rate": 9.969949812878158e-06, + "loss": 0.7683, + "step": 1243 + }, + { + "epoch": 0.06393257272073183, + "grad_norm": 1.1791683435440063, + "learning_rate": 9.969858636138158e-06, + "loss": 0.8496, + "step": 1244 + }, + { + "epoch": 0.06398396546407648, + "grad_norm": 1.3437433242797852, + "learning_rate": 9.969767321703886e-06, + "loss": 0.8125, + "step": 1245 + }, + { + "epoch": 0.06403535820742111, + "grad_norm": 1.2271130084991455, + "learning_rate": 9.969675869577873e-06, + "loss": 0.8836, + "step": 1246 + }, + { + "epoch": 0.06408675095076576, + "grad_norm": 1.219010353088379, + "learning_rate": 9.969584279762648e-06, + "loss": 0.8354, + "step": 1247 + }, + { + "epoch": 0.06413814369411039, + "grad_norm": 0.8295942544937134, + "learning_rate": 9.96949255226075e-06, + "loss": 0.7467, + "step": 1248 + }, + { + "epoch": 0.06418953643745504, + "grad_norm": 1.1781601905822754, + "learning_rate": 9.969400687074722e-06, + "loss": 0.8542, + "step": 1249 + }, + { + "epoch": 0.06424092918079967, + "grad_norm": 1.2492057085037231, + "learning_rate": 9.969308684207109e-06, + "loss": 0.8038, + "step": 1250 + }, + { + "epoch": 0.06429232192414432, + "grad_norm": 1.21192467212677, + "learning_rate": 9.969216543660458e-06, + "loss": 0.8889, + "step": 1251 + }, + { + "epoch": 0.06434371466748895, + "grad_norm": 1.3369771242141724, + "learning_rate": 9.969124265437325e-06, + "loss": 0.8664, + "step": 1252 + }, + { + "epoch": 0.0643951074108336, + "grad_norm": 1.2625089883804321, + "learning_rate": 9.969031849540263e-06, + "loss": 0.8652, + "step": 1253 + }, + { + "epoch": 0.06444650015417823, + "grad_norm": 1.2198420763015747, + "learning_rate": 9.968939295971837e-06, + "loss": 0.8287, + "step": 1254 + }, + { + "epoch": 0.06449789289752288, + "grad_norm": 0.9190962910652161, + "learning_rate": 9.968846604734608e-06, + "loss": 0.7637, + "step": 1255 + }, + { + "epoch": 0.06454928564086751, + "grad_norm": 1.1250752210617065, + "learning_rate": 9.968753775831144e-06, + "loss": 0.8618, + "step": 1256 + }, + { + "epoch": 0.06460067838421214, + "grad_norm": 1.2384765148162842, + "learning_rate": 9.968660809264016e-06, + "loss": 0.8549, + "step": 1257 + }, + { + "epoch": 0.06465207112755679, + "grad_norm": 1.2538996934890747, + "learning_rate": 9.968567705035805e-06, + "loss": 0.8815, + "step": 1258 + }, + { + "epoch": 0.06470346387090142, + "grad_norm": 1.16378653049469, + "learning_rate": 9.968474463149084e-06, + "loss": 0.8147, + "step": 1259 + }, + { + "epoch": 0.06475485661424607, + "grad_norm": 1.2450807094573975, + "learning_rate": 9.968381083606442e-06, + "loss": 0.888, + "step": 1260 + }, + { + "epoch": 0.0648062493575907, + "grad_norm": 1.2954580783843994, + "learning_rate": 9.968287566410461e-06, + "loss": 0.8838, + "step": 1261 + }, + { + "epoch": 0.06485764210093535, + "grad_norm": 1.27425217628479, + "learning_rate": 9.968193911563735e-06, + "loss": 0.8145, + "step": 1262 + }, + { + "epoch": 0.06490903484427998, + "grad_norm": 1.2476718425750732, + "learning_rate": 9.96810011906886e-06, + "loss": 0.9031, + "step": 1263 + }, + { + "epoch": 0.06496042758762463, + "grad_norm": 0.7890223860740662, + "learning_rate": 9.968006188928432e-06, + "loss": 0.7331, + "step": 1264 + }, + { + "epoch": 0.06501182033096926, + "grad_norm": 0.8047551512718201, + "learning_rate": 9.967912121145054e-06, + "loss": 0.7191, + "step": 1265 + }, + { + "epoch": 0.06506321307431391, + "grad_norm": 0.8476386070251465, + "learning_rate": 9.967817915721332e-06, + "loss": 0.7867, + "step": 1266 + }, + { + "epoch": 0.06511460581765854, + "grad_norm": 1.2589093446731567, + "learning_rate": 9.967723572659876e-06, + "loss": 0.9136, + "step": 1267 + }, + { + "epoch": 0.06516599856100319, + "grad_norm": 1.2061235904693604, + "learning_rate": 9.967629091963302e-06, + "loss": 0.8366, + "step": 1268 + }, + { + "epoch": 0.06521739130434782, + "grad_norm": 1.2397112846374512, + "learning_rate": 9.967534473634224e-06, + "loss": 0.9452, + "step": 1269 + }, + { + "epoch": 0.06526878404769247, + "grad_norm": 1.2598036527633667, + "learning_rate": 9.967439717675269e-06, + "loss": 0.8465, + "step": 1270 + }, + { + "epoch": 0.0653201767910371, + "grad_norm": 1.2256470918655396, + "learning_rate": 9.967344824089056e-06, + "loss": 0.7987, + "step": 1271 + }, + { + "epoch": 0.06537156953438175, + "grad_norm": 1.2414723634719849, + "learning_rate": 9.967249792878217e-06, + "loss": 0.8307, + "step": 1272 + }, + { + "epoch": 0.06542296227772638, + "grad_norm": 1.166878581047058, + "learning_rate": 9.967154624045385e-06, + "loss": 0.8482, + "step": 1273 + }, + { + "epoch": 0.06547435502107102, + "grad_norm": 1.2057427167892456, + "learning_rate": 9.967059317593197e-06, + "loss": 0.9112, + "step": 1274 + }, + { + "epoch": 0.06552574776441566, + "grad_norm": 0.9161434173583984, + "learning_rate": 9.966963873524294e-06, + "loss": 0.7217, + "step": 1275 + }, + { + "epoch": 0.0655771405077603, + "grad_norm": 1.271059274673462, + "learning_rate": 9.966868291841318e-06, + "loss": 0.8414, + "step": 1276 + }, + { + "epoch": 0.06562853325110495, + "grad_norm": 1.275327205657959, + "learning_rate": 9.96677257254692e-06, + "loss": 0.8452, + "step": 1277 + }, + { + "epoch": 0.06567992599444958, + "grad_norm": 1.2687252759933472, + "learning_rate": 9.966676715643751e-06, + "loss": 0.8467, + "step": 1278 + }, + { + "epoch": 0.06573131873779423, + "grad_norm": 1.3421111106872559, + "learning_rate": 9.966580721134465e-06, + "loss": 0.793, + "step": 1279 + }, + { + "epoch": 0.06578271148113886, + "grad_norm": 1.2187787294387817, + "learning_rate": 9.966484589021724e-06, + "loss": 0.8333, + "step": 1280 + }, + { + "epoch": 0.0658341042244835, + "grad_norm": 1.2390424013137817, + "learning_rate": 9.966388319308191e-06, + "loss": 0.8797, + "step": 1281 + }, + { + "epoch": 0.06588549696782814, + "grad_norm": 1.1580936908721924, + "learning_rate": 9.96629191199653e-06, + "loss": 0.8206, + "step": 1282 + }, + { + "epoch": 0.06593688971117279, + "grad_norm": 1.034580945968628, + "learning_rate": 9.966195367089418e-06, + "loss": 0.7395, + "step": 1283 + }, + { + "epoch": 0.06598828245451742, + "grad_norm": 1.2569457292556763, + "learning_rate": 9.966098684589527e-06, + "loss": 0.8635, + "step": 1284 + }, + { + "epoch": 0.06603967519786207, + "grad_norm": 0.8791694641113281, + "learning_rate": 9.966001864499533e-06, + "loss": 0.7715, + "step": 1285 + }, + { + "epoch": 0.0660910679412067, + "grad_norm": 1.3085097074508667, + "learning_rate": 9.965904906822121e-06, + "loss": 0.9, + "step": 1286 + }, + { + "epoch": 0.06614246068455135, + "grad_norm": 0.828238308429718, + "learning_rate": 9.965807811559978e-06, + "loss": 0.7455, + "step": 1287 + }, + { + "epoch": 0.06619385342789598, + "grad_norm": 1.1314805746078491, + "learning_rate": 9.965710578715792e-06, + "loss": 0.8649, + "step": 1288 + }, + { + "epoch": 0.06624524617124063, + "grad_norm": 1.1361806392669678, + "learning_rate": 9.96561320829226e-06, + "loss": 0.806, + "step": 1289 + }, + { + "epoch": 0.06629663891458526, + "grad_norm": 1.2397502660751343, + "learning_rate": 9.965515700292076e-06, + "loss": 0.8575, + "step": 1290 + }, + { + "epoch": 0.0663480316579299, + "grad_norm": 1.1956923007965088, + "learning_rate": 9.965418054717944e-06, + "loss": 0.8262, + "step": 1291 + }, + { + "epoch": 0.06639942440127454, + "grad_norm": 1.2116965055465698, + "learning_rate": 9.96532027157257e-06, + "loss": 0.8852, + "step": 1292 + }, + { + "epoch": 0.06645081714461917, + "grad_norm": 1.07035231590271, + "learning_rate": 9.965222350858661e-06, + "loss": 0.7711, + "step": 1293 + }, + { + "epoch": 0.06650220988796382, + "grad_norm": 1.1506820917129517, + "learning_rate": 9.965124292578932e-06, + "loss": 0.8336, + "step": 1294 + }, + { + "epoch": 0.06655360263130845, + "grad_norm": 0.8365129828453064, + "learning_rate": 9.965026096736097e-06, + "loss": 0.7489, + "step": 1295 + }, + { + "epoch": 0.0666049953746531, + "grad_norm": 1.1129027605056763, + "learning_rate": 9.964927763332879e-06, + "loss": 0.8311, + "step": 1296 + }, + { + "epoch": 0.06665638811799773, + "grad_norm": 1.1906336545944214, + "learning_rate": 9.964829292372001e-06, + "loss": 0.8124, + "step": 1297 + }, + { + "epoch": 0.06670778086134238, + "grad_norm": 1.1677844524383545, + "learning_rate": 9.964730683856194e-06, + "loss": 0.8292, + "step": 1298 + }, + { + "epoch": 0.06675917360468701, + "grad_norm": 1.1898362636566162, + "learning_rate": 9.964631937788185e-06, + "loss": 0.8468, + "step": 1299 + }, + { + "epoch": 0.06681056634803166, + "grad_norm": 1.079712152481079, + "learning_rate": 9.964533054170716e-06, + "loss": 0.7879, + "step": 1300 + }, + { + "epoch": 0.0668619590913763, + "grad_norm": 0.9606438875198364, + "learning_rate": 9.96443403300652e-06, + "loss": 0.7172, + "step": 1301 + }, + { + "epoch": 0.06691335183472094, + "grad_norm": 1.185306429862976, + "learning_rate": 9.964334874298347e-06, + "loss": 0.8675, + "step": 1302 + }, + { + "epoch": 0.06696474457806557, + "grad_norm": 1.1964657306671143, + "learning_rate": 9.96423557804894e-06, + "loss": 0.8073, + "step": 1303 + }, + { + "epoch": 0.06701613732141022, + "grad_norm": 1.2034879922866821, + "learning_rate": 9.964136144261051e-06, + "loss": 0.8471, + "step": 1304 + }, + { + "epoch": 0.06706753006475485, + "grad_norm": 1.229433536529541, + "learning_rate": 9.964036572937436e-06, + "loss": 0.783, + "step": 1305 + }, + { + "epoch": 0.0671189228080995, + "grad_norm": 1.227113962173462, + "learning_rate": 9.963936864080854e-06, + "loss": 0.8878, + "step": 1306 + }, + { + "epoch": 0.06717031555144413, + "grad_norm": 1.1944153308868408, + "learning_rate": 9.963837017694065e-06, + "loss": 0.7389, + "step": 1307 + }, + { + "epoch": 0.06722170829478878, + "grad_norm": 1.3319650888442993, + "learning_rate": 9.963737033779837e-06, + "loss": 0.804, + "step": 1308 + }, + { + "epoch": 0.06727310103813341, + "grad_norm": 1.2119909524917603, + "learning_rate": 9.96363691234094e-06, + "loss": 0.8445, + "step": 1309 + }, + { + "epoch": 0.06732449378147806, + "grad_norm": 1.1178308725357056, + "learning_rate": 9.963536653380145e-06, + "loss": 0.8293, + "step": 1310 + }, + { + "epoch": 0.0673758865248227, + "grad_norm": 1.2156850099563599, + "learning_rate": 9.963436256900236e-06, + "loss": 0.8444, + "step": 1311 + }, + { + "epoch": 0.06742727926816733, + "grad_norm": 1.308240294456482, + "learning_rate": 9.963335722903991e-06, + "loss": 0.8515, + "step": 1312 + }, + { + "epoch": 0.06747867201151198, + "grad_norm": 1.3126437664031982, + "learning_rate": 9.963235051394196e-06, + "loss": 0.882, + "step": 1313 + }, + { + "epoch": 0.06753006475485661, + "grad_norm": 1.2073324918746948, + "learning_rate": 9.963134242373639e-06, + "loss": 0.7931, + "step": 1314 + }, + { + "epoch": 0.06758145749820126, + "grad_norm": 1.2467983961105347, + "learning_rate": 9.963033295845113e-06, + "loss": 0.8773, + "step": 1315 + }, + { + "epoch": 0.06763285024154589, + "grad_norm": 1.2667630910873413, + "learning_rate": 9.962932211811415e-06, + "loss": 0.8744, + "step": 1316 + }, + { + "epoch": 0.06768424298489054, + "grad_norm": 1.1797868013381958, + "learning_rate": 9.962830990275349e-06, + "loss": 0.8587, + "step": 1317 + }, + { + "epoch": 0.06773563572823517, + "grad_norm": 1.239017367362976, + "learning_rate": 9.962729631239716e-06, + "loss": 0.798, + "step": 1318 + }, + { + "epoch": 0.06778702847157982, + "grad_norm": 1.222000002861023, + "learning_rate": 9.962628134707324e-06, + "loss": 0.8645, + "step": 1319 + }, + { + "epoch": 0.06783842121492445, + "grad_norm": 1.1918072700500488, + "learning_rate": 9.962526500680986e-06, + "loss": 0.8859, + "step": 1320 + }, + { + "epoch": 0.0678898139582691, + "grad_norm": 1.2195672988891602, + "learning_rate": 9.962424729163517e-06, + "loss": 0.8535, + "step": 1321 + }, + { + "epoch": 0.06794120670161373, + "grad_norm": 1.247909665107727, + "learning_rate": 9.96232282015774e-06, + "loss": 0.8565, + "step": 1322 + }, + { + "epoch": 0.06799259944495838, + "grad_norm": 1.1036990880966187, + "learning_rate": 9.962220773666473e-06, + "loss": 0.8684, + "step": 1323 + }, + { + "epoch": 0.06804399218830301, + "grad_norm": 0.811055064201355, + "learning_rate": 9.962118589692547e-06, + "loss": 0.7471, + "step": 1324 + }, + { + "epoch": 0.06809538493164766, + "grad_norm": 1.156511902809143, + "learning_rate": 9.962016268238793e-06, + "loss": 0.8306, + "step": 1325 + }, + { + "epoch": 0.06814677767499229, + "grad_norm": 1.2628639936447144, + "learning_rate": 9.961913809308045e-06, + "loss": 0.9013, + "step": 1326 + }, + { + "epoch": 0.06819817041833694, + "grad_norm": 1.2208572626113892, + "learning_rate": 9.961811212903143e-06, + "loss": 0.8463, + "step": 1327 + }, + { + "epoch": 0.06824956316168157, + "grad_norm": 1.2117400169372559, + "learning_rate": 9.961708479026927e-06, + "loss": 0.8498, + "step": 1328 + }, + { + "epoch": 0.06830095590502622, + "grad_norm": 1.1851780414581299, + "learning_rate": 9.961605607682245e-06, + "loss": 0.8617, + "step": 1329 + }, + { + "epoch": 0.06835234864837085, + "grad_norm": 1.1723960638046265, + "learning_rate": 9.961502598871946e-06, + "loss": 0.8261, + "step": 1330 + }, + { + "epoch": 0.06840374139171548, + "grad_norm": 1.4972400665283203, + "learning_rate": 9.961399452598887e-06, + "loss": 0.8019, + "step": 1331 + }, + { + "epoch": 0.06845513413506013, + "grad_norm": 1.3451943397521973, + "learning_rate": 9.961296168865923e-06, + "loss": 0.8512, + "step": 1332 + }, + { + "epoch": 0.06850652687840476, + "grad_norm": 1.1305317878723145, + "learning_rate": 9.961192747675917e-06, + "loss": 0.8135, + "step": 1333 + }, + { + "epoch": 0.06855791962174941, + "grad_norm": 1.2738243341445923, + "learning_rate": 9.961089189031731e-06, + "loss": 0.8886, + "step": 1334 + }, + { + "epoch": 0.06860931236509404, + "grad_norm": 1.1738700866699219, + "learning_rate": 9.960985492936239e-06, + "loss": 0.8486, + "step": 1335 + }, + { + "epoch": 0.06866070510843869, + "grad_norm": 1.1840111017227173, + "learning_rate": 9.960881659392311e-06, + "loss": 0.8418, + "step": 1336 + }, + { + "epoch": 0.06871209785178332, + "grad_norm": 1.1579750776290894, + "learning_rate": 9.960777688402826e-06, + "loss": 0.8227, + "step": 1337 + }, + { + "epoch": 0.06876349059512797, + "grad_norm": 11.913105964660645, + "learning_rate": 9.960673579970661e-06, + "loss": 0.8345, + "step": 1338 + }, + { + "epoch": 0.0688148833384726, + "grad_norm": 1.1559432744979858, + "learning_rate": 9.960569334098705e-06, + "loss": 0.8506, + "step": 1339 + }, + { + "epoch": 0.06886627608181725, + "grad_norm": 1.2218172550201416, + "learning_rate": 9.96046495078984e-06, + "loss": 0.8251, + "step": 1340 + }, + { + "epoch": 0.06891766882516188, + "grad_norm": 1.2240543365478516, + "learning_rate": 9.960360430046965e-06, + "loss": 0.8498, + "step": 1341 + }, + { + "epoch": 0.06896906156850653, + "grad_norm": 1.1750335693359375, + "learning_rate": 9.960255771872972e-06, + "loss": 0.8708, + "step": 1342 + }, + { + "epoch": 0.06902045431185116, + "grad_norm": 1.1911288499832153, + "learning_rate": 9.96015097627076e-06, + "loss": 0.8281, + "step": 1343 + }, + { + "epoch": 0.06907184705519581, + "grad_norm": 1.249687671661377, + "learning_rate": 9.960046043243236e-06, + "loss": 0.845, + "step": 1344 + }, + { + "epoch": 0.06912323979854045, + "grad_norm": 1.124484658241272, + "learning_rate": 9.959940972793305e-06, + "loss": 0.8181, + "step": 1345 + }, + { + "epoch": 0.06917463254188509, + "grad_norm": 1.2790043354034424, + "learning_rate": 9.959835764923877e-06, + "loss": 0.8349, + "step": 1346 + }, + { + "epoch": 0.06922602528522973, + "grad_norm": 1.1683560609817505, + "learning_rate": 9.959730419637867e-06, + "loss": 0.8272, + "step": 1347 + }, + { + "epoch": 0.06927741802857436, + "grad_norm": 0.9273435473442078, + "learning_rate": 9.959624936938198e-06, + "loss": 0.766, + "step": 1348 + }, + { + "epoch": 0.069328810771919, + "grad_norm": 1.2485847473144531, + "learning_rate": 9.959519316827785e-06, + "loss": 0.9199, + "step": 1349 + }, + { + "epoch": 0.06938020351526364, + "grad_norm": 1.2011022567749023, + "learning_rate": 9.95941355930956e-06, + "loss": 0.8807, + "step": 1350 + }, + { + "epoch": 0.06943159625860829, + "grad_norm": 1.2683961391448975, + "learning_rate": 9.959307664386452e-06, + "loss": 0.8585, + "step": 1351 + }, + { + "epoch": 0.06948298900195292, + "grad_norm": 1.188191294670105, + "learning_rate": 9.959201632061395e-06, + "loss": 0.8632, + "step": 1352 + }, + { + "epoch": 0.06953438174529757, + "grad_norm": 1.1577982902526855, + "learning_rate": 9.959095462337326e-06, + "loss": 0.8558, + "step": 1353 + }, + { + "epoch": 0.0695857744886422, + "grad_norm": 1.255170464515686, + "learning_rate": 9.958989155217187e-06, + "loss": 0.7719, + "step": 1354 + }, + { + "epoch": 0.06963716723198685, + "grad_norm": 1.1004319190979004, + "learning_rate": 9.95888271070392e-06, + "loss": 0.703, + "step": 1355 + }, + { + "epoch": 0.06968855997533148, + "grad_norm": 1.1970584392547607, + "learning_rate": 9.95877612880048e-06, + "loss": 0.8844, + "step": 1356 + }, + { + "epoch": 0.06973995271867613, + "grad_norm": 1.2254855632781982, + "learning_rate": 9.958669409509818e-06, + "loss": 0.7915, + "step": 1357 + }, + { + "epoch": 0.06979134546202076, + "grad_norm": 1.1627140045166016, + "learning_rate": 9.958562552834889e-06, + "loss": 0.8252, + "step": 1358 + }, + { + "epoch": 0.0698427382053654, + "grad_norm": 0.7959070801734924, + "learning_rate": 9.958455558778653e-06, + "loss": 0.7079, + "step": 1359 + }, + { + "epoch": 0.06989413094871004, + "grad_norm": 1.1797618865966797, + "learning_rate": 9.958348427344077e-06, + "loss": 0.8108, + "step": 1360 + }, + { + "epoch": 0.06994552369205469, + "grad_norm": 1.248896837234497, + "learning_rate": 9.958241158534128e-06, + "loss": 0.8556, + "step": 1361 + }, + { + "epoch": 0.06999691643539932, + "grad_norm": 1.2889940738677979, + "learning_rate": 9.958133752351778e-06, + "loss": 0.8482, + "step": 1362 + }, + { + "epoch": 0.07004830917874397, + "grad_norm": 0.8384664058685303, + "learning_rate": 9.958026208800003e-06, + "loss": 0.7828, + "step": 1363 + }, + { + "epoch": 0.0700997019220886, + "grad_norm": 1.2159619331359863, + "learning_rate": 9.95791852788178e-06, + "loss": 0.8833, + "step": 1364 + }, + { + "epoch": 0.07015109466543325, + "grad_norm": 1.1174484491348267, + "learning_rate": 9.957810709600097e-06, + "loss": 0.7938, + "step": 1365 + }, + { + "epoch": 0.07020248740877788, + "grad_norm": 1.2247254848480225, + "learning_rate": 9.957702753957938e-06, + "loss": 0.8095, + "step": 1366 + }, + { + "epoch": 0.07025388015212251, + "grad_norm": 1.2365052700042725, + "learning_rate": 9.957594660958296e-06, + "loss": 0.8266, + "step": 1367 + }, + { + "epoch": 0.07030527289546716, + "grad_norm": 1.1571627855300903, + "learning_rate": 9.957486430604163e-06, + "loss": 0.8344, + "step": 1368 + }, + { + "epoch": 0.0703566656388118, + "grad_norm": 0.9697171449661255, + "learning_rate": 9.957378062898541e-06, + "loss": 0.7369, + "step": 1369 + }, + { + "epoch": 0.07040805838215644, + "grad_norm": 1.4132081270217896, + "learning_rate": 9.95726955784443e-06, + "loss": 0.8857, + "step": 1370 + }, + { + "epoch": 0.07045945112550107, + "grad_norm": 1.1720525026321411, + "learning_rate": 9.95716091544484e-06, + "loss": 0.8819, + "step": 1371 + }, + { + "epoch": 0.07051084386884572, + "grad_norm": 1.38770592212677, + "learning_rate": 9.957052135702774e-06, + "loss": 0.7999, + "step": 1372 + }, + { + "epoch": 0.07056223661219035, + "grad_norm": 1.1321516036987305, + "learning_rate": 9.95694321862125e-06, + "loss": 0.8554, + "step": 1373 + }, + { + "epoch": 0.070613629355535, + "grad_norm": 1.1667180061340332, + "learning_rate": 9.956834164203287e-06, + "loss": 0.7785, + "step": 1374 + }, + { + "epoch": 0.07066502209887963, + "grad_norm": 1.2644528150558472, + "learning_rate": 9.956724972451906e-06, + "loss": 0.8474, + "step": 1375 + }, + { + "epoch": 0.07071641484222428, + "grad_norm": 1.300689458847046, + "learning_rate": 9.95661564337013e-06, + "loss": 0.8833, + "step": 1376 + }, + { + "epoch": 0.07076780758556891, + "grad_norm": 1.3651669025421143, + "learning_rate": 9.956506176960987e-06, + "loss": 0.8129, + "step": 1377 + }, + { + "epoch": 0.07081920032891356, + "grad_norm": 1.2010197639465332, + "learning_rate": 9.956396573227517e-06, + "loss": 0.834, + "step": 1378 + }, + { + "epoch": 0.0708705930722582, + "grad_norm": 1.2028528451919556, + "learning_rate": 9.956286832172747e-06, + "loss": 0.8359, + "step": 1379 + }, + { + "epoch": 0.07092198581560284, + "grad_norm": 1.178982138633728, + "learning_rate": 9.956176953799725e-06, + "loss": 0.8358, + "step": 1380 + }, + { + "epoch": 0.07097337855894748, + "grad_norm": 1.200798511505127, + "learning_rate": 9.956066938111491e-06, + "loss": 0.8423, + "step": 1381 + }, + { + "epoch": 0.07102477130229212, + "grad_norm": 1.1842976808547974, + "learning_rate": 9.955956785111095e-06, + "loss": 0.783, + "step": 1382 + }, + { + "epoch": 0.07107616404563676, + "grad_norm": 1.1839478015899658, + "learning_rate": 9.955846494801589e-06, + "loss": 0.7479, + "step": 1383 + }, + { + "epoch": 0.0711275567889814, + "grad_norm": 0.8355082273483276, + "learning_rate": 9.955736067186029e-06, + "loss": 0.7837, + "step": 1384 + }, + { + "epoch": 0.07117894953232604, + "grad_norm": 1.1897263526916504, + "learning_rate": 9.955625502267471e-06, + "loss": 0.8656, + "step": 1385 + }, + { + "epoch": 0.07123034227567067, + "grad_norm": 1.1653515100479126, + "learning_rate": 9.955514800048985e-06, + "loss": 0.8606, + "step": 1386 + }, + { + "epoch": 0.07128173501901532, + "grad_norm": 0.7644600868225098, + "learning_rate": 9.955403960533632e-06, + "loss": 0.739, + "step": 1387 + }, + { + "epoch": 0.07133312776235995, + "grad_norm": 1.317298412322998, + "learning_rate": 9.955292983724484e-06, + "loss": 0.8196, + "step": 1388 + }, + { + "epoch": 0.0713845205057046, + "grad_norm": 1.2663854360580444, + "learning_rate": 9.955181869624618e-06, + "loss": 0.8669, + "step": 1389 + }, + { + "epoch": 0.07143591324904923, + "grad_norm": 0.7830958366394043, + "learning_rate": 9.95507061823711e-06, + "loss": 0.7528, + "step": 1390 + }, + { + "epoch": 0.07148730599239388, + "grad_norm": 0.7879834771156311, + "learning_rate": 9.954959229565044e-06, + "loss": 0.7409, + "step": 1391 + }, + { + "epoch": 0.07153869873573851, + "grad_norm": 1.1435881853103638, + "learning_rate": 9.954847703611506e-06, + "loss": 0.9095, + "step": 1392 + }, + { + "epoch": 0.07159009147908316, + "grad_norm": 1.270622968673706, + "learning_rate": 9.954736040379587e-06, + "loss": 0.8461, + "step": 1393 + }, + { + "epoch": 0.07164148422242779, + "grad_norm": 1.233609676361084, + "learning_rate": 9.954624239872377e-06, + "loss": 0.8715, + "step": 1394 + }, + { + "epoch": 0.07169287696577244, + "grad_norm": 1.1390472650527954, + "learning_rate": 9.954512302092976e-06, + "loss": 0.8315, + "step": 1395 + }, + { + "epoch": 0.07174426970911707, + "grad_norm": 0.8489704132080078, + "learning_rate": 9.954400227044487e-06, + "loss": 0.6961, + "step": 1396 + }, + { + "epoch": 0.07179566245246172, + "grad_norm": 1.161421775817871, + "learning_rate": 9.954288014730012e-06, + "loss": 0.8712, + "step": 1397 + }, + { + "epoch": 0.07184705519580635, + "grad_norm": 1.1996312141418457, + "learning_rate": 9.954175665152661e-06, + "loss": 0.8478, + "step": 1398 + }, + { + "epoch": 0.071898447939151, + "grad_norm": 0.852490246295929, + "learning_rate": 9.954063178315549e-06, + "loss": 0.7643, + "step": 1399 + }, + { + "epoch": 0.07194984068249563, + "grad_norm": 1.2015862464904785, + "learning_rate": 9.953950554221789e-06, + "loss": 0.8842, + "step": 1400 + }, + { + "epoch": 0.07200123342584028, + "grad_norm": 1.1348485946655273, + "learning_rate": 9.953837792874502e-06, + "loss": 0.7997, + "step": 1401 + }, + { + "epoch": 0.07205262616918491, + "grad_norm": 1.1355761289596558, + "learning_rate": 9.953724894276815e-06, + "loss": 0.8445, + "step": 1402 + }, + { + "epoch": 0.07210401891252956, + "grad_norm": 1.175842523574829, + "learning_rate": 9.953611858431852e-06, + "loss": 0.8716, + "step": 1403 + }, + { + "epoch": 0.07215541165587419, + "grad_norm": 1.194063663482666, + "learning_rate": 9.953498685342748e-06, + "loss": 0.8049, + "step": 1404 + }, + { + "epoch": 0.07220680439921882, + "grad_norm": 0.9623196125030518, + "learning_rate": 9.953385375012637e-06, + "loss": 0.7444, + "step": 1405 + }, + { + "epoch": 0.07225819714256347, + "grad_norm": 0.9269728660583496, + "learning_rate": 9.953271927444659e-06, + "loss": 0.7305, + "step": 1406 + }, + { + "epoch": 0.0723095898859081, + "grad_norm": 1.1395899057388306, + "learning_rate": 9.953158342641956e-06, + "loss": 0.7863, + "step": 1407 + }, + { + "epoch": 0.07236098262925275, + "grad_norm": 1.2366182804107666, + "learning_rate": 9.953044620607677e-06, + "loss": 0.8513, + "step": 1408 + }, + { + "epoch": 0.07241237537259738, + "grad_norm": 1.21642005443573, + "learning_rate": 9.95293076134497e-06, + "loss": 0.8399, + "step": 1409 + }, + { + "epoch": 0.07246376811594203, + "grad_norm": 1.2443125247955322, + "learning_rate": 9.952816764856992e-06, + "loss": 0.9065, + "step": 1410 + }, + { + "epoch": 0.07251516085928666, + "grad_norm": 1.131524682044983, + "learning_rate": 9.952702631146901e-06, + "loss": 0.7952, + "step": 1411 + }, + { + "epoch": 0.07256655360263131, + "grad_norm": 0.9441289901733398, + "learning_rate": 9.952588360217858e-06, + "loss": 0.7774, + "step": 1412 + }, + { + "epoch": 0.07261794634597594, + "grad_norm": 1.1668057441711426, + "learning_rate": 9.95247395207303e-06, + "loss": 0.8996, + "step": 1413 + }, + { + "epoch": 0.07266933908932059, + "grad_norm": 1.1729233264923096, + "learning_rate": 9.952359406715586e-06, + "loss": 0.791, + "step": 1414 + }, + { + "epoch": 0.07272073183266523, + "grad_norm": 1.1911845207214355, + "learning_rate": 9.9522447241487e-06, + "loss": 0.7958, + "step": 1415 + }, + { + "epoch": 0.07277212457600987, + "grad_norm": 1.1784542798995972, + "learning_rate": 9.95212990437555e-06, + "loss": 0.8568, + "step": 1416 + }, + { + "epoch": 0.0728235173193545, + "grad_norm": 1.310339093208313, + "learning_rate": 9.952014947399318e-06, + "loss": 0.9307, + "step": 1417 + }, + { + "epoch": 0.07287491006269915, + "grad_norm": 1.253079891204834, + "learning_rate": 9.951899853223185e-06, + "loss": 0.8781, + "step": 1418 + }, + { + "epoch": 0.07292630280604379, + "grad_norm": 1.1128742694854736, + "learning_rate": 9.951784621850344e-06, + "loss": 0.8193, + "step": 1419 + }, + { + "epoch": 0.07297769554938843, + "grad_norm": 0.8029535412788391, + "learning_rate": 9.951669253283985e-06, + "loss": 0.7202, + "step": 1420 + }, + { + "epoch": 0.07302908829273307, + "grad_norm": 1.2505106925964355, + "learning_rate": 9.951553747527306e-06, + "loss": 0.8311, + "step": 1421 + }, + { + "epoch": 0.0730804810360777, + "grad_norm": 1.2667174339294434, + "learning_rate": 9.951438104583505e-06, + "loss": 0.8703, + "step": 1422 + }, + { + "epoch": 0.07313187377942235, + "grad_norm": 0.7906657457351685, + "learning_rate": 9.951322324455788e-06, + "loss": 0.7101, + "step": 1423 + }, + { + "epoch": 0.07318326652276698, + "grad_norm": 1.15032160282135, + "learning_rate": 9.951206407147364e-06, + "loss": 0.8473, + "step": 1424 + }, + { + "epoch": 0.07323465926611163, + "grad_norm": 1.255661964416504, + "learning_rate": 9.95109035266144e-06, + "loss": 0.8276, + "step": 1425 + }, + { + "epoch": 0.07328605200945626, + "grad_norm": 1.1481667757034302, + "learning_rate": 9.950974161001237e-06, + "loss": 0.8088, + "step": 1426 + }, + { + "epoch": 0.0733374447528009, + "grad_norm": 1.1616981029510498, + "learning_rate": 9.950857832169971e-06, + "loss": 0.8489, + "step": 1427 + }, + { + "epoch": 0.07338883749614554, + "grad_norm": 1.1490039825439453, + "learning_rate": 9.950741366170864e-06, + "loss": 0.831, + "step": 1428 + }, + { + "epoch": 0.07344023023949019, + "grad_norm": 1.151681661605835, + "learning_rate": 9.950624763007147e-06, + "loss": 0.8816, + "step": 1429 + }, + { + "epoch": 0.07349162298283482, + "grad_norm": 1.1748698949813843, + "learning_rate": 9.950508022682044e-06, + "loss": 0.8589, + "step": 1430 + }, + { + "epoch": 0.07354301572617947, + "grad_norm": 1.2848342657089233, + "learning_rate": 9.950391145198795e-06, + "loss": 0.8749, + "step": 1431 + }, + { + "epoch": 0.0735944084695241, + "grad_norm": 1.223555088043213, + "learning_rate": 9.950274130560638e-06, + "loss": 0.8585, + "step": 1432 + }, + { + "epoch": 0.07364580121286875, + "grad_norm": 1.177095651626587, + "learning_rate": 9.95015697877081e-06, + "loss": 0.745, + "step": 1433 + }, + { + "epoch": 0.07369719395621338, + "grad_norm": 0.9290037155151367, + "learning_rate": 9.950039689832565e-06, + "loss": 0.7747, + "step": 1434 + }, + { + "epoch": 0.07374858669955803, + "grad_norm": 1.2345010042190552, + "learning_rate": 9.949922263749144e-06, + "loss": 0.8246, + "step": 1435 + }, + { + "epoch": 0.07379997944290266, + "grad_norm": 1.2172366380691528, + "learning_rate": 9.949804700523805e-06, + "loss": 0.8949, + "step": 1436 + }, + { + "epoch": 0.07385137218624731, + "grad_norm": 1.1880131959915161, + "learning_rate": 9.949687000159805e-06, + "loss": 0.8409, + "step": 1437 + }, + { + "epoch": 0.07390276492959194, + "grad_norm": 1.6032335758209229, + "learning_rate": 9.949569162660405e-06, + "loss": 0.8448, + "step": 1438 + }, + { + "epoch": 0.07395415767293659, + "grad_norm": 1.176414966583252, + "learning_rate": 9.949451188028867e-06, + "loss": 0.8391, + "step": 1439 + }, + { + "epoch": 0.07400555041628122, + "grad_norm": 1.1669632196426392, + "learning_rate": 9.949333076268464e-06, + "loss": 0.8002, + "step": 1440 + }, + { + "epoch": 0.07405694315962585, + "grad_norm": 1.3067156076431274, + "learning_rate": 9.949214827382463e-06, + "loss": 0.7976, + "step": 1441 + }, + { + "epoch": 0.0741083359029705, + "grad_norm": 0.8184141516685486, + "learning_rate": 9.949096441374146e-06, + "loss": 0.6819, + "step": 1442 + }, + { + "epoch": 0.07415972864631513, + "grad_norm": 1.1940512657165527, + "learning_rate": 9.948977918246791e-06, + "loss": 0.8145, + "step": 1443 + }, + { + "epoch": 0.07421112138965978, + "grad_norm": 0.8063547015190125, + "learning_rate": 9.94885925800368e-06, + "loss": 0.7396, + "step": 1444 + }, + { + "epoch": 0.07426251413300441, + "grad_norm": 1.213698387145996, + "learning_rate": 9.9487404606481e-06, + "loss": 0.8319, + "step": 1445 + }, + { + "epoch": 0.07431390687634906, + "grad_norm": 0.8747170567512512, + "learning_rate": 9.948621526183346e-06, + "loss": 0.7597, + "step": 1446 + }, + { + "epoch": 0.0743652996196937, + "grad_norm": 1.2030444145202637, + "learning_rate": 9.948502454612712e-06, + "loss": 0.8668, + "step": 1447 + }, + { + "epoch": 0.07441669236303834, + "grad_norm": 0.9899352788925171, + "learning_rate": 9.948383245939495e-06, + "loss": 0.7368, + "step": 1448 + }, + { + "epoch": 0.07446808510638298, + "grad_norm": 0.8577443957328796, + "learning_rate": 9.948263900166998e-06, + "loss": 0.7243, + "step": 1449 + }, + { + "epoch": 0.07451947784972762, + "grad_norm": 0.786955714225769, + "learning_rate": 9.948144417298531e-06, + "loss": 0.7095, + "step": 1450 + }, + { + "epoch": 0.07457087059307226, + "grad_norm": 1.2283196449279785, + "learning_rate": 9.948024797337402e-06, + "loss": 0.8284, + "step": 1451 + }, + { + "epoch": 0.0746222633364169, + "grad_norm": 1.1920987367630005, + "learning_rate": 9.947905040286922e-06, + "loss": 0.8245, + "step": 1452 + }, + { + "epoch": 0.07467365607976154, + "grad_norm": 1.3656609058380127, + "learning_rate": 9.947785146150414e-06, + "loss": 0.8164, + "step": 1453 + }, + { + "epoch": 0.07472504882310618, + "grad_norm": 1.1480259895324707, + "learning_rate": 9.9476651149312e-06, + "loss": 0.9128, + "step": 1454 + }, + { + "epoch": 0.07477644156645082, + "grad_norm": 0.9765171408653259, + "learning_rate": 9.9475449466326e-06, + "loss": 0.7796, + "step": 1455 + }, + { + "epoch": 0.07482783430979546, + "grad_norm": 1.1816232204437256, + "learning_rate": 9.947424641257948e-06, + "loss": 0.7744, + "step": 1456 + }, + { + "epoch": 0.0748792270531401, + "grad_norm": 0.8721120953559875, + "learning_rate": 9.947304198810577e-06, + "loss": 0.7367, + "step": 1457 + }, + { + "epoch": 0.07493061979648474, + "grad_norm": 1.2565195560455322, + "learning_rate": 9.947183619293822e-06, + "loss": 0.901, + "step": 1458 + }, + { + "epoch": 0.07498201253982938, + "grad_norm": 1.2259550094604492, + "learning_rate": 9.947062902711024e-06, + "loss": 0.8315, + "step": 1459 + }, + { + "epoch": 0.07503340528317401, + "grad_norm": 0.9153401255607605, + "learning_rate": 9.946942049065532e-06, + "loss": 0.7573, + "step": 1460 + }, + { + "epoch": 0.07508479802651866, + "grad_norm": 1.2016843557357788, + "learning_rate": 9.946821058360685e-06, + "loss": 0.8429, + "step": 1461 + }, + { + "epoch": 0.07513619076986329, + "grad_norm": 0.8889265656471252, + "learning_rate": 9.946699930599845e-06, + "loss": 0.6882, + "step": 1462 + }, + { + "epoch": 0.07518758351320794, + "grad_norm": 1.3222475051879883, + "learning_rate": 9.946578665786362e-06, + "loss": 0.8192, + "step": 1463 + }, + { + "epoch": 0.07523897625655257, + "grad_norm": 1.1569023132324219, + "learning_rate": 9.946457263923599e-06, + "loss": 0.8718, + "step": 1464 + }, + { + "epoch": 0.07529036899989722, + "grad_norm": 1.1867568492889404, + "learning_rate": 9.946335725014918e-06, + "loss": 0.8506, + "step": 1465 + }, + { + "epoch": 0.07534176174324185, + "grad_norm": 0.9825249910354614, + "learning_rate": 9.946214049063684e-06, + "loss": 0.724, + "step": 1466 + }, + { + "epoch": 0.0753931544865865, + "grad_norm": 0.8492066860198975, + "learning_rate": 9.946092236073272e-06, + "loss": 0.7231, + "step": 1467 + }, + { + "epoch": 0.07544454722993113, + "grad_norm": 1.2318816184997559, + "learning_rate": 9.945970286047055e-06, + "loss": 0.8728, + "step": 1468 + }, + { + "epoch": 0.07549593997327578, + "grad_norm": 1.2679754495620728, + "learning_rate": 9.945848198988412e-06, + "loss": 0.8697, + "step": 1469 + }, + { + "epoch": 0.07554733271662041, + "grad_norm": 1.0174459218978882, + "learning_rate": 9.945725974900728e-06, + "loss": 0.7369, + "step": 1470 + }, + { + "epoch": 0.07559872545996506, + "grad_norm": 1.165276050567627, + "learning_rate": 9.945603613787385e-06, + "loss": 0.7709, + "step": 1471 + }, + { + "epoch": 0.07565011820330969, + "grad_norm": 1.2141611576080322, + "learning_rate": 9.945481115651774e-06, + "loss": 0.883, + "step": 1472 + }, + { + "epoch": 0.07570151094665434, + "grad_norm": 1.182141900062561, + "learning_rate": 9.945358480497293e-06, + "loss": 0.88, + "step": 1473 + }, + { + "epoch": 0.07575290368999897, + "grad_norm": 1.215726613998413, + "learning_rate": 9.945235708327335e-06, + "loss": 0.8595, + "step": 1474 + }, + { + "epoch": 0.07580429643334362, + "grad_norm": 1.23809814453125, + "learning_rate": 9.945112799145305e-06, + "loss": 0.8469, + "step": 1475 + }, + { + "epoch": 0.07585568917668825, + "grad_norm": 1.179121494293213, + "learning_rate": 9.944989752954604e-06, + "loss": 0.8458, + "step": 1476 + }, + { + "epoch": 0.07590708192003288, + "grad_norm": 0.9221994876861572, + "learning_rate": 9.944866569758644e-06, + "loss": 0.7097, + "step": 1477 + }, + { + "epoch": 0.07595847466337753, + "grad_norm": 1.167385220527649, + "learning_rate": 9.94474324956084e-06, + "loss": 0.9073, + "step": 1478 + }, + { + "epoch": 0.07600986740672216, + "grad_norm": 0.7991660833358765, + "learning_rate": 9.944619792364606e-06, + "loss": 0.736, + "step": 1479 + }, + { + "epoch": 0.07606126015006681, + "grad_norm": 1.156245231628418, + "learning_rate": 9.94449619817336e-06, + "loss": 0.8384, + "step": 1480 + }, + { + "epoch": 0.07611265289341144, + "grad_norm": 0.8453945517539978, + "learning_rate": 9.94437246699053e-06, + "loss": 0.6775, + "step": 1481 + }, + { + "epoch": 0.07616404563675609, + "grad_norm": 1.1651897430419922, + "learning_rate": 9.944248598819546e-06, + "loss": 0.8419, + "step": 1482 + }, + { + "epoch": 0.07621543838010073, + "grad_norm": 1.2072960138320923, + "learning_rate": 9.944124593663835e-06, + "loss": 0.7924, + "step": 1483 + }, + { + "epoch": 0.07626683112344537, + "grad_norm": 1.234621524810791, + "learning_rate": 9.944000451526833e-06, + "loss": 0.8932, + "step": 1484 + }, + { + "epoch": 0.07631822386679, + "grad_norm": 1.410512089729309, + "learning_rate": 9.943876172411983e-06, + "loss": 0.8674, + "step": 1485 + }, + { + "epoch": 0.07636961661013465, + "grad_norm": 1.156535029411316, + "learning_rate": 9.943751756322727e-06, + "loss": 0.8117, + "step": 1486 + }, + { + "epoch": 0.07642100935347929, + "grad_norm": 1.265147089958191, + "learning_rate": 9.943627203262511e-06, + "loss": 0.8851, + "step": 1487 + }, + { + "epoch": 0.07647240209682393, + "grad_norm": 1.42330002784729, + "learning_rate": 9.943502513234786e-06, + "loss": 0.872, + "step": 1488 + }, + { + "epoch": 0.07652379484016857, + "grad_norm": 1.1698558330535889, + "learning_rate": 9.943377686243008e-06, + "loss": 0.8876, + "step": 1489 + }, + { + "epoch": 0.07657518758351321, + "grad_norm": 1.261244535446167, + "learning_rate": 9.943252722290633e-06, + "loss": 0.8417, + "step": 1490 + }, + { + "epoch": 0.07662658032685785, + "grad_norm": 1.14500093460083, + "learning_rate": 9.943127621381126e-06, + "loss": 0.8316, + "step": 1491 + }, + { + "epoch": 0.0766779730702025, + "grad_norm": 1.1792700290679932, + "learning_rate": 9.94300238351795e-06, + "loss": 0.814, + "step": 1492 + }, + { + "epoch": 0.07672936581354713, + "grad_norm": 1.2186336517333984, + "learning_rate": 9.942877008704576e-06, + "loss": 0.7863, + "step": 1493 + }, + { + "epoch": 0.07678075855689177, + "grad_norm": 1.179352045059204, + "learning_rate": 9.94275149694448e-06, + "loss": 0.839, + "step": 1494 + }, + { + "epoch": 0.0768321513002364, + "grad_norm": 1.1480205059051514, + "learning_rate": 9.942625848241138e-06, + "loss": 0.8481, + "step": 1495 + }, + { + "epoch": 0.07688354404358104, + "grad_norm": 1.1068614721298218, + "learning_rate": 9.94250006259803e-06, + "loss": 0.8276, + "step": 1496 + }, + { + "epoch": 0.07693493678692569, + "grad_norm": 1.2442585229873657, + "learning_rate": 9.942374140018641e-06, + "loss": 0.8391, + "step": 1497 + }, + { + "epoch": 0.07698632953027032, + "grad_norm": 1.207851529121399, + "learning_rate": 9.942248080506462e-06, + "loss": 0.824, + "step": 1498 + }, + { + "epoch": 0.07703772227361497, + "grad_norm": 1.221541166305542, + "learning_rate": 9.942121884064984e-06, + "loss": 0.8529, + "step": 1499 + }, + { + "epoch": 0.0770891150169596, + "grad_norm": 1.1786326169967651, + "learning_rate": 9.941995550697702e-06, + "loss": 0.8263, + "step": 1500 + }, + { + "epoch": 0.07714050776030425, + "grad_norm": 1.1345199346542358, + "learning_rate": 9.941869080408118e-06, + "loss": 0.8055, + "step": 1501 + }, + { + "epoch": 0.07719190050364888, + "grad_norm": 1.1585556268692017, + "learning_rate": 9.941742473199737e-06, + "loss": 0.8309, + "step": 1502 + }, + { + "epoch": 0.07724329324699353, + "grad_norm": 1.142100214958191, + "learning_rate": 9.941615729076063e-06, + "loss": 0.866, + "step": 1503 + }, + { + "epoch": 0.07729468599033816, + "grad_norm": 1.2723445892333984, + "learning_rate": 9.941488848040612e-06, + "loss": 0.8779, + "step": 1504 + }, + { + "epoch": 0.07734607873368281, + "grad_norm": 1.1520577669143677, + "learning_rate": 9.941361830096899e-06, + "loss": 0.7641, + "step": 1505 + }, + { + "epoch": 0.07739747147702744, + "grad_norm": 1.180328607559204, + "learning_rate": 9.941234675248438e-06, + "loss": 0.8326, + "step": 1506 + }, + { + "epoch": 0.07744886422037209, + "grad_norm": 1.1629438400268555, + "learning_rate": 9.941107383498757e-06, + "loss": 0.83, + "step": 1507 + }, + { + "epoch": 0.07750025696371672, + "grad_norm": 1.227476954460144, + "learning_rate": 9.940979954851383e-06, + "loss": 0.8818, + "step": 1508 + }, + { + "epoch": 0.07755164970706137, + "grad_norm": 1.2439676523208618, + "learning_rate": 9.940852389309841e-06, + "loss": 0.8443, + "step": 1509 + }, + { + "epoch": 0.077603042450406, + "grad_norm": 1.1380934715270996, + "learning_rate": 9.940724686877672e-06, + "loss": 0.851, + "step": 1510 + }, + { + "epoch": 0.07765443519375065, + "grad_norm": 1.1234251260757446, + "learning_rate": 9.94059684755841e-06, + "loss": 0.8157, + "step": 1511 + }, + { + "epoch": 0.07770582793709528, + "grad_norm": 1.1356112957000732, + "learning_rate": 9.940468871355598e-06, + "loss": 0.859, + "step": 1512 + }, + { + "epoch": 0.07775722068043993, + "grad_norm": 0.8068326115608215, + "learning_rate": 9.94034075827278e-06, + "loss": 0.7219, + "step": 1513 + }, + { + "epoch": 0.07780861342378456, + "grad_norm": 1.1665666103363037, + "learning_rate": 9.940212508313509e-06, + "loss": 0.8223, + "step": 1514 + }, + { + "epoch": 0.0778600061671292, + "grad_norm": 0.8754160404205322, + "learning_rate": 9.940084121481336e-06, + "loss": 0.7071, + "step": 1515 + }, + { + "epoch": 0.07791139891047384, + "grad_norm": 1.2813318967819214, + "learning_rate": 9.93995559777982e-06, + "loss": 0.8728, + "step": 1516 + }, + { + "epoch": 0.07796279165381848, + "grad_norm": 1.2071044445037842, + "learning_rate": 9.939826937212517e-06, + "loss": 0.832, + "step": 1517 + }, + { + "epoch": 0.07801418439716312, + "grad_norm": 1.2397701740264893, + "learning_rate": 9.939698139782998e-06, + "loss": 0.8633, + "step": 1518 + }, + { + "epoch": 0.07806557714050776, + "grad_norm": 1.1649987697601318, + "learning_rate": 9.939569205494829e-06, + "loss": 0.831, + "step": 1519 + }, + { + "epoch": 0.0781169698838524, + "grad_norm": 1.0392868518829346, + "learning_rate": 9.939440134351578e-06, + "loss": 0.7676, + "step": 1520 + }, + { + "epoch": 0.07816836262719704, + "grad_norm": 1.2819868326187134, + "learning_rate": 9.939310926356828e-06, + "loss": 0.8081, + "step": 1521 + }, + { + "epoch": 0.07821975537054168, + "grad_norm": 1.20846426486969, + "learning_rate": 9.939181581514155e-06, + "loss": 0.8264, + "step": 1522 + }, + { + "epoch": 0.07827114811388632, + "grad_norm": 0.8298874497413635, + "learning_rate": 9.939052099827142e-06, + "loss": 0.7511, + "step": 1523 + }, + { + "epoch": 0.07832254085723096, + "grad_norm": 1.191690444946289, + "learning_rate": 9.938922481299378e-06, + "loss": 0.8948, + "step": 1524 + }, + { + "epoch": 0.0783739336005756, + "grad_norm": 1.1529649496078491, + "learning_rate": 9.938792725934455e-06, + "loss": 0.7952, + "step": 1525 + }, + { + "epoch": 0.07842532634392024, + "grad_norm": 1.1194936037063599, + "learning_rate": 9.938662833735966e-06, + "loss": 0.843, + "step": 1526 + }, + { + "epoch": 0.07847671908726488, + "grad_norm": 1.1901437044143677, + "learning_rate": 9.93853280470751e-06, + "loss": 0.8535, + "step": 1527 + }, + { + "epoch": 0.07852811183060952, + "grad_norm": 1.533829689025879, + "learning_rate": 9.93840263885269e-06, + "loss": 0.8294, + "step": 1528 + }, + { + "epoch": 0.07857950457395416, + "grad_norm": 0.9010263085365295, + "learning_rate": 9.938272336175113e-06, + "loss": 0.7387, + "step": 1529 + }, + { + "epoch": 0.0786308973172988, + "grad_norm": 0.8380365371704102, + "learning_rate": 9.938141896678388e-06, + "loss": 0.6974, + "step": 1530 + }, + { + "epoch": 0.07868229006064344, + "grad_norm": 1.2068496942520142, + "learning_rate": 9.93801132036613e-06, + "loss": 0.8268, + "step": 1531 + }, + { + "epoch": 0.07873368280398808, + "grad_norm": 1.2153056859970093, + "learning_rate": 9.937880607241956e-06, + "loss": 0.8109, + "step": 1532 + }, + { + "epoch": 0.07878507554733272, + "grad_norm": 1.1627013683319092, + "learning_rate": 9.937749757309489e-06, + "loss": 0.8535, + "step": 1533 + }, + { + "epoch": 0.07883646829067735, + "grad_norm": 1.1460394859313965, + "learning_rate": 9.937618770572352e-06, + "loss": 0.8462, + "step": 1534 + }, + { + "epoch": 0.078887861034022, + "grad_norm": 1.1696171760559082, + "learning_rate": 9.937487647034176e-06, + "loss": 0.8693, + "step": 1535 + }, + { + "epoch": 0.07893925377736663, + "grad_norm": 0.8339906930923462, + "learning_rate": 9.937356386698593e-06, + "loss": 0.7087, + "step": 1536 + }, + { + "epoch": 0.07899064652071128, + "grad_norm": 1.1579349040985107, + "learning_rate": 9.937224989569239e-06, + "loss": 0.822, + "step": 1537 + }, + { + "epoch": 0.07904203926405591, + "grad_norm": 1.2062667608261108, + "learning_rate": 9.937093455649755e-06, + "loss": 0.888, + "step": 1538 + }, + { + "epoch": 0.07909343200740056, + "grad_norm": 1.3188236951828003, + "learning_rate": 9.936961784943785e-06, + "loss": 0.856, + "step": 1539 + }, + { + "epoch": 0.07914482475074519, + "grad_norm": 1.4719436168670654, + "learning_rate": 9.93682997745498e-06, + "loss": 0.8293, + "step": 1540 + }, + { + "epoch": 0.07919621749408984, + "grad_norm": 1.2409130334854126, + "learning_rate": 9.936698033186988e-06, + "loss": 0.8802, + "step": 1541 + }, + { + "epoch": 0.07924761023743447, + "grad_norm": 1.1431233882904053, + "learning_rate": 9.936565952143466e-06, + "loss": 0.8779, + "step": 1542 + }, + { + "epoch": 0.07929900298077912, + "grad_norm": 1.2298637628555298, + "learning_rate": 9.936433734328075e-06, + "loss": 0.8227, + "step": 1543 + }, + { + "epoch": 0.07935039572412375, + "grad_norm": 1.142795205116272, + "learning_rate": 9.936301379744475e-06, + "loss": 0.8651, + "step": 1544 + }, + { + "epoch": 0.0794017884674684, + "grad_norm": 1.1544638872146606, + "learning_rate": 9.936168888396337e-06, + "loss": 0.7847, + "step": 1545 + }, + { + "epoch": 0.07945318121081303, + "grad_norm": 1.1904575824737549, + "learning_rate": 9.936036260287325e-06, + "loss": 0.8296, + "step": 1546 + }, + { + "epoch": 0.07950457395415768, + "grad_norm": 1.2933287620544434, + "learning_rate": 9.935903495421122e-06, + "loss": 0.8829, + "step": 1547 + }, + { + "epoch": 0.07955596669750231, + "grad_norm": 1.2042230367660522, + "learning_rate": 9.935770593801401e-06, + "loss": 0.8435, + "step": 1548 + }, + { + "epoch": 0.07960735944084696, + "grad_norm": 1.0343199968338013, + "learning_rate": 9.935637555431845e-06, + "loss": 0.7205, + "step": 1549 + }, + { + "epoch": 0.07965875218419159, + "grad_norm": 1.2809213399887085, + "learning_rate": 9.935504380316141e-06, + "loss": 0.8765, + "step": 1550 + }, + { + "epoch": 0.07971014492753623, + "grad_norm": 1.2070891857147217, + "learning_rate": 9.935371068457979e-06, + "loss": 0.8762, + "step": 1551 + }, + { + "epoch": 0.07976153767088087, + "grad_norm": 1.2169498205184937, + "learning_rate": 9.935237619861051e-06, + "loss": 0.7749, + "step": 1552 + }, + { + "epoch": 0.0798129304142255, + "grad_norm": 1.2012264728546143, + "learning_rate": 9.935104034529054e-06, + "loss": 0.8357, + "step": 1553 + }, + { + "epoch": 0.07986432315757015, + "grad_norm": 1.1824877262115479, + "learning_rate": 9.934970312465692e-06, + "loss": 0.8241, + "step": 1554 + }, + { + "epoch": 0.07991571590091479, + "grad_norm": 1.1611387729644775, + "learning_rate": 9.934836453674667e-06, + "loss": 0.8175, + "step": 1555 + }, + { + "epoch": 0.07996710864425943, + "grad_norm": 1.2355875968933105, + "learning_rate": 9.93470245815969e-06, + "loss": 0.8149, + "step": 1556 + }, + { + "epoch": 0.08001850138760407, + "grad_norm": 0.7743059396743774, + "learning_rate": 9.934568325924472e-06, + "loss": 0.7496, + "step": 1557 + }, + { + "epoch": 0.08006989413094871, + "grad_norm": 1.317503809928894, + "learning_rate": 9.934434056972728e-06, + "loss": 0.8467, + "step": 1558 + }, + { + "epoch": 0.08012128687429335, + "grad_norm": 0.771838366985321, + "learning_rate": 9.93429965130818e-06, + "loss": 0.731, + "step": 1559 + }, + { + "epoch": 0.08017267961763799, + "grad_norm": 1.1630659103393555, + "learning_rate": 9.934165108934552e-06, + "loss": 0.8292, + "step": 1560 + }, + { + "epoch": 0.08022407236098263, + "grad_norm": 1.182859182357788, + "learning_rate": 9.93403042985557e-06, + "loss": 0.8685, + "step": 1561 + }, + { + "epoch": 0.08027546510432727, + "grad_norm": 1.1724762916564941, + "learning_rate": 9.933895614074966e-06, + "loss": 0.7261, + "step": 1562 + }, + { + "epoch": 0.0803268578476719, + "grad_norm": 0.739230751991272, + "learning_rate": 9.933760661596478e-06, + "loss": 0.7088, + "step": 1563 + }, + { + "epoch": 0.08037825059101655, + "grad_norm": 1.2899434566497803, + "learning_rate": 9.93362557242384e-06, + "loss": 0.8419, + "step": 1564 + }, + { + "epoch": 0.08042964333436119, + "grad_norm": 1.2055768966674805, + "learning_rate": 9.9334903465608e-06, + "loss": 0.8959, + "step": 1565 + }, + { + "epoch": 0.08048103607770583, + "grad_norm": 1.1167572736740112, + "learning_rate": 9.933354984011098e-06, + "loss": 0.8142, + "step": 1566 + }, + { + "epoch": 0.08053242882105047, + "grad_norm": 1.0762931108474731, + "learning_rate": 9.933219484778491e-06, + "loss": 0.8056, + "step": 1567 + }, + { + "epoch": 0.08058382156439511, + "grad_norm": 1.1156624555587769, + "learning_rate": 9.93308384886673e-06, + "loss": 0.8121, + "step": 1568 + }, + { + "epoch": 0.08063521430773975, + "grad_norm": 0.8716696500778198, + "learning_rate": 9.932948076279571e-06, + "loss": 0.7291, + "step": 1569 + }, + { + "epoch": 0.08068660705108438, + "grad_norm": 1.1160387992858887, + "learning_rate": 9.93281216702078e-06, + "loss": 0.8296, + "step": 1570 + }, + { + "epoch": 0.08073799979442903, + "grad_norm": 1.1406720876693726, + "learning_rate": 9.93267612109412e-06, + "loss": 0.8575, + "step": 1571 + }, + { + "epoch": 0.08078939253777366, + "grad_norm": 1.2666432857513428, + "learning_rate": 9.932539938503361e-06, + "loss": 0.892, + "step": 1572 + }, + { + "epoch": 0.08084078528111831, + "grad_norm": 1.1326005458831787, + "learning_rate": 9.932403619252274e-06, + "loss": 0.7921, + "step": 1573 + }, + { + "epoch": 0.08089217802446294, + "grad_norm": 0.9215224385261536, + "learning_rate": 9.93226716334464e-06, + "loss": 0.7327, + "step": 1574 + }, + { + "epoch": 0.08094357076780759, + "grad_norm": 1.1820156574249268, + "learning_rate": 9.932130570784236e-06, + "loss": 0.9139, + "step": 1575 + }, + { + "epoch": 0.08099496351115222, + "grad_norm": 1.2150022983551025, + "learning_rate": 9.931993841574846e-06, + "loss": 0.8315, + "step": 1576 + }, + { + "epoch": 0.08104635625449687, + "grad_norm": 1.1447468996047974, + "learning_rate": 9.931856975720263e-06, + "loss": 0.782, + "step": 1577 + }, + { + "epoch": 0.0810977489978415, + "grad_norm": 1.1437690258026123, + "learning_rate": 9.931719973224272e-06, + "loss": 0.8255, + "step": 1578 + }, + { + "epoch": 0.08114914174118615, + "grad_norm": 1.1846719980239868, + "learning_rate": 9.931582834090676e-06, + "loss": 0.8004, + "step": 1579 + }, + { + "epoch": 0.08120053448453078, + "grad_norm": 0.7766371369361877, + "learning_rate": 9.931445558323269e-06, + "loss": 0.6986, + "step": 1580 + }, + { + "epoch": 0.08125192722787543, + "grad_norm": 1.184993863105774, + "learning_rate": 9.931308145925858e-06, + "loss": 0.8207, + "step": 1581 + }, + { + "epoch": 0.08130331997122006, + "grad_norm": 1.2104731798171997, + "learning_rate": 9.931170596902246e-06, + "loss": 0.8031, + "step": 1582 + }, + { + "epoch": 0.08135471271456471, + "grad_norm": 1.093277096748352, + "learning_rate": 9.931032911256249e-06, + "loss": 0.9028, + "step": 1583 + }, + { + "epoch": 0.08140610545790934, + "grad_norm": 1.2365262508392334, + "learning_rate": 9.930895088991678e-06, + "loss": 0.8832, + "step": 1584 + }, + { + "epoch": 0.08145749820125399, + "grad_norm": 1.2132134437561035, + "learning_rate": 9.930757130112354e-06, + "loss": 0.8222, + "step": 1585 + }, + { + "epoch": 0.08150889094459862, + "grad_norm": 1.1220144033432007, + "learning_rate": 9.930619034622095e-06, + "loss": 0.8661, + "step": 1586 + }, + { + "epoch": 0.08156028368794327, + "grad_norm": 1.2653635740280151, + "learning_rate": 9.930480802524733e-06, + "loss": 0.8175, + "step": 1587 + }, + { + "epoch": 0.0816116764312879, + "grad_norm": 1.1704076528549194, + "learning_rate": 9.930342433824094e-06, + "loss": 0.8977, + "step": 1588 + }, + { + "epoch": 0.08166306917463254, + "grad_norm": 1.1403098106384277, + "learning_rate": 9.930203928524012e-06, + "loss": 0.8181, + "step": 1589 + }, + { + "epoch": 0.08171446191797718, + "grad_norm": 1.1767399311065674, + "learning_rate": 9.930065286628325e-06, + "loss": 0.8518, + "step": 1590 + }, + { + "epoch": 0.08176585466132182, + "grad_norm": 0.8117778301239014, + "learning_rate": 9.929926508140875e-06, + "loss": 0.745, + "step": 1591 + }, + { + "epoch": 0.08181724740466646, + "grad_norm": 0.7984206080436707, + "learning_rate": 9.929787593065506e-06, + "loss": 0.6961, + "step": 1592 + }, + { + "epoch": 0.0818686401480111, + "grad_norm": 1.2913763523101807, + "learning_rate": 9.929648541406065e-06, + "loss": 0.8533, + "step": 1593 + }, + { + "epoch": 0.08192003289135574, + "grad_norm": 1.2602064609527588, + "learning_rate": 9.929509353166408e-06, + "loss": 0.8624, + "step": 1594 + }, + { + "epoch": 0.08197142563470038, + "grad_norm": 1.1494656801223755, + "learning_rate": 9.929370028350389e-06, + "loss": 0.8225, + "step": 1595 + }, + { + "epoch": 0.08202281837804502, + "grad_norm": 1.1617251634597778, + "learning_rate": 9.92923056696187e-06, + "loss": 0.8628, + "step": 1596 + }, + { + "epoch": 0.08207421112138966, + "grad_norm": 1.2061105966567993, + "learning_rate": 9.929090969004713e-06, + "loss": 0.8629, + "step": 1597 + }, + { + "epoch": 0.0821256038647343, + "grad_norm": 1.1750879287719727, + "learning_rate": 9.928951234482786e-06, + "loss": 0.8912, + "step": 1598 + }, + { + "epoch": 0.08217699660807894, + "grad_norm": 1.14617121219635, + "learning_rate": 9.928811363399961e-06, + "loss": 0.8423, + "step": 1599 + }, + { + "epoch": 0.08222838935142358, + "grad_norm": 1.1267099380493164, + "learning_rate": 9.928671355760114e-06, + "loss": 0.8629, + "step": 1600 + }, + { + "epoch": 0.08227978209476822, + "grad_norm": 1.2203190326690674, + "learning_rate": 9.928531211567122e-06, + "loss": 0.8128, + "step": 1601 + }, + { + "epoch": 0.08233117483811286, + "grad_norm": 1.1688432693481445, + "learning_rate": 9.928390930824869e-06, + "loss": 0.8511, + "step": 1602 + }, + { + "epoch": 0.0823825675814575, + "grad_norm": 1.2543197870254517, + "learning_rate": 9.928250513537242e-06, + "loss": 0.8191, + "step": 1603 + }, + { + "epoch": 0.08243396032480214, + "grad_norm": 0.9396727085113525, + "learning_rate": 9.928109959708131e-06, + "loss": 0.7219, + "step": 1604 + }, + { + "epoch": 0.08248535306814678, + "grad_norm": 1.206816554069519, + "learning_rate": 9.927969269341431e-06, + "loss": 0.8721, + "step": 1605 + }, + { + "epoch": 0.08253674581149142, + "grad_norm": 1.2839499711990356, + "learning_rate": 9.927828442441038e-06, + "loss": 0.8268, + "step": 1606 + }, + { + "epoch": 0.08258813855483606, + "grad_norm": 1.5705254077911377, + "learning_rate": 9.927687479010855e-06, + "loss": 0.8246, + "step": 1607 + }, + { + "epoch": 0.08263953129818069, + "grad_norm": 1.1614164113998413, + "learning_rate": 9.927546379054786e-06, + "loss": 0.8608, + "step": 1608 + }, + { + "epoch": 0.08269092404152534, + "grad_norm": 1.188598394393921, + "learning_rate": 9.927405142576744e-06, + "loss": 0.8613, + "step": 1609 + }, + { + "epoch": 0.08274231678486997, + "grad_norm": 1.1738187074661255, + "learning_rate": 9.927263769580639e-06, + "loss": 0.8375, + "step": 1610 + }, + { + "epoch": 0.08279370952821462, + "grad_norm": 0.9251877665519714, + "learning_rate": 9.927122260070388e-06, + "loss": 0.7336, + "step": 1611 + }, + { + "epoch": 0.08284510227155925, + "grad_norm": 1.1685748100280762, + "learning_rate": 9.926980614049913e-06, + "loss": 0.7769, + "step": 1612 + }, + { + "epoch": 0.0828964950149039, + "grad_norm": 1.2056996822357178, + "learning_rate": 9.926838831523136e-06, + "loss": 0.8557, + "step": 1613 + }, + { + "epoch": 0.08294788775824853, + "grad_norm": 1.1196720600128174, + "learning_rate": 9.926696912493988e-06, + "loss": 0.8099, + "step": 1614 + }, + { + "epoch": 0.08299928050159318, + "grad_norm": 1.269811987876892, + "learning_rate": 9.926554856966399e-06, + "loss": 0.8729, + "step": 1615 + }, + { + "epoch": 0.08305067324493781, + "grad_norm": 1.1423460245132446, + "learning_rate": 9.926412664944308e-06, + "loss": 0.7912, + "step": 1616 + }, + { + "epoch": 0.08310206598828246, + "grad_norm": 1.2107304334640503, + "learning_rate": 9.926270336431649e-06, + "loss": 0.8389, + "step": 1617 + }, + { + "epoch": 0.08315345873162709, + "grad_norm": 1.3321670293807983, + "learning_rate": 9.92612787143237e-06, + "loss": 0.8208, + "step": 1618 + }, + { + "epoch": 0.08320485147497174, + "grad_norm": 1.2418450117111206, + "learning_rate": 9.925985269950417e-06, + "loss": 0.8776, + "step": 1619 + }, + { + "epoch": 0.08325624421831637, + "grad_norm": 1.39900541305542, + "learning_rate": 9.92584253198974e-06, + "loss": 0.8069, + "step": 1620 + }, + { + "epoch": 0.08330763696166102, + "grad_norm": 1.2392438650131226, + "learning_rate": 9.925699657554294e-06, + "loss": 0.8143, + "step": 1621 + }, + { + "epoch": 0.08335902970500565, + "grad_norm": 1.18338942527771, + "learning_rate": 9.925556646648039e-06, + "loss": 0.8686, + "step": 1622 + }, + { + "epoch": 0.0834104224483503, + "grad_norm": 0.9409334063529968, + "learning_rate": 9.925413499274935e-06, + "loss": 0.6862, + "step": 1623 + }, + { + "epoch": 0.08346181519169493, + "grad_norm": 1.302871823310852, + "learning_rate": 9.925270215438947e-06, + "loss": 0.8698, + "step": 1624 + }, + { + "epoch": 0.08351320793503957, + "grad_norm": 1.3412436246871948, + "learning_rate": 9.925126795144048e-06, + "loss": 0.8131, + "step": 1625 + }, + { + "epoch": 0.08356460067838421, + "grad_norm": 1.3268060684204102, + "learning_rate": 9.924983238394212e-06, + "loss": 0.7826, + "step": 1626 + }, + { + "epoch": 0.08361599342172885, + "grad_norm": 1.24637770652771, + "learning_rate": 9.924839545193412e-06, + "loss": 0.8274, + "step": 1627 + }, + { + "epoch": 0.08366738616507349, + "grad_norm": 1.3227349519729614, + "learning_rate": 9.924695715545634e-06, + "loss": 0.7981, + "step": 1628 + }, + { + "epoch": 0.08371877890841813, + "grad_norm": 1.1521252393722534, + "learning_rate": 9.924551749454861e-06, + "loss": 0.7988, + "step": 1629 + }, + { + "epoch": 0.08377017165176277, + "grad_norm": 1.182499885559082, + "learning_rate": 9.92440764692508e-06, + "loss": 0.84, + "step": 1630 + }, + { + "epoch": 0.0838215643951074, + "grad_norm": 1.210735559463501, + "learning_rate": 9.924263407960285e-06, + "loss": 0.8569, + "step": 1631 + }, + { + "epoch": 0.08387295713845205, + "grad_norm": 1.194484829902649, + "learning_rate": 9.924119032564472e-06, + "loss": 0.8735, + "step": 1632 + }, + { + "epoch": 0.08392434988179669, + "grad_norm": 1.1902440786361694, + "learning_rate": 9.923974520741642e-06, + "loss": 0.8325, + "step": 1633 + }, + { + "epoch": 0.08397574262514133, + "grad_norm": 0.9878230690956116, + "learning_rate": 9.923829872495799e-06, + "loss": 0.7547, + "step": 1634 + }, + { + "epoch": 0.08402713536848597, + "grad_norm": 1.0768442153930664, + "learning_rate": 9.923685087830947e-06, + "loss": 0.7315, + "step": 1635 + }, + { + "epoch": 0.08407852811183061, + "grad_norm": 1.1201190948486328, + "learning_rate": 9.923540166751102e-06, + "loss": 0.8132, + "step": 1636 + }, + { + "epoch": 0.08412992085517525, + "grad_norm": 1.207828402519226, + "learning_rate": 9.923395109260276e-06, + "loss": 0.8567, + "step": 1637 + }, + { + "epoch": 0.0841813135985199, + "grad_norm": 1.174572229385376, + "learning_rate": 9.923249915362491e-06, + "loss": 0.8372, + "step": 1638 + }, + { + "epoch": 0.08423270634186453, + "grad_norm": 1.1701222658157349, + "learning_rate": 9.923104585061766e-06, + "loss": 0.8142, + "step": 1639 + }, + { + "epoch": 0.08428409908520917, + "grad_norm": 1.159571647644043, + "learning_rate": 9.922959118362132e-06, + "loss": 0.796, + "step": 1640 + }, + { + "epoch": 0.08433549182855381, + "grad_norm": 1.1724703311920166, + "learning_rate": 9.922813515267614e-06, + "loss": 0.9127, + "step": 1641 + }, + { + "epoch": 0.08438688457189845, + "grad_norm": 1.1685751676559448, + "learning_rate": 9.92266777578225e-06, + "loss": 0.8545, + "step": 1642 + }, + { + "epoch": 0.08443827731524309, + "grad_norm": 1.173614740371704, + "learning_rate": 9.922521899910076e-06, + "loss": 0.8735, + "step": 1643 + }, + { + "epoch": 0.08448967005858772, + "grad_norm": 1.1921416521072388, + "learning_rate": 9.922375887655136e-06, + "loss": 0.8827, + "step": 1644 + }, + { + "epoch": 0.08454106280193237, + "grad_norm": 1.0121287107467651, + "learning_rate": 9.922229739021471e-06, + "loss": 0.7882, + "step": 1645 + }, + { + "epoch": 0.084592455545277, + "grad_norm": 1.1683837175369263, + "learning_rate": 9.922083454013134e-06, + "loss": 0.8126, + "step": 1646 + }, + { + "epoch": 0.08464384828862165, + "grad_norm": 1.148938536643982, + "learning_rate": 9.921937032634177e-06, + "loss": 0.8313, + "step": 1647 + }, + { + "epoch": 0.08469524103196628, + "grad_norm": 0.9767423868179321, + "learning_rate": 9.921790474888656e-06, + "loss": 0.715, + "step": 1648 + }, + { + "epoch": 0.08474663377531093, + "grad_norm": 1.1744794845581055, + "learning_rate": 9.921643780780631e-06, + "loss": 0.81, + "step": 1649 + }, + { + "epoch": 0.08479802651865556, + "grad_norm": 1.0476856231689453, + "learning_rate": 9.921496950314169e-06, + "loss": 0.7134, + "step": 1650 + }, + { + "epoch": 0.08484941926200021, + "grad_norm": 1.0916850566864014, + "learning_rate": 9.921349983493336e-06, + "loss": 0.7874, + "step": 1651 + }, + { + "epoch": 0.08490081200534484, + "grad_norm": 1.165315866470337, + "learning_rate": 9.921202880322204e-06, + "loss": 0.82, + "step": 1652 + }, + { + "epoch": 0.08495220474868949, + "grad_norm": 1.2108068466186523, + "learning_rate": 9.921055640804849e-06, + "loss": 0.8478, + "step": 1653 + }, + { + "epoch": 0.08500359749203412, + "grad_norm": 1.081020712852478, + "learning_rate": 9.920908264945349e-06, + "loss": 0.8261, + "step": 1654 + }, + { + "epoch": 0.08505499023537877, + "grad_norm": 1.456040859222412, + "learning_rate": 9.92076075274779e-06, + "loss": 0.7596, + "step": 1655 + }, + { + "epoch": 0.0851063829787234, + "grad_norm": 1.2016627788543701, + "learning_rate": 9.920613104216256e-06, + "loss": 0.8583, + "step": 1656 + }, + { + "epoch": 0.08515777572206805, + "grad_norm": 1.2699116468429565, + "learning_rate": 9.92046531935484e-06, + "loss": 0.8366, + "step": 1657 + }, + { + "epoch": 0.08520916846541268, + "grad_norm": 1.1259067058563232, + "learning_rate": 9.920317398167634e-06, + "loss": 0.8305, + "step": 1658 + }, + { + "epoch": 0.08526056120875733, + "grad_norm": 1.1446977853775024, + "learning_rate": 9.920169340658739e-06, + "loss": 0.8855, + "step": 1659 + }, + { + "epoch": 0.08531195395210196, + "grad_norm": 1.245345115661621, + "learning_rate": 9.920021146832256e-06, + "loss": 0.8594, + "step": 1660 + }, + { + "epoch": 0.08536334669544661, + "grad_norm": 1.22573721408844, + "learning_rate": 9.919872816692291e-06, + "loss": 0.8671, + "step": 1661 + }, + { + "epoch": 0.08541473943879124, + "grad_norm": 1.1051931381225586, + "learning_rate": 9.919724350242953e-06, + "loss": 0.7381, + "step": 1662 + }, + { + "epoch": 0.08546613218213588, + "grad_norm": 1.1745229959487915, + "learning_rate": 9.919575747488355e-06, + "loss": 0.8987, + "step": 1663 + }, + { + "epoch": 0.08551752492548052, + "grad_norm": 1.2365570068359375, + "learning_rate": 9.919427008432615e-06, + "loss": 0.9219, + "step": 1664 + }, + { + "epoch": 0.08556891766882516, + "grad_norm": 0.9980310201644897, + "learning_rate": 9.919278133079855e-06, + "loss": 0.7436, + "step": 1665 + }, + { + "epoch": 0.0856203104121698, + "grad_norm": 1.1453474760055542, + "learning_rate": 9.919129121434198e-06, + "loss": 0.7722, + "step": 1666 + }, + { + "epoch": 0.08567170315551444, + "grad_norm": 1.1426036357879639, + "learning_rate": 9.918979973499774e-06, + "loss": 0.8587, + "step": 1667 + }, + { + "epoch": 0.08572309589885908, + "grad_norm": 1.136110782623291, + "learning_rate": 9.918830689280714e-06, + "loss": 0.827, + "step": 1668 + }, + { + "epoch": 0.08577448864220372, + "grad_norm": 1.167225956916809, + "learning_rate": 9.918681268781154e-06, + "loss": 0.8335, + "step": 1669 + }, + { + "epoch": 0.08582588138554836, + "grad_norm": 1.209023356437683, + "learning_rate": 9.918531712005234e-06, + "loss": 0.8774, + "step": 1670 + }, + { + "epoch": 0.085877274128893, + "grad_norm": 1.1121746301651, + "learning_rate": 9.918382018957098e-06, + "loss": 0.8603, + "step": 1671 + }, + { + "epoch": 0.08592866687223764, + "grad_norm": 1.30025053024292, + "learning_rate": 9.918232189640892e-06, + "loss": 0.8565, + "step": 1672 + }, + { + "epoch": 0.08598005961558228, + "grad_norm": 1.1765111684799194, + "learning_rate": 9.91808222406077e-06, + "loss": 0.8444, + "step": 1673 + }, + { + "epoch": 0.08603145235892692, + "grad_norm": 1.1734223365783691, + "learning_rate": 9.917932122220886e-06, + "loss": 0.8015, + "step": 1674 + }, + { + "epoch": 0.08608284510227156, + "grad_norm": 1.1578689813613892, + "learning_rate": 9.917781884125397e-06, + "loss": 0.8256, + "step": 1675 + }, + { + "epoch": 0.0861342378456162, + "grad_norm": 0.8297823071479797, + "learning_rate": 9.917631509778468e-06, + "loss": 0.751, + "step": 1676 + }, + { + "epoch": 0.08618563058896084, + "grad_norm": 1.2077951431274414, + "learning_rate": 9.91748099918426e-06, + "loss": 0.8859, + "step": 1677 + }, + { + "epoch": 0.08623702333230548, + "grad_norm": 1.190809965133667, + "learning_rate": 9.91733035234695e-06, + "loss": 0.8647, + "step": 1678 + }, + { + "epoch": 0.08628841607565012, + "grad_norm": 1.2281314134597778, + "learning_rate": 9.917179569270708e-06, + "loss": 0.804, + "step": 1679 + }, + { + "epoch": 0.08633980881899475, + "grad_norm": 1.2048341035842896, + "learning_rate": 9.91702864995971e-06, + "loss": 0.9118, + "step": 1680 + }, + { + "epoch": 0.0863912015623394, + "grad_norm": 1.1584261655807495, + "learning_rate": 9.916877594418141e-06, + "loss": 0.8589, + "step": 1681 + }, + { + "epoch": 0.08644259430568403, + "grad_norm": 1.0403865575790405, + "learning_rate": 9.916726402650185e-06, + "loss": 0.77, + "step": 1682 + }, + { + "epoch": 0.08649398704902868, + "grad_norm": 1.1586493253707886, + "learning_rate": 9.916575074660031e-06, + "loss": 0.8347, + "step": 1683 + }, + { + "epoch": 0.08654537979237331, + "grad_norm": 1.1266425848007202, + "learning_rate": 9.916423610451871e-06, + "loss": 0.8409, + "step": 1684 + }, + { + "epoch": 0.08659677253571796, + "grad_norm": 1.1396063566207886, + "learning_rate": 9.9162720100299e-06, + "loss": 0.7928, + "step": 1685 + }, + { + "epoch": 0.08664816527906259, + "grad_norm": 0.7864115834236145, + "learning_rate": 9.916120273398321e-06, + "loss": 0.7434, + "step": 1686 + }, + { + "epoch": 0.08669955802240724, + "grad_norm": 1.137732982635498, + "learning_rate": 9.915968400561337e-06, + "loss": 0.8163, + "step": 1687 + }, + { + "epoch": 0.08675095076575187, + "grad_norm": 1.2246334552764893, + "learning_rate": 9.915816391523156e-06, + "loss": 0.8392, + "step": 1688 + }, + { + "epoch": 0.08680234350909652, + "grad_norm": 1.1159659624099731, + "learning_rate": 9.915664246287988e-06, + "loss": 0.8277, + "step": 1689 + }, + { + "epoch": 0.08685373625244115, + "grad_norm": 1.1441612243652344, + "learning_rate": 9.91551196486005e-06, + "loss": 0.8295, + "step": 1690 + }, + { + "epoch": 0.0869051289957858, + "grad_norm": 1.1027507781982422, + "learning_rate": 9.915359547243562e-06, + "loss": 0.8074, + "step": 1691 + }, + { + "epoch": 0.08695652173913043, + "grad_norm": 1.2078737020492554, + "learning_rate": 9.915206993442742e-06, + "loss": 0.8981, + "step": 1692 + }, + { + "epoch": 0.08700791448247508, + "grad_norm": 1.1267225742340088, + "learning_rate": 9.915054303461824e-06, + "loss": 0.8265, + "step": 1693 + }, + { + "epoch": 0.08705930722581971, + "grad_norm": 1.1323634386062622, + "learning_rate": 9.914901477305033e-06, + "loss": 0.8489, + "step": 1694 + }, + { + "epoch": 0.08711069996916436, + "grad_norm": 1.1748361587524414, + "learning_rate": 9.914748514976602e-06, + "loss": 0.8685, + "step": 1695 + }, + { + "epoch": 0.08716209271250899, + "grad_norm": 1.2105607986450195, + "learning_rate": 9.914595416480775e-06, + "loss": 0.8914, + "step": 1696 + }, + { + "epoch": 0.08721348545585364, + "grad_norm": 0.9802582263946533, + "learning_rate": 9.91444218182179e-06, + "loss": 0.7275, + "step": 1697 + }, + { + "epoch": 0.08726487819919827, + "grad_norm": 1.2489691972732544, + "learning_rate": 9.91428881100389e-06, + "loss": 0.8424, + "step": 1698 + }, + { + "epoch": 0.0873162709425429, + "grad_norm": 1.191060185432434, + "learning_rate": 9.91413530403133e-06, + "loss": 0.8359, + "step": 1699 + }, + { + "epoch": 0.08736766368588755, + "grad_norm": 0.8627701997756958, + "learning_rate": 9.91398166090836e-06, + "loss": 0.7629, + "step": 1700 + }, + { + "epoch": 0.08741905642923219, + "grad_norm": 1.1759506464004517, + "learning_rate": 9.913827881639238e-06, + "loss": 0.8113, + "step": 1701 + }, + { + "epoch": 0.08747044917257683, + "grad_norm": 1.2642565965652466, + "learning_rate": 9.91367396622822e-06, + "loss": 0.8866, + "step": 1702 + }, + { + "epoch": 0.08752184191592147, + "grad_norm": 1.1577627658843994, + "learning_rate": 9.913519914679576e-06, + "loss": 0.8489, + "step": 1703 + }, + { + "epoch": 0.08757323465926611, + "grad_norm": 1.1268728971481323, + "learning_rate": 9.913365726997572e-06, + "loss": 0.8065, + "step": 1704 + }, + { + "epoch": 0.08762462740261075, + "grad_norm": 1.2316921949386597, + "learning_rate": 9.91321140318648e-06, + "loss": 0.8748, + "step": 1705 + }, + { + "epoch": 0.0876760201459554, + "grad_norm": 0.920242428779602, + "learning_rate": 9.913056943250577e-06, + "loss": 0.7523, + "step": 1706 + }, + { + "epoch": 0.08772741288930003, + "grad_norm": 0.8819411993026733, + "learning_rate": 9.912902347194138e-06, + "loss": 0.7034, + "step": 1707 + }, + { + "epoch": 0.08777880563264467, + "grad_norm": 1.2137534618377686, + "learning_rate": 9.912747615021452e-06, + "loss": 0.8567, + "step": 1708 + }, + { + "epoch": 0.08783019837598931, + "grad_norm": 1.1915690898895264, + "learning_rate": 9.912592746736803e-06, + "loss": 0.819, + "step": 1709 + }, + { + "epoch": 0.08788159111933395, + "grad_norm": 1.2644426822662354, + "learning_rate": 9.91243774234448e-06, + "loss": 0.8375, + "step": 1710 + }, + { + "epoch": 0.08793298386267859, + "grad_norm": 1.1931955814361572, + "learning_rate": 9.912282601848782e-06, + "loss": 0.8204, + "step": 1711 + }, + { + "epoch": 0.08798437660602323, + "grad_norm": 0.8790128231048584, + "learning_rate": 9.912127325254003e-06, + "loss": 0.6827, + "step": 1712 + }, + { + "epoch": 0.08803576934936787, + "grad_norm": 1.1609959602355957, + "learning_rate": 9.911971912564449e-06, + "loss": 0.8488, + "step": 1713 + }, + { + "epoch": 0.08808716209271251, + "grad_norm": 0.8543535470962524, + "learning_rate": 9.911816363784421e-06, + "loss": 0.7388, + "step": 1714 + }, + { + "epoch": 0.08813855483605715, + "grad_norm": 1.1180012226104736, + "learning_rate": 9.911660678918233e-06, + "loss": 0.8656, + "step": 1715 + }, + { + "epoch": 0.0881899475794018, + "grad_norm": 1.1848636865615845, + "learning_rate": 9.911504857970198e-06, + "loss": 0.8759, + "step": 1716 + }, + { + "epoch": 0.08824134032274643, + "grad_norm": 1.1929327249526978, + "learning_rate": 9.91134890094463e-06, + "loss": 0.8569, + "step": 1717 + }, + { + "epoch": 0.08829273306609106, + "grad_norm": 1.4534296989440918, + "learning_rate": 9.911192807845853e-06, + "loss": 0.8385, + "step": 1718 + }, + { + "epoch": 0.08834412580943571, + "grad_norm": 1.1895525455474854, + "learning_rate": 9.91103657867819e-06, + "loss": 0.8231, + "step": 1719 + }, + { + "epoch": 0.08839551855278034, + "grad_norm": 1.6340380907058716, + "learning_rate": 9.910880213445971e-06, + "loss": 0.8758, + "step": 1720 + }, + { + "epoch": 0.08844691129612499, + "grad_norm": 1.146881341934204, + "learning_rate": 9.910723712153526e-06, + "loss": 0.8813, + "step": 1721 + }, + { + "epoch": 0.08849830403946962, + "grad_norm": 1.2568217515945435, + "learning_rate": 9.910567074805192e-06, + "loss": 0.8494, + "step": 1722 + }, + { + "epoch": 0.08854969678281427, + "grad_norm": 1.1822679042816162, + "learning_rate": 9.91041030140531e-06, + "loss": 0.8024, + "step": 1723 + }, + { + "epoch": 0.0886010895261589, + "grad_norm": 1.5603710412979126, + "learning_rate": 9.910253391958224e-06, + "loss": 0.7578, + "step": 1724 + }, + { + "epoch": 0.08865248226950355, + "grad_norm": 1.0265038013458252, + "learning_rate": 9.910096346468279e-06, + "loss": 0.7669, + "step": 1725 + }, + { + "epoch": 0.08870387501284818, + "grad_norm": 1.3067086935043335, + "learning_rate": 9.909939164939825e-06, + "loss": 0.8291, + "step": 1726 + }, + { + "epoch": 0.08875526775619283, + "grad_norm": 0.7920119166374207, + "learning_rate": 9.909781847377223e-06, + "loss": 0.7212, + "step": 1727 + }, + { + "epoch": 0.08880666049953746, + "grad_norm": 1.2579916715621948, + "learning_rate": 9.909624393784824e-06, + "loss": 0.8196, + "step": 1728 + }, + { + "epoch": 0.08885805324288211, + "grad_norm": 1.1902517080307007, + "learning_rate": 9.909466804166994e-06, + "loss": 0.8208, + "step": 1729 + }, + { + "epoch": 0.08890944598622674, + "grad_norm": 1.1936190128326416, + "learning_rate": 9.909309078528099e-06, + "loss": 0.883, + "step": 1730 + }, + { + "epoch": 0.08896083872957139, + "grad_norm": 1.1592642068862915, + "learning_rate": 9.90915121687251e-06, + "loss": 0.8637, + "step": 1731 + }, + { + "epoch": 0.08901223147291602, + "grad_norm": 1.1445428133010864, + "learning_rate": 9.9089932192046e-06, + "loss": 0.8371, + "step": 1732 + }, + { + "epoch": 0.08906362421626067, + "grad_norm": 1.2663488388061523, + "learning_rate": 9.908835085528745e-06, + "loss": 0.7851, + "step": 1733 + }, + { + "epoch": 0.0891150169596053, + "grad_norm": 1.2600806951522827, + "learning_rate": 9.90867681584933e-06, + "loss": 0.8628, + "step": 1734 + }, + { + "epoch": 0.08916640970294995, + "grad_norm": 1.2068052291870117, + "learning_rate": 9.908518410170734e-06, + "loss": 0.8098, + "step": 1735 + }, + { + "epoch": 0.08921780244629458, + "grad_norm": 1.3630139827728271, + "learning_rate": 9.908359868497351e-06, + "loss": 0.8433, + "step": 1736 + }, + { + "epoch": 0.08926919518963922, + "grad_norm": 1.229305386543274, + "learning_rate": 9.90820119083357e-06, + "loss": 0.8874, + "step": 1737 + }, + { + "epoch": 0.08932058793298386, + "grad_norm": 0.8338720202445984, + "learning_rate": 9.90804237718379e-06, + "loss": 0.7063, + "step": 1738 + }, + { + "epoch": 0.0893719806763285, + "grad_norm": 1.1197295188903809, + "learning_rate": 9.90788342755241e-06, + "loss": 0.8625, + "step": 1739 + }, + { + "epoch": 0.08942337341967314, + "grad_norm": 1.1867824792861938, + "learning_rate": 9.907724341943834e-06, + "loss": 0.9239, + "step": 1740 + }, + { + "epoch": 0.08947476616301778, + "grad_norm": 1.0972654819488525, + "learning_rate": 9.90756512036247e-06, + "loss": 0.827, + "step": 1741 + }, + { + "epoch": 0.08952615890636242, + "grad_norm": 1.1490797996520996, + "learning_rate": 9.907405762812727e-06, + "loss": 0.7919, + "step": 1742 + }, + { + "epoch": 0.08957755164970706, + "grad_norm": 1.1660102605819702, + "learning_rate": 9.907246269299024e-06, + "loss": 0.8564, + "step": 1743 + }, + { + "epoch": 0.0896289443930517, + "grad_norm": 1.116220235824585, + "learning_rate": 9.907086639825777e-06, + "loss": 0.782, + "step": 1744 + }, + { + "epoch": 0.08968033713639634, + "grad_norm": 1.1477996110916138, + "learning_rate": 9.906926874397408e-06, + "loss": 0.8486, + "step": 1745 + }, + { + "epoch": 0.08973172987974098, + "grad_norm": 1.1711969375610352, + "learning_rate": 9.906766973018348e-06, + "loss": 0.7998, + "step": 1746 + }, + { + "epoch": 0.08978312262308562, + "grad_norm": 1.1594072580337524, + "learning_rate": 9.906606935693023e-06, + "loss": 0.8742, + "step": 1747 + }, + { + "epoch": 0.08983451536643026, + "grad_norm": 1.085246205329895, + "learning_rate": 9.906446762425867e-06, + "loss": 0.8875, + "step": 1748 + }, + { + "epoch": 0.0898859081097749, + "grad_norm": 1.1919771432876587, + "learning_rate": 9.906286453221321e-06, + "loss": 0.8565, + "step": 1749 + }, + { + "epoch": 0.08993730085311955, + "grad_norm": 1.129128336906433, + "learning_rate": 9.906126008083823e-06, + "loss": 0.8062, + "step": 1750 + }, + { + "epoch": 0.08998869359646418, + "grad_norm": 1.1392971277236938, + "learning_rate": 9.90596542701782e-06, + "loss": 0.8189, + "step": 1751 + }, + { + "epoch": 0.09004008633980883, + "grad_norm": 1.1751765012741089, + "learning_rate": 9.90580471002776e-06, + "loss": 0.7489, + "step": 1752 + }, + { + "epoch": 0.09009147908315346, + "grad_norm": 1.247114896774292, + "learning_rate": 9.905643857118097e-06, + "loss": 0.8858, + "step": 1753 + }, + { + "epoch": 0.09014287182649809, + "grad_norm": 1.1875725984573364, + "learning_rate": 9.905482868293287e-06, + "loss": 0.7805, + "step": 1754 + }, + { + "epoch": 0.09019426456984274, + "grad_norm": 1.1310409307479858, + "learning_rate": 9.905321743557792e-06, + "loss": 0.8283, + "step": 1755 + }, + { + "epoch": 0.09024565731318737, + "grad_norm": 1.1902302503585815, + "learning_rate": 9.905160482916074e-06, + "loss": 0.7898, + "step": 1756 + }, + { + "epoch": 0.09029705005653202, + "grad_norm": 1.1485869884490967, + "learning_rate": 9.904999086372602e-06, + "loss": 0.7992, + "step": 1757 + }, + { + "epoch": 0.09034844279987665, + "grad_norm": 1.1292792558670044, + "learning_rate": 9.904837553931846e-06, + "loss": 0.7609, + "step": 1758 + }, + { + "epoch": 0.0903998355432213, + "grad_norm": 1.2959582805633545, + "learning_rate": 9.904675885598281e-06, + "loss": 0.7961, + "step": 1759 + }, + { + "epoch": 0.09045122828656593, + "grad_norm": 1.1334614753723145, + "learning_rate": 9.904514081376388e-06, + "loss": 0.8255, + "step": 1760 + }, + { + "epoch": 0.09050262102991058, + "grad_norm": 1.2229472398757935, + "learning_rate": 9.904352141270652e-06, + "loss": 0.8224, + "step": 1761 + }, + { + "epoch": 0.09055401377325521, + "grad_norm": 1.148348331451416, + "learning_rate": 9.904190065285554e-06, + "loss": 0.8531, + "step": 1762 + }, + { + "epoch": 0.09060540651659986, + "grad_norm": 1.174596905708313, + "learning_rate": 9.90402785342559e-06, + "loss": 0.8217, + "step": 1763 + }, + { + "epoch": 0.09065679925994449, + "grad_norm": 1.14903724193573, + "learning_rate": 9.903865505695252e-06, + "loss": 0.81, + "step": 1764 + }, + { + "epoch": 0.09070819200328914, + "grad_norm": 1.234180212020874, + "learning_rate": 9.903703022099037e-06, + "loss": 0.8435, + "step": 1765 + }, + { + "epoch": 0.09075958474663377, + "grad_norm": 1.1840254068374634, + "learning_rate": 9.903540402641449e-06, + "loss": 0.8851, + "step": 1766 + }, + { + "epoch": 0.09081097748997842, + "grad_norm": 1.1155592203140259, + "learning_rate": 9.903377647326991e-06, + "loss": 0.8184, + "step": 1767 + }, + { + "epoch": 0.09086237023332305, + "grad_norm": 1.1460785865783691, + "learning_rate": 9.903214756160173e-06, + "loss": 0.8425, + "step": 1768 + }, + { + "epoch": 0.0909137629766677, + "grad_norm": 1.1041440963745117, + "learning_rate": 9.903051729145508e-06, + "loss": 0.8766, + "step": 1769 + }, + { + "epoch": 0.09096515572001233, + "grad_norm": 1.1446219682693481, + "learning_rate": 9.902888566287516e-06, + "loss": 0.8132, + "step": 1770 + }, + { + "epoch": 0.09101654846335698, + "grad_norm": 1.140639305114746, + "learning_rate": 9.902725267590711e-06, + "loss": 0.8347, + "step": 1771 + }, + { + "epoch": 0.09106794120670161, + "grad_norm": 1.3522577285766602, + "learning_rate": 9.902561833059625e-06, + "loss": 0.9177, + "step": 1772 + }, + { + "epoch": 0.09111933395004625, + "grad_norm": 1.1369584798812866, + "learning_rate": 9.90239826269878e-06, + "loss": 0.8455, + "step": 1773 + }, + { + "epoch": 0.0911707266933909, + "grad_norm": 1.206945776939392, + "learning_rate": 9.902234556512711e-06, + "loss": 0.8595, + "step": 1774 + }, + { + "epoch": 0.09122211943673553, + "grad_norm": 1.1400874853134155, + "learning_rate": 9.902070714505951e-06, + "loss": 0.8575, + "step": 1775 + }, + { + "epoch": 0.09127351218008017, + "grad_norm": 1.124389410018921, + "learning_rate": 9.901906736683044e-06, + "loss": 0.8419, + "step": 1776 + }, + { + "epoch": 0.09132490492342481, + "grad_norm": 1.1665382385253906, + "learning_rate": 9.901742623048529e-06, + "loss": 0.8846, + "step": 1777 + }, + { + "epoch": 0.09137629766676945, + "grad_norm": 0.8407975435256958, + "learning_rate": 9.901578373606953e-06, + "loss": 0.7765, + "step": 1778 + }, + { + "epoch": 0.09142769041011409, + "grad_norm": 1.2934582233428955, + "learning_rate": 9.90141398836287e-06, + "loss": 0.8469, + "step": 1779 + }, + { + "epoch": 0.09147908315345873, + "grad_norm": 1.211266279220581, + "learning_rate": 9.901249467320832e-06, + "loss": 0.8657, + "step": 1780 + }, + { + "epoch": 0.09153047589680337, + "grad_norm": 1.145798921585083, + "learning_rate": 9.901084810485397e-06, + "loss": 0.8086, + "step": 1781 + }, + { + "epoch": 0.09158186864014801, + "grad_norm": 1.1913820505142212, + "learning_rate": 9.900920017861126e-06, + "loss": 0.8779, + "step": 1782 + }, + { + "epoch": 0.09163326138349265, + "grad_norm": 1.1218396425247192, + "learning_rate": 9.900755089452589e-06, + "loss": 0.8289, + "step": 1783 + }, + { + "epoch": 0.0916846541268373, + "grad_norm": 1.14694082736969, + "learning_rate": 9.90059002526435e-06, + "loss": 0.8032, + "step": 1784 + }, + { + "epoch": 0.09173604687018193, + "grad_norm": 1.1361756324768066, + "learning_rate": 9.900424825300987e-06, + "loss": 0.8574, + "step": 1785 + }, + { + "epoch": 0.09178743961352658, + "grad_norm": 1.3079322576522827, + "learning_rate": 9.900259489567075e-06, + "loss": 0.882, + "step": 1786 + }, + { + "epoch": 0.09183883235687121, + "grad_norm": 1.211051344871521, + "learning_rate": 9.900094018067193e-06, + "loss": 0.8617, + "step": 1787 + }, + { + "epoch": 0.09189022510021586, + "grad_norm": 1.2048643827438354, + "learning_rate": 9.899928410805928e-06, + "loss": 0.8829, + "step": 1788 + }, + { + "epoch": 0.09194161784356049, + "grad_norm": 1.3317652940750122, + "learning_rate": 9.899762667787868e-06, + "loss": 0.8882, + "step": 1789 + }, + { + "epoch": 0.09199301058690514, + "grad_norm": 0.8494691252708435, + "learning_rate": 9.899596789017604e-06, + "loss": 0.6927, + "step": 1790 + }, + { + "epoch": 0.09204440333024977, + "grad_norm": 1.14299476146698, + "learning_rate": 9.899430774499731e-06, + "loss": 0.832, + "step": 1791 + }, + { + "epoch": 0.0920957960735944, + "grad_norm": 1.1811003684997559, + "learning_rate": 9.899264624238854e-06, + "loss": 0.7885, + "step": 1792 + }, + { + "epoch": 0.09214718881693905, + "grad_norm": 1.1288859844207764, + "learning_rate": 9.89909833823957e-06, + "loss": 0.8618, + "step": 1793 + }, + { + "epoch": 0.09219858156028368, + "grad_norm": 1.1633673906326294, + "learning_rate": 9.898931916506487e-06, + "loss": 0.8386, + "step": 1794 + }, + { + "epoch": 0.09224997430362833, + "grad_norm": 1.2165002822875977, + "learning_rate": 9.89876535904422e-06, + "loss": 0.8657, + "step": 1795 + }, + { + "epoch": 0.09230136704697296, + "grad_norm": 0.8270774483680725, + "learning_rate": 9.89859866585738e-06, + "loss": 0.75, + "step": 1796 + }, + { + "epoch": 0.09235275979031761, + "grad_norm": 1.130789875984192, + "learning_rate": 9.898431836950585e-06, + "loss": 0.8534, + "step": 1797 + }, + { + "epoch": 0.09240415253366224, + "grad_norm": 1.1629869937896729, + "learning_rate": 9.898264872328461e-06, + "loss": 0.8008, + "step": 1798 + }, + { + "epoch": 0.09245554527700689, + "grad_norm": 1.1575783491134644, + "learning_rate": 9.898097771995628e-06, + "loss": 0.7962, + "step": 1799 + }, + { + "epoch": 0.09250693802035152, + "grad_norm": 1.1942816972732544, + "learning_rate": 9.897930535956722e-06, + "loss": 0.7994, + "step": 1800 + }, + { + "epoch": 0.09255833076369617, + "grad_norm": 1.1875114440917969, + "learning_rate": 9.897763164216372e-06, + "loss": 0.7908, + "step": 1801 + }, + { + "epoch": 0.0926097235070408, + "grad_norm": 1.0235319137573242, + "learning_rate": 9.897595656779215e-06, + "loss": 0.7046, + "step": 1802 + }, + { + "epoch": 0.09266111625038545, + "grad_norm": 1.1574156284332275, + "learning_rate": 9.897428013649896e-06, + "loss": 0.8396, + "step": 1803 + }, + { + "epoch": 0.09271250899373008, + "grad_norm": 0.8634977340698242, + "learning_rate": 9.897260234833057e-06, + "loss": 0.6808, + "step": 1804 + }, + { + "epoch": 0.09276390173707473, + "grad_norm": 1.2188562154769897, + "learning_rate": 9.897092320333346e-06, + "loss": 0.8992, + "step": 1805 + }, + { + "epoch": 0.09281529448041936, + "grad_norm": 1.130176067352295, + "learning_rate": 9.896924270155416e-06, + "loss": 0.842, + "step": 1806 + }, + { + "epoch": 0.09286668722376401, + "grad_norm": 1.2023639678955078, + "learning_rate": 9.896756084303922e-06, + "loss": 0.8234, + "step": 1807 + }, + { + "epoch": 0.09291807996710864, + "grad_norm": 1.1483135223388672, + "learning_rate": 9.896587762783527e-06, + "loss": 0.7868, + "step": 1808 + }, + { + "epoch": 0.09296947271045329, + "grad_norm": 1.1314959526062012, + "learning_rate": 9.89641930559889e-06, + "loss": 0.8395, + "step": 1809 + }, + { + "epoch": 0.09302086545379792, + "grad_norm": 1.1751201152801514, + "learning_rate": 9.896250712754681e-06, + "loss": 0.8244, + "step": 1810 + }, + { + "epoch": 0.09307225819714256, + "grad_norm": 1.2030655145645142, + "learning_rate": 9.896081984255571e-06, + "loss": 0.832, + "step": 1811 + }, + { + "epoch": 0.0931236509404872, + "grad_norm": 1.1651982069015503, + "learning_rate": 9.895913120106233e-06, + "loss": 0.7897, + "step": 1812 + }, + { + "epoch": 0.09317504368383184, + "grad_norm": 0.9788298606872559, + "learning_rate": 9.895744120311348e-06, + "loss": 0.7899, + "step": 1813 + }, + { + "epoch": 0.09322643642717648, + "grad_norm": 0.8545430302619934, + "learning_rate": 9.895574984875595e-06, + "loss": 0.7375, + "step": 1814 + }, + { + "epoch": 0.09327782917052112, + "grad_norm": 1.247603416442871, + "learning_rate": 9.895405713803666e-06, + "loss": 0.9005, + "step": 1815 + }, + { + "epoch": 0.09332922191386576, + "grad_norm": 1.1607825756072998, + "learning_rate": 9.895236307100242e-06, + "loss": 0.8445, + "step": 1816 + }, + { + "epoch": 0.0933806146572104, + "grad_norm": 1.216247320175171, + "learning_rate": 9.895066764770025e-06, + "loss": 0.8295, + "step": 1817 + }, + { + "epoch": 0.09343200740055504, + "grad_norm": 1.1355317831039429, + "learning_rate": 9.894897086817707e-06, + "loss": 0.816, + "step": 1818 + }, + { + "epoch": 0.09348340014389968, + "grad_norm": 1.2093919515609741, + "learning_rate": 9.89472727324799e-06, + "loss": 0.824, + "step": 1819 + }, + { + "epoch": 0.09353479288724433, + "grad_norm": 1.2408522367477417, + "learning_rate": 9.89455732406558e-06, + "loss": 0.8274, + "step": 1820 + }, + { + "epoch": 0.09358618563058896, + "grad_norm": 1.2961534261703491, + "learning_rate": 9.894387239275187e-06, + "loss": 0.7489, + "step": 1821 + }, + { + "epoch": 0.0936375783739336, + "grad_norm": 1.069570779800415, + "learning_rate": 9.89421701888152e-06, + "loss": 0.8274, + "step": 1822 + }, + { + "epoch": 0.09368897111727824, + "grad_norm": 1.0915688276290894, + "learning_rate": 9.894046662889297e-06, + "loss": 0.8225, + "step": 1823 + }, + { + "epoch": 0.09374036386062289, + "grad_norm": 1.4259387254714966, + "learning_rate": 9.893876171303238e-06, + "loss": 0.8274, + "step": 1824 + }, + { + "epoch": 0.09379175660396752, + "grad_norm": 1.2512353658676147, + "learning_rate": 9.893705544128065e-06, + "loss": 0.8534, + "step": 1825 + }, + { + "epoch": 0.09384314934731217, + "grad_norm": 1.18647038936615, + "learning_rate": 9.893534781368508e-06, + "loss": 0.8277, + "step": 1826 + }, + { + "epoch": 0.0938945420906568, + "grad_norm": 1.1751434803009033, + "learning_rate": 9.893363883029294e-06, + "loss": 0.8766, + "step": 1827 + }, + { + "epoch": 0.09394593483400143, + "grad_norm": 1.0925143957138062, + "learning_rate": 9.893192849115163e-06, + "loss": 0.7637, + "step": 1828 + }, + { + "epoch": 0.09399732757734608, + "grad_norm": 1.1270004510879517, + "learning_rate": 9.89302167963085e-06, + "loss": 0.8157, + "step": 1829 + }, + { + "epoch": 0.09404872032069071, + "grad_norm": 1.2134119272232056, + "learning_rate": 9.8928503745811e-06, + "loss": 0.7248, + "step": 1830 + }, + { + "epoch": 0.09410011306403536, + "grad_norm": 1.255647897720337, + "learning_rate": 9.892678933970656e-06, + "loss": 0.874, + "step": 1831 + }, + { + "epoch": 0.09415150580737999, + "grad_norm": 1.166581153869629, + "learning_rate": 9.89250735780427e-06, + "loss": 0.8473, + "step": 1832 + }, + { + "epoch": 0.09420289855072464, + "grad_norm": 0.7590866684913635, + "learning_rate": 9.892335646086697e-06, + "loss": 0.6881, + "step": 1833 + }, + { + "epoch": 0.09425429129406927, + "grad_norm": 1.1782327890396118, + "learning_rate": 9.892163798822692e-06, + "loss": 0.8666, + "step": 1834 + }, + { + "epoch": 0.09430568403741392, + "grad_norm": 1.4020658731460571, + "learning_rate": 9.891991816017015e-06, + "loss": 0.7937, + "step": 1835 + }, + { + "epoch": 0.09435707678075855, + "grad_norm": 1.110379695892334, + "learning_rate": 9.891819697674434e-06, + "loss": 0.7729, + "step": 1836 + }, + { + "epoch": 0.0944084695241032, + "grad_norm": 0.9698451161384583, + "learning_rate": 9.891647443799717e-06, + "loss": 0.7335, + "step": 1837 + }, + { + "epoch": 0.09445986226744783, + "grad_norm": 1.130768060684204, + "learning_rate": 9.891475054397635e-06, + "loss": 0.8299, + "step": 1838 + }, + { + "epoch": 0.09451125501079248, + "grad_norm": 1.1199626922607422, + "learning_rate": 9.891302529472965e-06, + "loss": 0.8047, + "step": 1839 + }, + { + "epoch": 0.09456264775413711, + "grad_norm": 1.087475299835205, + "learning_rate": 9.89112986903049e-06, + "loss": 0.8214, + "step": 1840 + }, + { + "epoch": 0.09461404049748176, + "grad_norm": 1.3037495613098145, + "learning_rate": 9.890957073074989e-06, + "loss": 0.8659, + "step": 1841 + }, + { + "epoch": 0.0946654332408264, + "grad_norm": 1.2137969732284546, + "learning_rate": 9.890784141611249e-06, + "loss": 0.8113, + "step": 1842 + }, + { + "epoch": 0.09471682598417104, + "grad_norm": 1.1723822355270386, + "learning_rate": 9.890611074644067e-06, + "loss": 0.7764, + "step": 1843 + }, + { + "epoch": 0.09476821872751567, + "grad_norm": 1.0850350856781006, + "learning_rate": 9.890437872178232e-06, + "loss": 0.7877, + "step": 1844 + }, + { + "epoch": 0.09481961147086032, + "grad_norm": 1.0865315198898315, + "learning_rate": 9.890264534218546e-06, + "loss": 0.7854, + "step": 1845 + }, + { + "epoch": 0.09487100421420495, + "grad_norm": 0.8876744508743286, + "learning_rate": 9.890091060769812e-06, + "loss": 0.7321, + "step": 1846 + }, + { + "epoch": 0.09492239695754959, + "grad_norm": 1.307947039604187, + "learning_rate": 9.889917451836834e-06, + "loss": 0.8251, + "step": 1847 + }, + { + "epoch": 0.09497378970089423, + "grad_norm": 0.837812066078186, + "learning_rate": 9.889743707424422e-06, + "loss": 0.7255, + "step": 1848 + }, + { + "epoch": 0.09502518244423887, + "grad_norm": 0.83307945728302, + "learning_rate": 9.889569827537392e-06, + "loss": 0.7435, + "step": 1849 + }, + { + "epoch": 0.09507657518758351, + "grad_norm": 0.8142296075820923, + "learning_rate": 9.88939581218056e-06, + "loss": 0.7143, + "step": 1850 + }, + { + "epoch": 0.09512796793092815, + "grad_norm": 1.2148126363754272, + "learning_rate": 9.889221661358745e-06, + "loss": 0.8502, + "step": 1851 + }, + { + "epoch": 0.0951793606742728, + "grad_norm": 1.2017245292663574, + "learning_rate": 9.889047375076777e-06, + "loss": 0.8508, + "step": 1852 + }, + { + "epoch": 0.09523075341761743, + "grad_norm": 1.1075420379638672, + "learning_rate": 9.888872953339481e-06, + "loss": 0.7604, + "step": 1853 + }, + { + "epoch": 0.09528214616096208, + "grad_norm": 1.1481331586837769, + "learning_rate": 9.888698396151692e-06, + "loss": 0.8114, + "step": 1854 + }, + { + "epoch": 0.09533353890430671, + "grad_norm": 1.1724259853363037, + "learning_rate": 9.888523703518244e-06, + "loss": 0.8095, + "step": 1855 + }, + { + "epoch": 0.09538493164765136, + "grad_norm": 1.4335479736328125, + "learning_rate": 9.888348875443978e-06, + "loss": 0.7941, + "step": 1856 + }, + { + "epoch": 0.09543632439099599, + "grad_norm": 1.1842377185821533, + "learning_rate": 9.888173911933739e-06, + "loss": 0.9032, + "step": 1857 + }, + { + "epoch": 0.09548771713434064, + "grad_norm": 1.1997178792953491, + "learning_rate": 9.88799881299237e-06, + "loss": 0.8193, + "step": 1858 + }, + { + "epoch": 0.09553910987768527, + "grad_norm": 1.2129074335098267, + "learning_rate": 9.887823578624729e-06, + "loss": 0.8391, + "step": 1859 + }, + { + "epoch": 0.09559050262102992, + "grad_norm": 1.2300792932510376, + "learning_rate": 9.887648208835664e-06, + "loss": 0.8749, + "step": 1860 + }, + { + "epoch": 0.09564189536437455, + "grad_norm": 1.2000662088394165, + "learning_rate": 9.887472703630039e-06, + "loss": 0.8414, + "step": 1861 + }, + { + "epoch": 0.0956932881077192, + "grad_norm": 1.178249716758728, + "learning_rate": 9.887297063012715e-06, + "loss": 0.894, + "step": 1862 + }, + { + "epoch": 0.09574468085106383, + "grad_norm": 1.5113506317138672, + "learning_rate": 9.887121286988559e-06, + "loss": 0.8001, + "step": 1863 + }, + { + "epoch": 0.09579607359440848, + "grad_norm": 1.1608010530471802, + "learning_rate": 9.886945375562438e-06, + "loss": 0.817, + "step": 1864 + }, + { + "epoch": 0.09584746633775311, + "grad_norm": 1.1100857257843018, + "learning_rate": 9.88676932873923e-06, + "loss": 0.8306, + "step": 1865 + }, + { + "epoch": 0.09589885908109774, + "grad_norm": 1.210470199584961, + "learning_rate": 9.886593146523808e-06, + "loss": 0.8615, + "step": 1866 + }, + { + "epoch": 0.09595025182444239, + "grad_norm": 1.1187340021133423, + "learning_rate": 9.886416828921056e-06, + "loss": 0.7577, + "step": 1867 + }, + { + "epoch": 0.09600164456778702, + "grad_norm": 1.1965348720550537, + "learning_rate": 9.886240375935861e-06, + "loss": 0.8123, + "step": 1868 + }, + { + "epoch": 0.09605303731113167, + "grad_norm": 1.2064812183380127, + "learning_rate": 9.886063787573109e-06, + "loss": 0.8437, + "step": 1869 + }, + { + "epoch": 0.0961044300544763, + "grad_norm": 1.1664938926696777, + "learning_rate": 9.885887063837691e-06, + "loss": 0.8504, + "step": 1870 + }, + { + "epoch": 0.09615582279782095, + "grad_norm": 1.1598814725875854, + "learning_rate": 9.885710204734507e-06, + "loss": 0.8659, + "step": 1871 + }, + { + "epoch": 0.09620721554116558, + "grad_norm": 1.1873193979263306, + "learning_rate": 9.885533210268456e-06, + "loss": 0.8842, + "step": 1872 + }, + { + "epoch": 0.09625860828451023, + "grad_norm": 1.0674279928207397, + "learning_rate": 9.88535608044444e-06, + "loss": 0.8393, + "step": 1873 + }, + { + "epoch": 0.09631000102785486, + "grad_norm": 1.0970643758773804, + "learning_rate": 9.885178815267367e-06, + "loss": 0.84, + "step": 1874 + }, + { + "epoch": 0.09636139377119951, + "grad_norm": 1.1926662921905518, + "learning_rate": 9.88500141474215e-06, + "loss": 0.8434, + "step": 1875 + }, + { + "epoch": 0.09641278651454414, + "grad_norm": 1.189504623413086, + "learning_rate": 9.884823878873702e-06, + "loss": 0.8493, + "step": 1876 + }, + { + "epoch": 0.09646417925788879, + "grad_norm": 1.3385924100875854, + "learning_rate": 9.884646207666943e-06, + "loss": 0.8196, + "step": 1877 + }, + { + "epoch": 0.09651557200123342, + "grad_norm": 1.108610987663269, + "learning_rate": 9.884468401126797e-06, + "loss": 0.8258, + "step": 1878 + }, + { + "epoch": 0.09656696474457807, + "grad_norm": 1.1384812593460083, + "learning_rate": 9.884290459258188e-06, + "loss": 0.8437, + "step": 1879 + }, + { + "epoch": 0.0966183574879227, + "grad_norm": 1.2351970672607422, + "learning_rate": 9.884112382066048e-06, + "loss": 0.8752, + "step": 1880 + }, + { + "epoch": 0.09666975023126735, + "grad_norm": 1.0431514978408813, + "learning_rate": 9.883934169555305e-06, + "loss": 0.8127, + "step": 1881 + }, + { + "epoch": 0.09672114297461198, + "grad_norm": 1.2366681098937988, + "learning_rate": 9.883755821730905e-06, + "loss": 0.8233, + "step": 1882 + }, + { + "epoch": 0.09677253571795663, + "grad_norm": 1.3336654901504517, + "learning_rate": 9.883577338597784e-06, + "loss": 0.7904, + "step": 1883 + }, + { + "epoch": 0.09682392846130126, + "grad_norm": 1.1166678667068481, + "learning_rate": 9.883398720160887e-06, + "loss": 0.7452, + "step": 1884 + }, + { + "epoch": 0.0968753212046459, + "grad_norm": 1.1957266330718994, + "learning_rate": 9.883219966425164e-06, + "loss": 0.8569, + "step": 1885 + }, + { + "epoch": 0.09692671394799054, + "grad_norm": 1.2624551057815552, + "learning_rate": 9.88304107739557e-06, + "loss": 0.8346, + "step": 1886 + }, + { + "epoch": 0.09697810669133518, + "grad_norm": 1.1623643636703491, + "learning_rate": 9.882862053077057e-06, + "loss": 0.8348, + "step": 1887 + }, + { + "epoch": 0.09702949943467983, + "grad_norm": 1.1269875764846802, + "learning_rate": 9.882682893474588e-06, + "loss": 0.8452, + "step": 1888 + }, + { + "epoch": 0.09708089217802446, + "grad_norm": 1.1543737649917603, + "learning_rate": 9.882503598593124e-06, + "loss": 0.8776, + "step": 1889 + }, + { + "epoch": 0.0971322849213691, + "grad_norm": 0.9897733926773071, + "learning_rate": 9.882324168437635e-06, + "loss": 0.7387, + "step": 1890 + }, + { + "epoch": 0.09718367766471374, + "grad_norm": 0.9477784037590027, + "learning_rate": 9.882144603013093e-06, + "loss": 0.7292, + "step": 1891 + }, + { + "epoch": 0.09723507040805839, + "grad_norm": 1.1817985773086548, + "learning_rate": 9.88196490232447e-06, + "loss": 0.8236, + "step": 1892 + }, + { + "epoch": 0.09728646315140302, + "grad_norm": 1.196481466293335, + "learning_rate": 9.881785066376747e-06, + "loss": 0.8813, + "step": 1893 + }, + { + "epoch": 0.09733785589474767, + "grad_norm": 0.7876906394958496, + "learning_rate": 9.881605095174905e-06, + "loss": 0.7382, + "step": 1894 + }, + { + "epoch": 0.0973892486380923, + "grad_norm": 1.1125801801681519, + "learning_rate": 9.881424988723931e-06, + "loss": 0.755, + "step": 1895 + }, + { + "epoch": 0.09744064138143695, + "grad_norm": 0.7514511346817017, + "learning_rate": 9.881244747028815e-06, + "loss": 0.6809, + "step": 1896 + }, + { + "epoch": 0.09749203412478158, + "grad_norm": 1.162706971168518, + "learning_rate": 9.881064370094552e-06, + "loss": 0.7938, + "step": 1897 + }, + { + "epoch": 0.09754342686812623, + "grad_norm": 1.2461947202682495, + "learning_rate": 9.880883857926137e-06, + "loss": 0.7983, + "step": 1898 + }, + { + "epoch": 0.09759481961147086, + "grad_norm": 1.1399445533752441, + "learning_rate": 9.880703210528572e-06, + "loss": 0.8359, + "step": 1899 + }, + { + "epoch": 0.0976462123548155, + "grad_norm": 1.1532084941864014, + "learning_rate": 9.880522427906864e-06, + "loss": 0.9289, + "step": 1900 + }, + { + "epoch": 0.09769760509816014, + "grad_norm": 0.8654432892799377, + "learning_rate": 9.88034151006602e-06, + "loss": 0.7471, + "step": 1901 + }, + { + "epoch": 0.09774899784150477, + "grad_norm": 1.268162727355957, + "learning_rate": 9.880160457011053e-06, + "loss": 0.8543, + "step": 1902 + }, + { + "epoch": 0.09780039058484942, + "grad_norm": 1.1969913244247437, + "learning_rate": 9.879979268746977e-06, + "loss": 0.8709, + "step": 1903 + }, + { + "epoch": 0.09785178332819405, + "grad_norm": 1.166963815689087, + "learning_rate": 9.879797945278816e-06, + "loss": 0.7749, + "step": 1904 + }, + { + "epoch": 0.0979031760715387, + "grad_norm": 1.1033501625061035, + "learning_rate": 9.87961648661159e-06, + "loss": 0.836, + "step": 1905 + }, + { + "epoch": 0.09795456881488333, + "grad_norm": 0.8506897687911987, + "learning_rate": 9.87943489275033e-06, + "loss": 0.7521, + "step": 1906 + }, + { + "epoch": 0.09800596155822798, + "grad_norm": 1.1612355709075928, + "learning_rate": 9.879253163700064e-06, + "loss": 0.919, + "step": 1907 + }, + { + "epoch": 0.09805735430157261, + "grad_norm": 1.1377818584442139, + "learning_rate": 9.87907129946583e-06, + "loss": 0.8544, + "step": 1908 + }, + { + "epoch": 0.09810874704491726, + "grad_norm": 1.1257753372192383, + "learning_rate": 9.878889300052663e-06, + "loss": 0.7727, + "step": 1909 + }, + { + "epoch": 0.0981601397882619, + "grad_norm": 1.11044442653656, + "learning_rate": 9.87870716546561e-06, + "loss": 0.7919, + "step": 1910 + }, + { + "epoch": 0.09821153253160654, + "grad_norm": 1.1394280195236206, + "learning_rate": 9.878524895709711e-06, + "loss": 0.7915, + "step": 1911 + }, + { + "epoch": 0.09826292527495117, + "grad_norm": 1.3458809852600098, + "learning_rate": 9.878342490790022e-06, + "loss": 0.904, + "step": 1912 + }, + { + "epoch": 0.09831431801829582, + "grad_norm": 1.203850269317627, + "learning_rate": 9.878159950711594e-06, + "loss": 0.8187, + "step": 1913 + }, + { + "epoch": 0.09836571076164045, + "grad_norm": 1.1976758241653442, + "learning_rate": 9.877977275479485e-06, + "loss": 0.8431, + "step": 1914 + }, + { + "epoch": 0.0984171035049851, + "grad_norm": 1.2261943817138672, + "learning_rate": 9.877794465098755e-06, + "loss": 0.8407, + "step": 1915 + }, + { + "epoch": 0.09846849624832973, + "grad_norm": 1.1451129913330078, + "learning_rate": 9.87761151957447e-06, + "loss": 0.8086, + "step": 1916 + }, + { + "epoch": 0.09851988899167438, + "grad_norm": 1.1906285285949707, + "learning_rate": 9.877428438911699e-06, + "loss": 0.8376, + "step": 1917 + }, + { + "epoch": 0.09857128173501901, + "grad_norm": 1.135897159576416, + "learning_rate": 9.877245223115514e-06, + "loss": 0.8919, + "step": 1918 + }, + { + "epoch": 0.09862267447836366, + "grad_norm": 1.0655922889709473, + "learning_rate": 9.87706187219099e-06, + "loss": 0.7791, + "step": 1919 + }, + { + "epoch": 0.0986740672217083, + "grad_norm": 1.0949722528457642, + "learning_rate": 9.87687838614321e-06, + "loss": 0.8655, + "step": 1920 + }, + { + "epoch": 0.09872545996505293, + "grad_norm": 1.1466466188430786, + "learning_rate": 9.876694764977256e-06, + "loss": 0.7745, + "step": 1921 + }, + { + "epoch": 0.09877685270839758, + "grad_norm": 1.4071240425109863, + "learning_rate": 9.876511008698211e-06, + "loss": 0.8552, + "step": 1922 + }, + { + "epoch": 0.09882824545174221, + "grad_norm": 1.2041629552841187, + "learning_rate": 9.876327117311173e-06, + "loss": 0.7922, + "step": 1923 + }, + { + "epoch": 0.09887963819508686, + "grad_norm": 1.1415711641311646, + "learning_rate": 9.876143090821234e-06, + "loss": 0.7956, + "step": 1924 + }, + { + "epoch": 0.09893103093843149, + "grad_norm": 1.1186227798461914, + "learning_rate": 9.875958929233492e-06, + "loss": 0.8175, + "step": 1925 + }, + { + "epoch": 0.09898242368177614, + "grad_norm": 1.3107589483261108, + "learning_rate": 9.87577463255305e-06, + "loss": 0.831, + "step": 1926 + }, + { + "epoch": 0.09903381642512077, + "grad_norm": 0.7583712339401245, + "learning_rate": 9.875590200785015e-06, + "loss": 0.7013, + "step": 1927 + }, + { + "epoch": 0.09908520916846542, + "grad_norm": 0.8107669353485107, + "learning_rate": 9.875405633934493e-06, + "loss": 0.7445, + "step": 1928 + }, + { + "epoch": 0.09913660191181005, + "grad_norm": 1.2696977853775024, + "learning_rate": 9.875220932006604e-06, + "loss": 0.8216, + "step": 1929 + }, + { + "epoch": 0.0991879946551547, + "grad_norm": 1.1522916555404663, + "learning_rate": 9.87503609500646e-06, + "loss": 0.9029, + "step": 1930 + }, + { + "epoch": 0.09923938739849933, + "grad_norm": 1.2198549509048462, + "learning_rate": 9.874851122939184e-06, + "loss": 0.8317, + "step": 1931 + }, + { + "epoch": 0.09929078014184398, + "grad_norm": 1.1317845582962036, + "learning_rate": 9.874666015809901e-06, + "loss": 0.8284, + "step": 1932 + }, + { + "epoch": 0.09934217288518861, + "grad_norm": 1.1639400720596313, + "learning_rate": 9.87448077362374e-06, + "loss": 0.8315, + "step": 1933 + }, + { + "epoch": 0.09939356562853326, + "grad_norm": 1.1315388679504395, + "learning_rate": 9.874295396385831e-06, + "loss": 0.8096, + "step": 1934 + }, + { + "epoch": 0.09944495837187789, + "grad_norm": 1.3996027708053589, + "learning_rate": 9.874109884101314e-06, + "loss": 0.7659, + "step": 1935 + }, + { + "epoch": 0.09949635111522254, + "grad_norm": 1.1562831401824951, + "learning_rate": 9.873924236775324e-06, + "loss": 0.8235, + "step": 1936 + }, + { + "epoch": 0.09954774385856717, + "grad_norm": 1.2070807218551636, + "learning_rate": 9.873738454413007e-06, + "loss": 0.7747, + "step": 1937 + }, + { + "epoch": 0.09959913660191182, + "grad_norm": 1.109803318977356, + "learning_rate": 9.873552537019512e-06, + "loss": 0.8314, + "step": 1938 + }, + { + "epoch": 0.09965052934525645, + "grad_norm": 0.9041234254837036, + "learning_rate": 9.873366484599987e-06, + "loss": 0.7461, + "step": 1939 + }, + { + "epoch": 0.09970192208860108, + "grad_norm": 1.2265452146530151, + "learning_rate": 9.873180297159588e-06, + "loss": 0.8244, + "step": 1940 + }, + { + "epoch": 0.09975331483194573, + "grad_norm": 0.9118494987487793, + "learning_rate": 9.872993974703473e-06, + "loss": 0.7349, + "step": 1941 + }, + { + "epoch": 0.09980470757529036, + "grad_norm": 1.1241427659988403, + "learning_rate": 9.872807517236804e-06, + "loss": 0.8263, + "step": 1942 + }, + { + "epoch": 0.09985610031863501, + "grad_norm": 1.2760899066925049, + "learning_rate": 9.87262092476475e-06, + "loss": 0.7973, + "step": 1943 + }, + { + "epoch": 0.09990749306197964, + "grad_norm": 1.1521601676940918, + "learning_rate": 9.872434197292476e-06, + "loss": 0.7692, + "step": 1944 + }, + { + "epoch": 0.09995888580532429, + "grad_norm": 1.2208954095840454, + "learning_rate": 9.872247334825158e-06, + "loss": 0.7998, + "step": 1945 + }, + { + "epoch": 0.10001027854866892, + "grad_norm": 1.2851568460464478, + "learning_rate": 9.872060337367975e-06, + "loss": 0.8112, + "step": 1946 + }, + { + "epoch": 0.10006167129201357, + "grad_norm": 1.1856228113174438, + "learning_rate": 9.871873204926104e-06, + "loss": 0.7819, + "step": 1947 + }, + { + "epoch": 0.1001130640353582, + "grad_norm": 1.1499347686767578, + "learning_rate": 9.87168593750473e-06, + "loss": 0.84, + "step": 1948 + }, + { + "epoch": 0.10016445677870285, + "grad_norm": 1.152206540107727, + "learning_rate": 9.871498535109046e-06, + "loss": 0.8534, + "step": 1949 + }, + { + "epoch": 0.10021584952204748, + "grad_norm": 1.1630617380142212, + "learning_rate": 9.871310997744241e-06, + "loss": 0.8407, + "step": 1950 + }, + { + "epoch": 0.10026724226539213, + "grad_norm": 1.1810673475265503, + "learning_rate": 9.871123325415509e-06, + "loss": 0.8423, + "step": 1951 + }, + { + "epoch": 0.10031863500873676, + "grad_norm": 1.157597303390503, + "learning_rate": 9.870935518128053e-06, + "loss": 0.8525, + "step": 1952 + }, + { + "epoch": 0.10037002775208141, + "grad_norm": 1.0883376598358154, + "learning_rate": 9.870747575887074e-06, + "loss": 0.8238, + "step": 1953 + }, + { + "epoch": 0.10042142049542604, + "grad_norm": 0.8618469834327698, + "learning_rate": 9.870559498697781e-06, + "loss": 0.7389, + "step": 1954 + }, + { + "epoch": 0.10047281323877069, + "grad_norm": 1.4844752550125122, + "learning_rate": 9.870371286565383e-06, + "loss": 0.7735, + "step": 1955 + }, + { + "epoch": 0.10052420598211532, + "grad_norm": 0.7890344262123108, + "learning_rate": 9.870182939495096e-06, + "loss": 0.7169, + "step": 1956 + }, + { + "epoch": 0.10057559872545996, + "grad_norm": 1.1457399129867554, + "learning_rate": 9.869994457492138e-06, + "loss": 0.8318, + "step": 1957 + }, + { + "epoch": 0.1006269914688046, + "grad_norm": 1.0957731008529663, + "learning_rate": 9.869805840561731e-06, + "loss": 0.7761, + "step": 1958 + }, + { + "epoch": 0.10067838421214924, + "grad_norm": 0.7526283264160156, + "learning_rate": 9.869617088709101e-06, + "loss": 0.7239, + "step": 1959 + }, + { + "epoch": 0.10072977695549389, + "grad_norm": 0.9006901979446411, + "learning_rate": 9.869428201939476e-06, + "loss": 0.6991, + "step": 1960 + }, + { + "epoch": 0.10078116969883852, + "grad_norm": 1.2227797508239746, + "learning_rate": 9.86923918025809e-06, + "loss": 0.8145, + "step": 1961 + }, + { + "epoch": 0.10083256244218317, + "grad_norm": 1.2286690473556519, + "learning_rate": 9.869050023670182e-06, + "loss": 0.8341, + "step": 1962 + }, + { + "epoch": 0.1008839551855278, + "grad_norm": 0.7617394328117371, + "learning_rate": 9.868860732180989e-06, + "loss": 0.7317, + "step": 1963 + }, + { + "epoch": 0.10093534792887245, + "grad_norm": 1.212906837463379, + "learning_rate": 9.86867130579576e-06, + "loss": 0.844, + "step": 1964 + }, + { + "epoch": 0.10098674067221708, + "grad_norm": 1.2200617790222168, + "learning_rate": 9.86848174451974e-06, + "loss": 0.8441, + "step": 1965 + }, + { + "epoch": 0.10103813341556173, + "grad_norm": 1.1495684385299683, + "learning_rate": 9.868292048358183e-06, + "loss": 0.8589, + "step": 1966 + }, + { + "epoch": 0.10108952615890636, + "grad_norm": 1.1524317264556885, + "learning_rate": 9.868102217316342e-06, + "loss": 0.8106, + "step": 1967 + }, + { + "epoch": 0.101140918902251, + "grad_norm": 1.1042249202728271, + "learning_rate": 9.867912251399479e-06, + "loss": 0.8159, + "step": 1968 + }, + { + "epoch": 0.10119231164559564, + "grad_norm": 1.0767053365707397, + "learning_rate": 9.867722150612855e-06, + "loss": 0.7984, + "step": 1969 + }, + { + "epoch": 0.10124370438894029, + "grad_norm": 0.9872498512268066, + "learning_rate": 9.86753191496174e-06, + "loss": 0.734, + "step": 1970 + }, + { + "epoch": 0.10129509713228492, + "grad_norm": 1.1424567699432373, + "learning_rate": 9.867341544451401e-06, + "loss": 0.8342, + "step": 1971 + }, + { + "epoch": 0.10134648987562957, + "grad_norm": 0.9847910404205322, + "learning_rate": 9.867151039087115e-06, + "loss": 0.7457, + "step": 1972 + }, + { + "epoch": 0.1013978826189742, + "grad_norm": 1.1396901607513428, + "learning_rate": 9.866960398874159e-06, + "loss": 0.8373, + "step": 1973 + }, + { + "epoch": 0.10144927536231885, + "grad_norm": 1.1302088499069214, + "learning_rate": 9.866769623817816e-06, + "loss": 0.9205, + "step": 1974 + }, + { + "epoch": 0.10150066810566348, + "grad_norm": 1.1684679985046387, + "learning_rate": 9.866578713923369e-06, + "loss": 0.8282, + "step": 1975 + }, + { + "epoch": 0.10155206084900811, + "grad_norm": 1.1139990091323853, + "learning_rate": 9.866387669196112e-06, + "loss": 0.8158, + "step": 1976 + }, + { + "epoch": 0.10160345359235276, + "grad_norm": 1.0582804679870605, + "learning_rate": 9.866196489641332e-06, + "loss": 0.7916, + "step": 1977 + }, + { + "epoch": 0.1016548463356974, + "grad_norm": 1.1517270803451538, + "learning_rate": 9.866005175264331e-06, + "loss": 0.8431, + "step": 1978 + }, + { + "epoch": 0.10170623907904204, + "grad_norm": 1.0896328687667847, + "learning_rate": 9.865813726070405e-06, + "loss": 0.805, + "step": 1979 + }, + { + "epoch": 0.10175763182238667, + "grad_norm": 1.091223955154419, + "learning_rate": 9.865622142064863e-06, + "loss": 0.819, + "step": 1980 + }, + { + "epoch": 0.10180902456573132, + "grad_norm": 1.0777031183242798, + "learning_rate": 9.86543042325301e-06, + "loss": 0.8607, + "step": 1981 + }, + { + "epoch": 0.10186041730907595, + "grad_norm": 1.3352848291397095, + "learning_rate": 9.865238569640157e-06, + "loss": 0.8373, + "step": 1982 + }, + { + "epoch": 0.1019118100524206, + "grad_norm": 1.1215145587921143, + "learning_rate": 9.865046581231624e-06, + "loss": 0.8096, + "step": 1983 + }, + { + "epoch": 0.10196320279576523, + "grad_norm": 1.3076443672180176, + "learning_rate": 9.864854458032724e-06, + "loss": 0.8428, + "step": 1984 + }, + { + "epoch": 0.10201459553910988, + "grad_norm": 1.0836284160614014, + "learning_rate": 9.864662200048784e-06, + "loss": 0.8413, + "step": 1985 + }, + { + "epoch": 0.10206598828245451, + "grad_norm": 1.2135984897613525, + "learning_rate": 9.86446980728513e-06, + "loss": 0.8324, + "step": 1986 + }, + { + "epoch": 0.10211738102579916, + "grad_norm": 1.1524406671524048, + "learning_rate": 9.864277279747092e-06, + "loss": 0.8309, + "step": 1987 + }, + { + "epoch": 0.1021687737691438, + "grad_norm": 1.0766704082489014, + "learning_rate": 9.864084617440004e-06, + "loss": 0.829, + "step": 1988 + }, + { + "epoch": 0.10222016651248844, + "grad_norm": 0.995010495185852, + "learning_rate": 9.863891820369205e-06, + "loss": 0.7795, + "step": 1989 + }, + { + "epoch": 0.10227155925583307, + "grad_norm": 1.2125451564788818, + "learning_rate": 9.863698888540035e-06, + "loss": 0.8997, + "step": 1990 + }, + { + "epoch": 0.10232295199917772, + "grad_norm": 0.8472604751586914, + "learning_rate": 9.86350582195784e-06, + "loss": 0.7164, + "step": 1991 + }, + { + "epoch": 0.10237434474252236, + "grad_norm": 1.2747083902359009, + "learning_rate": 9.86331262062797e-06, + "loss": 0.8774, + "step": 1992 + }, + { + "epoch": 0.102425737485867, + "grad_norm": 1.1559460163116455, + "learning_rate": 9.863119284555776e-06, + "loss": 0.8064, + "step": 1993 + }, + { + "epoch": 0.10247713022921164, + "grad_norm": 1.1248106956481934, + "learning_rate": 9.862925813746616e-06, + "loss": 0.8005, + "step": 1994 + }, + { + "epoch": 0.10252852297255627, + "grad_norm": 1.1622852087020874, + "learning_rate": 9.862732208205849e-06, + "loss": 0.8304, + "step": 1995 + }, + { + "epoch": 0.10257991571590092, + "grad_norm": 1.2017699480056763, + "learning_rate": 9.862538467938842e-06, + "loss": 0.8492, + "step": 1996 + }, + { + "epoch": 0.10263130845924555, + "grad_norm": 1.2489099502563477, + "learning_rate": 9.862344592950958e-06, + "loss": 0.8329, + "step": 1997 + }, + { + "epoch": 0.1026827012025902, + "grad_norm": 1.201389193534851, + "learning_rate": 9.862150583247574e-06, + "loss": 0.8639, + "step": 1998 + }, + { + "epoch": 0.10273409394593483, + "grad_norm": 1.2007253170013428, + "learning_rate": 9.86195643883406e-06, + "loss": 0.8994, + "step": 1999 + }, + { + "epoch": 0.10278548668927948, + "grad_norm": 1.121754765510559, + "learning_rate": 9.861762159715798e-06, + "loss": 0.8269, + "step": 2000 + }, + { + "epoch": 0.10283687943262411, + "grad_norm": 1.2858860492706299, + "learning_rate": 9.861567745898169e-06, + "loss": 0.8733, + "step": 2001 + }, + { + "epoch": 0.10288827217596876, + "grad_norm": 1.1056945323944092, + "learning_rate": 9.86137319738656e-06, + "loss": 0.8241, + "step": 2002 + }, + { + "epoch": 0.10293966491931339, + "grad_norm": 0.9627084732055664, + "learning_rate": 9.861178514186363e-06, + "loss": 0.7263, + "step": 2003 + }, + { + "epoch": 0.10299105766265804, + "grad_norm": 1.185849666595459, + "learning_rate": 9.86098369630297e-06, + "loss": 0.803, + "step": 2004 + }, + { + "epoch": 0.10304245040600267, + "grad_norm": 1.2022292613983154, + "learning_rate": 9.860788743741778e-06, + "loss": 0.8392, + "step": 2005 + }, + { + "epoch": 0.10309384314934732, + "grad_norm": 1.1016179323196411, + "learning_rate": 9.860593656508188e-06, + "loss": 0.8508, + "step": 2006 + }, + { + "epoch": 0.10314523589269195, + "grad_norm": 1.3060848712921143, + "learning_rate": 9.860398434607609e-06, + "loss": 0.8402, + "step": 2007 + }, + { + "epoch": 0.1031966286360366, + "grad_norm": 0.9066895842552185, + "learning_rate": 9.860203078045445e-06, + "loss": 0.7333, + "step": 2008 + }, + { + "epoch": 0.10324802137938123, + "grad_norm": 1.6079376935958862, + "learning_rate": 9.860007586827112e-06, + "loss": 0.8228, + "step": 2009 + }, + { + "epoch": 0.10329941412272588, + "grad_norm": 1.2383357286453247, + "learning_rate": 9.859811960958025e-06, + "loss": 0.8628, + "step": 2010 + }, + { + "epoch": 0.10335080686607051, + "grad_norm": 1.1892626285552979, + "learning_rate": 9.859616200443603e-06, + "loss": 0.8122, + "step": 2011 + }, + { + "epoch": 0.10340219960941516, + "grad_norm": 1.1847467422485352, + "learning_rate": 9.85942030528927e-06, + "loss": 0.8447, + "step": 2012 + }, + { + "epoch": 0.10345359235275979, + "grad_norm": 1.1552761793136597, + "learning_rate": 9.859224275500454e-06, + "loss": 0.785, + "step": 2013 + }, + { + "epoch": 0.10350498509610442, + "grad_norm": 1.2984205484390259, + "learning_rate": 9.859028111082587e-06, + "loss": 0.8166, + "step": 2014 + }, + { + "epoch": 0.10355637783944907, + "grad_norm": 1.0715218782424927, + "learning_rate": 9.858831812041102e-06, + "loss": 0.7129, + "step": 2015 + }, + { + "epoch": 0.1036077705827937, + "grad_norm": 0.9487645030021667, + "learning_rate": 9.85863537838144e-06, + "loss": 0.6786, + "step": 2016 + }, + { + "epoch": 0.10365916332613835, + "grad_norm": 1.3574559688568115, + "learning_rate": 9.858438810109044e-06, + "loss": 0.8436, + "step": 2017 + }, + { + "epoch": 0.10371055606948298, + "grad_norm": 1.1608316898345947, + "learning_rate": 9.858242107229355e-06, + "loss": 0.8035, + "step": 2018 + }, + { + "epoch": 0.10376194881282763, + "grad_norm": 1.112083077430725, + "learning_rate": 9.858045269747826e-06, + "loss": 0.8425, + "step": 2019 + }, + { + "epoch": 0.10381334155617226, + "grad_norm": 1.3320881128311157, + "learning_rate": 9.857848297669912e-06, + "loss": 0.8981, + "step": 2020 + }, + { + "epoch": 0.10386473429951691, + "grad_norm": 1.1864501237869263, + "learning_rate": 9.857651191001067e-06, + "loss": 0.8221, + "step": 2021 + }, + { + "epoch": 0.10391612704286154, + "grad_norm": 1.2660017013549805, + "learning_rate": 9.857453949746756e-06, + "loss": 0.8604, + "step": 2022 + }, + { + "epoch": 0.10396751978620619, + "grad_norm": 1.1579972505569458, + "learning_rate": 9.857256573912441e-06, + "loss": 0.8257, + "step": 2023 + }, + { + "epoch": 0.10401891252955082, + "grad_norm": 1.1935635805130005, + "learning_rate": 9.85705906350359e-06, + "loss": 0.8171, + "step": 2024 + }, + { + "epoch": 0.10407030527289547, + "grad_norm": 1.2708181142807007, + "learning_rate": 9.856861418525678e-06, + "loss": 0.8117, + "step": 2025 + }, + { + "epoch": 0.1041216980162401, + "grad_norm": 0.8358376622200012, + "learning_rate": 9.856663638984177e-06, + "loss": 0.6917, + "step": 2026 + }, + { + "epoch": 0.10417309075958475, + "grad_norm": 1.15700364112854, + "learning_rate": 9.85646572488457e-06, + "loss": 0.8215, + "step": 2027 + }, + { + "epoch": 0.10422448350292939, + "grad_norm": 1.273124098777771, + "learning_rate": 9.856267676232339e-06, + "loss": 0.8325, + "step": 2028 + }, + { + "epoch": 0.10427587624627403, + "grad_norm": 1.1909046173095703, + "learning_rate": 9.856069493032971e-06, + "loss": 0.822, + "step": 2029 + }, + { + "epoch": 0.10432726898961867, + "grad_norm": 1.1918940544128418, + "learning_rate": 9.855871175291958e-06, + "loss": 0.87, + "step": 2030 + }, + { + "epoch": 0.1043786617329633, + "grad_norm": 1.2000300884246826, + "learning_rate": 9.855672723014792e-06, + "loss": 0.8185, + "step": 2031 + }, + { + "epoch": 0.10443005447630795, + "grad_norm": 1.0824081897735596, + "learning_rate": 9.855474136206975e-06, + "loss": 0.8173, + "step": 2032 + }, + { + "epoch": 0.10448144721965258, + "grad_norm": 1.2731975317001343, + "learning_rate": 9.855275414874007e-06, + "loss": 0.8249, + "step": 2033 + }, + { + "epoch": 0.10453283996299723, + "grad_norm": 1.1852995157241821, + "learning_rate": 9.855076559021392e-06, + "loss": 0.809, + "step": 2034 + }, + { + "epoch": 0.10458423270634186, + "grad_norm": 0.9092479348182678, + "learning_rate": 9.854877568654644e-06, + "loss": 0.7449, + "step": 2035 + }, + { + "epoch": 0.1046356254496865, + "grad_norm": 1.1946287155151367, + "learning_rate": 9.854678443779273e-06, + "loss": 0.8475, + "step": 2036 + }, + { + "epoch": 0.10468701819303114, + "grad_norm": 1.1364498138427734, + "learning_rate": 9.854479184400793e-06, + "loss": 0.8348, + "step": 2037 + }, + { + "epoch": 0.10473841093637579, + "grad_norm": 1.2741285562515259, + "learning_rate": 9.85427979052473e-06, + "loss": 0.8109, + "step": 2038 + }, + { + "epoch": 0.10478980367972042, + "grad_norm": 1.1620416641235352, + "learning_rate": 9.854080262156609e-06, + "loss": 0.7829, + "step": 2039 + }, + { + "epoch": 0.10484119642306507, + "grad_norm": 1.1623344421386719, + "learning_rate": 9.853880599301952e-06, + "loss": 0.7821, + "step": 2040 + }, + { + "epoch": 0.1048925891664097, + "grad_norm": 0.7803221940994263, + "learning_rate": 9.853680801966297e-06, + "loss": 0.7291, + "step": 2041 + }, + { + "epoch": 0.10494398190975435, + "grad_norm": 1.1708322763442993, + "learning_rate": 9.853480870155175e-06, + "loss": 0.8614, + "step": 2042 + }, + { + "epoch": 0.10499537465309898, + "grad_norm": 1.155012607574463, + "learning_rate": 9.853280803874128e-06, + "loss": 0.8111, + "step": 2043 + }, + { + "epoch": 0.10504676739644363, + "grad_norm": 1.2395075559616089, + "learning_rate": 9.853080603128698e-06, + "loss": 0.8636, + "step": 2044 + }, + { + "epoch": 0.10509816013978826, + "grad_norm": 1.3967252969741821, + "learning_rate": 9.852880267924431e-06, + "loss": 0.8555, + "step": 2045 + }, + { + "epoch": 0.10514955288313291, + "grad_norm": 1.384650468826294, + "learning_rate": 9.85267979826688e-06, + "loss": 0.8256, + "step": 2046 + }, + { + "epoch": 0.10520094562647754, + "grad_norm": 1.2941588163375854, + "learning_rate": 9.852479194161597e-06, + "loss": 0.8777, + "step": 2047 + }, + { + "epoch": 0.10525233836982219, + "grad_norm": 0.86928391456604, + "learning_rate": 9.852278455614142e-06, + "loss": 0.7763, + "step": 2048 + }, + { + "epoch": 0.10530373111316682, + "grad_norm": 1.1695020198822021, + "learning_rate": 9.852077582630073e-06, + "loss": 0.8226, + "step": 2049 + }, + { + "epoch": 0.10535512385651145, + "grad_norm": 1.0998610258102417, + "learning_rate": 9.851876575214957e-06, + "loss": 0.7926, + "step": 2050 + }, + { + "epoch": 0.1054065165998561, + "grad_norm": 0.8325785994529724, + "learning_rate": 9.851675433374366e-06, + "loss": 0.7115, + "step": 2051 + }, + { + "epoch": 0.10545790934320073, + "grad_norm": 1.0745104551315308, + "learning_rate": 9.851474157113869e-06, + "loss": 0.8372, + "step": 2052 + }, + { + "epoch": 0.10550930208654538, + "grad_norm": 1.3948298692703247, + "learning_rate": 9.851272746439045e-06, + "loss": 0.8342, + "step": 2053 + }, + { + "epoch": 0.10556069482989001, + "grad_norm": 0.7847484350204468, + "learning_rate": 9.851071201355473e-06, + "loss": 0.7027, + "step": 2054 + }, + { + "epoch": 0.10561208757323466, + "grad_norm": 0.8258352279663086, + "learning_rate": 9.850869521868736e-06, + "loss": 0.6856, + "step": 2055 + }, + { + "epoch": 0.1056634803165793, + "grad_norm": 1.1569632291793823, + "learning_rate": 9.850667707984425e-06, + "loss": 0.7977, + "step": 2056 + }, + { + "epoch": 0.10571487305992394, + "grad_norm": 0.8674617409706116, + "learning_rate": 9.850465759708127e-06, + "loss": 0.7323, + "step": 2057 + }, + { + "epoch": 0.10576626580326857, + "grad_norm": 1.0897079706192017, + "learning_rate": 9.85026367704544e-06, + "loss": 0.8463, + "step": 2058 + }, + { + "epoch": 0.10581765854661322, + "grad_norm": 1.0594137907028198, + "learning_rate": 9.850061460001963e-06, + "loss": 0.7503, + "step": 2059 + }, + { + "epoch": 0.10586905128995786, + "grad_norm": 1.1581857204437256, + "learning_rate": 9.849859108583298e-06, + "loss": 0.8204, + "step": 2060 + }, + { + "epoch": 0.1059204440333025, + "grad_norm": 0.85575270652771, + "learning_rate": 9.849656622795052e-06, + "loss": 0.7616, + "step": 2061 + }, + { + "epoch": 0.10597183677664714, + "grad_norm": 1.1002634763717651, + "learning_rate": 9.849454002642833e-06, + "loss": 0.8263, + "step": 2062 + }, + { + "epoch": 0.10602322951999178, + "grad_norm": 1.199106216430664, + "learning_rate": 9.849251248132257e-06, + "loss": 0.8019, + "step": 2063 + }, + { + "epoch": 0.10607462226333642, + "grad_norm": 1.1747106313705444, + "learning_rate": 9.84904835926894e-06, + "loss": 0.8138, + "step": 2064 + }, + { + "epoch": 0.10612601500668106, + "grad_norm": 0.9194386601448059, + "learning_rate": 9.848845336058503e-06, + "loss": 0.7092, + "step": 2065 + }, + { + "epoch": 0.1061774077500257, + "grad_norm": 0.8934338688850403, + "learning_rate": 9.848642178506573e-06, + "loss": 0.7293, + "step": 2066 + }, + { + "epoch": 0.10622880049337034, + "grad_norm": 1.2011280059814453, + "learning_rate": 9.848438886618777e-06, + "loss": 0.8039, + "step": 2067 + }, + { + "epoch": 0.10628019323671498, + "grad_norm": 1.1254558563232422, + "learning_rate": 9.848235460400748e-06, + "loss": 0.86, + "step": 2068 + }, + { + "epoch": 0.10633158598005961, + "grad_norm": 1.160234808921814, + "learning_rate": 9.84803189985812e-06, + "loss": 0.8012, + "step": 2069 + }, + { + "epoch": 0.10638297872340426, + "grad_norm": 1.2093859910964966, + "learning_rate": 9.84782820499654e-06, + "loss": 0.8424, + "step": 2070 + }, + { + "epoch": 0.10643437146674889, + "grad_norm": 1.153065800666809, + "learning_rate": 9.84762437582164e-06, + "loss": 0.8216, + "step": 2071 + }, + { + "epoch": 0.10648576421009354, + "grad_norm": 1.0969511270523071, + "learning_rate": 9.847420412339077e-06, + "loss": 0.843, + "step": 2072 + }, + { + "epoch": 0.10653715695343817, + "grad_norm": 1.0607304573059082, + "learning_rate": 9.847216314554497e-06, + "loss": 0.7702, + "step": 2073 + }, + { + "epoch": 0.10658854969678282, + "grad_norm": 1.1696754693984985, + "learning_rate": 9.847012082473559e-06, + "loss": 0.877, + "step": 2074 + }, + { + "epoch": 0.10663994244012745, + "grad_norm": 1.0082323551177979, + "learning_rate": 9.846807716101916e-06, + "loss": 0.7426, + "step": 2075 + }, + { + "epoch": 0.1066913351834721, + "grad_norm": 1.187567114830017, + "learning_rate": 9.846603215445232e-06, + "loss": 0.8129, + "step": 2076 + }, + { + "epoch": 0.10674272792681673, + "grad_norm": 1.1597225666046143, + "learning_rate": 9.846398580509176e-06, + "loss": 0.8249, + "step": 2077 + }, + { + "epoch": 0.10679412067016138, + "grad_norm": 1.1469331979751587, + "learning_rate": 9.846193811299414e-06, + "loss": 0.7991, + "step": 2078 + }, + { + "epoch": 0.10684551341350601, + "grad_norm": 1.0819371938705444, + "learning_rate": 9.845988907821621e-06, + "loss": 0.8229, + "step": 2079 + }, + { + "epoch": 0.10689690615685066, + "grad_norm": 1.178691029548645, + "learning_rate": 9.845783870081473e-06, + "loss": 0.8177, + "step": 2080 + }, + { + "epoch": 0.10694829890019529, + "grad_norm": 1.094375729560852, + "learning_rate": 9.845578698084652e-06, + "loss": 0.8094, + "step": 2081 + }, + { + "epoch": 0.10699969164353994, + "grad_norm": 1.1425831317901611, + "learning_rate": 9.845373391836842e-06, + "loss": 0.8226, + "step": 2082 + }, + { + "epoch": 0.10705108438688457, + "grad_norm": 0.844132661819458, + "learning_rate": 9.84516795134373e-06, + "loss": 0.7712, + "step": 2083 + }, + { + "epoch": 0.10710247713022922, + "grad_norm": 1.2742677927017212, + "learning_rate": 9.844962376611009e-06, + "loss": 0.862, + "step": 2084 + }, + { + "epoch": 0.10715386987357385, + "grad_norm": 1.2135865688323975, + "learning_rate": 9.844756667644375e-06, + "loss": 0.8326, + "step": 2085 + }, + { + "epoch": 0.1072052626169185, + "grad_norm": 1.1784766912460327, + "learning_rate": 9.844550824449526e-06, + "loss": 0.8758, + "step": 2086 + }, + { + "epoch": 0.10725665536026313, + "grad_norm": 1.184257984161377, + "learning_rate": 9.844344847032168e-06, + "loss": 0.8139, + "step": 2087 + }, + { + "epoch": 0.10730804810360776, + "grad_norm": 0.8778985142707825, + "learning_rate": 9.844138735398003e-06, + "loss": 0.7399, + "step": 2088 + }, + { + "epoch": 0.10735944084695241, + "grad_norm": 1.1916102170944214, + "learning_rate": 9.843932489552746e-06, + "loss": 0.8393, + "step": 2089 + }, + { + "epoch": 0.10741083359029704, + "grad_norm": 1.190514326095581, + "learning_rate": 9.84372610950211e-06, + "loss": 0.7854, + "step": 2090 + }, + { + "epoch": 0.10746222633364169, + "grad_norm": 1.189003825187683, + "learning_rate": 9.843519595251811e-06, + "loss": 0.852, + "step": 2091 + }, + { + "epoch": 0.10751361907698632, + "grad_norm": 1.1399040222167969, + "learning_rate": 9.843312946807573e-06, + "loss": 0.8597, + "step": 2092 + }, + { + "epoch": 0.10756501182033097, + "grad_norm": 1.2766233682632446, + "learning_rate": 9.84310616417512e-06, + "loss": 0.8448, + "step": 2093 + }, + { + "epoch": 0.1076164045636756, + "grad_norm": 1.0877881050109863, + "learning_rate": 9.842899247360181e-06, + "loss": 0.766, + "step": 2094 + }, + { + "epoch": 0.10766779730702025, + "grad_norm": 1.15092933177948, + "learning_rate": 9.84269219636849e-06, + "loss": 0.8475, + "step": 2095 + }, + { + "epoch": 0.10771919005036489, + "grad_norm": 1.1373077630996704, + "learning_rate": 9.842485011205782e-06, + "loss": 0.8456, + "step": 2096 + }, + { + "epoch": 0.10777058279370953, + "grad_norm": 1.1603868007659912, + "learning_rate": 9.8422776918778e-06, + "loss": 0.8172, + "step": 2097 + }, + { + "epoch": 0.10782197553705417, + "grad_norm": 1.1840358972549438, + "learning_rate": 9.842070238390284e-06, + "loss": 0.8801, + "step": 2098 + }, + { + "epoch": 0.10787336828039881, + "grad_norm": 1.1693062782287598, + "learning_rate": 9.841862650748983e-06, + "loss": 0.8315, + "step": 2099 + }, + { + "epoch": 0.10792476102374345, + "grad_norm": 1.1618939638137817, + "learning_rate": 9.841654928959651e-06, + "loss": 0.8554, + "step": 2100 + }, + { + "epoch": 0.10797615376708809, + "grad_norm": 1.1679095029830933, + "learning_rate": 9.84144707302804e-06, + "loss": 0.8003, + "step": 2101 + }, + { + "epoch": 0.10802754651043273, + "grad_norm": 1.2202725410461426, + "learning_rate": 9.841239082959913e-06, + "loss": 0.8577, + "step": 2102 + }, + { + "epoch": 0.10807893925377737, + "grad_norm": 1.1412358283996582, + "learning_rate": 9.841030958761026e-06, + "loss": 0.8348, + "step": 2103 + }, + { + "epoch": 0.108130331997122, + "grad_norm": 1.2195676565170288, + "learning_rate": 9.84082270043715e-06, + "loss": 0.7994, + "step": 2104 + }, + { + "epoch": 0.10818172474046664, + "grad_norm": 1.3252432346343994, + "learning_rate": 9.840614307994056e-06, + "loss": 0.8217, + "step": 2105 + }, + { + "epoch": 0.10823311748381129, + "grad_norm": 1.162333607673645, + "learning_rate": 9.840405781437515e-06, + "loss": 0.7966, + "step": 2106 + }, + { + "epoch": 0.10828451022715592, + "grad_norm": 1.1192104816436768, + "learning_rate": 9.840197120773303e-06, + "loss": 0.8182, + "step": 2107 + }, + { + "epoch": 0.10833590297050057, + "grad_norm": 1.1943600177764893, + "learning_rate": 9.839988326007204e-06, + "loss": 0.8539, + "step": 2108 + }, + { + "epoch": 0.1083872957138452, + "grad_norm": 1.1747912168502808, + "learning_rate": 9.839779397145002e-06, + "loss": 0.8416, + "step": 2109 + }, + { + "epoch": 0.10843868845718985, + "grad_norm": 1.2193492650985718, + "learning_rate": 9.839570334192485e-06, + "loss": 0.8389, + "step": 2110 + }, + { + "epoch": 0.10849008120053448, + "grad_norm": 1.1557869911193848, + "learning_rate": 9.839361137155445e-06, + "loss": 0.8047, + "step": 2111 + }, + { + "epoch": 0.10854147394387913, + "grad_norm": 1.0687378644943237, + "learning_rate": 9.839151806039681e-06, + "loss": 0.8312, + "step": 2112 + }, + { + "epoch": 0.10859286668722376, + "grad_norm": 0.7824373841285706, + "learning_rate": 9.83894234085099e-06, + "loss": 0.7271, + "step": 2113 + }, + { + "epoch": 0.10864425943056841, + "grad_norm": 1.1830910444259644, + "learning_rate": 9.838732741595174e-06, + "loss": 0.7912, + "step": 2114 + }, + { + "epoch": 0.10869565217391304, + "grad_norm": 1.0808295011520386, + "learning_rate": 9.838523008278043e-06, + "loss": 0.8084, + "step": 2115 + }, + { + "epoch": 0.10874704491725769, + "grad_norm": 1.2928946018218994, + "learning_rate": 9.838313140905407e-06, + "loss": 0.7243, + "step": 2116 + }, + { + "epoch": 0.10879843766060232, + "grad_norm": 1.1614266633987427, + "learning_rate": 9.838103139483082e-06, + "loss": 0.7778, + "step": 2117 + }, + { + "epoch": 0.10884983040394697, + "grad_norm": 1.1963202953338623, + "learning_rate": 9.837893004016883e-06, + "loss": 0.8136, + "step": 2118 + }, + { + "epoch": 0.1089012231472916, + "grad_norm": 1.7935479879379272, + "learning_rate": 9.837682734512633e-06, + "loss": 0.7559, + "step": 2119 + }, + { + "epoch": 0.10895261589063625, + "grad_norm": 0.8835573196411133, + "learning_rate": 9.83747233097616e-06, + "loss": 0.7271, + "step": 2120 + }, + { + "epoch": 0.10900400863398088, + "grad_norm": 1.2589309215545654, + "learning_rate": 9.837261793413292e-06, + "loss": 0.8552, + "step": 2121 + }, + { + "epoch": 0.10905540137732553, + "grad_norm": 1.185909390449524, + "learning_rate": 9.837051121829859e-06, + "loss": 0.8122, + "step": 2122 + }, + { + "epoch": 0.10910679412067016, + "grad_norm": 1.11640202999115, + "learning_rate": 9.836840316231704e-06, + "loss": 0.8339, + "step": 2123 + }, + { + "epoch": 0.1091581868640148, + "grad_norm": 1.1984152793884277, + "learning_rate": 9.836629376624663e-06, + "loss": 0.8202, + "step": 2124 + }, + { + "epoch": 0.10920957960735944, + "grad_norm": 1.2150204181671143, + "learning_rate": 9.83641830301458e-06, + "loss": 0.8028, + "step": 2125 + }, + { + "epoch": 0.10926097235070407, + "grad_norm": 1.1551257371902466, + "learning_rate": 9.836207095407306e-06, + "loss": 0.7802, + "step": 2126 + }, + { + "epoch": 0.10931236509404872, + "grad_norm": 1.1546604633331299, + "learning_rate": 9.83599575380869e-06, + "loss": 0.823, + "step": 2127 + }, + { + "epoch": 0.10936375783739335, + "grad_norm": 0.8745397329330444, + "learning_rate": 9.835784278224591e-06, + "loss": 0.7263, + "step": 2128 + }, + { + "epoch": 0.109415150580738, + "grad_norm": 1.2139627933502197, + "learning_rate": 9.835572668660866e-06, + "loss": 0.8662, + "step": 2129 + }, + { + "epoch": 0.10946654332408264, + "grad_norm": 1.140381097793579, + "learning_rate": 9.835360925123376e-06, + "loss": 0.8385, + "step": 2130 + }, + { + "epoch": 0.10951793606742728, + "grad_norm": 0.7668219208717346, + "learning_rate": 9.835149047617989e-06, + "loss": 0.6953, + "step": 2131 + }, + { + "epoch": 0.10956932881077192, + "grad_norm": 0.8458096981048584, + "learning_rate": 9.834937036150576e-06, + "loss": 0.6732, + "step": 2132 + }, + { + "epoch": 0.10962072155411656, + "grad_norm": 0.8098699450492859, + "learning_rate": 9.83472489072701e-06, + "loss": 0.7035, + "step": 2133 + }, + { + "epoch": 0.1096721142974612, + "grad_norm": 0.9325520992279053, + "learning_rate": 9.834512611353168e-06, + "loss": 0.6944, + "step": 2134 + }, + { + "epoch": 0.10972350704080584, + "grad_norm": 1.1876887083053589, + "learning_rate": 9.834300198034934e-06, + "loss": 0.8423, + "step": 2135 + }, + { + "epoch": 0.10977489978415048, + "grad_norm": 0.8546350002288818, + "learning_rate": 9.83408765077819e-06, + "loss": 0.7267, + "step": 2136 + }, + { + "epoch": 0.10982629252749512, + "grad_norm": 1.1998388767242432, + "learning_rate": 9.833874969588828e-06, + "loss": 0.792, + "step": 2137 + }, + { + "epoch": 0.10987768527083976, + "grad_norm": 1.2354494333267212, + "learning_rate": 9.833662154472738e-06, + "loss": 0.7892, + "step": 2138 + }, + { + "epoch": 0.1099290780141844, + "grad_norm": 1.1702477931976318, + "learning_rate": 9.833449205435817e-06, + "loss": 0.8115, + "step": 2139 + }, + { + "epoch": 0.10998047075752904, + "grad_norm": 1.1714764833450317, + "learning_rate": 9.833236122483967e-06, + "loss": 0.8342, + "step": 2140 + }, + { + "epoch": 0.11003186350087368, + "grad_norm": 1.1763198375701904, + "learning_rate": 9.833022905623086e-06, + "loss": 0.8534, + "step": 2141 + }, + { + "epoch": 0.11008325624421832, + "grad_norm": 1.2342931032180786, + "learning_rate": 9.832809554859088e-06, + "loss": 0.923, + "step": 2142 + }, + { + "epoch": 0.11013464898756295, + "grad_norm": 1.1907038688659668, + "learning_rate": 9.83259607019788e-06, + "loss": 0.7705, + "step": 2143 + }, + { + "epoch": 0.1101860417309076, + "grad_norm": 1.1392539739608765, + "learning_rate": 9.832382451645377e-06, + "loss": 0.8351, + "step": 2144 + }, + { + "epoch": 0.11023743447425223, + "grad_norm": 1.198627233505249, + "learning_rate": 9.832168699207498e-06, + "loss": 0.8773, + "step": 2145 + }, + { + "epoch": 0.11028882721759688, + "grad_norm": 1.2492464780807495, + "learning_rate": 9.831954812890168e-06, + "loss": 0.8655, + "step": 2146 + }, + { + "epoch": 0.11034021996094151, + "grad_norm": 1.2627941370010376, + "learning_rate": 9.83174079269931e-06, + "loss": 0.7434, + "step": 2147 + }, + { + "epoch": 0.11039161270428616, + "grad_norm": 1.1658904552459717, + "learning_rate": 9.831526638640852e-06, + "loss": 0.8412, + "step": 2148 + }, + { + "epoch": 0.11044300544763079, + "grad_norm": 1.7002475261688232, + "learning_rate": 9.83131235072073e-06, + "loss": 0.8496, + "step": 2149 + }, + { + "epoch": 0.11049439819097544, + "grad_norm": 1.151473879814148, + "learning_rate": 9.83109792894488e-06, + "loss": 0.7836, + "step": 2150 + }, + { + "epoch": 0.11054579093432007, + "grad_norm": 1.1377246379852295, + "learning_rate": 9.830883373319244e-06, + "loss": 0.8052, + "step": 2151 + }, + { + "epoch": 0.11059718367766472, + "grad_norm": 1.029439926147461, + "learning_rate": 9.830668683849766e-06, + "loss": 0.7213, + "step": 2152 + }, + { + "epoch": 0.11064857642100935, + "grad_norm": 1.1953693628311157, + "learning_rate": 9.830453860542393e-06, + "loss": 0.8036, + "step": 2153 + }, + { + "epoch": 0.110699969164354, + "grad_norm": 1.1742392778396606, + "learning_rate": 9.830238903403078e-06, + "loss": 0.7974, + "step": 2154 + }, + { + "epoch": 0.11075136190769863, + "grad_norm": 1.1791337728500366, + "learning_rate": 9.830023812437777e-06, + "loss": 0.8304, + "step": 2155 + }, + { + "epoch": 0.11080275465104328, + "grad_norm": 1.1293309926986694, + "learning_rate": 9.829808587652447e-06, + "loss": 0.8188, + "step": 2156 + }, + { + "epoch": 0.11085414739438791, + "grad_norm": 1.1872262954711914, + "learning_rate": 9.829593229053055e-06, + "loss": 0.832, + "step": 2157 + }, + { + "epoch": 0.11090554013773256, + "grad_norm": 1.202713966369629, + "learning_rate": 9.829377736645562e-06, + "loss": 0.8391, + "step": 2158 + }, + { + "epoch": 0.11095693288107719, + "grad_norm": 1.1206715106964111, + "learning_rate": 9.82916211043594e-06, + "loss": 0.7979, + "step": 2159 + }, + { + "epoch": 0.11100832562442182, + "grad_norm": 1.1883658170700073, + "learning_rate": 9.828946350430169e-06, + "loss": 0.8654, + "step": 2160 + }, + { + "epoch": 0.11105971836776647, + "grad_norm": 1.1700845956802368, + "learning_rate": 9.828730456634222e-06, + "loss": 0.8531, + "step": 2161 + }, + { + "epoch": 0.1111111111111111, + "grad_norm": 0.9313642978668213, + "learning_rate": 9.82851442905408e-06, + "loss": 0.75, + "step": 2162 + }, + { + "epoch": 0.11116250385445575, + "grad_norm": 1.217764973640442, + "learning_rate": 9.82829826769573e-06, + "loss": 0.8306, + "step": 2163 + }, + { + "epoch": 0.11121389659780039, + "grad_norm": 1.1495599746704102, + "learning_rate": 9.82808197256516e-06, + "loss": 0.7775, + "step": 2164 + }, + { + "epoch": 0.11126528934114503, + "grad_norm": 1.1596499681472778, + "learning_rate": 9.827865543668362e-06, + "loss": 0.8938, + "step": 2165 + }, + { + "epoch": 0.11131668208448967, + "grad_norm": 1.2563353776931763, + "learning_rate": 9.827648981011334e-06, + "loss": 0.8334, + "step": 2166 + }, + { + "epoch": 0.11136807482783431, + "grad_norm": 0.8503729701042175, + "learning_rate": 9.827432284600073e-06, + "loss": 0.7069, + "step": 2167 + }, + { + "epoch": 0.11141946757117895, + "grad_norm": 1.1692421436309814, + "learning_rate": 9.827215454440588e-06, + "loss": 0.7936, + "step": 2168 + }, + { + "epoch": 0.11147086031452359, + "grad_norm": 0.8492381572723389, + "learning_rate": 9.826998490538883e-06, + "loss": 0.7473, + "step": 2169 + }, + { + "epoch": 0.11152225305786823, + "grad_norm": 1.0849483013153076, + "learning_rate": 9.826781392900968e-06, + "loss": 0.7947, + "step": 2170 + }, + { + "epoch": 0.11157364580121287, + "grad_norm": 0.8289567232131958, + "learning_rate": 9.82656416153286e-06, + "loss": 0.7413, + "step": 2171 + }, + { + "epoch": 0.1116250385445575, + "grad_norm": 1.1089200973510742, + "learning_rate": 9.82634679644058e-06, + "loss": 0.8241, + "step": 2172 + }, + { + "epoch": 0.11167643128790215, + "grad_norm": 1.1167725324630737, + "learning_rate": 9.826129297630146e-06, + "loss": 0.7698, + "step": 2173 + }, + { + "epoch": 0.11172782403124679, + "grad_norm": 1.1596695184707642, + "learning_rate": 9.825911665107584e-06, + "loss": 0.8354, + "step": 2174 + }, + { + "epoch": 0.11177921677459143, + "grad_norm": 1.1653627157211304, + "learning_rate": 9.825693898878925e-06, + "loss": 0.8043, + "step": 2175 + }, + { + "epoch": 0.11183060951793607, + "grad_norm": 1.160455584526062, + "learning_rate": 9.825475998950203e-06, + "loss": 0.8086, + "step": 2176 + }, + { + "epoch": 0.11188200226128071, + "grad_norm": 1.2420039176940918, + "learning_rate": 9.825257965327454e-06, + "loss": 0.8526, + "step": 2177 + }, + { + "epoch": 0.11193339500462535, + "grad_norm": 0.8005742430686951, + "learning_rate": 9.825039798016723e-06, + "loss": 0.7035, + "step": 2178 + }, + { + "epoch": 0.11198478774796998, + "grad_norm": 0.8854387402534485, + "learning_rate": 9.824821497024046e-06, + "loss": 0.7325, + "step": 2179 + }, + { + "epoch": 0.11203618049131463, + "grad_norm": 0.7694113850593567, + "learning_rate": 9.82460306235548e-06, + "loss": 0.6782, + "step": 2180 + }, + { + "epoch": 0.11208757323465926, + "grad_norm": 0.7485276460647583, + "learning_rate": 9.824384494017072e-06, + "loss": 0.7378, + "step": 2181 + }, + { + "epoch": 0.11213896597800391, + "grad_norm": 0.7948840260505676, + "learning_rate": 9.824165792014877e-06, + "loss": 0.7209, + "step": 2182 + }, + { + "epoch": 0.11219035872134854, + "grad_norm": 1.305915117263794, + "learning_rate": 9.823946956354958e-06, + "loss": 0.8635, + "step": 2183 + }, + { + "epoch": 0.11224175146469319, + "grad_norm": 0.7420186996459961, + "learning_rate": 9.823727987043376e-06, + "loss": 0.6824, + "step": 2184 + }, + { + "epoch": 0.11229314420803782, + "grad_norm": 0.7992856502532959, + "learning_rate": 9.823508884086197e-06, + "loss": 0.7047, + "step": 2185 + }, + { + "epoch": 0.11234453695138247, + "grad_norm": 1.1752492189407349, + "learning_rate": 9.823289647489493e-06, + "loss": 0.8568, + "step": 2186 + }, + { + "epoch": 0.1123959296947271, + "grad_norm": 1.4357744455337524, + "learning_rate": 9.823070277259337e-06, + "loss": 0.8649, + "step": 2187 + }, + { + "epoch": 0.11244732243807175, + "grad_norm": 1.105179786682129, + "learning_rate": 9.822850773401807e-06, + "loss": 0.798, + "step": 2188 + }, + { + "epoch": 0.11249871518141638, + "grad_norm": 1.189935564994812, + "learning_rate": 9.822631135922984e-06, + "loss": 0.8346, + "step": 2189 + }, + { + "epoch": 0.11255010792476103, + "grad_norm": 1.2357518672943115, + "learning_rate": 9.822411364828957e-06, + "loss": 0.8323, + "step": 2190 + }, + { + "epoch": 0.11260150066810566, + "grad_norm": 1.1284202337265015, + "learning_rate": 9.82219146012581e-06, + "loss": 0.825, + "step": 2191 + }, + { + "epoch": 0.11265289341145031, + "grad_norm": 1.198906421661377, + "learning_rate": 9.821971421819637e-06, + "loss": 0.8671, + "step": 2192 + }, + { + "epoch": 0.11270428615479494, + "grad_norm": 1.0011494159698486, + "learning_rate": 9.821751249916536e-06, + "loss": 0.7101, + "step": 2193 + }, + { + "epoch": 0.11275567889813959, + "grad_norm": 1.1242879629135132, + "learning_rate": 9.821530944422606e-06, + "loss": 0.824, + "step": 2194 + }, + { + "epoch": 0.11280707164148422, + "grad_norm": 1.192854404449463, + "learning_rate": 9.82131050534395e-06, + "loss": 0.8145, + "step": 2195 + }, + { + "epoch": 0.11285846438482887, + "grad_norm": 0.8611165285110474, + "learning_rate": 9.821089932686677e-06, + "loss": 0.7311, + "step": 2196 + }, + { + "epoch": 0.1129098571281735, + "grad_norm": 1.0399948358535767, + "learning_rate": 9.820869226456897e-06, + "loss": 0.8317, + "step": 2197 + }, + { + "epoch": 0.11296124987151814, + "grad_norm": 1.1548027992248535, + "learning_rate": 9.820648386660727e-06, + "loss": 0.8093, + "step": 2198 + }, + { + "epoch": 0.11301264261486278, + "grad_norm": 1.1734575033187866, + "learning_rate": 9.820427413304282e-06, + "loss": 0.7881, + "step": 2199 + }, + { + "epoch": 0.11306403535820742, + "grad_norm": 1.1255524158477783, + "learning_rate": 9.820206306393687e-06, + "loss": 0.8569, + "step": 2200 + }, + { + "epoch": 0.11311542810155206, + "grad_norm": 1.189300775527954, + "learning_rate": 9.819985065935065e-06, + "loss": 0.865, + "step": 2201 + }, + { + "epoch": 0.1131668208448967, + "grad_norm": 0.842091977596283, + "learning_rate": 9.819763691934551e-06, + "loss": 0.6655, + "step": 2202 + }, + { + "epoch": 0.11321821358824134, + "grad_norm": 1.22659432888031, + "learning_rate": 9.819542184398273e-06, + "loss": 0.8303, + "step": 2203 + }, + { + "epoch": 0.11326960633158598, + "grad_norm": 1.1401138305664062, + "learning_rate": 9.819320543332371e-06, + "loss": 0.8408, + "step": 2204 + }, + { + "epoch": 0.11332099907493062, + "grad_norm": 0.8609177470207214, + "learning_rate": 9.819098768742985e-06, + "loss": 0.7204, + "step": 2205 + }, + { + "epoch": 0.11337239181827526, + "grad_norm": 1.1957910060882568, + "learning_rate": 9.818876860636259e-06, + "loss": 0.8133, + "step": 2206 + }, + { + "epoch": 0.1134237845616199, + "grad_norm": 1.171829342842102, + "learning_rate": 9.818654819018341e-06, + "loss": 0.7967, + "step": 2207 + }, + { + "epoch": 0.11347517730496454, + "grad_norm": 0.8800224661827087, + "learning_rate": 9.818432643895383e-06, + "loss": 0.6815, + "step": 2208 + }, + { + "epoch": 0.11352657004830918, + "grad_norm": 1.1448153257369995, + "learning_rate": 9.818210335273541e-06, + "loss": 0.8125, + "step": 2209 + }, + { + "epoch": 0.11357796279165382, + "grad_norm": 1.3789615631103516, + "learning_rate": 9.817987893158976e-06, + "loss": 0.8499, + "step": 2210 + }, + { + "epoch": 0.11362935553499846, + "grad_norm": 0.8859899044036865, + "learning_rate": 9.817765317557848e-06, + "loss": 0.7424, + "step": 2211 + }, + { + "epoch": 0.1136807482783431, + "grad_norm": 1.135510802268982, + "learning_rate": 9.817542608476325e-06, + "loss": 0.8468, + "step": 2212 + }, + { + "epoch": 0.11373214102168774, + "grad_norm": 1.1696711778640747, + "learning_rate": 9.817319765920576e-06, + "loss": 0.8337, + "step": 2213 + }, + { + "epoch": 0.11378353376503238, + "grad_norm": 1.391337275505066, + "learning_rate": 9.817096789896779e-06, + "loss": 0.8101, + "step": 2214 + }, + { + "epoch": 0.11383492650837702, + "grad_norm": 0.8232397437095642, + "learning_rate": 9.816873680411106e-06, + "loss": 0.7946, + "step": 2215 + }, + { + "epoch": 0.11388631925172166, + "grad_norm": 1.0893553495407104, + "learning_rate": 9.816650437469744e-06, + "loss": 0.8166, + "step": 2216 + }, + { + "epoch": 0.11393771199506629, + "grad_norm": 1.0865901708602905, + "learning_rate": 9.816427061078872e-06, + "loss": 0.7737, + "step": 2217 + }, + { + "epoch": 0.11398910473841094, + "grad_norm": 1.1675657033920288, + "learning_rate": 9.816203551244686e-06, + "loss": 0.8598, + "step": 2218 + }, + { + "epoch": 0.11404049748175557, + "grad_norm": 1.155751347541809, + "learning_rate": 9.815979907973373e-06, + "loss": 0.8249, + "step": 2219 + }, + { + "epoch": 0.11409189022510022, + "grad_norm": 1.0168051719665527, + "learning_rate": 9.81575613127113e-06, + "loss": 0.7335, + "step": 2220 + }, + { + "epoch": 0.11414328296844485, + "grad_norm": 1.156654715538025, + "learning_rate": 9.81553222114416e-06, + "loss": 0.8244, + "step": 2221 + }, + { + "epoch": 0.1141946757117895, + "grad_norm": 1.1916625499725342, + "learning_rate": 9.815308177598664e-06, + "loss": 0.8108, + "step": 2222 + }, + { + "epoch": 0.11424606845513413, + "grad_norm": 1.0490589141845703, + "learning_rate": 9.815084000640851e-06, + "loss": 0.8074, + "step": 2223 + }, + { + "epoch": 0.11429746119847878, + "grad_norm": 1.127611517906189, + "learning_rate": 9.81485969027693e-06, + "loss": 0.7945, + "step": 2224 + }, + { + "epoch": 0.11434885394182341, + "grad_norm": 1.0956743955612183, + "learning_rate": 9.814635246513117e-06, + "loss": 0.7805, + "step": 2225 + }, + { + "epoch": 0.11440024668516806, + "grad_norm": 0.8723738789558411, + "learning_rate": 9.81441066935563e-06, + "loss": 0.7058, + "step": 2226 + }, + { + "epoch": 0.11445163942851269, + "grad_norm": 1.197574257850647, + "learning_rate": 9.814185958810692e-06, + "loss": 0.804, + "step": 2227 + }, + { + "epoch": 0.11450303217185734, + "grad_norm": 1.2121386528015137, + "learning_rate": 9.813961114884527e-06, + "loss": 0.8049, + "step": 2228 + }, + { + "epoch": 0.11455442491520197, + "grad_norm": 1.231401801109314, + "learning_rate": 9.813736137583366e-06, + "loss": 0.848, + "step": 2229 + }, + { + "epoch": 0.11460581765854662, + "grad_norm": 0.7662510871887207, + "learning_rate": 9.813511026913442e-06, + "loss": 0.7384, + "step": 2230 + }, + { + "epoch": 0.11465721040189125, + "grad_norm": 0.8181769251823425, + "learning_rate": 9.81328578288099e-06, + "loss": 0.7442, + "step": 2231 + }, + { + "epoch": 0.1147086031452359, + "grad_norm": 1.1366883516311646, + "learning_rate": 9.813060405492254e-06, + "loss": 0.8171, + "step": 2232 + }, + { + "epoch": 0.11475999588858053, + "grad_norm": 1.1009812355041504, + "learning_rate": 9.812834894753476e-06, + "loss": 0.7749, + "step": 2233 + }, + { + "epoch": 0.11481138863192517, + "grad_norm": 1.388122320175171, + "learning_rate": 9.812609250670904e-06, + "loss": 0.7959, + "step": 2234 + }, + { + "epoch": 0.11486278137526981, + "grad_norm": 1.1714955568313599, + "learning_rate": 9.81238347325079e-06, + "loss": 0.855, + "step": 2235 + }, + { + "epoch": 0.11491417411861445, + "grad_norm": 1.2079038619995117, + "learning_rate": 9.812157562499391e-06, + "loss": 0.8214, + "step": 2236 + }, + { + "epoch": 0.11496556686195909, + "grad_norm": 1.1966966390609741, + "learning_rate": 9.811931518422963e-06, + "loss": 0.8083, + "step": 2237 + }, + { + "epoch": 0.11501695960530373, + "grad_norm": 1.0508400201797485, + "learning_rate": 9.811705341027772e-06, + "loss": 0.7889, + "step": 2238 + }, + { + "epoch": 0.11506835234864837, + "grad_norm": 1.2941479682922363, + "learning_rate": 9.811479030320081e-06, + "loss": 0.7083, + "step": 2239 + }, + { + "epoch": 0.115119745091993, + "grad_norm": 0.8416644334793091, + "learning_rate": 9.811252586306164e-06, + "loss": 0.7267, + "step": 2240 + }, + { + "epoch": 0.11517113783533765, + "grad_norm": 1.208182692527771, + "learning_rate": 9.81102600899229e-06, + "loss": 0.8419, + "step": 2241 + }, + { + "epoch": 0.11522253057868229, + "grad_norm": 0.7682667374610901, + "learning_rate": 9.810799298384742e-06, + "loss": 0.7507, + "step": 2242 + }, + { + "epoch": 0.11527392332202693, + "grad_norm": 1.1417720317840576, + "learning_rate": 9.810572454489796e-06, + "loss": 0.7738, + "step": 2243 + }, + { + "epoch": 0.11532531606537157, + "grad_norm": 1.1823337078094482, + "learning_rate": 9.81034547731374e-06, + "loss": 0.8585, + "step": 2244 + }, + { + "epoch": 0.11537670880871621, + "grad_norm": 0.9936612844467163, + "learning_rate": 9.810118366862862e-06, + "loss": 0.7301, + "step": 2245 + }, + { + "epoch": 0.11542810155206085, + "grad_norm": 1.2431813478469849, + "learning_rate": 9.809891123143455e-06, + "loss": 0.8515, + "step": 2246 + }, + { + "epoch": 0.1154794942954055, + "grad_norm": 1.1989020109176636, + "learning_rate": 9.809663746161812e-06, + "loss": 0.8486, + "step": 2247 + }, + { + "epoch": 0.11553088703875013, + "grad_norm": 1.1301652193069458, + "learning_rate": 9.809436235924237e-06, + "loss": 0.8482, + "step": 2248 + }, + { + "epoch": 0.11558227978209477, + "grad_norm": 1.1326934099197388, + "learning_rate": 9.809208592437032e-06, + "loss": 0.8063, + "step": 2249 + }, + { + "epoch": 0.11563367252543941, + "grad_norm": 0.7685805559158325, + "learning_rate": 9.808980815706502e-06, + "loss": 0.7051, + "step": 2250 + }, + { + "epoch": 0.11568506526878405, + "grad_norm": 1.1498092412948608, + "learning_rate": 9.808752905738958e-06, + "loss": 0.7975, + "step": 2251 + }, + { + "epoch": 0.11573645801212869, + "grad_norm": 1.1571431159973145, + "learning_rate": 9.808524862540718e-06, + "loss": 0.8442, + "step": 2252 + }, + { + "epoch": 0.11578785075547332, + "grad_norm": 1.2044833898544312, + "learning_rate": 9.808296686118097e-06, + "loss": 0.8168, + "step": 2253 + }, + { + "epoch": 0.11583924349881797, + "grad_norm": 1.1793773174285889, + "learning_rate": 9.808068376477415e-06, + "loss": 0.8131, + "step": 2254 + }, + { + "epoch": 0.1158906362421626, + "grad_norm": 1.0460063219070435, + "learning_rate": 9.807839933625003e-06, + "loss": 0.7302, + "step": 2255 + }, + { + "epoch": 0.11594202898550725, + "grad_norm": 1.1322252750396729, + "learning_rate": 9.807611357567185e-06, + "loss": 0.8225, + "step": 2256 + }, + { + "epoch": 0.11599342172885188, + "grad_norm": 1.0627758502960205, + "learning_rate": 9.807382648310298e-06, + "loss": 0.7657, + "step": 2257 + }, + { + "epoch": 0.11604481447219653, + "grad_norm": 1.160506248474121, + "learning_rate": 9.807153805860676e-06, + "loss": 0.7815, + "step": 2258 + }, + { + "epoch": 0.11609620721554116, + "grad_norm": 1.1657752990722656, + "learning_rate": 9.80692483022466e-06, + "loss": 0.8051, + "step": 2259 + }, + { + "epoch": 0.11614759995888581, + "grad_norm": 1.2565639019012451, + "learning_rate": 9.806695721408595e-06, + "loss": 0.8148, + "step": 2260 + }, + { + "epoch": 0.11619899270223044, + "grad_norm": 1.1335959434509277, + "learning_rate": 9.806466479418826e-06, + "loss": 0.8249, + "step": 2261 + }, + { + "epoch": 0.11625038544557509, + "grad_norm": 1.3181891441345215, + "learning_rate": 9.806237104261706e-06, + "loss": 0.877, + "step": 2262 + }, + { + "epoch": 0.11630177818891972, + "grad_norm": 1.1931564807891846, + "learning_rate": 9.806007595943593e-06, + "loss": 0.8316, + "step": 2263 + }, + { + "epoch": 0.11635317093226437, + "grad_norm": 0.9158135652542114, + "learning_rate": 9.805777954470839e-06, + "loss": 0.7474, + "step": 2264 + }, + { + "epoch": 0.116404563675609, + "grad_norm": 1.092392921447754, + "learning_rate": 9.80554817984981e-06, + "loss": 0.7904, + "step": 2265 + }, + { + "epoch": 0.11645595641895365, + "grad_norm": 1.1873093843460083, + "learning_rate": 9.805318272086874e-06, + "loss": 0.8432, + "step": 2266 + }, + { + "epoch": 0.11650734916229828, + "grad_norm": 1.1701726913452148, + "learning_rate": 9.8050882311884e-06, + "loss": 0.8385, + "step": 2267 + }, + { + "epoch": 0.11655874190564293, + "grad_norm": 1.1609998941421509, + "learning_rate": 9.804858057160758e-06, + "loss": 0.7997, + "step": 2268 + }, + { + "epoch": 0.11661013464898756, + "grad_norm": 1.1385737657546997, + "learning_rate": 9.804627750010329e-06, + "loss": 0.7694, + "step": 2269 + }, + { + "epoch": 0.11666152739233221, + "grad_norm": 1.19200599193573, + "learning_rate": 9.804397309743493e-06, + "loss": 0.8275, + "step": 2270 + }, + { + "epoch": 0.11671292013567684, + "grad_norm": 1.3303487300872803, + "learning_rate": 9.804166736366635e-06, + "loss": 0.8326, + "step": 2271 + }, + { + "epoch": 0.11676431287902148, + "grad_norm": 0.9244875311851501, + "learning_rate": 9.803936029886141e-06, + "loss": 0.7688, + "step": 2272 + }, + { + "epoch": 0.11681570562236612, + "grad_norm": 1.2007381916046143, + "learning_rate": 9.803705190308404e-06, + "loss": 0.8164, + "step": 2273 + }, + { + "epoch": 0.11686709836571076, + "grad_norm": 1.1963053941726685, + "learning_rate": 9.803474217639821e-06, + "loss": 0.8646, + "step": 2274 + }, + { + "epoch": 0.1169184911090554, + "grad_norm": 1.2735464572906494, + "learning_rate": 9.803243111886788e-06, + "loss": 0.8183, + "step": 2275 + }, + { + "epoch": 0.11696988385240004, + "grad_norm": 1.1631420850753784, + "learning_rate": 9.803011873055713e-06, + "loss": 0.8074, + "step": 2276 + }, + { + "epoch": 0.11702127659574468, + "grad_norm": 1.1905298233032227, + "learning_rate": 9.802780501152997e-06, + "loss": 0.8607, + "step": 2277 + }, + { + "epoch": 0.11707266933908932, + "grad_norm": 0.8752067685127258, + "learning_rate": 9.802548996185056e-06, + "loss": 0.7716, + "step": 2278 + }, + { + "epoch": 0.11712406208243396, + "grad_norm": 1.2684789896011353, + "learning_rate": 9.8023173581583e-06, + "loss": 0.8314, + "step": 2279 + }, + { + "epoch": 0.1171754548257786, + "grad_norm": 1.1342294216156006, + "learning_rate": 9.802085587079147e-06, + "loss": 0.749, + "step": 2280 + }, + { + "epoch": 0.11722684756912324, + "grad_norm": 1.1198534965515137, + "learning_rate": 9.80185368295402e-06, + "loss": 0.7954, + "step": 2281 + }, + { + "epoch": 0.11727824031246788, + "grad_norm": 1.0954395532608032, + "learning_rate": 9.801621645789346e-06, + "loss": 0.7939, + "step": 2282 + }, + { + "epoch": 0.11732963305581252, + "grad_norm": 1.151352882385254, + "learning_rate": 9.801389475591548e-06, + "loss": 0.8916, + "step": 2283 + }, + { + "epoch": 0.11738102579915716, + "grad_norm": 1.222489356994629, + "learning_rate": 9.801157172367064e-06, + "loss": 0.811, + "step": 2284 + }, + { + "epoch": 0.1174324185425018, + "grad_norm": 1.1428749561309814, + "learning_rate": 9.800924736122326e-06, + "loss": 0.8499, + "step": 2285 + }, + { + "epoch": 0.11748381128584644, + "grad_norm": 1.2226756811141968, + "learning_rate": 9.800692166863777e-06, + "loss": 0.8123, + "step": 2286 + }, + { + "epoch": 0.11753520402919108, + "grad_norm": 0.7957601547241211, + "learning_rate": 9.80045946459786e-06, + "loss": 0.7308, + "step": 2287 + }, + { + "epoch": 0.11758659677253572, + "grad_norm": 1.1141963005065918, + "learning_rate": 9.80022662933102e-06, + "loss": 0.7744, + "step": 2288 + }, + { + "epoch": 0.11763798951588036, + "grad_norm": 1.1813395023345947, + "learning_rate": 9.799993661069712e-06, + "loss": 0.7754, + "step": 2289 + }, + { + "epoch": 0.117689382259225, + "grad_norm": 1.1548686027526855, + "learning_rate": 9.799760559820385e-06, + "loss": 0.8755, + "step": 2290 + }, + { + "epoch": 0.11774077500256963, + "grad_norm": 0.8712412118911743, + "learning_rate": 9.799527325589503e-06, + "loss": 0.7138, + "step": 2291 + }, + { + "epoch": 0.11779216774591428, + "grad_norm": 1.1727826595306396, + "learning_rate": 9.799293958383523e-06, + "loss": 0.8311, + "step": 2292 + }, + { + "epoch": 0.11784356048925891, + "grad_norm": 1.1654831171035767, + "learning_rate": 9.799060458208913e-06, + "loss": 0.8075, + "step": 2293 + }, + { + "epoch": 0.11789495323260356, + "grad_norm": 1.1513378620147705, + "learning_rate": 9.798826825072144e-06, + "loss": 0.8108, + "step": 2294 + }, + { + "epoch": 0.11794634597594819, + "grad_norm": 1.213681936264038, + "learning_rate": 9.798593058979686e-06, + "loss": 0.9134, + "step": 2295 + }, + { + "epoch": 0.11799773871929284, + "grad_norm": 1.1846833229064941, + "learning_rate": 9.798359159938017e-06, + "loss": 0.7895, + "step": 2296 + }, + { + "epoch": 0.11804913146263747, + "grad_norm": 1.0933934450149536, + "learning_rate": 9.798125127953617e-06, + "loss": 0.836, + "step": 2297 + }, + { + "epoch": 0.11810052420598212, + "grad_norm": 1.159136414527893, + "learning_rate": 9.797890963032972e-06, + "loss": 0.8006, + "step": 2298 + }, + { + "epoch": 0.11815191694932675, + "grad_norm": 0.8138665556907654, + "learning_rate": 9.797656665182567e-06, + "loss": 0.7267, + "step": 2299 + }, + { + "epoch": 0.1182033096926714, + "grad_norm": 1.2907710075378418, + "learning_rate": 9.797422234408896e-06, + "loss": 0.7974, + "step": 2300 + }, + { + "epoch": 0.11825470243601603, + "grad_norm": 1.0957804918289185, + "learning_rate": 9.79718767071845e-06, + "loss": 0.8965, + "step": 2301 + }, + { + "epoch": 0.11830609517936068, + "grad_norm": 1.2075046300888062, + "learning_rate": 9.796952974117732e-06, + "loss": 0.8354, + "step": 2302 + }, + { + "epoch": 0.11835748792270531, + "grad_norm": 1.2988404035568237, + "learning_rate": 9.796718144613242e-06, + "loss": 0.8473, + "step": 2303 + }, + { + "epoch": 0.11840888066604996, + "grad_norm": 1.2331697940826416, + "learning_rate": 9.796483182211488e-06, + "loss": 0.8433, + "step": 2304 + }, + { + "epoch": 0.11846027340939459, + "grad_norm": 0.8854478001594543, + "learning_rate": 9.79624808691898e-06, + "loss": 0.7321, + "step": 2305 + }, + { + "epoch": 0.11851166615273924, + "grad_norm": 1.151158332824707, + "learning_rate": 9.79601285874223e-06, + "loss": 0.791, + "step": 2306 + }, + { + "epoch": 0.11856305889608387, + "grad_norm": 1.1608421802520752, + "learning_rate": 9.795777497687755e-06, + "loss": 0.8245, + "step": 2307 + }, + { + "epoch": 0.1186144516394285, + "grad_norm": 1.1874397993087769, + "learning_rate": 9.795542003762076e-06, + "loss": 0.7834, + "step": 2308 + }, + { + "epoch": 0.11866584438277315, + "grad_norm": 0.7717701196670532, + "learning_rate": 9.795306376971719e-06, + "loss": 0.7241, + "step": 2309 + }, + { + "epoch": 0.11871723712611779, + "grad_norm": 1.1602261066436768, + "learning_rate": 9.795070617323211e-06, + "loss": 0.8437, + "step": 2310 + }, + { + "epoch": 0.11876862986946243, + "grad_norm": 1.1960617303848267, + "learning_rate": 9.794834724823084e-06, + "loss": 0.8495, + "step": 2311 + }, + { + "epoch": 0.11882002261280707, + "grad_norm": 1.1137713193893433, + "learning_rate": 9.794598699477874e-06, + "loss": 0.7788, + "step": 2312 + }, + { + "epoch": 0.11887141535615171, + "grad_norm": 0.7985857129096985, + "learning_rate": 9.79436254129412e-06, + "loss": 0.6837, + "step": 2313 + }, + { + "epoch": 0.11892280809949635, + "grad_norm": 1.001046061515808, + "learning_rate": 9.794126250278366e-06, + "loss": 0.7176, + "step": 2314 + }, + { + "epoch": 0.118974200842841, + "grad_norm": 0.8436415791511536, + "learning_rate": 9.793889826437158e-06, + "loss": 0.7105, + "step": 2315 + }, + { + "epoch": 0.11902559358618563, + "grad_norm": 1.1825486421585083, + "learning_rate": 9.793653269777043e-06, + "loss": 0.8236, + "step": 2316 + }, + { + "epoch": 0.11907698632953027, + "grad_norm": 1.1977462768554688, + "learning_rate": 9.793416580304582e-06, + "loss": 0.8774, + "step": 2317 + }, + { + "epoch": 0.11912837907287491, + "grad_norm": 0.8693056702613831, + "learning_rate": 9.793179758026328e-06, + "loss": 0.6691, + "step": 2318 + }, + { + "epoch": 0.11917977181621955, + "grad_norm": 0.8027037978172302, + "learning_rate": 9.792942802948842e-06, + "loss": 0.6605, + "step": 2319 + }, + { + "epoch": 0.11923116455956419, + "grad_norm": 1.1929094791412354, + "learning_rate": 9.792705715078691e-06, + "loss": 0.7963, + "step": 2320 + }, + { + "epoch": 0.11928255730290883, + "grad_norm": 1.2470293045043945, + "learning_rate": 9.792468494422443e-06, + "loss": 0.8188, + "step": 2321 + }, + { + "epoch": 0.11933395004625347, + "grad_norm": 1.083120584487915, + "learning_rate": 9.79223114098667e-06, + "loss": 0.8888, + "step": 2322 + }, + { + "epoch": 0.11938534278959811, + "grad_norm": 0.7924959659576416, + "learning_rate": 9.791993654777949e-06, + "loss": 0.7239, + "step": 2323 + }, + { + "epoch": 0.11943673553294275, + "grad_norm": 1.1215131282806396, + "learning_rate": 9.791756035802858e-06, + "loss": 0.786, + "step": 2324 + }, + { + "epoch": 0.1194881282762874, + "grad_norm": 1.283737063407898, + "learning_rate": 9.791518284067983e-06, + "loss": 0.79, + "step": 2325 + }, + { + "epoch": 0.11953952101963203, + "grad_norm": 1.1455661058425903, + "learning_rate": 9.79128039957991e-06, + "loss": 0.8132, + "step": 2326 + }, + { + "epoch": 0.11959091376297666, + "grad_norm": 1.1607595682144165, + "learning_rate": 9.791042382345227e-06, + "loss": 0.7933, + "step": 2327 + }, + { + "epoch": 0.11964230650632131, + "grad_norm": 1.1484507322311401, + "learning_rate": 9.790804232370533e-06, + "loss": 0.7774, + "step": 2328 + }, + { + "epoch": 0.11969369924966594, + "grad_norm": 1.0853508710861206, + "learning_rate": 9.790565949662425e-06, + "loss": 0.7915, + "step": 2329 + }, + { + "epoch": 0.11974509199301059, + "grad_norm": 0.9143819212913513, + "learning_rate": 9.790327534227502e-06, + "loss": 0.7331, + "step": 2330 + }, + { + "epoch": 0.11979648473635522, + "grad_norm": 1.2360515594482422, + "learning_rate": 9.790088986072372e-06, + "loss": 0.7699, + "step": 2331 + }, + { + "epoch": 0.11984787747969987, + "grad_norm": 1.1892566680908203, + "learning_rate": 9.789850305203646e-06, + "loss": 0.8275, + "step": 2332 + }, + { + "epoch": 0.1198992702230445, + "grad_norm": 1.2155404090881348, + "learning_rate": 9.789611491627933e-06, + "loss": 0.7942, + "step": 2333 + }, + { + "epoch": 0.11995066296638915, + "grad_norm": 1.1557867527008057, + "learning_rate": 9.789372545351851e-06, + "loss": 0.8055, + "step": 2334 + }, + { + "epoch": 0.12000205570973378, + "grad_norm": 1.1441162824630737, + "learning_rate": 9.78913346638202e-06, + "loss": 0.8614, + "step": 2335 + }, + { + "epoch": 0.12005344845307843, + "grad_norm": 1.1550605297088623, + "learning_rate": 9.788894254725065e-06, + "loss": 0.799, + "step": 2336 + }, + { + "epoch": 0.12010484119642306, + "grad_norm": 1.201387643814087, + "learning_rate": 9.788654910387611e-06, + "loss": 0.8131, + "step": 2337 + }, + { + "epoch": 0.12015623393976771, + "grad_norm": 1.1856136322021484, + "learning_rate": 9.788415433376293e-06, + "loss": 0.8414, + "step": 2338 + }, + { + "epoch": 0.12020762668311234, + "grad_norm": 1.193785309791565, + "learning_rate": 9.788175823697744e-06, + "loss": 0.8332, + "step": 2339 + }, + { + "epoch": 0.12025901942645699, + "grad_norm": 0.8861750960350037, + "learning_rate": 9.787936081358602e-06, + "loss": 0.7092, + "step": 2340 + }, + { + "epoch": 0.12031041216980162, + "grad_norm": 0.7914197444915771, + "learning_rate": 9.78769620636551e-06, + "loss": 0.6724, + "step": 2341 + }, + { + "epoch": 0.12036180491314627, + "grad_norm": 1.2197389602661133, + "learning_rate": 9.787456198725114e-06, + "loss": 0.8405, + "step": 2342 + }, + { + "epoch": 0.1204131976564909, + "grad_norm": 1.2777796983718872, + "learning_rate": 9.787216058444063e-06, + "loss": 0.8338, + "step": 2343 + }, + { + "epoch": 0.12046459039983555, + "grad_norm": 1.1930240392684937, + "learning_rate": 9.78697578552901e-06, + "loss": 0.8247, + "step": 2344 + }, + { + "epoch": 0.12051598314318018, + "grad_norm": 1.2151107788085938, + "learning_rate": 9.786735379986613e-06, + "loss": 0.8665, + "step": 2345 + }, + { + "epoch": 0.12056737588652482, + "grad_norm": 0.7986428141593933, + "learning_rate": 9.786494841823534e-06, + "loss": 0.7414, + "step": 2346 + }, + { + "epoch": 0.12061876862986946, + "grad_norm": 1.2018343210220337, + "learning_rate": 9.786254171046434e-06, + "loss": 0.829, + "step": 2347 + }, + { + "epoch": 0.1206701613732141, + "grad_norm": 1.1624906063079834, + "learning_rate": 9.786013367661982e-06, + "loss": 0.8097, + "step": 2348 + }, + { + "epoch": 0.12072155411655874, + "grad_norm": 1.1820101737976074, + "learning_rate": 9.785772431676852e-06, + "loss": 0.8393, + "step": 2349 + }, + { + "epoch": 0.12077294685990338, + "grad_norm": 1.219596028327942, + "learning_rate": 9.785531363097718e-06, + "loss": 0.8454, + "step": 2350 + }, + { + "epoch": 0.12082433960324802, + "grad_norm": 1.1194829940795898, + "learning_rate": 9.785290161931256e-06, + "loss": 0.8882, + "step": 2351 + }, + { + "epoch": 0.12087573234659266, + "grad_norm": 1.1620408296585083, + "learning_rate": 9.785048828184153e-06, + "loss": 0.849, + "step": 2352 + }, + { + "epoch": 0.1209271250899373, + "grad_norm": 1.1587157249450684, + "learning_rate": 9.784807361863094e-06, + "loss": 0.8306, + "step": 2353 + }, + { + "epoch": 0.12097851783328194, + "grad_norm": 1.1986109018325806, + "learning_rate": 9.784565762974768e-06, + "loss": 0.7667, + "step": 2354 + }, + { + "epoch": 0.12102991057662658, + "grad_norm": 1.1149792671203613, + "learning_rate": 9.78432403152587e-06, + "loss": 0.7846, + "step": 2355 + }, + { + "epoch": 0.12108130331997122, + "grad_norm": 1.1920340061187744, + "learning_rate": 9.784082167523097e-06, + "loss": 0.8115, + "step": 2356 + }, + { + "epoch": 0.12113269606331586, + "grad_norm": 1.2083313465118408, + "learning_rate": 9.783840170973149e-06, + "loss": 0.8229, + "step": 2357 + }, + { + "epoch": 0.1211840888066605, + "grad_norm": 0.8737711310386658, + "learning_rate": 9.783598041882732e-06, + "loss": 0.7323, + "step": 2358 + }, + { + "epoch": 0.12123548155000514, + "grad_norm": 1.077492594718933, + "learning_rate": 9.783355780258554e-06, + "loss": 0.779, + "step": 2359 + }, + { + "epoch": 0.12128687429334978, + "grad_norm": 0.8473538756370544, + "learning_rate": 9.783113386107328e-06, + "loss": 0.7365, + "step": 2360 + }, + { + "epoch": 0.12133826703669442, + "grad_norm": 1.2189054489135742, + "learning_rate": 9.782870859435768e-06, + "loss": 0.8042, + "step": 2361 + }, + { + "epoch": 0.12138965978003906, + "grad_norm": 1.0938615798950195, + "learning_rate": 9.782628200250595e-06, + "loss": 0.8019, + "step": 2362 + }, + { + "epoch": 0.1214410525233837, + "grad_norm": 1.0917021036148071, + "learning_rate": 9.78238540855853e-06, + "loss": 0.773, + "step": 2363 + }, + { + "epoch": 0.12149244526672834, + "grad_norm": 1.108105182647705, + "learning_rate": 9.782142484366301e-06, + "loss": 0.8194, + "step": 2364 + }, + { + "epoch": 0.12154383801007297, + "grad_norm": 1.1767961978912354, + "learning_rate": 9.78189942768064e-06, + "loss": 0.8348, + "step": 2365 + }, + { + "epoch": 0.12159523075341762, + "grad_norm": 1.179188847541809, + "learning_rate": 9.781656238508279e-06, + "loss": 0.8172, + "step": 2366 + }, + { + "epoch": 0.12164662349676225, + "grad_norm": 1.1754181385040283, + "learning_rate": 9.781412916855954e-06, + "loss": 0.8896, + "step": 2367 + }, + { + "epoch": 0.1216980162401069, + "grad_norm": 1.1560802459716797, + "learning_rate": 9.781169462730412e-06, + "loss": 0.7877, + "step": 2368 + }, + { + "epoch": 0.12174940898345153, + "grad_norm": 1.171472191810608, + "learning_rate": 9.780925876138393e-06, + "loss": 0.8424, + "step": 2369 + }, + { + "epoch": 0.12180080172679618, + "grad_norm": 1.1786916255950928, + "learning_rate": 9.78068215708665e-06, + "loss": 0.8786, + "step": 2370 + }, + { + "epoch": 0.12185219447014081, + "grad_norm": 0.7980872392654419, + "learning_rate": 9.780438305581931e-06, + "loss": 0.7392, + "step": 2371 + }, + { + "epoch": 0.12190358721348546, + "grad_norm": 1.3483103513717651, + "learning_rate": 9.780194321630996e-06, + "loss": 0.7793, + "step": 2372 + }, + { + "epoch": 0.12195497995683009, + "grad_norm": 0.9097517132759094, + "learning_rate": 9.7799502052406e-06, + "loss": 0.7196, + "step": 2373 + }, + { + "epoch": 0.12200637270017474, + "grad_norm": 0.7508184313774109, + "learning_rate": 9.77970595641751e-06, + "loss": 0.7666, + "step": 2374 + }, + { + "epoch": 0.12205776544351937, + "grad_norm": 1.1788885593414307, + "learning_rate": 9.779461575168497e-06, + "loss": 0.7951, + "step": 2375 + }, + { + "epoch": 0.12210915818686402, + "grad_norm": 1.1702628135681152, + "learning_rate": 9.779217061500324e-06, + "loss": 0.831, + "step": 2376 + }, + { + "epoch": 0.12216055093020865, + "grad_norm": 0.8389960527420044, + "learning_rate": 9.778972415419768e-06, + "loss": 0.7138, + "step": 2377 + }, + { + "epoch": 0.1222119436735533, + "grad_norm": 1.1207269430160522, + "learning_rate": 9.77872763693361e-06, + "loss": 0.7974, + "step": 2378 + }, + { + "epoch": 0.12226333641689793, + "grad_norm": 0.7351743578910828, + "learning_rate": 9.77848272604863e-06, + "loss": 0.7194, + "step": 2379 + }, + { + "epoch": 0.12231472916024258, + "grad_norm": 1.4855936765670776, + "learning_rate": 9.778237682771612e-06, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.12236612190358721, + "grad_norm": 1.1282931566238403, + "learning_rate": 9.777992507109345e-06, + "loss": 0.8238, + "step": 2381 + }, + { + "epoch": 0.12241751464693185, + "grad_norm": 1.102705717086792, + "learning_rate": 9.777747199068626e-06, + "loss": 0.8485, + "step": 2382 + }, + { + "epoch": 0.1224689073902765, + "grad_norm": 1.090747594833374, + "learning_rate": 9.777501758656249e-06, + "loss": 0.781, + "step": 2383 + }, + { + "epoch": 0.12252030013362113, + "grad_norm": 1.1472095251083374, + "learning_rate": 9.777256185879012e-06, + "loss": 0.7966, + "step": 2384 + }, + { + "epoch": 0.12257169287696577, + "grad_norm": 1.0857065916061401, + "learning_rate": 9.77701048074372e-06, + "loss": 0.7938, + "step": 2385 + }, + { + "epoch": 0.1226230856203104, + "grad_norm": 1.1367560625076294, + "learning_rate": 9.776764643257184e-06, + "loss": 0.802, + "step": 2386 + }, + { + "epoch": 0.12267447836365505, + "grad_norm": 1.0472407341003418, + "learning_rate": 9.77651867342621e-06, + "loss": 0.825, + "step": 2387 + }, + { + "epoch": 0.12272587110699969, + "grad_norm": 1.086228609085083, + "learning_rate": 9.776272571257617e-06, + "loss": 0.8402, + "step": 2388 + }, + { + "epoch": 0.12277726385034433, + "grad_norm": 1.2209358215332031, + "learning_rate": 9.77602633675822e-06, + "loss": 0.8282, + "step": 2389 + }, + { + "epoch": 0.12282865659368897, + "grad_norm": 1.1555324792861938, + "learning_rate": 9.775779969934842e-06, + "loss": 0.8823, + "step": 2390 + }, + { + "epoch": 0.12288004933703361, + "grad_norm": 1.0583807229995728, + "learning_rate": 9.775533470794312e-06, + "loss": 0.6895, + "step": 2391 + }, + { + "epoch": 0.12293144208037825, + "grad_norm": 1.1415427923202515, + "learning_rate": 9.775286839343456e-06, + "loss": 0.7884, + "step": 2392 + }, + { + "epoch": 0.1229828348237229, + "grad_norm": 1.2491494417190552, + "learning_rate": 9.775040075589107e-06, + "loss": 0.8348, + "step": 2393 + }, + { + "epoch": 0.12303422756706753, + "grad_norm": 1.210504174232483, + "learning_rate": 9.774793179538104e-06, + "loss": 0.9094, + "step": 2394 + }, + { + "epoch": 0.12308562031041217, + "grad_norm": 1.1285442113876343, + "learning_rate": 9.774546151197285e-06, + "loss": 0.7987, + "step": 2395 + }, + { + "epoch": 0.12313701305375681, + "grad_norm": 1.1042815446853638, + "learning_rate": 9.774298990573499e-06, + "loss": 0.7701, + "step": 2396 + }, + { + "epoch": 0.12318840579710146, + "grad_norm": 1.1584590673446655, + "learning_rate": 9.774051697673587e-06, + "loss": 0.8272, + "step": 2397 + }, + { + "epoch": 0.12323979854044609, + "grad_norm": 1.1461942195892334, + "learning_rate": 9.773804272504404e-06, + "loss": 0.8311, + "step": 2398 + }, + { + "epoch": 0.12329119128379074, + "grad_norm": 1.1121602058410645, + "learning_rate": 9.773556715072806e-06, + "loss": 0.8718, + "step": 2399 + }, + { + "epoch": 0.12334258402713537, + "grad_norm": 0.8707857728004456, + "learning_rate": 9.773309025385652e-06, + "loss": 0.7157, + "step": 2400 + }, + { + "epoch": 0.12339397677048, + "grad_norm": 1.0973265171051025, + "learning_rate": 9.7730612034498e-06, + "loss": 0.8048, + "step": 2401 + }, + { + "epoch": 0.12344536951382465, + "grad_norm": 1.1713505983352661, + "learning_rate": 9.772813249272121e-06, + "loss": 0.8723, + "step": 2402 + }, + { + "epoch": 0.12349676225716928, + "grad_norm": 1.1715407371520996, + "learning_rate": 9.772565162859483e-06, + "loss": 0.8259, + "step": 2403 + }, + { + "epoch": 0.12354815500051393, + "grad_norm": 1.0965425968170166, + "learning_rate": 9.77231694421876e-06, + "loss": 0.8943, + "step": 2404 + }, + { + "epoch": 0.12359954774385856, + "grad_norm": 1.1724035739898682, + "learning_rate": 9.772068593356829e-06, + "loss": 0.8144, + "step": 2405 + }, + { + "epoch": 0.12365094048720321, + "grad_norm": 1.1090623140335083, + "learning_rate": 9.77182011028057e-06, + "loss": 0.8161, + "step": 2406 + }, + { + "epoch": 0.12370233323054784, + "grad_norm": 1.2380086183547974, + "learning_rate": 9.771571494996866e-06, + "loss": 0.9177, + "step": 2407 + }, + { + "epoch": 0.12375372597389249, + "grad_norm": 1.1686373949050903, + "learning_rate": 9.77132274751261e-06, + "loss": 0.8012, + "step": 2408 + }, + { + "epoch": 0.12380511871723712, + "grad_norm": 1.174485206604004, + "learning_rate": 9.771073867834689e-06, + "loss": 0.7706, + "step": 2409 + }, + { + "epoch": 0.12385651146058177, + "grad_norm": 1.1513538360595703, + "learning_rate": 9.77082485597e-06, + "loss": 0.8249, + "step": 2410 + }, + { + "epoch": 0.1239079042039264, + "grad_norm": 1.2050405740737915, + "learning_rate": 9.770575711925443e-06, + "loss": 0.8024, + "step": 2411 + }, + { + "epoch": 0.12395929694727105, + "grad_norm": 1.0723958015441895, + "learning_rate": 9.77032643570792e-06, + "loss": 0.821, + "step": 2412 + }, + { + "epoch": 0.12401068969061568, + "grad_norm": 1.1408740282058716, + "learning_rate": 9.770077027324338e-06, + "loss": 0.8116, + "step": 2413 + }, + { + "epoch": 0.12406208243396033, + "grad_norm": 1.134290099143982, + "learning_rate": 9.769827486781605e-06, + "loss": 0.7669, + "step": 2414 + }, + { + "epoch": 0.12411347517730496, + "grad_norm": 1.1239155530929565, + "learning_rate": 9.769577814086638e-06, + "loss": 0.8213, + "step": 2415 + }, + { + "epoch": 0.12416486792064961, + "grad_norm": 1.100677728652954, + "learning_rate": 9.76932800924635e-06, + "loss": 0.8276, + "step": 2416 + }, + { + "epoch": 0.12421626066399424, + "grad_norm": 0.9586683511734009, + "learning_rate": 9.769078072267668e-06, + "loss": 0.6942, + "step": 2417 + }, + { + "epoch": 0.12426765340733889, + "grad_norm": 1.1630403995513916, + "learning_rate": 9.76882800315751e-06, + "loss": 0.8387, + "step": 2418 + }, + { + "epoch": 0.12431904615068352, + "grad_norm": 1.1736809015274048, + "learning_rate": 9.76857780192281e-06, + "loss": 0.849, + "step": 2419 + }, + { + "epoch": 0.12437043889402816, + "grad_norm": 1.1257340908050537, + "learning_rate": 9.768327468570498e-06, + "loss": 0.8389, + "step": 2420 + }, + { + "epoch": 0.1244218316373728, + "grad_norm": 0.8188894391059875, + "learning_rate": 9.76807700310751e-06, + "loss": 0.6977, + "step": 2421 + }, + { + "epoch": 0.12447322438071744, + "grad_norm": 1.1124860048294067, + "learning_rate": 9.767826405540784e-06, + "loss": 0.8248, + "step": 2422 + }, + { + "epoch": 0.12452461712406208, + "grad_norm": 1.0898975133895874, + "learning_rate": 9.767575675877263e-06, + "loss": 0.7911, + "step": 2423 + }, + { + "epoch": 0.12457600986740672, + "grad_norm": 1.0893003940582275, + "learning_rate": 9.767324814123896e-06, + "loss": 0.8442, + "step": 2424 + }, + { + "epoch": 0.12462740261075136, + "grad_norm": 1.3539092540740967, + "learning_rate": 9.76707382028763e-06, + "loss": 0.8346, + "step": 2425 + }, + { + "epoch": 0.124678795354096, + "grad_norm": 1.19283926486969, + "learning_rate": 9.766822694375424e-06, + "loss": 0.8191, + "step": 2426 + }, + { + "epoch": 0.12473018809744064, + "grad_norm": 1.1377184391021729, + "learning_rate": 9.766571436394232e-06, + "loss": 0.8725, + "step": 2427 + }, + { + "epoch": 0.12478158084078528, + "grad_norm": 1.1395691633224487, + "learning_rate": 9.766320046351015e-06, + "loss": 0.8504, + "step": 2428 + }, + { + "epoch": 0.12483297358412992, + "grad_norm": 1.0948843955993652, + "learning_rate": 9.766068524252739e-06, + "loss": 0.8183, + "step": 2429 + }, + { + "epoch": 0.12488436632747456, + "grad_norm": 1.196698546409607, + "learning_rate": 9.765816870106373e-06, + "loss": 0.8508, + "step": 2430 + }, + { + "epoch": 0.1249357590708192, + "grad_norm": 1.1633023023605347, + "learning_rate": 9.765565083918889e-06, + "loss": 0.8811, + "step": 2431 + }, + { + "epoch": 0.12498715181416384, + "grad_norm": 0.9639090299606323, + "learning_rate": 9.765313165697263e-06, + "loss": 0.6867, + "step": 2432 + }, + { + "epoch": 0.12503854455750849, + "grad_norm": 1.1229802370071411, + "learning_rate": 9.765061115448474e-06, + "loss": 0.8122, + "step": 2433 + }, + { + "epoch": 0.12508993730085313, + "grad_norm": 0.7699605226516724, + "learning_rate": 9.764808933179505e-06, + "loss": 0.7129, + "step": 2434 + }, + { + "epoch": 0.12514133004419775, + "grad_norm": 1.1316336393356323, + "learning_rate": 9.764556618897345e-06, + "loss": 0.8039, + "step": 2435 + }, + { + "epoch": 0.1251927227875424, + "grad_norm": 0.7841988205909729, + "learning_rate": 9.764304172608982e-06, + "loss": 0.7109, + "step": 2436 + }, + { + "epoch": 0.12524411553088705, + "grad_norm": 1.1598159074783325, + "learning_rate": 9.764051594321412e-06, + "loss": 0.8732, + "step": 2437 + }, + { + "epoch": 0.12529550827423167, + "grad_norm": 1.162996768951416, + "learning_rate": 9.763798884041634e-06, + "loss": 0.7986, + "step": 2438 + }, + { + "epoch": 0.1253469010175763, + "grad_norm": 1.1298846006393433, + "learning_rate": 9.763546041776646e-06, + "loss": 0.7717, + "step": 2439 + }, + { + "epoch": 0.12539829376092096, + "grad_norm": 1.1655163764953613, + "learning_rate": 9.763293067533455e-06, + "loss": 0.7986, + "step": 2440 + }, + { + "epoch": 0.1254496865042656, + "grad_norm": 1.1782069206237793, + "learning_rate": 9.763039961319072e-06, + "loss": 0.7931, + "step": 2441 + }, + { + "epoch": 0.12550107924761023, + "grad_norm": 1.1473352909088135, + "learning_rate": 9.762786723140505e-06, + "loss": 0.7973, + "step": 2442 + }, + { + "epoch": 0.12555247199095487, + "grad_norm": 0.9067972898483276, + "learning_rate": 9.762533353004774e-06, + "loss": 0.6754, + "step": 2443 + }, + { + "epoch": 0.12560386473429952, + "grad_norm": 0.966131329536438, + "learning_rate": 9.7622798509189e-06, + "loss": 0.7291, + "step": 2444 + }, + { + "epoch": 0.12565525747764417, + "grad_norm": 1.2590121030807495, + "learning_rate": 9.7620262168899e-06, + "loss": 0.8821, + "step": 2445 + }, + { + "epoch": 0.12570665022098879, + "grad_norm": 1.1836737394332886, + "learning_rate": 9.761772450924807e-06, + "loss": 0.7902, + "step": 2446 + }, + { + "epoch": 0.12575804296433343, + "grad_norm": 1.1875131130218506, + "learning_rate": 9.761518553030651e-06, + "loss": 0.7986, + "step": 2447 + }, + { + "epoch": 0.12580943570767808, + "grad_norm": 0.8529171943664551, + "learning_rate": 9.761264523214465e-06, + "loss": 0.6867, + "step": 2448 + }, + { + "epoch": 0.12586082845102273, + "grad_norm": 1.3119860887527466, + "learning_rate": 9.761010361483287e-06, + "loss": 0.8208, + "step": 2449 + }, + { + "epoch": 0.12591222119436735, + "grad_norm": 1.1417913436889648, + "learning_rate": 9.760756067844159e-06, + "loss": 0.8314, + "step": 2450 + }, + { + "epoch": 0.125963613937712, + "grad_norm": 1.2178484201431274, + "learning_rate": 9.760501642304127e-06, + "loss": 0.8432, + "step": 2451 + }, + { + "epoch": 0.12601500668105664, + "grad_norm": 1.1822431087493896, + "learning_rate": 9.760247084870242e-06, + "loss": 0.8576, + "step": 2452 + }, + { + "epoch": 0.1260663994244013, + "grad_norm": 1.1628656387329102, + "learning_rate": 9.759992395549553e-06, + "loss": 0.8169, + "step": 2453 + }, + { + "epoch": 0.1261177921677459, + "grad_norm": 1.1693353652954102, + "learning_rate": 9.759737574349118e-06, + "loss": 0.8416, + "step": 2454 + }, + { + "epoch": 0.12616918491109055, + "grad_norm": 0.8510347008705139, + "learning_rate": 9.759482621275998e-06, + "loss": 0.7307, + "step": 2455 + }, + { + "epoch": 0.1262205776544352, + "grad_norm": 1.1376551389694214, + "learning_rate": 9.759227536337254e-06, + "loss": 0.8301, + "step": 2456 + }, + { + "epoch": 0.12627197039777982, + "grad_norm": 1.2361568212509155, + "learning_rate": 9.758972319539957e-06, + "loss": 0.7843, + "step": 2457 + }, + { + "epoch": 0.12632336314112447, + "grad_norm": 0.7573777437210083, + "learning_rate": 9.758716970891174e-06, + "loss": 0.7186, + "step": 2458 + }, + { + "epoch": 0.12637475588446911, + "grad_norm": 0.7261207103729248, + "learning_rate": 9.758461490397983e-06, + "loss": 0.7094, + "step": 2459 + }, + { + "epoch": 0.12642614862781376, + "grad_norm": 1.0854101181030273, + "learning_rate": 9.758205878067461e-06, + "loss": 0.8166, + "step": 2460 + }, + { + "epoch": 0.12647754137115838, + "grad_norm": 1.09835684299469, + "learning_rate": 9.75795013390669e-06, + "loss": 0.8159, + "step": 2461 + }, + { + "epoch": 0.12652893411450303, + "grad_norm": 0.9098944067955017, + "learning_rate": 9.757694257922757e-06, + "loss": 0.7334, + "step": 2462 + }, + { + "epoch": 0.12658032685784767, + "grad_norm": 0.9366123080253601, + "learning_rate": 9.757438250122749e-06, + "loss": 0.7116, + "step": 2463 + }, + { + "epoch": 0.12663171960119232, + "grad_norm": 1.224448561668396, + "learning_rate": 9.75718211051376e-06, + "loss": 0.8677, + "step": 2464 + }, + { + "epoch": 0.12668311234453694, + "grad_norm": 1.0930780172348022, + "learning_rate": 9.756925839102886e-06, + "loss": 0.7819, + "step": 2465 + }, + { + "epoch": 0.1267345050878816, + "grad_norm": 1.092489242553711, + "learning_rate": 9.756669435897228e-06, + "loss": 0.7784, + "step": 2466 + }, + { + "epoch": 0.12678589783122624, + "grad_norm": 1.129995584487915, + "learning_rate": 9.75641290090389e-06, + "loss": 0.8004, + "step": 2467 + }, + { + "epoch": 0.12683729057457088, + "grad_norm": 1.194913387298584, + "learning_rate": 9.756156234129979e-06, + "loss": 0.8121, + "step": 2468 + }, + { + "epoch": 0.1268886833179155, + "grad_norm": 1.1954747438430786, + "learning_rate": 9.755899435582605e-06, + "loss": 0.8519, + "step": 2469 + }, + { + "epoch": 0.12694007606126015, + "grad_norm": 1.1264718770980835, + "learning_rate": 9.755642505268885e-06, + "loss": 0.8498, + "step": 2470 + }, + { + "epoch": 0.1269914688046048, + "grad_norm": 1.1738253831863403, + "learning_rate": 9.755385443195938e-06, + "loss": 0.8364, + "step": 2471 + }, + { + "epoch": 0.12704286154794944, + "grad_norm": 1.0835410356521606, + "learning_rate": 9.755128249370881e-06, + "loss": 0.7762, + "step": 2472 + }, + { + "epoch": 0.12709425429129406, + "grad_norm": 1.0523349046707153, + "learning_rate": 9.754870923800846e-06, + "loss": 0.7709, + "step": 2473 + }, + { + "epoch": 0.1271456470346387, + "grad_norm": 1.088438630104065, + "learning_rate": 9.75461346649296e-06, + "loss": 0.7829, + "step": 2474 + }, + { + "epoch": 0.12719703977798336, + "grad_norm": 1.112038493156433, + "learning_rate": 9.754355877454355e-06, + "loss": 0.7755, + "step": 2475 + }, + { + "epoch": 0.12724843252132798, + "grad_norm": 1.238457441329956, + "learning_rate": 9.75409815669217e-06, + "loss": 0.8486, + "step": 2476 + }, + { + "epoch": 0.12729982526467262, + "grad_norm": 1.1355949640274048, + "learning_rate": 9.753840304213544e-06, + "loss": 0.8167, + "step": 2477 + }, + { + "epoch": 0.12735121800801727, + "grad_norm": 1.1541801691055298, + "learning_rate": 9.753582320025619e-06, + "loss": 0.786, + "step": 2478 + }, + { + "epoch": 0.12740261075136192, + "grad_norm": 1.1330422163009644, + "learning_rate": 9.753324204135548e-06, + "loss": 0.8133, + "step": 2479 + }, + { + "epoch": 0.12745400349470654, + "grad_norm": 1.1421079635620117, + "learning_rate": 9.753065956550476e-06, + "loss": 0.8388, + "step": 2480 + }, + { + "epoch": 0.12750539623805118, + "grad_norm": 1.1337547302246094, + "learning_rate": 9.752807577277563e-06, + "loss": 0.8134, + "step": 2481 + }, + { + "epoch": 0.12755678898139583, + "grad_norm": 1.1110343933105469, + "learning_rate": 9.752549066323965e-06, + "loss": 0.8118, + "step": 2482 + }, + { + "epoch": 0.12760818172474048, + "grad_norm": 1.16692316532135, + "learning_rate": 9.752290423696843e-06, + "loss": 0.8378, + "step": 2483 + }, + { + "epoch": 0.1276595744680851, + "grad_norm": 1.1433643102645874, + "learning_rate": 9.752031649403368e-06, + "loss": 0.8659, + "step": 2484 + }, + { + "epoch": 0.12771096721142974, + "grad_norm": 1.1853506565093994, + "learning_rate": 9.751772743450707e-06, + "loss": 0.8415, + "step": 2485 + }, + { + "epoch": 0.1277623599547744, + "grad_norm": 1.1135592460632324, + "learning_rate": 9.751513705846032e-06, + "loss": 0.8629, + "step": 2486 + }, + { + "epoch": 0.12781375269811904, + "grad_norm": 1.139089822769165, + "learning_rate": 9.751254536596519e-06, + "loss": 0.8387, + "step": 2487 + }, + { + "epoch": 0.12786514544146366, + "grad_norm": 1.2364814281463623, + "learning_rate": 9.75099523570935e-06, + "loss": 0.8424, + "step": 2488 + }, + { + "epoch": 0.1279165381848083, + "grad_norm": 1.1059569120407104, + "learning_rate": 9.75073580319171e-06, + "loss": 0.8593, + "step": 2489 + }, + { + "epoch": 0.12796793092815295, + "grad_norm": 0.875663697719574, + "learning_rate": 9.750476239050786e-06, + "loss": 0.6782, + "step": 2490 + }, + { + "epoch": 0.1280193236714976, + "grad_norm": 1.1016995906829834, + "learning_rate": 9.750216543293769e-06, + "loss": 0.8821, + "step": 2491 + }, + { + "epoch": 0.12807071641484222, + "grad_norm": 1.1005722284317017, + "learning_rate": 9.749956715927856e-06, + "loss": 0.8115, + "step": 2492 + }, + { + "epoch": 0.12812210915818686, + "grad_norm": 1.2815628051757812, + "learning_rate": 9.749696756960243e-06, + "loss": 0.8868, + "step": 2493 + }, + { + "epoch": 0.1281735019015315, + "grad_norm": 1.095913290977478, + "learning_rate": 9.749436666398135e-06, + "loss": 0.8262, + "step": 2494 + }, + { + "epoch": 0.12822489464487613, + "grad_norm": 1.1588845252990723, + "learning_rate": 9.749176444248734e-06, + "loss": 0.8031, + "step": 2495 + }, + { + "epoch": 0.12827628738822078, + "grad_norm": 1.123256802558899, + "learning_rate": 9.748916090519256e-06, + "loss": 0.7933, + "step": 2496 + }, + { + "epoch": 0.12832768013156542, + "grad_norm": 1.1677677631378174, + "learning_rate": 9.748655605216908e-06, + "loss": 0.8397, + "step": 2497 + }, + { + "epoch": 0.12837907287491007, + "grad_norm": 1.1567836999893188, + "learning_rate": 9.74839498834891e-06, + "loss": 0.8054, + "step": 2498 + }, + { + "epoch": 0.1284304656182547, + "grad_norm": 1.0221271514892578, + "learning_rate": 9.748134239922484e-06, + "loss": 0.801, + "step": 2499 + }, + { + "epoch": 0.12848185836159934, + "grad_norm": 1.0962082147598267, + "learning_rate": 9.747873359944852e-06, + "loss": 0.786, + "step": 2500 + }, + { + "epoch": 0.12853325110494399, + "grad_norm": 1.1158311367034912, + "learning_rate": 9.747612348423241e-06, + "loss": 0.8168, + "step": 2501 + }, + { + "epoch": 0.12858464384828863, + "grad_norm": 1.304885983467102, + "learning_rate": 9.747351205364885e-06, + "loss": 0.7765, + "step": 2502 + }, + { + "epoch": 0.12863603659163325, + "grad_norm": 1.1309278011322021, + "learning_rate": 9.74708993077702e-06, + "loss": 0.8041, + "step": 2503 + }, + { + "epoch": 0.1286874293349779, + "grad_norm": 0.8903225064277649, + "learning_rate": 9.746828524666881e-06, + "loss": 0.7539, + "step": 2504 + }, + { + "epoch": 0.12873882207832255, + "grad_norm": 1.125013828277588, + "learning_rate": 9.746566987041713e-06, + "loss": 0.8249, + "step": 2505 + }, + { + "epoch": 0.1287902148216672, + "grad_norm": 0.802054762840271, + "learning_rate": 9.746305317908762e-06, + "loss": 0.6945, + "step": 2506 + }, + { + "epoch": 0.1288416075650118, + "grad_norm": 1.1441078186035156, + "learning_rate": 9.746043517275278e-06, + "loss": 0.8347, + "step": 2507 + }, + { + "epoch": 0.12889300030835646, + "grad_norm": 1.0917900800704956, + "learning_rate": 9.745781585148512e-06, + "loss": 0.8145, + "step": 2508 + }, + { + "epoch": 0.1289443930517011, + "grad_norm": 1.1471260786056519, + "learning_rate": 9.745519521535725e-06, + "loss": 0.8185, + "step": 2509 + }, + { + "epoch": 0.12899578579504575, + "grad_norm": 1.1653029918670654, + "learning_rate": 9.745257326444176e-06, + "loss": 0.8946, + "step": 2510 + }, + { + "epoch": 0.12904717853839037, + "grad_norm": 1.110302209854126, + "learning_rate": 9.744994999881128e-06, + "loss": 0.8134, + "step": 2511 + }, + { + "epoch": 0.12909857128173502, + "grad_norm": 8.557238578796387, + "learning_rate": 9.74473254185385e-06, + "loss": 0.8255, + "step": 2512 + }, + { + "epoch": 0.12914996402507967, + "grad_norm": 1.25217866897583, + "learning_rate": 9.744469952369614e-06, + "loss": 0.8471, + "step": 2513 + }, + { + "epoch": 0.12920135676842429, + "grad_norm": 1.1042636632919312, + "learning_rate": 9.744207231435695e-06, + "loss": 0.7905, + "step": 2514 + }, + { + "epoch": 0.12925274951176893, + "grad_norm": 0.9154908061027527, + "learning_rate": 9.743944379059369e-06, + "loss": 0.728, + "step": 2515 + }, + { + "epoch": 0.12930414225511358, + "grad_norm": 1.1178827285766602, + "learning_rate": 9.743681395247924e-06, + "loss": 0.8358, + "step": 2516 + }, + { + "epoch": 0.12935553499845823, + "grad_norm": 1.1692816019058228, + "learning_rate": 9.743418280008643e-06, + "loss": 0.894, + "step": 2517 + }, + { + "epoch": 0.12940692774180285, + "grad_norm": 1.2138783931732178, + "learning_rate": 9.743155033348817e-06, + "loss": 0.8147, + "step": 2518 + }, + { + "epoch": 0.1294583204851475, + "grad_norm": 0.7936961650848389, + "learning_rate": 9.742891655275738e-06, + "loss": 0.7215, + "step": 2519 + }, + { + "epoch": 0.12950971322849214, + "grad_norm": 1.1221630573272705, + "learning_rate": 9.742628145796704e-06, + "loss": 0.8303, + "step": 2520 + }, + { + "epoch": 0.1295611059718368, + "grad_norm": 1.1911793947219849, + "learning_rate": 9.742364504919018e-06, + "loss": 0.8082, + "step": 2521 + }, + { + "epoch": 0.1296124987151814, + "grad_norm": 1.143926739692688, + "learning_rate": 9.74210073264998e-06, + "loss": 0.7922, + "step": 2522 + }, + { + "epoch": 0.12966389145852605, + "grad_norm": 1.122007131576538, + "learning_rate": 9.7418368289969e-06, + "loss": 0.8126, + "step": 2523 + }, + { + "epoch": 0.1297152842018707, + "grad_norm": 10.368847846984863, + "learning_rate": 9.74157279396709e-06, + "loss": 1.077, + "step": 2524 + }, + { + "epoch": 0.12976667694521535, + "grad_norm": 1.2167574167251587, + "learning_rate": 9.741308627567866e-06, + "loss": 0.8361, + "step": 2525 + }, + { + "epoch": 0.12981806968855997, + "grad_norm": 1.0986183881759644, + "learning_rate": 9.741044329806546e-06, + "loss": 0.7848, + "step": 2526 + }, + { + "epoch": 0.12986946243190461, + "grad_norm": 1.1548548936843872, + "learning_rate": 9.74077990069045e-06, + "loss": 0.8285, + "step": 2527 + }, + { + "epoch": 0.12992085517524926, + "grad_norm": 1.1838788986206055, + "learning_rate": 9.740515340226911e-06, + "loss": 0.8044, + "step": 2528 + }, + { + "epoch": 0.1299722479185939, + "grad_norm": 1.1177500486373901, + "learning_rate": 9.740250648423252e-06, + "loss": 0.7872, + "step": 2529 + }, + { + "epoch": 0.13002364066193853, + "grad_norm": 1.1367467641830444, + "learning_rate": 9.739985825286813e-06, + "loss": 0.6893, + "step": 2530 + }, + { + "epoch": 0.13007503340528317, + "grad_norm": 1.612687587738037, + "learning_rate": 9.739720870824924e-06, + "loss": 0.8172, + "step": 2531 + }, + { + "epoch": 0.13012642614862782, + "grad_norm": 1.122273564338684, + "learning_rate": 9.73945578504493e-06, + "loss": 0.783, + "step": 2532 + }, + { + "epoch": 0.13017781889197244, + "grad_norm": 0.9774260520935059, + "learning_rate": 9.739190567954176e-06, + "loss": 0.701, + "step": 2533 + }, + { + "epoch": 0.1302292116353171, + "grad_norm": 1.2303053140640259, + "learning_rate": 9.73892521956001e-06, + "loss": 0.8618, + "step": 2534 + }, + { + "epoch": 0.13028060437866174, + "grad_norm": 0.7637819051742554, + "learning_rate": 9.73865973986978e-06, + "loss": 0.6732, + "step": 2535 + }, + { + "epoch": 0.13033199712200638, + "grad_norm": 1.1514431238174438, + "learning_rate": 9.738394128890845e-06, + "loss": 0.7937, + "step": 2536 + }, + { + "epoch": 0.130383389865351, + "grad_norm": 1.1339715719223022, + "learning_rate": 9.738128386630561e-06, + "loss": 0.8662, + "step": 2537 + }, + { + "epoch": 0.13043478260869565, + "grad_norm": 1.2402563095092773, + "learning_rate": 9.737862513096294e-06, + "loss": 0.7785, + "step": 2538 + }, + { + "epoch": 0.1304861753520403, + "grad_norm": 1.165157437324524, + "learning_rate": 9.73759650829541e-06, + "loss": 0.832, + "step": 2539 + }, + { + "epoch": 0.13053756809538494, + "grad_norm": 1.2463504076004028, + "learning_rate": 9.737330372235276e-06, + "loss": 0.861, + "step": 2540 + }, + { + "epoch": 0.13058896083872956, + "grad_norm": 1.1279863119125366, + "learning_rate": 9.737064104923269e-06, + "loss": 0.8407, + "step": 2541 + }, + { + "epoch": 0.1306403535820742, + "grad_norm": 1.2176536321640015, + "learning_rate": 9.736797706366763e-06, + "loss": 0.79, + "step": 2542 + }, + { + "epoch": 0.13069174632541886, + "grad_norm": 1.3712241649627686, + "learning_rate": 9.73653117657314e-06, + "loss": 0.8002, + "step": 2543 + }, + { + "epoch": 0.1307431390687635, + "grad_norm": 1.1009495258331299, + "learning_rate": 9.736264515549785e-06, + "loss": 0.8095, + "step": 2544 + }, + { + "epoch": 0.13079453181210812, + "grad_norm": 1.1206634044647217, + "learning_rate": 9.735997723304085e-06, + "loss": 0.7942, + "step": 2545 + }, + { + "epoch": 0.13084592455545277, + "grad_norm": 1.1654514074325562, + "learning_rate": 9.735730799843433e-06, + "loss": 0.7751, + "step": 2546 + }, + { + "epoch": 0.13089731729879742, + "grad_norm": 1.1726800203323364, + "learning_rate": 9.735463745175223e-06, + "loss": 0.8371, + "step": 2547 + }, + { + "epoch": 0.13094871004214204, + "grad_norm": 1.1220122575759888, + "learning_rate": 9.735196559306857e-06, + "loss": 0.7997, + "step": 2548 + }, + { + "epoch": 0.13100010278548668, + "grad_norm": 1.14466392993927, + "learning_rate": 9.734929242245732e-06, + "loss": 0.8208, + "step": 2549 + }, + { + "epoch": 0.13105149552883133, + "grad_norm": 1.3103368282318115, + "learning_rate": 9.73466179399926e-06, + "loss": 0.7657, + "step": 2550 + }, + { + "epoch": 0.13110288827217598, + "grad_norm": 0.8653972744941711, + "learning_rate": 9.734394214574848e-06, + "loss": 0.694, + "step": 2551 + }, + { + "epoch": 0.1311542810155206, + "grad_norm": 0.9475825428962708, + "learning_rate": 9.734126503979911e-06, + "loss": 0.7256, + "step": 2552 + }, + { + "epoch": 0.13120567375886524, + "grad_norm": 1.0775402784347534, + "learning_rate": 9.733858662221863e-06, + "loss": 0.7769, + "step": 2553 + }, + { + "epoch": 0.1312570665022099, + "grad_norm": 1.1888377666473389, + "learning_rate": 9.733590689308128e-06, + "loss": 0.8184, + "step": 2554 + }, + { + "epoch": 0.13130845924555454, + "grad_norm": 1.19166100025177, + "learning_rate": 9.73332258524613e-06, + "loss": 0.8029, + "step": 2555 + }, + { + "epoch": 0.13135985198889916, + "grad_norm": 1.056988000869751, + "learning_rate": 9.733054350043295e-06, + "loss": 0.8581, + "step": 2556 + }, + { + "epoch": 0.1314112447322438, + "grad_norm": 0.7992630004882812, + "learning_rate": 9.732785983707056e-06, + "loss": 0.6592, + "step": 2557 + }, + { + "epoch": 0.13146263747558845, + "grad_norm": 1.1102385520935059, + "learning_rate": 9.73251748624485e-06, + "loss": 0.7871, + "step": 2558 + }, + { + "epoch": 0.1315140302189331, + "grad_norm": 1.1597158908843994, + "learning_rate": 9.732248857664115e-06, + "loss": 0.8326, + "step": 2559 + }, + { + "epoch": 0.13156542296227772, + "grad_norm": 1.1040688753128052, + "learning_rate": 9.73198009797229e-06, + "loss": 0.798, + "step": 2560 + }, + { + "epoch": 0.13161681570562236, + "grad_norm": 1.1124264001846313, + "learning_rate": 9.731711207176826e-06, + "loss": 0.7974, + "step": 2561 + }, + { + "epoch": 0.131668208448967, + "grad_norm": 1.1696454286575317, + "learning_rate": 9.731442185285172e-06, + "loss": 0.8378, + "step": 2562 + }, + { + "epoch": 0.13171960119231166, + "grad_norm": 1.261734127998352, + "learning_rate": 9.73117303230478e-06, + "loss": 0.8198, + "step": 2563 + }, + { + "epoch": 0.13177099393565628, + "grad_norm": 1.1441841125488281, + "learning_rate": 9.730903748243107e-06, + "loss": 0.8282, + "step": 2564 + }, + { + "epoch": 0.13182238667900092, + "grad_norm": 1.1437608003616333, + "learning_rate": 9.730634333107613e-06, + "loss": 0.8515, + "step": 2565 + }, + { + "epoch": 0.13187377942234557, + "grad_norm": 1.264147400856018, + "learning_rate": 9.730364786905765e-06, + "loss": 0.8741, + "step": 2566 + }, + { + "epoch": 0.1319251721656902, + "grad_norm": 1.188440203666687, + "learning_rate": 9.730095109645032e-06, + "loss": 0.8423, + "step": 2567 + }, + { + "epoch": 0.13197656490903484, + "grad_norm": 1.1565133333206177, + "learning_rate": 9.729825301332882e-06, + "loss": 0.7745, + "step": 2568 + }, + { + "epoch": 0.13202795765237949, + "grad_norm": 1.1575068235397339, + "learning_rate": 9.729555361976792e-06, + "loss": 0.8232, + "step": 2569 + }, + { + "epoch": 0.13207935039572413, + "grad_norm": 1.13129460811615, + "learning_rate": 9.729285291584239e-06, + "loss": 0.8257, + "step": 2570 + }, + { + "epoch": 0.13213074313906875, + "grad_norm": 1.1367108821868896, + "learning_rate": 9.729015090162709e-06, + "loss": 0.7524, + "step": 2571 + }, + { + "epoch": 0.1321821358824134, + "grad_norm": 1.1301205158233643, + "learning_rate": 9.728744757719685e-06, + "loss": 0.7987, + "step": 2572 + }, + { + "epoch": 0.13223352862575805, + "grad_norm": 1.1188976764678955, + "learning_rate": 9.72847429426266e-06, + "loss": 0.784, + "step": 2573 + }, + { + "epoch": 0.1322849213691027, + "grad_norm": 1.3200082778930664, + "learning_rate": 9.728203699799123e-06, + "loss": 0.7682, + "step": 2574 + }, + { + "epoch": 0.1323363141124473, + "grad_norm": 0.8083642721176147, + "learning_rate": 9.727932974336576e-06, + "loss": 0.7082, + "step": 2575 + }, + { + "epoch": 0.13238770685579196, + "grad_norm": 1.0618683099746704, + "learning_rate": 9.727662117882517e-06, + "loss": 0.7546, + "step": 2576 + }, + { + "epoch": 0.1324390995991366, + "grad_norm": 1.1620287895202637, + "learning_rate": 9.72739113044445e-06, + "loss": 0.8559, + "step": 2577 + }, + { + "epoch": 0.13249049234248125, + "grad_norm": 1.1489086151123047, + "learning_rate": 9.727120012029882e-06, + "loss": 0.8371, + "step": 2578 + }, + { + "epoch": 0.13254188508582587, + "grad_norm": 1.2074640989303589, + "learning_rate": 9.726848762646329e-06, + "loss": 0.8346, + "step": 2579 + }, + { + "epoch": 0.13259327782917052, + "grad_norm": 1.392053484916687, + "learning_rate": 9.7265773823013e-06, + "loss": 0.8141, + "step": 2580 + }, + { + "epoch": 0.13264467057251517, + "grad_norm": 1.184875249862671, + "learning_rate": 9.72630587100232e-06, + "loss": 0.7666, + "step": 2581 + }, + { + "epoch": 0.1326960633158598, + "grad_norm": 1.1245810985565186, + "learning_rate": 9.726034228756908e-06, + "loss": 0.817, + "step": 2582 + }, + { + "epoch": 0.13274745605920443, + "grad_norm": 1.1560232639312744, + "learning_rate": 9.72576245557259e-06, + "loss": 0.8116, + "step": 2583 + }, + { + "epoch": 0.13279884880254908, + "grad_norm": 1.1937674283981323, + "learning_rate": 9.725490551456897e-06, + "loss": 0.8234, + "step": 2584 + }, + { + "epoch": 0.13285024154589373, + "grad_norm": 0.839627206325531, + "learning_rate": 9.725218516417362e-06, + "loss": 0.7255, + "step": 2585 + }, + { + "epoch": 0.13290163428923835, + "grad_norm": 1.1455011367797852, + "learning_rate": 9.724946350461522e-06, + "loss": 0.7953, + "step": 2586 + }, + { + "epoch": 0.132953027032583, + "grad_norm": 1.1295292377471924, + "learning_rate": 9.724674053596917e-06, + "loss": 0.7461, + "step": 2587 + }, + { + "epoch": 0.13300441977592764, + "grad_norm": 1.1307677030563354, + "learning_rate": 9.72440162583109e-06, + "loss": 0.7985, + "step": 2588 + }, + { + "epoch": 0.1330558125192723, + "grad_norm": 1.2196446657180786, + "learning_rate": 9.724129067171593e-06, + "loss": 0.8421, + "step": 2589 + }, + { + "epoch": 0.1331072052626169, + "grad_norm": 0.965071439743042, + "learning_rate": 9.723856377625973e-06, + "loss": 0.7361, + "step": 2590 + }, + { + "epoch": 0.13315859800596155, + "grad_norm": 1.1619025468826294, + "learning_rate": 9.723583557201788e-06, + "loss": 0.8407, + "step": 2591 + }, + { + "epoch": 0.1332099907493062, + "grad_norm": 1.1729711294174194, + "learning_rate": 9.723310605906595e-06, + "loss": 0.819, + "step": 2592 + }, + { + "epoch": 0.13326138349265085, + "grad_norm": 0.7274436950683594, + "learning_rate": 9.723037523747957e-06, + "loss": 0.6828, + "step": 2593 + }, + { + "epoch": 0.13331277623599547, + "grad_norm": 0.8518123030662537, + "learning_rate": 9.72276431073344e-06, + "loss": 0.706, + "step": 2594 + }, + { + "epoch": 0.13336416897934011, + "grad_norm": 1.1291074752807617, + "learning_rate": 9.722490966870614e-06, + "loss": 0.7829, + "step": 2595 + }, + { + "epoch": 0.13341556172268476, + "grad_norm": 1.0963987112045288, + "learning_rate": 9.722217492167052e-06, + "loss": 0.8332, + "step": 2596 + }, + { + "epoch": 0.1334669544660294, + "grad_norm": 1.0919013023376465, + "learning_rate": 9.72194388663033e-06, + "loss": 0.7639, + "step": 2597 + }, + { + "epoch": 0.13351834720937403, + "grad_norm": 1.046149492263794, + "learning_rate": 9.721670150268029e-06, + "loss": 0.7833, + "step": 2598 + }, + { + "epoch": 0.13356973995271867, + "grad_norm": 1.1089986562728882, + "learning_rate": 9.721396283087736e-06, + "loss": 0.8237, + "step": 2599 + }, + { + "epoch": 0.13362113269606332, + "grad_norm": 1.1304103136062622, + "learning_rate": 9.721122285097034e-06, + "loss": 0.7458, + "step": 2600 + }, + { + "epoch": 0.13367252543940797, + "grad_norm": 1.2912743091583252, + "learning_rate": 9.720848156303518e-06, + "loss": 0.8359, + "step": 2601 + }, + { + "epoch": 0.1337239181827526, + "grad_norm": 0.8447766304016113, + "learning_rate": 9.720573896714778e-06, + "loss": 0.708, + "step": 2602 + }, + { + "epoch": 0.13377531092609724, + "grad_norm": 0.9680198431015015, + "learning_rate": 9.720299506338418e-06, + "loss": 0.7942, + "step": 2603 + }, + { + "epoch": 0.13382670366944188, + "grad_norm": 1.139566421508789, + "learning_rate": 9.72002498518204e-06, + "loss": 0.8131, + "step": 2604 + }, + { + "epoch": 0.1338780964127865, + "grad_norm": 1.2419695854187012, + "learning_rate": 9.719750333253246e-06, + "loss": 0.8127, + "step": 2605 + }, + { + "epoch": 0.13392948915613115, + "grad_norm": 1.2263543605804443, + "learning_rate": 9.719475550559648e-06, + "loss": 0.8517, + "step": 2606 + }, + { + "epoch": 0.1339808818994758, + "grad_norm": 1.1253172159194946, + "learning_rate": 9.719200637108857e-06, + "loss": 0.7928, + "step": 2607 + }, + { + "epoch": 0.13403227464282044, + "grad_norm": 1.1045161485671997, + "learning_rate": 9.718925592908492e-06, + "loss": 0.8264, + "step": 2608 + }, + { + "epoch": 0.13408366738616506, + "grad_norm": 1.313133716583252, + "learning_rate": 9.718650417966174e-06, + "loss": 0.8472, + "step": 2609 + }, + { + "epoch": 0.1341350601295097, + "grad_norm": 4.287288665771484, + "learning_rate": 9.718375112289525e-06, + "loss": 0.8429, + "step": 2610 + }, + { + "epoch": 0.13418645287285436, + "grad_norm": 1.1689093112945557, + "learning_rate": 9.718099675886173e-06, + "loss": 0.8386, + "step": 2611 + }, + { + "epoch": 0.134237845616199, + "grad_norm": 1.1137447357177734, + "learning_rate": 9.71782410876375e-06, + "loss": 0.8598, + "step": 2612 + }, + { + "epoch": 0.13428923835954362, + "grad_norm": 0.8552294373512268, + "learning_rate": 9.71754841092989e-06, + "loss": 0.7216, + "step": 2613 + }, + { + "epoch": 0.13434063110288827, + "grad_norm": 0.9169567823410034, + "learning_rate": 9.717272582392232e-06, + "loss": 0.7178, + "step": 2614 + }, + { + "epoch": 0.13439202384623292, + "grad_norm": 1.226951003074646, + "learning_rate": 9.716996623158417e-06, + "loss": 0.8317, + "step": 2615 + }, + { + "epoch": 0.13444341658957756, + "grad_norm": 1.2023383378982544, + "learning_rate": 9.716720533236093e-06, + "loss": 0.8035, + "step": 2616 + }, + { + "epoch": 0.13449480933292218, + "grad_norm": 1.1587753295898438, + "learning_rate": 9.716444312632906e-06, + "loss": 0.8164, + "step": 2617 + }, + { + "epoch": 0.13454620207626683, + "grad_norm": 1.2885563373565674, + "learning_rate": 9.716167961356511e-06, + "loss": 0.8863, + "step": 2618 + }, + { + "epoch": 0.13459759481961148, + "grad_norm": 1.1696535348892212, + "learning_rate": 9.715891479414565e-06, + "loss": 0.7684, + "step": 2619 + }, + { + "epoch": 0.13464898756295612, + "grad_norm": 1.0802528858184814, + "learning_rate": 9.715614866814726e-06, + "loss": 0.7732, + "step": 2620 + }, + { + "epoch": 0.13470038030630074, + "grad_norm": 0.9993689060211182, + "learning_rate": 9.715338123564662e-06, + "loss": 0.73, + "step": 2621 + }, + { + "epoch": 0.1347517730496454, + "grad_norm": 1.3452314138412476, + "learning_rate": 9.715061249672036e-06, + "loss": 0.8114, + "step": 2622 + }, + { + "epoch": 0.13480316579299004, + "grad_norm": 1.230893850326538, + "learning_rate": 9.71478424514452e-06, + "loss": 0.8117, + "step": 2623 + }, + { + "epoch": 0.13485455853633466, + "grad_norm": 1.2168797254562378, + "learning_rate": 9.71450710998979e-06, + "loss": 0.8102, + "step": 2624 + }, + { + "epoch": 0.1349059512796793, + "grad_norm": 1.2299240827560425, + "learning_rate": 9.714229844215523e-06, + "loss": 0.8305, + "step": 2625 + }, + { + "epoch": 0.13495734402302395, + "grad_norm": 1.1167078018188477, + "learning_rate": 9.713952447829402e-06, + "loss": 0.7424, + "step": 2626 + }, + { + "epoch": 0.1350087367663686, + "grad_norm": 1.1739639043807983, + "learning_rate": 9.713674920839113e-06, + "loss": 0.7812, + "step": 2627 + }, + { + "epoch": 0.13506012950971322, + "grad_norm": 1.1659791469573975, + "learning_rate": 9.713397263252342e-06, + "loss": 0.7917, + "step": 2628 + }, + { + "epoch": 0.13511152225305786, + "grad_norm": 0.8265565633773804, + "learning_rate": 9.713119475076786e-06, + "loss": 0.7352, + "step": 2629 + }, + { + "epoch": 0.1351629149964025, + "grad_norm": 0.8502550721168518, + "learning_rate": 9.712841556320139e-06, + "loss": 0.7484, + "step": 2630 + }, + { + "epoch": 0.13521430773974716, + "grad_norm": 1.1613914966583252, + "learning_rate": 9.7125635069901e-06, + "loss": 0.7909, + "step": 2631 + }, + { + "epoch": 0.13526570048309178, + "grad_norm": 1.1277912855148315, + "learning_rate": 9.712285327094374e-06, + "loss": 0.8384, + "step": 2632 + }, + { + "epoch": 0.13531709322643642, + "grad_norm": 1.0719398260116577, + "learning_rate": 9.712007016640667e-06, + "loss": 0.7837, + "step": 2633 + }, + { + "epoch": 0.13536848596978107, + "grad_norm": 1.222659707069397, + "learning_rate": 9.711728575636695e-06, + "loss": 0.8223, + "step": 2634 + }, + { + "epoch": 0.13541987871312572, + "grad_norm": 1.183280348777771, + "learning_rate": 9.711450004090165e-06, + "loss": 0.819, + "step": 2635 + }, + { + "epoch": 0.13547127145647034, + "grad_norm": 1.1620908975601196, + "learning_rate": 9.711171302008798e-06, + "loss": 0.8713, + "step": 2636 + }, + { + "epoch": 0.13552266419981499, + "grad_norm": 1.1609777212142944, + "learning_rate": 9.710892469400316e-06, + "loss": 0.8787, + "step": 2637 + }, + { + "epoch": 0.13557405694315963, + "grad_norm": 1.138584017753601, + "learning_rate": 9.710613506272447e-06, + "loss": 0.8551, + "step": 2638 + }, + { + "epoch": 0.13562544968650428, + "grad_norm": 1.2348012924194336, + "learning_rate": 9.710334412632916e-06, + "loss": 0.8032, + "step": 2639 + }, + { + "epoch": 0.1356768424298489, + "grad_norm": 1.1296855211257935, + "learning_rate": 9.710055188489456e-06, + "loss": 0.7954, + "step": 2640 + }, + { + "epoch": 0.13572823517319355, + "grad_norm": 0.8864043951034546, + "learning_rate": 9.709775833849804e-06, + "loss": 0.7179, + "step": 2641 + }, + { + "epoch": 0.1357796279165382, + "grad_norm": 1.190123200416565, + "learning_rate": 9.7094963487217e-06, + "loss": 0.7965, + "step": 2642 + }, + { + "epoch": 0.1358310206598828, + "grad_norm": 1.1864268779754639, + "learning_rate": 9.709216733112888e-06, + "loss": 0.8471, + "step": 2643 + }, + { + "epoch": 0.13588241340322746, + "grad_norm": 0.8380422592163086, + "learning_rate": 9.708936987031115e-06, + "loss": 0.6969, + "step": 2644 + }, + { + "epoch": 0.1359338061465721, + "grad_norm": 1.3133583068847656, + "learning_rate": 9.708657110484128e-06, + "loss": 0.813, + "step": 2645 + }, + { + "epoch": 0.13598519888991675, + "grad_norm": 1.2705795764923096, + "learning_rate": 9.708377103479685e-06, + "loss": 0.876, + "step": 2646 + }, + { + "epoch": 0.13603659163326137, + "grad_norm": 1.1146432161331177, + "learning_rate": 9.708096966025544e-06, + "loss": 0.8495, + "step": 2647 + }, + { + "epoch": 0.13608798437660602, + "grad_norm": 0.8538182973861694, + "learning_rate": 9.707816698129464e-06, + "loss": 0.7761, + "step": 2648 + }, + { + "epoch": 0.13613937711995067, + "grad_norm": 1.1765365600585938, + "learning_rate": 9.707536299799212e-06, + "loss": 0.8806, + "step": 2649 + }, + { + "epoch": 0.1361907698632953, + "grad_norm": 1.085740327835083, + "learning_rate": 9.707255771042555e-06, + "loss": 0.8055, + "step": 2650 + }, + { + "epoch": 0.13624216260663993, + "grad_norm": 1.1011942625045776, + "learning_rate": 9.706975111867267e-06, + "loss": 0.7637, + "step": 2651 + }, + { + "epoch": 0.13629355534998458, + "grad_norm": 1.204447627067566, + "learning_rate": 9.706694322281124e-06, + "loss": 0.8038, + "step": 2652 + }, + { + "epoch": 0.13634494809332923, + "grad_norm": 0.7580429315567017, + "learning_rate": 9.706413402291903e-06, + "loss": 0.7305, + "step": 2653 + }, + { + "epoch": 0.13639634083667387, + "grad_norm": 1.1226884126663208, + "learning_rate": 9.70613235190739e-06, + "loss": 0.8551, + "step": 2654 + }, + { + "epoch": 0.1364477335800185, + "grad_norm": 1.0983165502548218, + "learning_rate": 9.705851171135369e-06, + "loss": 0.8058, + "step": 2655 + }, + { + "epoch": 0.13649912632336314, + "grad_norm": 0.8131149411201477, + "learning_rate": 9.705569859983633e-06, + "loss": 0.7156, + "step": 2656 + }, + { + "epoch": 0.1365505190667078, + "grad_norm": 1.0995423793792725, + "learning_rate": 9.705288418459976e-06, + "loss": 0.7811, + "step": 2657 + }, + { + "epoch": 0.13660191181005243, + "grad_norm": 1.2127082347869873, + "learning_rate": 9.705006846572194e-06, + "loss": 0.8417, + "step": 2658 + }, + { + "epoch": 0.13665330455339705, + "grad_norm": 0.8489097952842712, + "learning_rate": 9.704725144328087e-06, + "loss": 0.7049, + "step": 2659 + }, + { + "epoch": 0.1367046972967417, + "grad_norm": 1.19580078125, + "learning_rate": 9.704443311735462e-06, + "loss": 0.8299, + "step": 2660 + }, + { + "epoch": 0.13675609004008635, + "grad_norm": 1.1455217599868774, + "learning_rate": 9.704161348802128e-06, + "loss": 0.8167, + "step": 2661 + }, + { + "epoch": 0.13680748278343097, + "grad_norm": 1.10663902759552, + "learning_rate": 9.703879255535896e-06, + "loss": 0.7779, + "step": 2662 + }, + { + "epoch": 0.13685887552677561, + "grad_norm": 1.3961073160171509, + "learning_rate": 9.703597031944582e-06, + "loss": 0.7755, + "step": 2663 + }, + { + "epoch": 0.13691026827012026, + "grad_norm": 1.087077260017395, + "learning_rate": 9.703314678036004e-06, + "loss": 0.8376, + "step": 2664 + }, + { + "epoch": 0.1369616610134649, + "grad_norm": 1.17799711227417, + "learning_rate": 9.703032193817985e-06, + "loss": 0.827, + "step": 2665 + }, + { + "epoch": 0.13701305375680953, + "grad_norm": 1.8682605028152466, + "learning_rate": 9.702749579298354e-06, + "loss": 0.6878, + "step": 2666 + }, + { + "epoch": 0.13706444650015417, + "grad_norm": 1.2004543542861938, + "learning_rate": 9.702466834484938e-06, + "loss": 0.8131, + "step": 2667 + }, + { + "epoch": 0.13711583924349882, + "grad_norm": 1.1433489322662354, + "learning_rate": 9.702183959385571e-06, + "loss": 0.8016, + "step": 2668 + }, + { + "epoch": 0.13716723198684347, + "grad_norm": 1.1702873706817627, + "learning_rate": 9.701900954008092e-06, + "loss": 0.8072, + "step": 2669 + }, + { + "epoch": 0.1372186247301881, + "grad_norm": 1.1314847469329834, + "learning_rate": 9.701617818360342e-06, + "loss": 0.7812, + "step": 2670 + }, + { + "epoch": 0.13727001747353273, + "grad_norm": 1.1351122856140137, + "learning_rate": 9.701334552450165e-06, + "loss": 0.8049, + "step": 2671 + }, + { + "epoch": 0.13732141021687738, + "grad_norm": 1.1694854497909546, + "learning_rate": 9.701051156285407e-06, + "loss": 0.8187, + "step": 2672 + }, + { + "epoch": 0.13737280296022203, + "grad_norm": 0.8477898240089417, + "learning_rate": 9.700767629873923e-06, + "loss": 0.7354, + "step": 2673 + }, + { + "epoch": 0.13742419570356665, + "grad_norm": 1.1977671384811401, + "learning_rate": 9.700483973223567e-06, + "loss": 0.8588, + "step": 2674 + }, + { + "epoch": 0.1374755884469113, + "grad_norm": 1.2428123950958252, + "learning_rate": 9.700200186342197e-06, + "loss": 0.7964, + "step": 2675 + }, + { + "epoch": 0.13752698119025594, + "grad_norm": 0.7044335007667542, + "learning_rate": 9.699916269237676e-06, + "loss": 0.6562, + "step": 2676 + }, + { + "epoch": 0.13757837393360056, + "grad_norm": 1.1356942653656006, + "learning_rate": 9.699632221917872e-06, + "loss": 0.8361, + "step": 2677 + }, + { + "epoch": 0.1376297666769452, + "grad_norm": 1.1374279260635376, + "learning_rate": 9.699348044390652e-06, + "loss": 0.7868, + "step": 2678 + }, + { + "epoch": 0.13768115942028986, + "grad_norm": 1.1640079021453857, + "learning_rate": 9.699063736663892e-06, + "loss": 0.8378, + "step": 2679 + }, + { + "epoch": 0.1377325521636345, + "grad_norm": 1.0794360637664795, + "learning_rate": 9.698779298745468e-06, + "loss": 0.7853, + "step": 2680 + }, + { + "epoch": 0.13778394490697912, + "grad_norm": 0.941981315612793, + "learning_rate": 9.69849473064326e-06, + "loss": 0.7393, + "step": 2681 + }, + { + "epoch": 0.13783533765032377, + "grad_norm": 1.1368045806884766, + "learning_rate": 9.698210032365151e-06, + "loss": 0.7606, + "step": 2682 + }, + { + "epoch": 0.13788673039366842, + "grad_norm": 1.1727396249771118, + "learning_rate": 9.697925203919032e-06, + "loss": 0.79, + "step": 2683 + }, + { + "epoch": 0.13793812313701306, + "grad_norm": 1.1475847959518433, + "learning_rate": 9.697640245312793e-06, + "loss": 0.7926, + "step": 2684 + }, + { + "epoch": 0.13798951588035768, + "grad_norm": 1.1517375707626343, + "learning_rate": 9.697355156554328e-06, + "loss": 0.7962, + "step": 2685 + }, + { + "epoch": 0.13804090862370233, + "grad_norm": 1.2805520296096802, + "learning_rate": 9.697069937651535e-06, + "loss": 0.8869, + "step": 2686 + }, + { + "epoch": 0.13809230136704698, + "grad_norm": 0.978940486907959, + "learning_rate": 9.69678458861232e-06, + "loss": 0.7031, + "step": 2687 + }, + { + "epoch": 0.13814369411039162, + "grad_norm": 1.1227670907974243, + "learning_rate": 9.696499109444587e-06, + "loss": 0.8085, + "step": 2688 + }, + { + "epoch": 0.13819508685373624, + "grad_norm": 1.090261459350586, + "learning_rate": 9.696213500156243e-06, + "loss": 0.8353, + "step": 2689 + }, + { + "epoch": 0.1382464795970809, + "grad_norm": 1.196114182472229, + "learning_rate": 9.695927760755203e-06, + "loss": 0.7922, + "step": 2690 + }, + { + "epoch": 0.13829787234042554, + "grad_norm": 1.1394280195236206, + "learning_rate": 9.695641891249385e-06, + "loss": 0.8398, + "step": 2691 + }, + { + "epoch": 0.13834926508377018, + "grad_norm": 1.1485434770584106, + "learning_rate": 9.695355891646707e-06, + "loss": 0.7382, + "step": 2692 + }, + { + "epoch": 0.1384006578271148, + "grad_norm": 1.180327296257019, + "learning_rate": 9.695069761955096e-06, + "loss": 0.8092, + "step": 2693 + }, + { + "epoch": 0.13845205057045945, + "grad_norm": 1.0974851846694946, + "learning_rate": 9.694783502182474e-06, + "loss": 0.8088, + "step": 2694 + }, + { + "epoch": 0.1385034433138041, + "grad_norm": 1.173720359802246, + "learning_rate": 9.694497112336778e-06, + "loss": 0.8444, + "step": 2695 + }, + { + "epoch": 0.13855483605714872, + "grad_norm": 1.2134408950805664, + "learning_rate": 9.69421059242594e-06, + "loss": 0.7904, + "step": 2696 + }, + { + "epoch": 0.13860622880049336, + "grad_norm": 1.0976135730743408, + "learning_rate": 9.693923942457897e-06, + "loss": 0.7804, + "step": 2697 + }, + { + "epoch": 0.138657621543838, + "grad_norm": 1.1164802312850952, + "learning_rate": 9.693637162440592e-06, + "loss": 0.7936, + "step": 2698 + }, + { + "epoch": 0.13870901428718266, + "grad_norm": 0.8723376393318176, + "learning_rate": 9.693350252381972e-06, + "loss": 0.7768, + "step": 2699 + }, + { + "epoch": 0.13876040703052728, + "grad_norm": 1.2291243076324463, + "learning_rate": 9.693063212289983e-06, + "loss": 0.8261, + "step": 2700 + }, + { + "epoch": 0.13881179977387192, + "grad_norm": 1.1501260995864868, + "learning_rate": 9.692776042172582e-06, + "loss": 0.8239, + "step": 2701 + }, + { + "epoch": 0.13886319251721657, + "grad_norm": 1.1100144386291504, + "learning_rate": 9.692488742037721e-06, + "loss": 0.8396, + "step": 2702 + }, + { + "epoch": 0.13891458526056122, + "grad_norm": 0.7832348346710205, + "learning_rate": 9.692201311893362e-06, + "loss": 0.7374, + "step": 2703 + }, + { + "epoch": 0.13896597800390584, + "grad_norm": 1.0825269222259521, + "learning_rate": 9.691913751747468e-06, + "loss": 0.8237, + "step": 2704 + }, + { + "epoch": 0.13901737074725048, + "grad_norm": 1.1884174346923828, + "learning_rate": 9.691626061608008e-06, + "loss": 0.8466, + "step": 2705 + }, + { + "epoch": 0.13906876349059513, + "grad_norm": 1.1264417171478271, + "learning_rate": 9.69133824148295e-06, + "loss": 0.8076, + "step": 2706 + }, + { + "epoch": 0.13912015623393978, + "grad_norm": 1.14151930809021, + "learning_rate": 9.691050291380268e-06, + "loss": 0.8075, + "step": 2707 + }, + { + "epoch": 0.1391715489772844, + "grad_norm": 1.1592979431152344, + "learning_rate": 9.690762211307942e-06, + "loss": 0.8255, + "step": 2708 + }, + { + "epoch": 0.13922294172062905, + "grad_norm": 1.1559135913848877, + "learning_rate": 9.690474001273953e-06, + "loss": 0.8356, + "step": 2709 + }, + { + "epoch": 0.1392743344639737, + "grad_norm": 1.0876551866531372, + "learning_rate": 9.690185661286287e-06, + "loss": 0.7496, + "step": 2710 + }, + { + "epoch": 0.13932572720731834, + "grad_norm": 1.1137487888336182, + "learning_rate": 9.68989719135293e-06, + "loss": 0.8275, + "step": 2711 + }, + { + "epoch": 0.13937711995066296, + "grad_norm": 1.1706751585006714, + "learning_rate": 9.689608591481877e-06, + "loss": 0.7768, + "step": 2712 + }, + { + "epoch": 0.1394285126940076, + "grad_norm": 1.1375924348831177, + "learning_rate": 9.689319861681123e-06, + "loss": 0.7991, + "step": 2713 + }, + { + "epoch": 0.13947990543735225, + "grad_norm": 1.1523921489715576, + "learning_rate": 9.689031001958667e-06, + "loss": 0.8537, + "step": 2714 + }, + { + "epoch": 0.13953129818069687, + "grad_norm": 1.2321090698242188, + "learning_rate": 9.688742012322512e-06, + "loss": 0.8445, + "step": 2715 + }, + { + "epoch": 0.13958269092404152, + "grad_norm": 1.1780710220336914, + "learning_rate": 9.688452892780664e-06, + "loss": 0.8086, + "step": 2716 + }, + { + "epoch": 0.13963408366738617, + "grad_norm": 1.0607858896255493, + "learning_rate": 9.688163643341136e-06, + "loss": 0.8115, + "step": 2717 + }, + { + "epoch": 0.1396854764107308, + "grad_norm": 1.1239253282546997, + "learning_rate": 9.687874264011941e-06, + "loss": 0.8133, + "step": 2718 + }, + { + "epoch": 0.13973686915407543, + "grad_norm": 1.1768728494644165, + "learning_rate": 9.687584754801093e-06, + "loss": 0.869, + "step": 2719 + }, + { + "epoch": 0.13978826189742008, + "grad_norm": 1.099822759628296, + "learning_rate": 9.68729511571662e-06, + "loss": 0.8125, + "step": 2720 + }, + { + "epoch": 0.13983965464076473, + "grad_norm": 1.0882219076156616, + "learning_rate": 9.68700534676654e-06, + "loss": 0.8603, + "step": 2721 + }, + { + "epoch": 0.13989104738410937, + "grad_norm": 0.8649500012397766, + "learning_rate": 9.686715447958883e-06, + "loss": 0.6943, + "step": 2722 + }, + { + "epoch": 0.139942440127454, + "grad_norm": 1.1535301208496094, + "learning_rate": 9.686425419301684e-06, + "loss": 0.815, + "step": 2723 + }, + { + "epoch": 0.13999383287079864, + "grad_norm": 1.101647973060608, + "learning_rate": 9.686135260802975e-06, + "loss": 0.8147, + "step": 2724 + }, + { + "epoch": 0.1400452256141433, + "grad_norm": 1.1324268579483032, + "learning_rate": 9.685844972470797e-06, + "loss": 0.8328, + "step": 2725 + }, + { + "epoch": 0.14009661835748793, + "grad_norm": 2.122354745864868, + "learning_rate": 9.685554554313192e-06, + "loss": 0.7859, + "step": 2726 + }, + { + "epoch": 0.14014801110083255, + "grad_norm": 1.1310182809829712, + "learning_rate": 9.685264006338207e-06, + "loss": 0.8524, + "step": 2727 + }, + { + "epoch": 0.1401994038441772, + "grad_norm": 1.206246256828308, + "learning_rate": 9.684973328553893e-06, + "loss": 0.7713, + "step": 2728 + }, + { + "epoch": 0.14025079658752185, + "grad_norm": 1.043457269668579, + "learning_rate": 9.684682520968298e-06, + "loss": 0.7698, + "step": 2729 + }, + { + "epoch": 0.1403021893308665, + "grad_norm": 1.1174529790878296, + "learning_rate": 9.684391583589485e-06, + "loss": 0.8197, + "step": 2730 + }, + { + "epoch": 0.14035358207421111, + "grad_norm": 1.1224461793899536, + "learning_rate": 9.684100516425513e-06, + "loss": 0.8176, + "step": 2731 + }, + { + "epoch": 0.14040497481755576, + "grad_norm": 1.1051719188690186, + "learning_rate": 9.683809319484444e-06, + "loss": 0.7758, + "step": 2732 + }, + { + "epoch": 0.1404563675609004, + "grad_norm": 1.0629916191101074, + "learning_rate": 9.683517992774349e-06, + "loss": 0.7951, + "step": 2733 + }, + { + "epoch": 0.14050776030424503, + "grad_norm": 4.309171676635742, + "learning_rate": 9.683226536303298e-06, + "loss": 0.7924, + "step": 2734 + }, + { + "epoch": 0.14055915304758967, + "grad_norm": 1.1143549680709839, + "learning_rate": 9.682934950079367e-06, + "loss": 0.8384, + "step": 2735 + }, + { + "epoch": 0.14061054579093432, + "grad_norm": 1.2448540925979614, + "learning_rate": 9.682643234110634e-06, + "loss": 0.8823, + "step": 2736 + }, + { + "epoch": 0.14066193853427897, + "grad_norm": 1.675135612487793, + "learning_rate": 9.68235138840518e-06, + "loss": 0.7369, + "step": 2737 + }, + { + "epoch": 0.1407133312776236, + "grad_norm": 0.7427315711975098, + "learning_rate": 9.682059412971093e-06, + "loss": 0.6501, + "step": 2738 + }, + { + "epoch": 0.14076472402096823, + "grad_norm": 1.2052559852600098, + "learning_rate": 9.681767307816458e-06, + "loss": 0.8602, + "step": 2739 + }, + { + "epoch": 0.14081611676431288, + "grad_norm": 1.0685110092163086, + "learning_rate": 9.681475072949376e-06, + "loss": 0.787, + "step": 2740 + }, + { + "epoch": 0.14086750950765753, + "grad_norm": 1.1438913345336914, + "learning_rate": 9.681182708377937e-06, + "loss": 0.8531, + "step": 2741 + }, + { + "epoch": 0.14091890225100215, + "grad_norm": 1.123216986656189, + "learning_rate": 9.680890214110242e-06, + "loss": 0.8001, + "step": 2742 + }, + { + "epoch": 0.1409702949943468, + "grad_norm": 1.1277879476547241, + "learning_rate": 9.680597590154397e-06, + "loss": 0.8331, + "step": 2743 + }, + { + "epoch": 0.14102168773769144, + "grad_norm": 1.1109282970428467, + "learning_rate": 9.68030483651851e-06, + "loss": 0.8317, + "step": 2744 + }, + { + "epoch": 0.1410730804810361, + "grad_norm": 1.1681088209152222, + "learning_rate": 9.680011953210688e-06, + "loss": 0.8023, + "step": 2745 + }, + { + "epoch": 0.1411244732243807, + "grad_norm": 1.1539644002914429, + "learning_rate": 9.679718940239049e-06, + "loss": 0.8713, + "step": 2746 + }, + { + "epoch": 0.14117586596772536, + "grad_norm": 1.1707487106323242, + "learning_rate": 9.679425797611711e-06, + "loss": 0.7797, + "step": 2747 + }, + { + "epoch": 0.14122725871107, + "grad_norm": 1.1454941034317017, + "learning_rate": 9.679132525336794e-06, + "loss": 0.7339, + "step": 2748 + }, + { + "epoch": 0.14127865145441465, + "grad_norm": 1.1407099962234497, + "learning_rate": 9.678839123422426e-06, + "loss": 0.7414, + "step": 2749 + }, + { + "epoch": 0.14133004419775927, + "grad_norm": 1.17057204246521, + "learning_rate": 9.678545591876734e-06, + "loss": 0.8506, + "step": 2750 + }, + { + "epoch": 0.14138143694110392, + "grad_norm": 1.0912845134735107, + "learning_rate": 9.678251930707849e-06, + "loss": 0.8705, + "step": 2751 + }, + { + "epoch": 0.14143282968444856, + "grad_norm": 0.8528375625610352, + "learning_rate": 9.67795813992391e-06, + "loss": 0.7135, + "step": 2752 + }, + { + "epoch": 0.14148422242779318, + "grad_norm": 1.000312089920044, + "learning_rate": 9.677664219533056e-06, + "loss": 0.7643, + "step": 2753 + }, + { + "epoch": 0.14153561517113783, + "grad_norm": 1.283801555633545, + "learning_rate": 9.67737016954343e-06, + "loss": 0.7829, + "step": 2754 + }, + { + "epoch": 0.14158700791448248, + "grad_norm": 1.1663647890090942, + "learning_rate": 9.677075989963178e-06, + "loss": 0.8347, + "step": 2755 + }, + { + "epoch": 0.14163840065782712, + "grad_norm": 1.2120170593261719, + "learning_rate": 9.676781680800453e-06, + "loss": 0.7752, + "step": 2756 + }, + { + "epoch": 0.14168979340117174, + "grad_norm": 1.063825011253357, + "learning_rate": 9.676487242063406e-06, + "loss": 0.8276, + "step": 2757 + }, + { + "epoch": 0.1417411861445164, + "grad_norm": 1.1821837425231934, + "learning_rate": 9.676192673760197e-06, + "loss": 0.8856, + "step": 2758 + }, + { + "epoch": 0.14179257888786104, + "grad_norm": 1.1495966911315918, + "learning_rate": 9.675897975898986e-06, + "loss": 0.7856, + "step": 2759 + }, + { + "epoch": 0.14184397163120568, + "grad_norm": 1.194322109222412, + "learning_rate": 9.67560314848794e-06, + "loss": 0.8319, + "step": 2760 + }, + { + "epoch": 0.1418953643745503, + "grad_norm": 1.0979180335998535, + "learning_rate": 9.675308191535224e-06, + "loss": 0.7608, + "step": 2761 + }, + { + "epoch": 0.14194675711789495, + "grad_norm": 1.1436878442764282, + "learning_rate": 9.675013105049015e-06, + "loss": 0.7815, + "step": 2762 + }, + { + "epoch": 0.1419981498612396, + "grad_norm": 1.2227396965026855, + "learning_rate": 9.674717889037481e-06, + "loss": 0.8219, + "step": 2763 + }, + { + "epoch": 0.14204954260458424, + "grad_norm": 1.1543980836868286, + "learning_rate": 9.674422543508808e-06, + "loss": 0.7778, + "step": 2764 + }, + { + "epoch": 0.14210093534792886, + "grad_norm": 1.187705159187317, + "learning_rate": 9.674127068471177e-06, + "loss": 0.8468, + "step": 2765 + }, + { + "epoch": 0.1421523280912735, + "grad_norm": 1.1667296886444092, + "learning_rate": 9.673831463932773e-06, + "loss": 0.7672, + "step": 2766 + }, + { + "epoch": 0.14220372083461816, + "grad_norm": 1.107059121131897, + "learning_rate": 9.673535729901788e-06, + "loss": 0.7684, + "step": 2767 + }, + { + "epoch": 0.1422551135779628, + "grad_norm": 1.1269954442977905, + "learning_rate": 9.673239866386414e-06, + "loss": 0.8404, + "step": 2768 + }, + { + "epoch": 0.14230650632130742, + "grad_norm": 1.0767295360565186, + "learning_rate": 9.672943873394848e-06, + "loss": 0.8103, + "step": 2769 + }, + { + "epoch": 0.14235789906465207, + "grad_norm": 1.2439650297164917, + "learning_rate": 9.67264775093529e-06, + "loss": 0.7964, + "step": 2770 + }, + { + "epoch": 0.14240929180799672, + "grad_norm": 1.1509159803390503, + "learning_rate": 9.672351499015949e-06, + "loss": 0.7949, + "step": 2771 + }, + { + "epoch": 0.14246068455134134, + "grad_norm": 1.0878055095672607, + "learning_rate": 9.672055117645026e-06, + "loss": 0.8597, + "step": 2772 + }, + { + "epoch": 0.14251207729468598, + "grad_norm": 1.0522780418395996, + "learning_rate": 9.67175860683074e-06, + "loss": 0.7897, + "step": 2773 + }, + { + "epoch": 0.14256347003803063, + "grad_norm": 1.168533444404602, + "learning_rate": 9.671461966581301e-06, + "loss": 0.8491, + "step": 2774 + }, + { + "epoch": 0.14261486278137528, + "grad_norm": 1.242834210395813, + "learning_rate": 9.671165196904929e-06, + "loss": 0.8567, + "step": 2775 + }, + { + "epoch": 0.1426662555247199, + "grad_norm": 1.1433464288711548, + "learning_rate": 9.670868297809844e-06, + "loss": 0.8429, + "step": 2776 + }, + { + "epoch": 0.14271764826806455, + "grad_norm": 1.2627933025360107, + "learning_rate": 9.670571269304275e-06, + "loss": 0.8065, + "step": 2777 + }, + { + "epoch": 0.1427690410114092, + "grad_norm": 0.7583116888999939, + "learning_rate": 9.670274111396452e-06, + "loss": 0.7, + "step": 2778 + }, + { + "epoch": 0.14282043375475384, + "grad_norm": 1.1929150819778442, + "learning_rate": 9.669976824094604e-06, + "loss": 0.8413, + "step": 2779 + }, + { + "epoch": 0.14287182649809846, + "grad_norm": 1.1292308568954468, + "learning_rate": 9.669679407406972e-06, + "loss": 0.7978, + "step": 2780 + }, + { + "epoch": 0.1429232192414431, + "grad_norm": 1.1096967458724976, + "learning_rate": 9.669381861341794e-06, + "loss": 0.792, + "step": 2781 + }, + { + "epoch": 0.14297461198478775, + "grad_norm": 1.1445257663726807, + "learning_rate": 9.669084185907313e-06, + "loss": 0.811, + "step": 2782 + }, + { + "epoch": 0.1430260047281324, + "grad_norm": 1.0826023817062378, + "learning_rate": 9.668786381111778e-06, + "loss": 0.8169, + "step": 2783 + }, + { + "epoch": 0.14307739747147702, + "grad_norm": 1.127459168434143, + "learning_rate": 9.66848844696344e-06, + "loss": 0.8182, + "step": 2784 + }, + { + "epoch": 0.14312879021482167, + "grad_norm": 1.1308761835098267, + "learning_rate": 9.668190383470551e-06, + "loss": 0.8125, + "step": 2785 + }, + { + "epoch": 0.1431801829581663, + "grad_norm": 1.1512395143508911, + "learning_rate": 9.667892190641373e-06, + "loss": 0.7987, + "step": 2786 + }, + { + "epoch": 0.14323157570151096, + "grad_norm": 1.1589457988739014, + "learning_rate": 9.667593868484165e-06, + "loss": 0.8709, + "step": 2787 + }, + { + "epoch": 0.14328296844485558, + "grad_norm": 1.1947205066680908, + "learning_rate": 9.667295417007193e-06, + "loss": 0.8611, + "step": 2788 + }, + { + "epoch": 0.14333436118820023, + "grad_norm": 0.894476592540741, + "learning_rate": 9.666996836218725e-06, + "loss": 0.702, + "step": 2789 + }, + { + "epoch": 0.14338575393154487, + "grad_norm": 1.144202709197998, + "learning_rate": 9.666698126127034e-06, + "loss": 0.821, + "step": 2790 + }, + { + "epoch": 0.1434371466748895, + "grad_norm": 1.075032114982605, + "learning_rate": 9.666399286740397e-06, + "loss": 0.8255, + "step": 2791 + }, + { + "epoch": 0.14348853941823414, + "grad_norm": 0.8268353939056396, + "learning_rate": 9.666100318067093e-06, + "loss": 0.7171, + "step": 2792 + }, + { + "epoch": 0.1435399321615788, + "grad_norm": 1.151853084564209, + "learning_rate": 9.665801220115405e-06, + "loss": 0.8292, + "step": 2793 + }, + { + "epoch": 0.14359132490492343, + "grad_norm": 1.117978811264038, + "learning_rate": 9.66550199289362e-06, + "loss": 0.8696, + "step": 2794 + }, + { + "epoch": 0.14364271764826805, + "grad_norm": 1.0856496095657349, + "learning_rate": 9.665202636410028e-06, + "loss": 0.7751, + "step": 2795 + }, + { + "epoch": 0.1436941103916127, + "grad_norm": 1.2467879056930542, + "learning_rate": 9.664903150672922e-06, + "loss": 0.7809, + "step": 2796 + }, + { + "epoch": 0.14374550313495735, + "grad_norm": 0.7887930274009705, + "learning_rate": 9.664603535690602e-06, + "loss": 0.7006, + "step": 2797 + }, + { + "epoch": 0.143796895878302, + "grad_norm": 1.1251088380813599, + "learning_rate": 9.664303791471369e-06, + "loss": 0.7756, + "step": 2798 + }, + { + "epoch": 0.14384828862164661, + "grad_norm": 1.1136263608932495, + "learning_rate": 9.664003918023523e-06, + "loss": 0.8232, + "step": 2799 + }, + { + "epoch": 0.14389968136499126, + "grad_norm": 1.254871129989624, + "learning_rate": 9.663703915355379e-06, + "loss": 0.7936, + "step": 2800 + }, + { + "epoch": 0.1439510741083359, + "grad_norm": 1.244874119758606, + "learning_rate": 9.663403783475242e-06, + "loss": 0.8016, + "step": 2801 + }, + { + "epoch": 0.14400246685168056, + "grad_norm": 1.1680164337158203, + "learning_rate": 9.663103522391433e-06, + "loss": 0.7817, + "step": 2802 + }, + { + "epoch": 0.14405385959502517, + "grad_norm": 0.869336724281311, + "learning_rate": 9.662803132112269e-06, + "loss": 0.7005, + "step": 2803 + }, + { + "epoch": 0.14410525233836982, + "grad_norm": 0.7678632736206055, + "learning_rate": 9.662502612646073e-06, + "loss": 0.7529, + "step": 2804 + }, + { + "epoch": 0.14415664508171447, + "grad_norm": 1.0485777854919434, + "learning_rate": 9.662201964001168e-06, + "loss": 0.7963, + "step": 2805 + }, + { + "epoch": 0.14420803782505912, + "grad_norm": 1.1833264827728271, + "learning_rate": 9.66190118618589e-06, + "loss": 0.8831, + "step": 2806 + }, + { + "epoch": 0.14425943056840373, + "grad_norm": 1.1319341659545898, + "learning_rate": 9.661600279208566e-06, + "loss": 0.8706, + "step": 2807 + }, + { + "epoch": 0.14431082331174838, + "grad_norm": 0.7123062610626221, + "learning_rate": 9.661299243077535e-06, + "loss": 0.679, + "step": 2808 + }, + { + "epoch": 0.14436221605509303, + "grad_norm": 1.1030851602554321, + "learning_rate": 9.66099807780114e-06, + "loss": 0.7907, + "step": 2809 + }, + { + "epoch": 0.14441360879843765, + "grad_norm": 1.1934466361999512, + "learning_rate": 9.660696783387721e-06, + "loss": 0.8396, + "step": 2810 + }, + { + "epoch": 0.1444650015417823, + "grad_norm": 1.2162672281265259, + "learning_rate": 9.66039535984563e-06, + "loss": 0.7848, + "step": 2811 + }, + { + "epoch": 0.14451639428512694, + "grad_norm": 1.2180185317993164, + "learning_rate": 9.660093807183215e-06, + "loss": 0.8073, + "step": 2812 + }, + { + "epoch": 0.1445677870284716, + "grad_norm": 1.6599971055984497, + "learning_rate": 9.659792125408832e-06, + "loss": 0.8632, + "step": 2813 + }, + { + "epoch": 0.1446191797718162, + "grad_norm": 1.1207119226455688, + "learning_rate": 9.659490314530837e-06, + "loss": 0.8681, + "step": 2814 + }, + { + "epoch": 0.14467057251516086, + "grad_norm": 1.0741400718688965, + "learning_rate": 9.659188374557596e-06, + "loss": 0.8764, + "step": 2815 + }, + { + "epoch": 0.1447219652585055, + "grad_norm": 1.111618995666504, + "learning_rate": 9.658886305497472e-06, + "loss": 0.7852, + "step": 2816 + }, + { + "epoch": 0.14477335800185015, + "grad_norm": 0.8580141067504883, + "learning_rate": 9.658584107358835e-06, + "loss": 0.7013, + "step": 2817 + }, + { + "epoch": 0.14482475074519477, + "grad_norm": 1.1124556064605713, + "learning_rate": 9.658281780150057e-06, + "loss": 0.7501, + "step": 2818 + }, + { + "epoch": 0.14487614348853942, + "grad_norm": 0.8735184073448181, + "learning_rate": 9.657979323879514e-06, + "loss": 0.743, + "step": 2819 + }, + { + "epoch": 0.14492753623188406, + "grad_norm": 0.7720156908035278, + "learning_rate": 9.657676738555588e-06, + "loss": 0.7585, + "step": 2820 + }, + { + "epoch": 0.1449789289752287, + "grad_norm": 1.124157428741455, + "learning_rate": 9.657374024186659e-06, + "loss": 0.797, + "step": 2821 + }, + { + "epoch": 0.14503032171857333, + "grad_norm": 1.1096609830856323, + "learning_rate": 9.657071180781114e-06, + "loss": 0.827, + "step": 2822 + }, + { + "epoch": 0.14508171446191798, + "grad_norm": 0.9002783894538879, + "learning_rate": 9.656768208347348e-06, + "loss": 0.7156, + "step": 2823 + }, + { + "epoch": 0.14513310720526262, + "grad_norm": 0.8159273266792297, + "learning_rate": 9.656465106893751e-06, + "loss": 0.7424, + "step": 2824 + }, + { + "epoch": 0.14518449994860724, + "grad_norm": 0.9551854729652405, + "learning_rate": 9.656161876428722e-06, + "loss": 0.7377, + "step": 2825 + }, + { + "epoch": 0.1452358926919519, + "grad_norm": 1.2128301858901978, + "learning_rate": 9.655858516960664e-06, + "loss": 0.8025, + "step": 2826 + }, + { + "epoch": 0.14528728543529654, + "grad_norm": 1.1419256925582886, + "learning_rate": 9.655555028497978e-06, + "loss": 0.7618, + "step": 2827 + }, + { + "epoch": 0.14533867817864118, + "grad_norm": 1.1777626276016235, + "learning_rate": 9.655251411049075e-06, + "loss": 0.8568, + "step": 2828 + }, + { + "epoch": 0.1453900709219858, + "grad_norm": 0.789156973361969, + "learning_rate": 9.654947664622367e-06, + "loss": 0.7059, + "step": 2829 + }, + { + "epoch": 0.14544146366533045, + "grad_norm": 0.7554160952568054, + "learning_rate": 9.654643789226267e-06, + "loss": 0.752, + "step": 2830 + }, + { + "epoch": 0.1454928564086751, + "grad_norm": 0.7973955273628235, + "learning_rate": 9.6543397848692e-06, + "loss": 0.7005, + "step": 2831 + }, + { + "epoch": 0.14554424915201974, + "grad_norm": 1.152632236480713, + "learning_rate": 9.654035651559583e-06, + "loss": 0.8085, + "step": 2832 + }, + { + "epoch": 0.14559564189536436, + "grad_norm": 1.182613730430603, + "learning_rate": 9.653731389305843e-06, + "loss": 0.8355, + "step": 2833 + }, + { + "epoch": 0.145647034638709, + "grad_norm": 1.088126540184021, + "learning_rate": 9.653426998116412e-06, + "loss": 0.7957, + "step": 2834 + }, + { + "epoch": 0.14569842738205366, + "grad_norm": 1.1100579500198364, + "learning_rate": 9.653122477999724e-06, + "loss": 0.8552, + "step": 2835 + }, + { + "epoch": 0.1457498201253983, + "grad_norm": 1.121160626411438, + "learning_rate": 9.652817828964212e-06, + "loss": 0.7724, + "step": 2836 + }, + { + "epoch": 0.14580121286874292, + "grad_norm": 1.111227035522461, + "learning_rate": 9.652513051018319e-06, + "loss": 0.7403, + "step": 2837 + }, + { + "epoch": 0.14585260561208757, + "grad_norm": 1.0959700345993042, + "learning_rate": 9.65220814417049e-06, + "loss": 0.8049, + "step": 2838 + }, + { + "epoch": 0.14590399835543222, + "grad_norm": 1.1258735656738281, + "learning_rate": 9.651903108429172e-06, + "loss": 0.7874, + "step": 2839 + }, + { + "epoch": 0.14595539109877687, + "grad_norm": 1.0866092443466187, + "learning_rate": 9.651597943802817e-06, + "loss": 0.8416, + "step": 2840 + }, + { + "epoch": 0.14600678384212148, + "grad_norm": 0.8451864719390869, + "learning_rate": 9.651292650299877e-06, + "loss": 0.7084, + "step": 2841 + }, + { + "epoch": 0.14605817658546613, + "grad_norm": 1.16310453414917, + "learning_rate": 9.650987227928815e-06, + "loss": 0.8453, + "step": 2842 + }, + { + "epoch": 0.14610956932881078, + "grad_norm": 1.3342370986938477, + "learning_rate": 9.650681676698088e-06, + "loss": 0.7605, + "step": 2843 + }, + { + "epoch": 0.1461609620721554, + "grad_norm": 1.4599759578704834, + "learning_rate": 9.650375996616164e-06, + "loss": 0.8379, + "step": 2844 + }, + { + "epoch": 0.14621235481550005, + "grad_norm": 1.0545005798339844, + "learning_rate": 9.650070187691514e-06, + "loss": 0.8314, + "step": 2845 + }, + { + "epoch": 0.1462637475588447, + "grad_norm": 1.0731251239776611, + "learning_rate": 9.649764249932608e-06, + "loss": 0.839, + "step": 2846 + }, + { + "epoch": 0.14631514030218934, + "grad_norm": 1.3592246770858765, + "learning_rate": 9.649458183347921e-06, + "loss": 0.7921, + "step": 2847 + }, + { + "epoch": 0.14636653304553396, + "grad_norm": 1.1174054145812988, + "learning_rate": 9.649151987945938e-06, + "loss": 0.8272, + "step": 2848 + }, + { + "epoch": 0.1464179257888786, + "grad_norm": 1.0916825532913208, + "learning_rate": 9.648845663735137e-06, + "loss": 0.752, + "step": 2849 + }, + { + "epoch": 0.14646931853222325, + "grad_norm": 1.0872632265090942, + "learning_rate": 9.648539210724009e-06, + "loss": 0.8718, + "step": 2850 + }, + { + "epoch": 0.1465207112755679, + "grad_norm": 1.1224303245544434, + "learning_rate": 9.648232628921041e-06, + "loss": 0.7933, + "step": 2851 + }, + { + "epoch": 0.14657210401891252, + "grad_norm": 1.1784203052520752, + "learning_rate": 9.64792591833473e-06, + "loss": 0.8249, + "step": 2852 + }, + { + "epoch": 0.14662349676225717, + "grad_norm": 1.1208337545394897, + "learning_rate": 9.647619078973573e-06, + "loss": 0.8253, + "step": 2853 + }, + { + "epoch": 0.1466748895056018, + "grad_norm": 1.0951341390609741, + "learning_rate": 9.647312110846072e-06, + "loss": 0.8206, + "step": 2854 + }, + { + "epoch": 0.14672628224894646, + "grad_norm": 0.9326683282852173, + "learning_rate": 9.647005013960729e-06, + "loss": 0.7517, + "step": 2855 + }, + { + "epoch": 0.14677767499229108, + "grad_norm": 1.1209354400634766, + "learning_rate": 9.646697788326053e-06, + "loss": 0.7804, + "step": 2856 + }, + { + "epoch": 0.14682906773563573, + "grad_norm": 1.1009749174118042, + "learning_rate": 9.64639043395056e-06, + "loss": 0.7645, + "step": 2857 + }, + { + "epoch": 0.14688046047898037, + "grad_norm": 0.6894964575767517, + "learning_rate": 9.64608295084276e-06, + "loss": 0.6925, + "step": 2858 + }, + { + "epoch": 0.14693185322232502, + "grad_norm": 1.1445415019989014, + "learning_rate": 9.645775339011178e-06, + "loss": 0.8574, + "step": 2859 + }, + { + "epoch": 0.14698324596566964, + "grad_norm": 1.1482607126235962, + "learning_rate": 9.645467598464331e-06, + "loss": 0.8881, + "step": 2860 + }, + { + "epoch": 0.1470346387090143, + "grad_norm": 0.7924277782440186, + "learning_rate": 9.645159729210747e-06, + "loss": 0.7235, + "step": 2861 + }, + { + "epoch": 0.14708603145235893, + "grad_norm": 1.0638656616210938, + "learning_rate": 9.644851731258957e-06, + "loss": 0.7991, + "step": 2862 + }, + { + "epoch": 0.14713742419570355, + "grad_norm": 1.1228902339935303, + "learning_rate": 9.644543604617495e-06, + "loss": 0.8015, + "step": 2863 + }, + { + "epoch": 0.1471888169390482, + "grad_norm": 1.2089815139770508, + "learning_rate": 9.644235349294896e-06, + "loss": 0.8241, + "step": 2864 + }, + { + "epoch": 0.14724020968239285, + "grad_norm": 1.0557582378387451, + "learning_rate": 9.6439269652997e-06, + "loss": 0.8076, + "step": 2865 + }, + { + "epoch": 0.1472916024257375, + "grad_norm": 1.1455074548721313, + "learning_rate": 9.643618452640453e-06, + "loss": 0.8003, + "step": 2866 + }, + { + "epoch": 0.1473429951690821, + "grad_norm": 1.1890294551849365, + "learning_rate": 9.6433098113257e-06, + "loss": 0.8004, + "step": 2867 + }, + { + "epoch": 0.14739438791242676, + "grad_norm": 1.0783731937408447, + "learning_rate": 9.643001041363997e-06, + "loss": 0.7807, + "step": 2868 + }, + { + "epoch": 0.1474457806557714, + "grad_norm": 1.0693098306655884, + "learning_rate": 9.642692142763893e-06, + "loss": 0.7815, + "step": 2869 + }, + { + "epoch": 0.14749717339911605, + "grad_norm": 2.2871763706207275, + "learning_rate": 9.64238311553395e-06, + "loss": 0.8495, + "step": 2870 + }, + { + "epoch": 0.14754856614246067, + "grad_norm": 1.0887948274612427, + "learning_rate": 9.64207395968273e-06, + "loss": 0.7897, + "step": 2871 + }, + { + "epoch": 0.14759995888580532, + "grad_norm": 1.0784099102020264, + "learning_rate": 9.641764675218795e-06, + "loss": 0.8009, + "step": 2872 + }, + { + "epoch": 0.14765135162914997, + "grad_norm": 1.1485944986343384, + "learning_rate": 9.641455262150718e-06, + "loss": 0.8332, + "step": 2873 + }, + { + "epoch": 0.14770274437249462, + "grad_norm": 1.1570560932159424, + "learning_rate": 9.64114572048707e-06, + "loss": 0.8024, + "step": 2874 + }, + { + "epoch": 0.14775413711583923, + "grad_norm": 1.1131924390792847, + "learning_rate": 9.640836050236426e-06, + "loss": 0.7483, + "step": 2875 + }, + { + "epoch": 0.14780552985918388, + "grad_norm": 1.2897237539291382, + "learning_rate": 9.640526251407368e-06, + "loss": 0.7805, + "step": 2876 + }, + { + "epoch": 0.14785692260252853, + "grad_norm": 1.109590768814087, + "learning_rate": 9.640216324008476e-06, + "loss": 0.8036, + "step": 2877 + }, + { + "epoch": 0.14790831534587318, + "grad_norm": 1.0632981061935425, + "learning_rate": 9.639906268048339e-06, + "loss": 0.7816, + "step": 2878 + }, + { + "epoch": 0.1479597080892178, + "grad_norm": 0.9479115009307861, + "learning_rate": 9.639596083535547e-06, + "loss": 0.7536, + "step": 2879 + }, + { + "epoch": 0.14801110083256244, + "grad_norm": 1.1946831941604614, + "learning_rate": 9.639285770478696e-06, + "loss": 0.8872, + "step": 2880 + }, + { + "epoch": 0.1480624935759071, + "grad_norm": 1.0937085151672363, + "learning_rate": 9.63897532888638e-06, + "loss": 0.7513, + "step": 2881 + }, + { + "epoch": 0.1481138863192517, + "grad_norm": 1.1250693798065186, + "learning_rate": 9.638664758767203e-06, + "loss": 0.8877, + "step": 2882 + }, + { + "epoch": 0.14816527906259636, + "grad_norm": 1.1539740562438965, + "learning_rate": 9.638354060129766e-06, + "loss": 0.8129, + "step": 2883 + }, + { + "epoch": 0.148216671805941, + "grad_norm": 0.8243728280067444, + "learning_rate": 9.63804323298268e-06, + "loss": 0.7646, + "step": 2884 + }, + { + "epoch": 0.14826806454928565, + "grad_norm": 0.8005802035331726, + "learning_rate": 9.637732277334555e-06, + "loss": 0.6997, + "step": 2885 + }, + { + "epoch": 0.14831945729263027, + "grad_norm": 1.1559743881225586, + "learning_rate": 9.63742119319401e-06, + "loss": 0.8377, + "step": 2886 + }, + { + "epoch": 0.14837085003597492, + "grad_norm": 1.1165852546691895, + "learning_rate": 9.637109980569659e-06, + "loss": 0.8116, + "step": 2887 + }, + { + "epoch": 0.14842224277931956, + "grad_norm": 2.090949296951294, + "learning_rate": 9.63679863947013e-06, + "loss": 0.7226, + "step": 2888 + }, + { + "epoch": 0.1484736355226642, + "grad_norm": 1.0865733623504639, + "learning_rate": 9.636487169904041e-06, + "loss": 0.8042, + "step": 2889 + }, + { + "epoch": 0.14852502826600883, + "grad_norm": 1.357225775718689, + "learning_rate": 9.636175571880028e-06, + "loss": 0.8223, + "step": 2890 + }, + { + "epoch": 0.14857642100935348, + "grad_norm": 1.160138726234436, + "learning_rate": 9.635863845406723e-06, + "loss": 0.8265, + "step": 2891 + }, + { + "epoch": 0.14862781375269812, + "grad_norm": 1.0842944383621216, + "learning_rate": 9.635551990492762e-06, + "loss": 0.8019, + "step": 2892 + }, + { + "epoch": 0.14867920649604277, + "grad_norm": 1.135393738746643, + "learning_rate": 9.635240007146785e-06, + "loss": 0.7571, + "step": 2893 + }, + { + "epoch": 0.1487305992393874, + "grad_norm": 1.138669729232788, + "learning_rate": 9.634927895377434e-06, + "loss": 0.8391, + "step": 2894 + }, + { + "epoch": 0.14878199198273204, + "grad_norm": 1.0314048528671265, + "learning_rate": 9.634615655193363e-06, + "loss": 0.7413, + "step": 2895 + }, + { + "epoch": 0.14883338472607668, + "grad_norm": 1.0841628313064575, + "learning_rate": 9.634303286603213e-06, + "loss": 0.8175, + "step": 2896 + }, + { + "epoch": 0.14888477746942133, + "grad_norm": 1.4417554140090942, + "learning_rate": 9.633990789615646e-06, + "loss": 0.786, + "step": 2897 + }, + { + "epoch": 0.14893617021276595, + "grad_norm": 1.5736850500106812, + "learning_rate": 9.633678164239318e-06, + "loss": 0.8303, + "step": 2898 + }, + { + "epoch": 0.1489875629561106, + "grad_norm": 1.1179144382476807, + "learning_rate": 9.633365410482889e-06, + "loss": 0.8605, + "step": 2899 + }, + { + "epoch": 0.14903895569945524, + "grad_norm": 1.1698509454727173, + "learning_rate": 9.633052528355025e-06, + "loss": 0.7788, + "step": 2900 + }, + { + "epoch": 0.14909034844279986, + "grad_norm": 1.2293646335601807, + "learning_rate": 9.632739517864396e-06, + "loss": 0.7936, + "step": 2901 + }, + { + "epoch": 0.1491417411861445, + "grad_norm": 1.1333012580871582, + "learning_rate": 9.632426379019672e-06, + "loss": 0.824, + "step": 2902 + }, + { + "epoch": 0.14919313392948916, + "grad_norm": 1.195748209953308, + "learning_rate": 9.632113111829531e-06, + "loss": 0.8131, + "step": 2903 + }, + { + "epoch": 0.1492445266728338, + "grad_norm": 1.111189365386963, + "learning_rate": 9.63179971630265e-06, + "loss": 0.7834, + "step": 2904 + }, + { + "epoch": 0.14929591941617842, + "grad_norm": 1.1388803720474243, + "learning_rate": 9.631486192447714e-06, + "loss": 0.8012, + "step": 2905 + }, + { + "epoch": 0.14934731215952307, + "grad_norm": 1.2618441581726074, + "learning_rate": 9.63117254027341e-06, + "loss": 0.7955, + "step": 2906 + }, + { + "epoch": 0.14939870490286772, + "grad_norm": 1.4290024042129517, + "learning_rate": 9.630858759788425e-06, + "loss": 0.8325, + "step": 2907 + }, + { + "epoch": 0.14945009764621237, + "grad_norm": 1.197076678276062, + "learning_rate": 9.630544851001453e-06, + "loss": 0.8305, + "step": 2908 + }, + { + "epoch": 0.14950149038955698, + "grad_norm": 1.087079644203186, + "learning_rate": 9.630230813921194e-06, + "loss": 0.7274, + "step": 2909 + }, + { + "epoch": 0.14955288313290163, + "grad_norm": 1.129672646522522, + "learning_rate": 9.629916648556347e-06, + "loss": 0.7828, + "step": 2910 + }, + { + "epoch": 0.14960427587624628, + "grad_norm": 1.167987585067749, + "learning_rate": 9.629602354915616e-06, + "loss": 0.8351, + "step": 2911 + }, + { + "epoch": 0.14965566861959093, + "grad_norm": 0.9685371518135071, + "learning_rate": 9.62928793300771e-06, + "loss": 0.7573, + "step": 2912 + }, + { + "epoch": 0.14970706136293555, + "grad_norm": 0.8648548126220703, + "learning_rate": 9.628973382841338e-06, + "loss": 0.72, + "step": 2913 + }, + { + "epoch": 0.1497584541062802, + "grad_norm": 1.0924067497253418, + "learning_rate": 9.628658704425215e-06, + "loss": 0.8289, + "step": 2914 + }, + { + "epoch": 0.14980984684962484, + "grad_norm": 1.190320372581482, + "learning_rate": 9.628343897768062e-06, + "loss": 0.8264, + "step": 2915 + }, + { + "epoch": 0.1498612395929695, + "grad_norm": 1.1206905841827393, + "learning_rate": 9.6280289628786e-06, + "loss": 0.8305, + "step": 2916 + }, + { + "epoch": 0.1499126323363141, + "grad_norm": 1.1106091737747192, + "learning_rate": 9.627713899765554e-06, + "loss": 0.8469, + "step": 2917 + }, + { + "epoch": 0.14996402507965875, + "grad_norm": 1.0479516983032227, + "learning_rate": 9.627398708437654e-06, + "loss": 0.7908, + "step": 2918 + }, + { + "epoch": 0.1500154178230034, + "grad_norm": 1.098765254020691, + "learning_rate": 9.62708338890363e-06, + "loss": 0.7931, + "step": 2919 + }, + { + "epoch": 0.15006681056634802, + "grad_norm": 1.0796483755111694, + "learning_rate": 9.626767941172222e-06, + "loss": 0.8246, + "step": 2920 + }, + { + "epoch": 0.15011820330969267, + "grad_norm": 1.1554334163665771, + "learning_rate": 9.626452365252168e-06, + "loss": 0.8163, + "step": 2921 + }, + { + "epoch": 0.1501695960530373, + "grad_norm": 1.1507536172866821, + "learning_rate": 9.62613666115221e-06, + "loss": 0.8234, + "step": 2922 + }, + { + "epoch": 0.15022098879638196, + "grad_norm": 1.1683820486068726, + "learning_rate": 9.625820828881096e-06, + "loss": 0.7735, + "step": 2923 + }, + { + "epoch": 0.15027238153972658, + "grad_norm": 1.120169997215271, + "learning_rate": 9.625504868447577e-06, + "loss": 0.834, + "step": 2924 + }, + { + "epoch": 0.15032377428307123, + "grad_norm": 1.137853980064392, + "learning_rate": 9.625188779860407e-06, + "loss": 0.7956, + "step": 2925 + }, + { + "epoch": 0.15037516702641587, + "grad_norm": 1.113329291343689, + "learning_rate": 9.624872563128342e-06, + "loss": 0.7993, + "step": 2926 + }, + { + "epoch": 0.15042655976976052, + "grad_norm": 0.8806728720664978, + "learning_rate": 9.624556218260144e-06, + "loss": 0.6828, + "step": 2927 + }, + { + "epoch": 0.15047795251310514, + "grad_norm": 1.130362868309021, + "learning_rate": 9.624239745264578e-06, + "loss": 0.7646, + "step": 2928 + }, + { + "epoch": 0.1505293452564498, + "grad_norm": 0.9131379723548889, + "learning_rate": 9.62392314415041e-06, + "loss": 0.7036, + "step": 2929 + }, + { + "epoch": 0.15058073799979443, + "grad_norm": 1.1984407901763916, + "learning_rate": 9.623606414926416e-06, + "loss": 0.8113, + "step": 2930 + }, + { + "epoch": 0.15063213074313908, + "grad_norm": 1.1617956161499023, + "learning_rate": 9.623289557601369e-06, + "loss": 0.7972, + "step": 2931 + }, + { + "epoch": 0.1506835234864837, + "grad_norm": 1.1492894887924194, + "learning_rate": 9.622972572184047e-06, + "loss": 0.8038, + "step": 2932 + }, + { + "epoch": 0.15073491622982835, + "grad_norm": 1.0828661918640137, + "learning_rate": 9.62265545868323e-06, + "loss": 0.8241, + "step": 2933 + }, + { + "epoch": 0.150786308973173, + "grad_norm": 0.8112282752990723, + "learning_rate": 9.62233821710771e-06, + "loss": 0.6988, + "step": 2934 + }, + { + "epoch": 0.15083770171651764, + "grad_norm": 1.1404075622558594, + "learning_rate": 9.622020847466274e-06, + "loss": 0.8171, + "step": 2935 + }, + { + "epoch": 0.15088909445986226, + "grad_norm": 0.8881917595863342, + "learning_rate": 9.621703349767712e-06, + "loss": 0.7569, + "step": 2936 + }, + { + "epoch": 0.1509404872032069, + "grad_norm": 1.2118170261383057, + "learning_rate": 9.621385724020824e-06, + "loss": 0.9396, + "step": 2937 + }, + { + "epoch": 0.15099187994655155, + "grad_norm": 1.148276448249817, + "learning_rate": 9.621067970234408e-06, + "loss": 0.835, + "step": 2938 + }, + { + "epoch": 0.15104327268989617, + "grad_norm": 1.1282833814620972, + "learning_rate": 9.620750088417268e-06, + "loss": 0.8175, + "step": 2939 + }, + { + "epoch": 0.15109466543324082, + "grad_norm": 1.109713077545166, + "learning_rate": 9.620432078578213e-06, + "loss": 0.7927, + "step": 2940 + }, + { + "epoch": 0.15114605817658547, + "grad_norm": 0.7900930643081665, + "learning_rate": 9.620113940726052e-06, + "loss": 0.7464, + "step": 2941 + }, + { + "epoch": 0.15119745091993012, + "grad_norm": 1.152547001838684, + "learning_rate": 9.619795674869601e-06, + "loss": 0.7936, + "step": 2942 + }, + { + "epoch": 0.15124884366327473, + "grad_norm": 1.09788978099823, + "learning_rate": 9.619477281017675e-06, + "loss": 0.8573, + "step": 2943 + }, + { + "epoch": 0.15130023640661938, + "grad_norm": 1.0922348499298096, + "learning_rate": 9.619158759179098e-06, + "loss": 0.8245, + "step": 2944 + }, + { + "epoch": 0.15135162914996403, + "grad_norm": 1.1092678308486938, + "learning_rate": 9.618840109362693e-06, + "loss": 0.7579, + "step": 2945 + }, + { + "epoch": 0.15140302189330868, + "grad_norm": 1.141142725944519, + "learning_rate": 9.61852133157729e-06, + "loss": 0.8134, + "step": 2946 + }, + { + "epoch": 0.1514544146366533, + "grad_norm": 0.8310251235961914, + "learning_rate": 9.61820242583172e-06, + "loss": 0.7005, + "step": 2947 + }, + { + "epoch": 0.15150580737999794, + "grad_norm": 0.7588998675346375, + "learning_rate": 9.61788339213482e-06, + "loss": 0.7357, + "step": 2948 + }, + { + "epoch": 0.1515572001233426, + "grad_norm": 1.4933528900146484, + "learning_rate": 9.617564230495428e-06, + "loss": 0.8218, + "step": 2949 + }, + { + "epoch": 0.15160859286668724, + "grad_norm": 0.803726315498352, + "learning_rate": 9.617244940922386e-06, + "loss": 0.6845, + "step": 2950 + }, + { + "epoch": 0.15165998561003186, + "grad_norm": 1.274340271949768, + "learning_rate": 9.616925523424541e-06, + "loss": 0.9059, + "step": 2951 + }, + { + "epoch": 0.1517113783533765, + "grad_norm": 1.2259368896484375, + "learning_rate": 9.616605978010742e-06, + "loss": 0.7815, + "step": 2952 + }, + { + "epoch": 0.15176277109672115, + "grad_norm": 0.7884451746940613, + "learning_rate": 9.616286304689843e-06, + "loss": 0.7158, + "step": 2953 + }, + { + "epoch": 0.15181416384006577, + "grad_norm": 1.1206575632095337, + "learning_rate": 9.615966503470703e-06, + "loss": 0.8044, + "step": 2954 + }, + { + "epoch": 0.15186555658341042, + "grad_norm": 1.2555440664291382, + "learning_rate": 9.615646574362178e-06, + "loss": 0.8495, + "step": 2955 + }, + { + "epoch": 0.15191694932675506, + "grad_norm": 1.056719422340393, + "learning_rate": 9.615326517373135e-06, + "loss": 0.8072, + "step": 2956 + }, + { + "epoch": 0.1519683420700997, + "grad_norm": 0.8035044074058533, + "learning_rate": 9.61500633251244e-06, + "loss": 0.7283, + "step": 2957 + }, + { + "epoch": 0.15201973481344433, + "grad_norm": 1.2515276670455933, + "learning_rate": 9.614686019788966e-06, + "loss": 0.8739, + "step": 2958 + }, + { + "epoch": 0.15207112755678898, + "grad_norm": 1.1463450193405151, + "learning_rate": 9.614365579211585e-06, + "loss": 0.8513, + "step": 2959 + }, + { + "epoch": 0.15212252030013362, + "grad_norm": 1.116761565208435, + "learning_rate": 9.614045010789175e-06, + "loss": 0.8273, + "step": 2960 + }, + { + "epoch": 0.15217391304347827, + "grad_norm": 1.1044739484786987, + "learning_rate": 9.613724314530621e-06, + "loss": 0.7587, + "step": 2961 + }, + { + "epoch": 0.1522253057868229, + "grad_norm": 1.0965312719345093, + "learning_rate": 9.613403490444804e-06, + "loss": 0.7869, + "step": 2962 + }, + { + "epoch": 0.15227669853016754, + "grad_norm": 1.139046549797058, + "learning_rate": 9.613082538540614e-06, + "loss": 0.7942, + "step": 2963 + }, + { + "epoch": 0.15232809127351218, + "grad_norm": 0.8312227129936218, + "learning_rate": 9.612761458826947e-06, + "loss": 0.7246, + "step": 2964 + }, + { + "epoch": 0.15237948401685683, + "grad_norm": 1.1537731885910034, + "learning_rate": 9.612440251312694e-06, + "loss": 0.807, + "step": 2965 + }, + { + "epoch": 0.15243087676020145, + "grad_norm": 1.0862990617752075, + "learning_rate": 9.612118916006757e-06, + "loss": 0.7842, + "step": 2966 + }, + { + "epoch": 0.1524822695035461, + "grad_norm": 1.2529340982437134, + "learning_rate": 9.611797452918038e-06, + "loss": 0.7649, + "step": 2967 + }, + { + "epoch": 0.15253366224689074, + "grad_norm": 0.8279370069503784, + "learning_rate": 9.61147586205544e-06, + "loss": 0.7283, + "step": 2968 + }, + { + "epoch": 0.1525850549902354, + "grad_norm": 0.8124724626541138, + "learning_rate": 9.611154143427878e-06, + "loss": 0.7181, + "step": 2969 + }, + { + "epoch": 0.15263644773358, + "grad_norm": 1.1039282083511353, + "learning_rate": 9.610832297044265e-06, + "loss": 0.8086, + "step": 2970 + }, + { + "epoch": 0.15268784047692466, + "grad_norm": 1.0751821994781494, + "learning_rate": 9.610510322913518e-06, + "loss": 0.7618, + "step": 2971 + }, + { + "epoch": 0.1527392332202693, + "grad_norm": 1.1291072368621826, + "learning_rate": 9.610188221044554e-06, + "loss": 0.8286, + "step": 2972 + }, + { + "epoch": 0.15279062596361392, + "grad_norm": 1.1156890392303467, + "learning_rate": 9.609865991446302e-06, + "loss": 0.7895, + "step": 2973 + }, + { + "epoch": 0.15284201870695857, + "grad_norm": 1.1015522480010986, + "learning_rate": 9.609543634127683e-06, + "loss": 0.8147, + "step": 2974 + }, + { + "epoch": 0.15289341145030322, + "grad_norm": 1.1526223421096802, + "learning_rate": 9.609221149097636e-06, + "loss": 0.8246, + "step": 2975 + }, + { + "epoch": 0.15294480419364787, + "grad_norm": 1.1723006963729858, + "learning_rate": 9.60889853636509e-06, + "loss": 0.7787, + "step": 2976 + }, + { + "epoch": 0.15299619693699248, + "grad_norm": 1.1209646463394165, + "learning_rate": 9.608575795938986e-06, + "loss": 0.7952, + "step": 2977 + }, + { + "epoch": 0.15304758968033713, + "grad_norm": 1.1622693538665771, + "learning_rate": 9.608252927828267e-06, + "loss": 0.8188, + "step": 2978 + }, + { + "epoch": 0.15309898242368178, + "grad_norm": 1.3454382419586182, + "learning_rate": 9.607929932041874e-06, + "loss": 0.8023, + "step": 2979 + }, + { + "epoch": 0.15315037516702643, + "grad_norm": 1.1904152631759644, + "learning_rate": 9.60760680858876e-06, + "loss": 0.7886, + "step": 2980 + }, + { + "epoch": 0.15320176791037105, + "grad_norm": 1.2229959964752197, + "learning_rate": 9.607283557477876e-06, + "loss": 0.8834, + "step": 2981 + }, + { + "epoch": 0.1532531606537157, + "grad_norm": 0.9498704671859741, + "learning_rate": 9.606960178718175e-06, + "loss": 0.7274, + "step": 2982 + }, + { + "epoch": 0.15330455339706034, + "grad_norm": 0.8235172629356384, + "learning_rate": 9.606636672318623e-06, + "loss": 0.756, + "step": 2983 + }, + { + "epoch": 0.153355946140405, + "grad_norm": 1.0444530248641968, + "learning_rate": 9.606313038288177e-06, + "loss": 0.7972, + "step": 2984 + }, + { + "epoch": 0.1534073388837496, + "grad_norm": 1.1400405168533325, + "learning_rate": 9.605989276635805e-06, + "loss": 0.8118, + "step": 2985 + }, + { + "epoch": 0.15345873162709425, + "grad_norm": 1.0790746212005615, + "learning_rate": 9.60566538737048e-06, + "loss": 0.7795, + "step": 2986 + }, + { + "epoch": 0.1535101243704389, + "grad_norm": 0.9387630224227905, + "learning_rate": 9.605341370501173e-06, + "loss": 0.7594, + "step": 2987 + }, + { + "epoch": 0.15356151711378355, + "grad_norm": 1.158873438835144, + "learning_rate": 9.605017226036861e-06, + "loss": 0.81, + "step": 2988 + }, + { + "epoch": 0.15361290985712817, + "grad_norm": 1.1313420534133911, + "learning_rate": 9.604692953986526e-06, + "loss": 0.8594, + "step": 2989 + }, + { + "epoch": 0.1536643026004728, + "grad_norm": 1.0720127820968628, + "learning_rate": 9.604368554359152e-06, + "loss": 0.791, + "step": 2990 + }, + { + "epoch": 0.15371569534381746, + "grad_norm": 1.1312565803527832, + "learning_rate": 9.604044027163725e-06, + "loss": 0.795, + "step": 2991 + }, + { + "epoch": 0.15376708808716208, + "grad_norm": 1.133973479270935, + "learning_rate": 9.60371937240924e-06, + "loss": 0.8124, + "step": 2992 + }, + { + "epoch": 0.15381848083050673, + "grad_norm": 0.7848932147026062, + "learning_rate": 9.603394590104689e-06, + "loss": 0.7201, + "step": 2993 + }, + { + "epoch": 0.15386987357385137, + "grad_norm": 0.9597364068031311, + "learning_rate": 9.60306968025907e-06, + "loss": 0.7226, + "step": 2994 + }, + { + "epoch": 0.15392126631719602, + "grad_norm": 1.1648770570755005, + "learning_rate": 9.602744642881387e-06, + "loss": 0.8061, + "step": 2995 + }, + { + "epoch": 0.15397265906054064, + "grad_norm": 0.7717452049255371, + "learning_rate": 9.602419477980643e-06, + "loss": 0.7146, + "step": 2996 + }, + { + "epoch": 0.1540240518038853, + "grad_norm": 1.1483856439590454, + "learning_rate": 9.60209418556585e-06, + "loss": 0.8003, + "step": 2997 + }, + { + "epoch": 0.15407544454722993, + "grad_norm": 0.9341374635696411, + "learning_rate": 9.601768765646018e-06, + "loss": 0.6776, + "step": 2998 + }, + { + "epoch": 0.15412683729057458, + "grad_norm": 0.8843017816543579, + "learning_rate": 9.601443218230163e-06, + "loss": 0.746, + "step": 2999 + }, + { + "epoch": 0.1541782300339192, + "grad_norm": 1.2591841220855713, + "learning_rate": 9.601117543327305e-06, + "loss": 0.7891, + "step": 3000 + }, + { + "epoch": 0.15422962277726385, + "grad_norm": 1.1563690900802612, + "learning_rate": 9.60079174094647e-06, + "loss": 0.814, + "step": 3001 + }, + { + "epoch": 0.1542810155206085, + "grad_norm": 1.1140031814575195, + "learning_rate": 9.600465811096682e-06, + "loss": 0.8519, + "step": 3002 + }, + { + "epoch": 0.15433240826395314, + "grad_norm": 1.2970277070999146, + "learning_rate": 9.60013975378697e-06, + "loss": 0.7649, + "step": 3003 + }, + { + "epoch": 0.15438380100729776, + "grad_norm": 1.7794508934020996, + "learning_rate": 9.59981356902637e-06, + "loss": 0.8601, + "step": 3004 + }, + { + "epoch": 0.1544351937506424, + "grad_norm": 0.9201565980911255, + "learning_rate": 9.599487256823918e-06, + "loss": 0.726, + "step": 3005 + }, + { + "epoch": 0.15448658649398705, + "grad_norm": 1.1056443452835083, + "learning_rate": 9.599160817188655e-06, + "loss": 0.7554, + "step": 3006 + }, + { + "epoch": 0.1545379792373317, + "grad_norm": 1.1170471906661987, + "learning_rate": 9.598834250129626e-06, + "loss": 0.8006, + "step": 3007 + }, + { + "epoch": 0.15458937198067632, + "grad_norm": 1.1487152576446533, + "learning_rate": 9.598507555655875e-06, + "loss": 0.8502, + "step": 3008 + }, + { + "epoch": 0.15464076472402097, + "grad_norm": 1.1027685403823853, + "learning_rate": 9.59818073377646e-06, + "loss": 0.861, + "step": 3009 + }, + { + "epoch": 0.15469215746736562, + "grad_norm": 1.0589098930358887, + "learning_rate": 9.59785378450043e-06, + "loss": 0.791, + "step": 3010 + }, + { + "epoch": 0.15474355021071023, + "grad_norm": 1.0861899852752686, + "learning_rate": 9.597526707836849e-06, + "loss": 0.822, + "step": 3011 + }, + { + "epoch": 0.15479494295405488, + "grad_norm": 1.5645182132720947, + "learning_rate": 9.597199503794774e-06, + "loss": 0.7781, + "step": 3012 + }, + { + "epoch": 0.15484633569739953, + "grad_norm": 1.1648106575012207, + "learning_rate": 9.596872172383273e-06, + "loss": 0.817, + "step": 3013 + }, + { + "epoch": 0.15489772844074418, + "grad_norm": 1.131691575050354, + "learning_rate": 9.596544713611413e-06, + "loss": 0.8319, + "step": 3014 + }, + { + "epoch": 0.1549491211840888, + "grad_norm": 1.2071539163589478, + "learning_rate": 9.596217127488268e-06, + "loss": 0.8823, + "step": 3015 + }, + { + "epoch": 0.15500051392743344, + "grad_norm": 0.9304954409599304, + "learning_rate": 9.595889414022914e-06, + "loss": 0.7584, + "step": 3016 + }, + { + "epoch": 0.1550519066707781, + "grad_norm": 1.0601961612701416, + "learning_rate": 9.595561573224433e-06, + "loss": 0.7422, + "step": 3017 + }, + { + "epoch": 0.15510329941412274, + "grad_norm": 1.0696712732315063, + "learning_rate": 9.5952336051019e-06, + "loss": 0.8429, + "step": 3018 + }, + { + "epoch": 0.15515469215746736, + "grad_norm": 0.7522971630096436, + "learning_rate": 9.594905509664412e-06, + "loss": 0.7537, + "step": 3019 + }, + { + "epoch": 0.155206084900812, + "grad_norm": 0.8931046724319458, + "learning_rate": 9.594577286921054e-06, + "loss": 0.7511, + "step": 3020 + }, + { + "epoch": 0.15525747764415665, + "grad_norm": 1.1393920183181763, + "learning_rate": 9.594248936880919e-06, + "loss": 0.7462, + "step": 3021 + }, + { + "epoch": 0.1553088703875013, + "grad_norm": 1.0803031921386719, + "learning_rate": 9.593920459553105e-06, + "loss": 0.8448, + "step": 3022 + }, + { + "epoch": 0.15536026313084592, + "grad_norm": 1.1737204790115356, + "learning_rate": 9.593591854946713e-06, + "loss": 0.7248, + "step": 3023 + }, + { + "epoch": 0.15541165587419056, + "grad_norm": 1.0927925109863281, + "learning_rate": 9.593263123070849e-06, + "loss": 0.7274, + "step": 3024 + }, + { + "epoch": 0.1554630486175352, + "grad_norm": 1.1205379962921143, + "learning_rate": 9.592934263934617e-06, + "loss": 0.7834, + "step": 3025 + }, + { + "epoch": 0.15551444136087986, + "grad_norm": 1.0936018228530884, + "learning_rate": 9.59260527754713e-06, + "loss": 0.8014, + "step": 3026 + }, + { + "epoch": 0.15556583410422448, + "grad_norm": 1.1625615358352661, + "learning_rate": 9.592276163917507e-06, + "loss": 0.7853, + "step": 3027 + }, + { + "epoch": 0.15561722684756912, + "grad_norm": 1.1285988092422485, + "learning_rate": 9.59194692305486e-06, + "loss": 0.8173, + "step": 3028 + }, + { + "epoch": 0.15566861959091377, + "grad_norm": 1.2074480056762695, + "learning_rate": 9.591617554968313e-06, + "loss": 0.8081, + "step": 3029 + }, + { + "epoch": 0.1557200123342584, + "grad_norm": 1.0644116401672363, + "learning_rate": 9.591288059666993e-06, + "loss": 0.7958, + "step": 3030 + }, + { + "epoch": 0.15577140507760304, + "grad_norm": 1.1561336517333984, + "learning_rate": 9.590958437160028e-06, + "loss": 0.8117, + "step": 3031 + }, + { + "epoch": 0.15582279782094768, + "grad_norm": 1.2603542804718018, + "learning_rate": 9.59062868745655e-06, + "loss": 0.7798, + "step": 3032 + }, + { + "epoch": 0.15587419056429233, + "grad_norm": 1.1251877546310425, + "learning_rate": 9.590298810565697e-06, + "loss": 0.8199, + "step": 3033 + }, + { + "epoch": 0.15592558330763695, + "grad_norm": 1.1156567335128784, + "learning_rate": 9.589968806496605e-06, + "loss": 0.7859, + "step": 3034 + }, + { + "epoch": 0.1559769760509816, + "grad_norm": 1.1767079830169678, + "learning_rate": 9.58963867525842e-06, + "loss": 0.8298, + "step": 3035 + }, + { + "epoch": 0.15602836879432624, + "grad_norm": 1.0802053213119507, + "learning_rate": 9.589308416860287e-06, + "loss": 0.7945, + "step": 3036 + }, + { + "epoch": 0.1560797615376709, + "grad_norm": 1.1257559061050415, + "learning_rate": 9.58897803131136e-06, + "loss": 0.8153, + "step": 3037 + }, + { + "epoch": 0.1561311542810155, + "grad_norm": 0.9175286889076233, + "learning_rate": 9.588647518620786e-06, + "loss": 0.771, + "step": 3038 + }, + { + "epoch": 0.15618254702436016, + "grad_norm": 1.1217080354690552, + "learning_rate": 9.588316878797726e-06, + "loss": 0.8253, + "step": 3039 + }, + { + "epoch": 0.1562339397677048, + "grad_norm": 0.7711915969848633, + "learning_rate": 9.58798611185134e-06, + "loss": 0.7206, + "step": 3040 + }, + { + "epoch": 0.15628533251104945, + "grad_norm": 1.1445866823196411, + "learning_rate": 9.587655217790792e-06, + "loss": 0.8226, + "step": 3041 + }, + { + "epoch": 0.15633672525439407, + "grad_norm": 1.1377453804016113, + "learning_rate": 9.587324196625252e-06, + "loss": 0.8379, + "step": 3042 + }, + { + "epoch": 0.15638811799773872, + "grad_norm": 0.8878036141395569, + "learning_rate": 9.586993048363888e-06, + "loss": 0.6898, + "step": 3043 + }, + { + "epoch": 0.15643951074108337, + "grad_norm": 1.3285459280014038, + "learning_rate": 9.586661773015875e-06, + "loss": 0.8733, + "step": 3044 + }, + { + "epoch": 0.156490903484428, + "grad_norm": 0.8551929593086243, + "learning_rate": 9.586330370590393e-06, + "loss": 0.7343, + "step": 3045 + }, + { + "epoch": 0.15654229622777263, + "grad_norm": 1.0831973552703857, + "learning_rate": 9.585998841096624e-06, + "loss": 0.7861, + "step": 3046 + }, + { + "epoch": 0.15659368897111728, + "grad_norm": 1.199660301208496, + "learning_rate": 9.585667184543753e-06, + "loss": 0.8218, + "step": 3047 + }, + { + "epoch": 0.15664508171446193, + "grad_norm": 1.1207102537155151, + "learning_rate": 9.585335400940967e-06, + "loss": 0.7858, + "step": 3048 + }, + { + "epoch": 0.15669647445780654, + "grad_norm": 0.8962370753288269, + "learning_rate": 9.58500349029746e-06, + "loss": 0.7223, + "step": 3049 + }, + { + "epoch": 0.1567478672011512, + "grad_norm": 1.1775535345077515, + "learning_rate": 9.584671452622427e-06, + "loss": 0.8017, + "step": 3050 + }, + { + "epoch": 0.15679925994449584, + "grad_norm": 1.1942967176437378, + "learning_rate": 9.584339287925068e-06, + "loss": 0.8163, + "step": 3051 + }, + { + "epoch": 0.15685065268784049, + "grad_norm": 1.151305913925171, + "learning_rate": 9.584006996214585e-06, + "loss": 0.7986, + "step": 3052 + }, + { + "epoch": 0.1569020454311851, + "grad_norm": 1.054308295249939, + "learning_rate": 9.583674577500185e-06, + "loss": 0.824, + "step": 3053 + }, + { + "epoch": 0.15695343817452975, + "grad_norm": 1.142619013786316, + "learning_rate": 9.583342031791081e-06, + "loss": 0.8351, + "step": 3054 + }, + { + "epoch": 0.1570048309178744, + "grad_norm": 1.0959057807922363, + "learning_rate": 9.583009359096483e-06, + "loss": 0.7646, + "step": 3055 + }, + { + "epoch": 0.15705622366121905, + "grad_norm": 1.116235375404358, + "learning_rate": 9.582676559425605e-06, + "loss": 0.8211, + "step": 3056 + }, + { + "epoch": 0.15710761640456367, + "grad_norm": 0.9597142338752747, + "learning_rate": 9.582343632787675e-06, + "loss": 0.7209, + "step": 3057 + }, + { + "epoch": 0.1571590091479083, + "grad_norm": 1.1602106094360352, + "learning_rate": 9.582010579191912e-06, + "loss": 0.8306, + "step": 3058 + }, + { + "epoch": 0.15721040189125296, + "grad_norm": 1.341246247291565, + "learning_rate": 9.581677398647544e-06, + "loss": 0.819, + "step": 3059 + }, + { + "epoch": 0.1572617946345976, + "grad_norm": 1.0703506469726562, + "learning_rate": 9.581344091163803e-06, + "loss": 0.8175, + "step": 3060 + }, + { + "epoch": 0.15731318737794223, + "grad_norm": 1.060909390449524, + "learning_rate": 9.581010656749924e-06, + "loss": 0.8182, + "step": 3061 + }, + { + "epoch": 0.15736458012128687, + "grad_norm": 1.1364223957061768, + "learning_rate": 9.580677095415144e-06, + "loss": 0.8128, + "step": 3062 + }, + { + "epoch": 0.15741597286463152, + "grad_norm": 0.7858501672744751, + "learning_rate": 9.580343407168704e-06, + "loss": 0.7177, + "step": 3063 + }, + { + "epoch": 0.15746736560797617, + "grad_norm": 0.7859862446784973, + "learning_rate": 9.580009592019853e-06, + "loss": 0.7055, + "step": 3064 + }, + { + "epoch": 0.1575187583513208, + "grad_norm": 1.477375864982605, + "learning_rate": 9.579675649977833e-06, + "loss": 0.8005, + "step": 3065 + }, + { + "epoch": 0.15757015109466543, + "grad_norm": 1.226243495941162, + "learning_rate": 9.579341581051902e-06, + "loss": 0.7751, + "step": 3066 + }, + { + "epoch": 0.15762154383801008, + "grad_norm": 7.041565418243408, + "learning_rate": 9.579007385251315e-06, + "loss": 0.8248, + "step": 3067 + }, + { + "epoch": 0.1576729365813547, + "grad_norm": 1.043668508529663, + "learning_rate": 9.578673062585328e-06, + "loss": 0.7887, + "step": 3068 + }, + { + "epoch": 0.15772432932469935, + "grad_norm": 1.154679298400879, + "learning_rate": 9.578338613063204e-06, + "loss": 0.8372, + "step": 3069 + }, + { + "epoch": 0.157775722068044, + "grad_norm": 2.3733458518981934, + "learning_rate": 9.578004036694214e-06, + "loss": 0.8061, + "step": 3070 + }, + { + "epoch": 0.15782711481138864, + "grad_norm": 1.2154031991958618, + "learning_rate": 9.577669333487622e-06, + "loss": 0.8013, + "step": 3071 + }, + { + "epoch": 0.15787850755473326, + "grad_norm": 1.1823316812515259, + "learning_rate": 9.577334503452706e-06, + "loss": 0.8473, + "step": 3072 + }, + { + "epoch": 0.1579299002980779, + "grad_norm": 1.139605164527893, + "learning_rate": 9.576999546598739e-06, + "loss": 0.8185, + "step": 3073 + }, + { + "epoch": 0.15798129304142255, + "grad_norm": 1.136753797531128, + "learning_rate": 9.576664462935003e-06, + "loss": 0.8018, + "step": 3074 + }, + { + "epoch": 0.1580326857847672, + "grad_norm": 1.198256015777588, + "learning_rate": 9.57632925247078e-06, + "loss": 0.7323, + "step": 3075 + }, + { + "epoch": 0.15808407852811182, + "grad_norm": 1.1671271324157715, + "learning_rate": 9.575993915215361e-06, + "loss": 0.8329, + "step": 3076 + }, + { + "epoch": 0.15813547127145647, + "grad_norm": 1.124439001083374, + "learning_rate": 9.575658451178034e-06, + "loss": 0.8341, + "step": 3077 + }, + { + "epoch": 0.15818686401480112, + "grad_norm": 1.091123104095459, + "learning_rate": 9.575322860368093e-06, + "loss": 0.815, + "step": 3078 + }, + { + "epoch": 0.15823825675814576, + "grad_norm": 1.131137728691101, + "learning_rate": 9.574987142794839e-06, + "loss": 0.8112, + "step": 3079 + }, + { + "epoch": 0.15828964950149038, + "grad_norm": 0.956962525844574, + "learning_rate": 9.574651298467569e-06, + "loss": 0.7368, + "step": 3080 + }, + { + "epoch": 0.15834104224483503, + "grad_norm": 1.2453482151031494, + "learning_rate": 9.57431532739559e-06, + "loss": 0.7677, + "step": 3081 + }, + { + "epoch": 0.15839243498817968, + "grad_norm": 1.1722524166107178, + "learning_rate": 9.57397922958821e-06, + "loss": 0.804, + "step": 3082 + }, + { + "epoch": 0.15844382773152432, + "grad_norm": 1.2492121458053589, + "learning_rate": 9.573643005054744e-06, + "loss": 0.8303, + "step": 3083 + }, + { + "epoch": 0.15849522047486894, + "grad_norm": 1.1757866144180298, + "learning_rate": 9.573306653804503e-06, + "loss": 0.8092, + "step": 3084 + }, + { + "epoch": 0.1585466132182136, + "grad_norm": 1.0953439474105835, + "learning_rate": 9.572970175846807e-06, + "loss": 0.8396, + "step": 3085 + }, + { + "epoch": 0.15859800596155824, + "grad_norm": 1.181564450263977, + "learning_rate": 9.572633571190978e-06, + "loss": 0.8354, + "step": 3086 + }, + { + "epoch": 0.15864939870490286, + "grad_norm": 1.1329485177993774, + "learning_rate": 9.572296839846346e-06, + "loss": 0.7932, + "step": 3087 + }, + { + "epoch": 0.1587007914482475, + "grad_norm": 1.109900712966919, + "learning_rate": 9.571959981822236e-06, + "loss": 0.8035, + "step": 3088 + }, + { + "epoch": 0.15875218419159215, + "grad_norm": 1.0511541366577148, + "learning_rate": 9.57162299712798e-06, + "loss": 0.7743, + "step": 3089 + }, + { + "epoch": 0.1588035769349368, + "grad_norm": 1.1241798400878906, + "learning_rate": 9.571285885772919e-06, + "loss": 0.7923, + "step": 3090 + }, + { + "epoch": 0.15885496967828142, + "grad_norm": 0.9507374167442322, + "learning_rate": 9.570948647766391e-06, + "loss": 0.7045, + "step": 3091 + }, + { + "epoch": 0.15890636242162606, + "grad_norm": 1.132149577140808, + "learning_rate": 9.570611283117738e-06, + "loss": 0.8289, + "step": 3092 + }, + { + "epoch": 0.1589577551649707, + "grad_norm": 1.1935265064239502, + "learning_rate": 9.57027379183631e-06, + "loss": 0.8208, + "step": 3093 + }, + { + "epoch": 0.15900914790831536, + "grad_norm": 1.1184380054473877, + "learning_rate": 9.569936173931453e-06, + "loss": 0.8161, + "step": 3094 + }, + { + "epoch": 0.15906054065165998, + "grad_norm": 0.7748022079467773, + "learning_rate": 9.569598429412526e-06, + "loss": 0.7227, + "step": 3095 + }, + { + "epoch": 0.15911193339500462, + "grad_norm": 0.9522273540496826, + "learning_rate": 9.569260558288883e-06, + "loss": 0.7548, + "step": 3096 + }, + { + "epoch": 0.15916332613834927, + "grad_norm": 1.267147421836853, + "learning_rate": 9.568922560569885e-06, + "loss": 0.8296, + "step": 3097 + }, + { + "epoch": 0.15921471888169392, + "grad_norm": 1.300022006034851, + "learning_rate": 9.568584436264899e-06, + "loss": 0.8262, + "step": 3098 + }, + { + "epoch": 0.15926611162503854, + "grad_norm": 1.0244982242584229, + "learning_rate": 9.568246185383291e-06, + "loss": 0.749, + "step": 3099 + }, + { + "epoch": 0.15931750436838318, + "grad_norm": 1.1157578229904175, + "learning_rate": 9.567907807934434e-06, + "loss": 0.7567, + "step": 3100 + }, + { + "epoch": 0.15936889711172783, + "grad_norm": 1.0519708395004272, + "learning_rate": 9.567569303927702e-06, + "loss": 0.7892, + "step": 3101 + }, + { + "epoch": 0.15942028985507245, + "grad_norm": 1.1085740327835083, + "learning_rate": 9.567230673372475e-06, + "loss": 0.832, + "step": 3102 + }, + { + "epoch": 0.1594716825984171, + "grad_norm": 1.0715935230255127, + "learning_rate": 9.566891916278131e-06, + "loss": 0.8272, + "step": 3103 + }, + { + "epoch": 0.15952307534176174, + "grad_norm": 0.8947829008102417, + "learning_rate": 9.566553032654061e-06, + "loss": 0.6814, + "step": 3104 + }, + { + "epoch": 0.1595744680851064, + "grad_norm": 0.8069021701812744, + "learning_rate": 9.566214022509651e-06, + "loss": 0.7176, + "step": 3105 + }, + { + "epoch": 0.159625860828451, + "grad_norm": 1.1611638069152832, + "learning_rate": 9.565874885854293e-06, + "loss": 0.8048, + "step": 3106 + }, + { + "epoch": 0.15967725357179566, + "grad_norm": 1.128555417060852, + "learning_rate": 9.565535622697387e-06, + "loss": 0.8009, + "step": 3107 + }, + { + "epoch": 0.1597286463151403, + "grad_norm": 1.1363729238510132, + "learning_rate": 9.565196233048325e-06, + "loss": 0.8326, + "step": 3108 + }, + { + "epoch": 0.15978003905848495, + "grad_norm": 0.7629484534263611, + "learning_rate": 9.56485671691652e-06, + "loss": 0.7318, + "step": 3109 + }, + { + "epoch": 0.15983143180182957, + "grad_norm": 1.218059778213501, + "learning_rate": 9.56451707431137e-06, + "loss": 0.8601, + "step": 3110 + }, + { + "epoch": 0.15988282454517422, + "grad_norm": 1.1709080934524536, + "learning_rate": 9.56417730524229e-06, + "loss": 0.7581, + "step": 3111 + }, + { + "epoch": 0.15993421728851887, + "grad_norm": 1.1050457954406738, + "learning_rate": 9.56383740971869e-06, + "loss": 0.8224, + "step": 3112 + }, + { + "epoch": 0.1599856100318635, + "grad_norm": 0.9180664420127869, + "learning_rate": 9.563497387749993e-06, + "loss": 0.7258, + "step": 3113 + }, + { + "epoch": 0.16003700277520813, + "grad_norm": 1.1267971992492676, + "learning_rate": 9.563157239345613e-06, + "loss": 0.8106, + "step": 3114 + }, + { + "epoch": 0.16008839551855278, + "grad_norm": 1.1436210870742798, + "learning_rate": 9.562816964514979e-06, + "loss": 0.8076, + "step": 3115 + }, + { + "epoch": 0.16013978826189743, + "grad_norm": 1.1593447923660278, + "learning_rate": 9.562476563267514e-06, + "loss": 0.8037, + "step": 3116 + }, + { + "epoch": 0.16019118100524207, + "grad_norm": 1.0998963117599487, + "learning_rate": 9.562136035612653e-06, + "loss": 0.7659, + "step": 3117 + }, + { + "epoch": 0.1602425737485867, + "grad_norm": 0.7985364198684692, + "learning_rate": 9.561795381559828e-06, + "loss": 0.7135, + "step": 3118 + }, + { + "epoch": 0.16029396649193134, + "grad_norm": 1.115679383277893, + "learning_rate": 9.56145460111848e-06, + "loss": 0.8586, + "step": 3119 + }, + { + "epoch": 0.16034535923527599, + "grad_norm": 1.0436532497406006, + "learning_rate": 9.56111369429805e-06, + "loss": 0.7556, + "step": 3120 + }, + { + "epoch": 0.1603967519786206, + "grad_norm": 1.1787986755371094, + "learning_rate": 9.56077266110798e-06, + "loss": 0.8416, + "step": 3121 + }, + { + "epoch": 0.16044814472196525, + "grad_norm": 1.0705763101577759, + "learning_rate": 9.560431501557721e-06, + "loss": 0.7912, + "step": 3122 + }, + { + "epoch": 0.1604995374653099, + "grad_norm": 0.7579458951950073, + "learning_rate": 9.560090215656726e-06, + "loss": 0.7193, + "step": 3123 + }, + { + "epoch": 0.16055093020865455, + "grad_norm": 1.0716207027435303, + "learning_rate": 9.559748803414448e-06, + "loss": 0.8354, + "step": 3124 + }, + { + "epoch": 0.16060232295199917, + "grad_norm": 1.0800081491470337, + "learning_rate": 9.559407264840348e-06, + "loss": 0.8406, + "step": 3125 + }, + { + "epoch": 0.1606537156953438, + "grad_norm": 1.1658076047897339, + "learning_rate": 9.55906559994389e-06, + "loss": 0.8153, + "step": 3126 + }, + { + "epoch": 0.16070510843868846, + "grad_norm": 1.1271469593048096, + "learning_rate": 9.558723808734536e-06, + "loss": 0.7817, + "step": 3127 + }, + { + "epoch": 0.1607565011820331, + "grad_norm": 1.1793339252471924, + "learning_rate": 9.55838189122176e-06, + "loss": 0.8295, + "step": 3128 + }, + { + "epoch": 0.16080789392537773, + "grad_norm": 1.1448228359222412, + "learning_rate": 9.558039847415033e-06, + "loss": 0.761, + "step": 3129 + }, + { + "epoch": 0.16085928666872237, + "grad_norm": 1.167729377746582, + "learning_rate": 9.55769767732383e-06, + "loss": 0.7989, + "step": 3130 + }, + { + "epoch": 0.16091067941206702, + "grad_norm": 1.1033068895339966, + "learning_rate": 9.557355380957633e-06, + "loss": 0.8118, + "step": 3131 + }, + { + "epoch": 0.16096207215541167, + "grad_norm": 1.0857632160186768, + "learning_rate": 9.557012958325926e-06, + "loss": 0.8288, + "step": 3132 + }, + { + "epoch": 0.1610134648987563, + "grad_norm": 1.127107858657837, + "learning_rate": 9.556670409438197e-06, + "loss": 0.8054, + "step": 3133 + }, + { + "epoch": 0.16106485764210093, + "grad_norm": 1.1581300497055054, + "learning_rate": 9.556327734303934e-06, + "loss": 0.8236, + "step": 3134 + }, + { + "epoch": 0.16111625038544558, + "grad_norm": 1.167819619178772, + "learning_rate": 9.555984932932631e-06, + "loss": 0.7537, + "step": 3135 + }, + { + "epoch": 0.16116764312879023, + "grad_norm": 1.205673098564148, + "learning_rate": 9.555642005333789e-06, + "loss": 0.8111, + "step": 3136 + }, + { + "epoch": 0.16121903587213485, + "grad_norm": 1.1420509815216064, + "learning_rate": 9.555298951516907e-06, + "loss": 0.8138, + "step": 3137 + }, + { + "epoch": 0.1612704286154795, + "grad_norm": 1.1440784931182861, + "learning_rate": 9.55495577149149e-06, + "loss": 0.7964, + "step": 3138 + }, + { + "epoch": 0.16132182135882414, + "grad_norm": 1.1752656698226929, + "learning_rate": 9.554612465267044e-06, + "loss": 0.8007, + "step": 3139 + }, + { + "epoch": 0.16137321410216876, + "grad_norm": 1.0747060775756836, + "learning_rate": 9.554269032853084e-06, + "loss": 0.8033, + "step": 3140 + }, + { + "epoch": 0.1614246068455134, + "grad_norm": 1.1762104034423828, + "learning_rate": 9.553925474259123e-06, + "loss": 0.839, + "step": 3141 + }, + { + "epoch": 0.16147599958885805, + "grad_norm": 1.0837814807891846, + "learning_rate": 9.55358178949468e-06, + "loss": 0.7826, + "step": 3142 + }, + { + "epoch": 0.1615273923322027, + "grad_norm": 1.1304336786270142, + "learning_rate": 9.553237978569276e-06, + "loss": 0.8474, + "step": 3143 + }, + { + "epoch": 0.16157878507554732, + "grad_norm": 1.1496949195861816, + "learning_rate": 9.552894041492439e-06, + "loss": 0.8433, + "step": 3144 + }, + { + "epoch": 0.16163017781889197, + "grad_norm": 1.1746634244918823, + "learning_rate": 9.552549978273697e-06, + "loss": 0.8133, + "step": 3145 + }, + { + "epoch": 0.16168157056223662, + "grad_norm": 1.1522823572158813, + "learning_rate": 9.552205788922582e-06, + "loss": 0.7971, + "step": 3146 + }, + { + "epoch": 0.16173296330558126, + "grad_norm": 1.119305968284607, + "learning_rate": 9.55186147344863e-06, + "loss": 0.773, + "step": 3147 + }, + { + "epoch": 0.16178435604892588, + "grad_norm": 0.8213911056518555, + "learning_rate": 9.55151703186138e-06, + "loss": 0.7447, + "step": 3148 + }, + { + "epoch": 0.16183574879227053, + "grad_norm": 1.1103883981704712, + "learning_rate": 9.551172464170377e-06, + "loss": 0.8008, + "step": 3149 + }, + { + "epoch": 0.16188714153561518, + "grad_norm": 1.1040072441101074, + "learning_rate": 9.550827770385167e-06, + "loss": 0.7636, + "step": 3150 + }, + { + "epoch": 0.16193853427895982, + "grad_norm": 1.128901481628418, + "learning_rate": 9.550482950515301e-06, + "loss": 0.794, + "step": 3151 + }, + { + "epoch": 0.16198992702230444, + "grad_norm": 1.1219249963760376, + "learning_rate": 9.550138004570328e-06, + "loss": 0.7992, + "step": 3152 + }, + { + "epoch": 0.1620413197656491, + "grad_norm": 1.0926544666290283, + "learning_rate": 9.549792932559811e-06, + "loss": 0.778, + "step": 3153 + }, + { + "epoch": 0.16209271250899374, + "grad_norm": 0.995165228843689, + "learning_rate": 9.549447734493306e-06, + "loss": 0.7005, + "step": 3154 + }, + { + "epoch": 0.16214410525233838, + "grad_norm": 1.1856826543807983, + "learning_rate": 9.54910241038038e-06, + "loss": 0.8078, + "step": 3155 + }, + { + "epoch": 0.162195497995683, + "grad_norm": 1.161812424659729, + "learning_rate": 9.548756960230598e-06, + "loss": 0.8354, + "step": 3156 + }, + { + "epoch": 0.16224689073902765, + "grad_norm": 1.1402584314346313, + "learning_rate": 9.548411384053534e-06, + "loss": 0.736, + "step": 3157 + }, + { + "epoch": 0.1622982834823723, + "grad_norm": 1.092926263809204, + "learning_rate": 9.548065681858758e-06, + "loss": 0.8042, + "step": 3158 + }, + { + "epoch": 0.16234967622571692, + "grad_norm": 1.1787275075912476, + "learning_rate": 9.547719853655853e-06, + "loss": 0.8125, + "step": 3159 + }, + { + "epoch": 0.16240106896906156, + "grad_norm": 1.1544336080551147, + "learning_rate": 9.547373899454397e-06, + "loss": 0.8873, + "step": 3160 + }, + { + "epoch": 0.1624524617124062, + "grad_norm": 1.1269683837890625, + "learning_rate": 9.547027819263976e-06, + "loss": 0.7755, + "step": 3161 + }, + { + "epoch": 0.16250385445575086, + "grad_norm": 1.5253983736038208, + "learning_rate": 9.54668161309418e-06, + "loss": 0.7777, + "step": 3162 + }, + { + "epoch": 0.16255524719909548, + "grad_norm": 1.1081044673919678, + "learning_rate": 9.546335280954599e-06, + "loss": 0.7656, + "step": 3163 + }, + { + "epoch": 0.16260663994244012, + "grad_norm": 0.8480460047721863, + "learning_rate": 9.545988822854829e-06, + "loss": 0.71, + "step": 3164 + }, + { + "epoch": 0.16265803268578477, + "grad_norm": 0.8821783661842346, + "learning_rate": 9.545642238804469e-06, + "loss": 0.7521, + "step": 3165 + }, + { + "epoch": 0.16270942542912942, + "grad_norm": 1.2296350002288818, + "learning_rate": 9.545295528813121e-06, + "loss": 0.7608, + "step": 3166 + }, + { + "epoch": 0.16276081817247404, + "grad_norm": 1.169236421585083, + "learning_rate": 9.544948692890392e-06, + "loss": 0.7668, + "step": 3167 + }, + { + "epoch": 0.16281221091581868, + "grad_norm": 1.0998739004135132, + "learning_rate": 9.544601731045888e-06, + "loss": 0.7418, + "step": 3168 + }, + { + "epoch": 0.16286360365916333, + "grad_norm": 1.2018539905548096, + "learning_rate": 9.544254643289226e-06, + "loss": 0.8165, + "step": 3169 + }, + { + "epoch": 0.16291499640250798, + "grad_norm": 1.1397939920425415, + "learning_rate": 9.543907429630021e-06, + "loss": 0.9075, + "step": 3170 + }, + { + "epoch": 0.1629663891458526, + "grad_norm": 1.1398556232452393, + "learning_rate": 9.543560090077895e-06, + "loss": 0.8091, + "step": 3171 + }, + { + "epoch": 0.16301778188919724, + "grad_norm": 0.8102909922599792, + "learning_rate": 9.543212624642466e-06, + "loss": 0.7159, + "step": 3172 + }, + { + "epoch": 0.1630691746325419, + "grad_norm": 1.1493078470230103, + "learning_rate": 9.542865033333364e-06, + "loss": 0.8216, + "step": 3173 + }, + { + "epoch": 0.16312056737588654, + "grad_norm": 1.2763092517852783, + "learning_rate": 9.542517316160222e-06, + "loss": 0.833, + "step": 3174 + }, + { + "epoch": 0.16317196011923116, + "grad_norm": 0.9804087281227112, + "learning_rate": 9.542169473132669e-06, + "loss": 0.6838, + "step": 3175 + }, + { + "epoch": 0.1632233528625758, + "grad_norm": 1.14390230178833, + "learning_rate": 9.541821504260345e-06, + "loss": 0.7921, + "step": 3176 + }, + { + "epoch": 0.16327474560592045, + "grad_norm": 0.901193380355835, + "learning_rate": 9.541473409552888e-06, + "loss": 0.7216, + "step": 3177 + }, + { + "epoch": 0.16332613834926507, + "grad_norm": 1.3020175695419312, + "learning_rate": 9.541125189019947e-06, + "loss": 0.8005, + "step": 3178 + }, + { + "epoch": 0.16337753109260972, + "grad_norm": 1.1251487731933594, + "learning_rate": 9.540776842671166e-06, + "loss": 0.807, + "step": 3179 + }, + { + "epoch": 0.16342892383595437, + "grad_norm": 1.1327316761016846, + "learning_rate": 9.540428370516197e-06, + "loss": 0.7693, + "step": 3180 + }, + { + "epoch": 0.163480316579299, + "grad_norm": 1.1738780736923218, + "learning_rate": 9.540079772564695e-06, + "loss": 0.8116, + "step": 3181 + }, + { + "epoch": 0.16353170932264363, + "grad_norm": 1.2064341306686401, + "learning_rate": 9.539731048826319e-06, + "loss": 0.7979, + "step": 3182 + }, + { + "epoch": 0.16358310206598828, + "grad_norm": 1.1245938539505005, + "learning_rate": 9.53938219931073e-06, + "loss": 0.8012, + "step": 3183 + }, + { + "epoch": 0.16363449480933293, + "grad_norm": 1.338637113571167, + "learning_rate": 9.539033224027594e-06, + "loss": 0.8266, + "step": 3184 + }, + { + "epoch": 0.16368588755267757, + "grad_norm": 1.0943418741226196, + "learning_rate": 9.538684122986577e-06, + "loss": 0.7632, + "step": 3185 + }, + { + "epoch": 0.1637372802960222, + "grad_norm": 1.1340044736862183, + "learning_rate": 9.538334896197355e-06, + "loss": 0.8112, + "step": 3186 + }, + { + "epoch": 0.16378867303936684, + "grad_norm": 1.062220811843872, + "learning_rate": 9.5379855436696e-06, + "loss": 0.7951, + "step": 3187 + }, + { + "epoch": 0.16384006578271149, + "grad_norm": 1.0626157522201538, + "learning_rate": 9.537636065412994e-06, + "loss": 0.766, + "step": 3188 + }, + { + "epoch": 0.16389145852605613, + "grad_norm": 0.9828280210494995, + "learning_rate": 9.537286461437216e-06, + "loss": 0.6822, + "step": 3189 + }, + { + "epoch": 0.16394285126940075, + "grad_norm": 1.122507929801941, + "learning_rate": 9.536936731751957e-06, + "loss": 0.8461, + "step": 3190 + }, + { + "epoch": 0.1639942440127454, + "grad_norm": 1.0993677377700806, + "learning_rate": 9.536586876366902e-06, + "loss": 0.8689, + "step": 3191 + }, + { + "epoch": 0.16404563675609005, + "grad_norm": 1.0578441619873047, + "learning_rate": 9.536236895291749e-06, + "loss": 0.821, + "step": 3192 + }, + { + "epoch": 0.1640970294994347, + "grad_norm": 1.0542157888412476, + "learning_rate": 9.535886788536188e-06, + "loss": 0.7938, + "step": 3193 + }, + { + "epoch": 0.1641484222427793, + "grad_norm": 1.1256091594696045, + "learning_rate": 9.535536556109924e-06, + "loss": 0.7914, + "step": 3194 + }, + { + "epoch": 0.16419981498612396, + "grad_norm": 0.7573138475418091, + "learning_rate": 9.53518619802266e-06, + "loss": 0.7009, + "step": 3195 + }, + { + "epoch": 0.1642512077294686, + "grad_norm": 1.1330567598342896, + "learning_rate": 9.5348357142841e-06, + "loss": 0.821, + "step": 3196 + }, + { + "epoch": 0.16430260047281323, + "grad_norm": 1.2477898597717285, + "learning_rate": 9.534485104903959e-06, + "loss": 0.8216, + "step": 3197 + }, + { + "epoch": 0.16435399321615787, + "grad_norm": 1.0125752687454224, + "learning_rate": 9.534134369891948e-06, + "loss": 0.7528, + "step": 3198 + }, + { + "epoch": 0.16440538595950252, + "grad_norm": 0.8818283081054688, + "learning_rate": 9.533783509257784e-06, + "loss": 0.7054, + "step": 3199 + }, + { + "epoch": 0.16445677870284717, + "grad_norm": 1.1226658821105957, + "learning_rate": 9.53343252301119e-06, + "loss": 0.7999, + "step": 3200 + }, + { + "epoch": 0.1645081714461918, + "grad_norm": 0.9891276955604553, + "learning_rate": 9.533081411161886e-06, + "loss": 0.7114, + "step": 3201 + }, + { + "epoch": 0.16455956418953643, + "grad_norm": 1.0801430940628052, + "learning_rate": 9.532730173719606e-06, + "loss": 0.7552, + "step": 3202 + }, + { + "epoch": 0.16461095693288108, + "grad_norm": 1.203872561454773, + "learning_rate": 9.532378810694079e-06, + "loss": 0.8366, + "step": 3203 + }, + { + "epoch": 0.16466234967622573, + "grad_norm": 1.1817381381988525, + "learning_rate": 9.532027322095037e-06, + "loss": 0.754, + "step": 3204 + }, + { + "epoch": 0.16471374241957035, + "grad_norm": 0.8346467018127441, + "learning_rate": 9.531675707932221e-06, + "loss": 0.6748, + "step": 3205 + }, + { + "epoch": 0.164765135162915, + "grad_norm": 1.122004508972168, + "learning_rate": 9.531323968215372e-06, + "loss": 0.8271, + "step": 3206 + }, + { + "epoch": 0.16481652790625964, + "grad_norm": 0.833706259727478, + "learning_rate": 9.530972102954237e-06, + "loss": 0.6826, + "step": 3207 + }, + { + "epoch": 0.1648679206496043, + "grad_norm": 1.1040239334106445, + "learning_rate": 9.530620112158561e-06, + "loss": 0.8063, + "step": 3208 + }, + { + "epoch": 0.1649193133929489, + "grad_norm": 0.7802413702011108, + "learning_rate": 9.5302679958381e-06, + "loss": 0.7054, + "step": 3209 + }, + { + "epoch": 0.16497070613629355, + "grad_norm": 1.139785647392273, + "learning_rate": 9.529915754002608e-06, + "loss": 0.8067, + "step": 3210 + }, + { + "epoch": 0.1650220988796382, + "grad_norm": 1.2267383337020874, + "learning_rate": 9.529563386661845e-06, + "loss": 0.8928, + "step": 3211 + }, + { + "epoch": 0.16507349162298285, + "grad_norm": 1.18010675907135, + "learning_rate": 9.52921089382557e-06, + "loss": 0.7962, + "step": 3212 + }, + { + "epoch": 0.16512488436632747, + "grad_norm": 1.0691472291946411, + "learning_rate": 9.528858275503556e-06, + "loss": 0.7564, + "step": 3213 + }, + { + "epoch": 0.16517627710967211, + "grad_norm": 1.112184762954712, + "learning_rate": 9.528505531705567e-06, + "loss": 0.8289, + "step": 3214 + }, + { + "epoch": 0.16522766985301676, + "grad_norm": 1.1967002153396606, + "learning_rate": 9.528152662441376e-06, + "loss": 0.8129, + "step": 3215 + }, + { + "epoch": 0.16527906259636138, + "grad_norm": 1.06976318359375, + "learning_rate": 9.527799667720764e-06, + "loss": 0.7514, + "step": 3216 + }, + { + "epoch": 0.16533045533970603, + "grad_norm": 1.1949013471603394, + "learning_rate": 9.527446547553507e-06, + "loss": 0.8407, + "step": 3217 + }, + { + "epoch": 0.16538184808305068, + "grad_norm": 1.1443004608154297, + "learning_rate": 9.52709330194939e-06, + "loss": 0.8709, + "step": 3218 + }, + { + "epoch": 0.16543324082639532, + "grad_norm": 1.0867016315460205, + "learning_rate": 9.526739930918201e-06, + "loss": 0.8091, + "step": 3219 + }, + { + "epoch": 0.16548463356973994, + "grad_norm": 1.1380915641784668, + "learning_rate": 9.526386434469727e-06, + "loss": 0.7791, + "step": 3220 + }, + { + "epoch": 0.1655360263130846, + "grad_norm": 1.0839924812316895, + "learning_rate": 9.526032812613766e-06, + "loss": 0.7819, + "step": 3221 + }, + { + "epoch": 0.16558741905642924, + "grad_norm": 1.1421802043914795, + "learning_rate": 9.525679065360113e-06, + "loss": 0.8201, + "step": 3222 + }, + { + "epoch": 0.16563881179977388, + "grad_norm": 1.0658698081970215, + "learning_rate": 9.52532519271857e-06, + "loss": 0.7183, + "step": 3223 + }, + { + "epoch": 0.1656902045431185, + "grad_norm": 1.111944556236267, + "learning_rate": 9.52497119469894e-06, + "loss": 0.8406, + "step": 3224 + }, + { + "epoch": 0.16574159728646315, + "grad_norm": 0.7980111837387085, + "learning_rate": 9.524617071311031e-06, + "loss": 0.7062, + "step": 3225 + }, + { + "epoch": 0.1657929900298078, + "grad_norm": 1.1120167970657349, + "learning_rate": 9.524262822564656e-06, + "loss": 0.811, + "step": 3226 + }, + { + "epoch": 0.16584438277315244, + "grad_norm": 1.0894891023635864, + "learning_rate": 9.52390844846963e-06, + "loss": 0.8326, + "step": 3227 + }, + { + "epoch": 0.16589577551649706, + "grad_norm": 1.1481879949569702, + "learning_rate": 9.523553949035768e-06, + "loss": 0.8294, + "step": 3228 + }, + { + "epoch": 0.1659471682598417, + "grad_norm": 1.0807256698608398, + "learning_rate": 9.523199324272894e-06, + "loss": 0.8193, + "step": 3229 + }, + { + "epoch": 0.16599856100318636, + "grad_norm": 1.214396595954895, + "learning_rate": 9.522844574190833e-06, + "loss": 0.7622, + "step": 3230 + }, + { + "epoch": 0.16604995374653098, + "grad_norm": 1.240247368812561, + "learning_rate": 9.522489698799412e-06, + "loss": 0.7571, + "step": 3231 + }, + { + "epoch": 0.16610134648987562, + "grad_norm": 1.0789235830307007, + "learning_rate": 9.522134698108468e-06, + "loss": 0.8095, + "step": 3232 + }, + { + "epoch": 0.16615273923322027, + "grad_norm": 1.0555205345153809, + "learning_rate": 9.521779572127832e-06, + "loss": 0.7576, + "step": 3233 + }, + { + "epoch": 0.16620413197656492, + "grad_norm": 1.1000267267227173, + "learning_rate": 9.521424320867343e-06, + "loss": 0.8142, + "step": 3234 + }, + { + "epoch": 0.16625552471990954, + "grad_norm": 0.7666199803352356, + "learning_rate": 9.521068944336847e-06, + "loss": 0.7112, + "step": 3235 + }, + { + "epoch": 0.16630691746325418, + "grad_norm": 1.1695549488067627, + "learning_rate": 9.520713442546186e-06, + "loss": 0.8387, + "step": 3236 + }, + { + "epoch": 0.16635831020659883, + "grad_norm": 0.7128877639770508, + "learning_rate": 9.520357815505212e-06, + "loss": 0.6888, + "step": 3237 + }, + { + "epoch": 0.16640970294994348, + "grad_norm": 1.0921233892440796, + "learning_rate": 9.520002063223777e-06, + "loss": 0.7871, + "step": 3238 + }, + { + "epoch": 0.1664610956932881, + "grad_norm": 1.150153636932373, + "learning_rate": 9.519646185711739e-06, + "loss": 0.8343, + "step": 3239 + }, + { + "epoch": 0.16651248843663274, + "grad_norm": 1.1637077331542969, + "learning_rate": 9.519290182978956e-06, + "loss": 0.8023, + "step": 3240 + }, + { + "epoch": 0.1665638811799774, + "grad_norm": 1.136567234992981, + "learning_rate": 9.51893405503529e-06, + "loss": 0.8462, + "step": 3241 + }, + { + "epoch": 0.16661527392332204, + "grad_norm": 0.8910467624664307, + "learning_rate": 9.518577801890612e-06, + "loss": 0.7085, + "step": 3242 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 1.3249033689498901, + "learning_rate": 9.51822142355479e-06, + "loss": 0.8509, + "step": 3243 + }, + { + "epoch": 0.1667180594100113, + "grad_norm": 1.1374561786651611, + "learning_rate": 9.517864920037698e-06, + "loss": 0.7423, + "step": 3244 + }, + { + "epoch": 0.16676945215335595, + "grad_norm": 1.2098907232284546, + "learning_rate": 9.517508291349214e-06, + "loss": 0.8424, + "step": 3245 + }, + { + "epoch": 0.1668208448967006, + "grad_norm": 1.332830548286438, + "learning_rate": 9.517151537499216e-06, + "loss": 0.7662, + "step": 3246 + }, + { + "epoch": 0.16687223764004522, + "grad_norm": 1.1399847269058228, + "learning_rate": 9.516794658497593e-06, + "loss": 0.8068, + "step": 3247 + }, + { + "epoch": 0.16692363038338986, + "grad_norm": 1.1291741132736206, + "learning_rate": 9.516437654354226e-06, + "loss": 0.8556, + "step": 3248 + }, + { + "epoch": 0.1669750231267345, + "grad_norm": 1.1566137075424194, + "learning_rate": 9.516080525079013e-06, + "loss": 0.8185, + "step": 3249 + }, + { + "epoch": 0.16702641587007913, + "grad_norm": 0.9099522233009338, + "learning_rate": 9.515723270681842e-06, + "loss": 0.7148, + "step": 3250 + }, + { + "epoch": 0.16707780861342378, + "grad_norm": 0.729370653629303, + "learning_rate": 9.515365891172618e-06, + "loss": 0.7262, + "step": 3251 + }, + { + "epoch": 0.16712920135676843, + "grad_norm": 1.177507996559143, + "learning_rate": 9.515008386561237e-06, + "loss": 0.7557, + "step": 3252 + }, + { + "epoch": 0.16718059410011307, + "grad_norm": 1.061783790588379, + "learning_rate": 9.514650756857607e-06, + "loss": 0.784, + "step": 3253 + }, + { + "epoch": 0.1672319868434577, + "grad_norm": 1.0806080102920532, + "learning_rate": 9.514293002071635e-06, + "loss": 0.7376, + "step": 3254 + }, + { + "epoch": 0.16728337958680234, + "grad_norm": 1.1734668016433716, + "learning_rate": 9.513935122213232e-06, + "loss": 0.76, + "step": 3255 + }, + { + "epoch": 0.16733477233014699, + "grad_norm": 1.2129284143447876, + "learning_rate": 9.513577117292317e-06, + "loss": 0.814, + "step": 3256 + }, + { + "epoch": 0.16738616507349163, + "grad_norm": 1.1298599243164062, + "learning_rate": 9.513218987318805e-06, + "loss": 0.7892, + "step": 3257 + }, + { + "epoch": 0.16743755781683625, + "grad_norm": 1.1670111417770386, + "learning_rate": 9.51286073230262e-06, + "loss": 0.8502, + "step": 3258 + }, + { + "epoch": 0.1674889505601809, + "grad_norm": 1.088180422782898, + "learning_rate": 9.512502352253687e-06, + "loss": 0.7749, + "step": 3259 + }, + { + "epoch": 0.16754034330352555, + "grad_norm": 1.066748857498169, + "learning_rate": 9.512143847181938e-06, + "loss": 0.7887, + "step": 3260 + }, + { + "epoch": 0.1675917360468702, + "grad_norm": 1.1440898180007935, + "learning_rate": 9.5117852170973e-06, + "loss": 0.7662, + "step": 3261 + }, + { + "epoch": 0.1676431287902148, + "grad_norm": 1.184906244277954, + "learning_rate": 9.511426462009716e-06, + "loss": 0.8306, + "step": 3262 + }, + { + "epoch": 0.16769452153355946, + "grad_norm": 1.097861886024475, + "learning_rate": 9.511067581929122e-06, + "loss": 0.7857, + "step": 3263 + }, + { + "epoch": 0.1677459142769041, + "grad_norm": 7.879432678222656, + "learning_rate": 9.51070857686546e-06, + "loss": 0.8673, + "step": 3264 + }, + { + "epoch": 0.16779730702024875, + "grad_norm": 1.1445090770721436, + "learning_rate": 9.510349446828677e-06, + "loss": 0.7599, + "step": 3265 + }, + { + "epoch": 0.16784869976359337, + "grad_norm": 1.1551319360733032, + "learning_rate": 9.509990191828727e-06, + "loss": 0.7779, + "step": 3266 + }, + { + "epoch": 0.16790009250693802, + "grad_norm": 1.249333381652832, + "learning_rate": 9.509630811875557e-06, + "loss": 0.7678, + "step": 3267 + }, + { + "epoch": 0.16795148525028267, + "grad_norm": 1.315457820892334, + "learning_rate": 9.509271306979129e-06, + "loss": 0.8145, + "step": 3268 + }, + { + "epoch": 0.1680028779936273, + "grad_norm": 1.1426721811294556, + "learning_rate": 9.508911677149402e-06, + "loss": 0.7596, + "step": 3269 + }, + { + "epoch": 0.16805427073697193, + "grad_norm": 1.002423644065857, + "learning_rate": 9.50855192239634e-06, + "loss": 0.6855, + "step": 3270 + }, + { + "epoch": 0.16810566348031658, + "grad_norm": 1.2738754749298096, + "learning_rate": 9.508192042729908e-06, + "loss": 0.8331, + "step": 3271 + }, + { + "epoch": 0.16815705622366123, + "grad_norm": 1.1588183641433716, + "learning_rate": 9.507832038160081e-06, + "loss": 0.817, + "step": 3272 + }, + { + "epoch": 0.16820844896700585, + "grad_norm": 1.1641695499420166, + "learning_rate": 9.50747190869683e-06, + "loss": 0.8409, + "step": 3273 + }, + { + "epoch": 0.1682598417103505, + "grad_norm": 1.1562764644622803, + "learning_rate": 9.507111654350134e-06, + "loss": 0.8083, + "step": 3274 + }, + { + "epoch": 0.16831123445369514, + "grad_norm": 1.1415313482284546, + "learning_rate": 9.506751275129974e-06, + "loss": 0.7253, + "step": 3275 + }, + { + "epoch": 0.1683626271970398, + "grad_norm": 1.1189708709716797, + "learning_rate": 9.506390771046332e-06, + "loss": 0.7325, + "step": 3276 + }, + { + "epoch": 0.1684140199403844, + "grad_norm": 1.1310651302337646, + "learning_rate": 9.506030142109202e-06, + "loss": 0.8092, + "step": 3277 + }, + { + "epoch": 0.16846541268372905, + "grad_norm": 2.6251814365386963, + "learning_rate": 9.505669388328568e-06, + "loss": 0.8332, + "step": 3278 + }, + { + "epoch": 0.1685168054270737, + "grad_norm": 1.1220414638519287, + "learning_rate": 9.505308509714433e-06, + "loss": 0.7718, + "step": 3279 + }, + { + "epoch": 0.16856819817041835, + "grad_norm": 0.7799605131149292, + "learning_rate": 9.50494750627679e-06, + "loss": 0.6964, + "step": 3280 + }, + { + "epoch": 0.16861959091376297, + "grad_norm": 0.8771004676818848, + "learning_rate": 9.504586378025641e-06, + "loss": 0.7629, + "step": 3281 + }, + { + "epoch": 0.16867098365710761, + "grad_norm": 1.1117522716522217, + "learning_rate": 9.504225124970997e-06, + "loss": 0.7906, + "step": 3282 + }, + { + "epoch": 0.16872237640045226, + "grad_norm": 1.0804903507232666, + "learning_rate": 9.50386374712286e-06, + "loss": 0.7739, + "step": 3283 + }, + { + "epoch": 0.1687737691437969, + "grad_norm": 1.1400279998779297, + "learning_rate": 9.503502244491243e-06, + "loss": 0.7996, + "step": 3284 + }, + { + "epoch": 0.16882516188714153, + "grad_norm": 1.1324580907821655, + "learning_rate": 9.503140617086165e-06, + "loss": 0.823, + "step": 3285 + }, + { + "epoch": 0.16887655463048618, + "grad_norm": 1.1264513731002808, + "learning_rate": 9.502778864917644e-06, + "loss": 0.7816, + "step": 3286 + }, + { + "epoch": 0.16892794737383082, + "grad_norm": 1.1614048480987549, + "learning_rate": 9.502416987995704e-06, + "loss": 0.8072, + "step": 3287 + }, + { + "epoch": 0.16897934011717544, + "grad_norm": 1.0859893560409546, + "learning_rate": 9.502054986330367e-06, + "loss": 0.7857, + "step": 3288 + }, + { + "epoch": 0.1690307328605201, + "grad_norm": 1.1314196586608887, + "learning_rate": 9.501692859931669e-06, + "loss": 0.7767, + "step": 3289 + }, + { + "epoch": 0.16908212560386474, + "grad_norm": 1.1476948261260986, + "learning_rate": 9.501330608809636e-06, + "loss": 0.8269, + "step": 3290 + }, + { + "epoch": 0.16913351834720938, + "grad_norm": 1.1194007396697998, + "learning_rate": 9.500968232974308e-06, + "loss": 0.7872, + "step": 3291 + }, + { + "epoch": 0.169184911090554, + "grad_norm": 0.8801155090332031, + "learning_rate": 9.500605732435726e-06, + "loss": 0.6508, + "step": 3292 + }, + { + "epoch": 0.16923630383389865, + "grad_norm": 1.1869466304779053, + "learning_rate": 9.50024310720393e-06, + "loss": 0.8529, + "step": 3293 + }, + { + "epoch": 0.1692876965772433, + "grad_norm": 1.105828881263733, + "learning_rate": 9.49988035728897e-06, + "loss": 0.7789, + "step": 3294 + }, + { + "epoch": 0.16933908932058794, + "grad_norm": 1.0911446809768677, + "learning_rate": 9.499517482700896e-06, + "loss": 0.784, + "step": 3295 + }, + { + "epoch": 0.16939048206393256, + "grad_norm": 1.089745283126831, + "learning_rate": 9.49915448344976e-06, + "loss": 0.7817, + "step": 3296 + }, + { + "epoch": 0.1694418748072772, + "grad_norm": 0.8322347402572632, + "learning_rate": 9.49879135954562e-06, + "loss": 0.7325, + "step": 3297 + }, + { + "epoch": 0.16949326755062186, + "grad_norm": 0.7731889486312866, + "learning_rate": 9.498428110998538e-06, + "loss": 0.7376, + "step": 3298 + }, + { + "epoch": 0.1695446602939665, + "grad_norm": 0.6958489418029785, + "learning_rate": 9.498064737818577e-06, + "loss": 0.6839, + "step": 3299 + }, + { + "epoch": 0.16959605303731112, + "grad_norm": 1.264474868774414, + "learning_rate": 9.497701240015805e-06, + "loss": 0.7524, + "step": 3300 + }, + { + "epoch": 0.16964744578065577, + "grad_norm": 0.7674562335014343, + "learning_rate": 9.497337617600291e-06, + "loss": 0.716, + "step": 3301 + }, + { + "epoch": 0.16969883852400042, + "grad_norm": 1.1512178182601929, + "learning_rate": 9.496973870582112e-06, + "loss": 0.7777, + "step": 3302 + }, + { + "epoch": 0.16975023126734506, + "grad_norm": 1.2055432796478271, + "learning_rate": 9.496609998971343e-06, + "loss": 0.8092, + "step": 3303 + }, + { + "epoch": 0.16980162401068968, + "grad_norm": 1.0939481258392334, + "learning_rate": 9.49624600277807e-06, + "loss": 0.793, + "step": 3304 + }, + { + "epoch": 0.16985301675403433, + "grad_norm": 1.134118914604187, + "learning_rate": 9.495881882012374e-06, + "loss": 0.8288, + "step": 3305 + }, + { + "epoch": 0.16990440949737898, + "grad_norm": 1.0895812511444092, + "learning_rate": 9.495517636684343e-06, + "loss": 0.8047, + "step": 3306 + }, + { + "epoch": 0.1699558022407236, + "grad_norm": 1.0916963815689087, + "learning_rate": 9.495153266804072e-06, + "loss": 0.7736, + "step": 3307 + }, + { + "epoch": 0.17000719498406824, + "grad_norm": 1.1814486980438232, + "learning_rate": 9.494788772381653e-06, + "loss": 0.8169, + "step": 3308 + }, + { + "epoch": 0.1700585877274129, + "grad_norm": 1.1182068586349487, + "learning_rate": 9.494424153427188e-06, + "loss": 0.8167, + "step": 3309 + }, + { + "epoch": 0.17010998047075754, + "grad_norm": 1.1320017576217651, + "learning_rate": 9.494059409950776e-06, + "loss": 0.8143, + "step": 3310 + }, + { + "epoch": 0.17016137321410216, + "grad_norm": 1.2176772356033325, + "learning_rate": 9.493694541962524e-06, + "loss": 0.8074, + "step": 3311 + }, + { + "epoch": 0.1702127659574468, + "grad_norm": 1.2402642965316772, + "learning_rate": 9.493329549472542e-06, + "loss": 0.8194, + "step": 3312 + }, + { + "epoch": 0.17026415870079145, + "grad_norm": 1.244359016418457, + "learning_rate": 9.492964432490939e-06, + "loss": 0.7585, + "step": 3313 + }, + { + "epoch": 0.1703155514441361, + "grad_norm": 1.1706732511520386, + "learning_rate": 9.492599191027832e-06, + "loss": 0.8453, + "step": 3314 + }, + { + "epoch": 0.17036694418748072, + "grad_norm": 1.1012581586837769, + "learning_rate": 9.492233825093343e-06, + "loss": 0.78, + "step": 3315 + }, + { + "epoch": 0.17041833693082536, + "grad_norm": 1.0937896966934204, + "learning_rate": 9.491868334697592e-06, + "loss": 0.7669, + "step": 3316 + }, + { + "epoch": 0.17046972967417, + "grad_norm": 1.195407748222351, + "learning_rate": 9.491502719850707e-06, + "loss": 0.8287, + "step": 3317 + }, + { + "epoch": 0.17052112241751466, + "grad_norm": 1.0681138038635254, + "learning_rate": 9.491136980562819e-06, + "loss": 0.7145, + "step": 3318 + }, + { + "epoch": 0.17057251516085928, + "grad_norm": 1.1032580137252808, + "learning_rate": 9.490771116844054e-06, + "loss": 0.7816, + "step": 3319 + }, + { + "epoch": 0.17062390790420393, + "grad_norm": 1.0793702602386475, + "learning_rate": 9.49040512870456e-06, + "loss": 0.788, + "step": 3320 + }, + { + "epoch": 0.17067530064754857, + "grad_norm": 0.9614205956459045, + "learning_rate": 9.490039016154466e-06, + "loss": 0.7102, + "step": 3321 + }, + { + "epoch": 0.17072669339089322, + "grad_norm": 1.2258070707321167, + "learning_rate": 9.489672779203923e-06, + "loss": 0.8231, + "step": 3322 + }, + { + "epoch": 0.17077808613423784, + "grad_norm": 0.868203341960907, + "learning_rate": 9.489306417863073e-06, + "loss": 0.7297, + "step": 3323 + }, + { + "epoch": 0.17082947887758249, + "grad_norm": 1.1281613111495972, + "learning_rate": 9.488939932142069e-06, + "loss": 0.7735, + "step": 3324 + }, + { + "epoch": 0.17088087162092713, + "grad_norm": 1.1692947149276733, + "learning_rate": 9.488573322051065e-06, + "loss": 0.8042, + "step": 3325 + }, + { + "epoch": 0.17093226436427175, + "grad_norm": 1.132978081703186, + "learning_rate": 9.488206587600216e-06, + "loss": 0.8566, + "step": 3326 + }, + { + "epoch": 0.1709836571076164, + "grad_norm": 1.1298434734344482, + "learning_rate": 9.487839728799685e-06, + "loss": 0.7854, + "step": 3327 + }, + { + "epoch": 0.17103504985096105, + "grad_norm": 1.1052907705307007, + "learning_rate": 9.487472745659636e-06, + "loss": 0.7851, + "step": 3328 + }, + { + "epoch": 0.1710864425943057, + "grad_norm": 1.2407386302947998, + "learning_rate": 9.487105638190235e-06, + "loss": 0.8049, + "step": 3329 + }, + { + "epoch": 0.1711378353376503, + "grad_norm": 1.0993572473526, + "learning_rate": 9.486738406401654e-06, + "loss": 0.7695, + "step": 3330 + }, + { + "epoch": 0.17118922808099496, + "grad_norm": 1.0431360006332397, + "learning_rate": 9.486371050304069e-06, + "loss": 0.8087, + "step": 3331 + }, + { + "epoch": 0.1712406208243396, + "grad_norm": 1.106719732284546, + "learning_rate": 9.486003569907653e-06, + "loss": 0.7928, + "step": 3332 + }, + { + "epoch": 0.17129201356768425, + "grad_norm": 1.226104497909546, + "learning_rate": 9.485635965222592e-06, + "loss": 0.7764, + "step": 3333 + }, + { + "epoch": 0.17134340631102887, + "grad_norm": 1.1749354600906372, + "learning_rate": 9.485268236259073e-06, + "loss": 0.8315, + "step": 3334 + }, + { + "epoch": 0.17139479905437352, + "grad_norm": 1.1435697078704834, + "learning_rate": 9.484900383027277e-06, + "loss": 0.8452, + "step": 3335 + }, + { + "epoch": 0.17144619179771817, + "grad_norm": 1.1379668712615967, + "learning_rate": 9.484532405537401e-06, + "loss": 0.8488, + "step": 3336 + }, + { + "epoch": 0.17149758454106281, + "grad_norm": 1.1501673460006714, + "learning_rate": 9.484164303799638e-06, + "loss": 0.7658, + "step": 3337 + }, + { + "epoch": 0.17154897728440743, + "grad_norm": 1.1365940570831299, + "learning_rate": 9.483796077824187e-06, + "loss": 0.7984, + "step": 3338 + }, + { + "epoch": 0.17160037002775208, + "grad_norm": 1.1052764654159546, + "learning_rate": 9.483427727621251e-06, + "loss": 0.7292, + "step": 3339 + }, + { + "epoch": 0.17165176277109673, + "grad_norm": 0.9644396305084229, + "learning_rate": 9.483059253201035e-06, + "loss": 0.7018, + "step": 3340 + }, + { + "epoch": 0.17170315551444137, + "grad_norm": 1.1723569631576538, + "learning_rate": 9.482690654573745e-06, + "loss": 0.7905, + "step": 3341 + }, + { + "epoch": 0.171754548257786, + "grad_norm": 1.127434253692627, + "learning_rate": 9.482321931749598e-06, + "loss": 0.8272, + "step": 3342 + }, + { + "epoch": 0.17180594100113064, + "grad_norm": 1.1210561990737915, + "learning_rate": 9.481953084738809e-06, + "loss": 0.8372, + "step": 3343 + }, + { + "epoch": 0.1718573337444753, + "grad_norm": 1.122659683227539, + "learning_rate": 9.481584113551594e-06, + "loss": 0.8206, + "step": 3344 + }, + { + "epoch": 0.1719087264878199, + "grad_norm": 0.8356234431266785, + "learning_rate": 9.481215018198177e-06, + "loss": 0.6954, + "step": 3345 + }, + { + "epoch": 0.17196011923116455, + "grad_norm": 0.9994503259658813, + "learning_rate": 9.480845798688786e-06, + "loss": 0.7696, + "step": 3346 + }, + { + "epoch": 0.1720115119745092, + "grad_norm": 1.1517689228057861, + "learning_rate": 9.48047645503365e-06, + "loss": 0.8747, + "step": 3347 + }, + { + "epoch": 0.17206290471785385, + "grad_norm": 1.082833170890808, + "learning_rate": 9.480106987243001e-06, + "loss": 0.7575, + "step": 3348 + }, + { + "epoch": 0.17211429746119847, + "grad_norm": 1.090004801750183, + "learning_rate": 9.479737395327074e-06, + "loss": 0.8026, + "step": 3349 + }, + { + "epoch": 0.17216569020454311, + "grad_norm": 0.8001365661621094, + "learning_rate": 9.479367679296111e-06, + "loss": 0.7162, + "step": 3350 + }, + { + "epoch": 0.17221708294788776, + "grad_norm": 1.2041667699813843, + "learning_rate": 9.478997839160356e-06, + "loss": 0.8279, + "step": 3351 + }, + { + "epoch": 0.1722684756912324, + "grad_norm": 1.1921515464782715, + "learning_rate": 9.478627874930053e-06, + "loss": 0.8779, + "step": 3352 + }, + { + "epoch": 0.17231986843457703, + "grad_norm": 1.1196902990341187, + "learning_rate": 9.478257786615457e-06, + "loss": 0.7982, + "step": 3353 + }, + { + "epoch": 0.17237126117792168, + "grad_norm": 1.0839660167694092, + "learning_rate": 9.477887574226815e-06, + "loss": 0.7863, + "step": 3354 + }, + { + "epoch": 0.17242265392126632, + "grad_norm": 1.1060118675231934, + "learning_rate": 9.47751723777439e-06, + "loss": 0.8044, + "step": 3355 + }, + { + "epoch": 0.17247404666461097, + "grad_norm": 0.7706414461135864, + "learning_rate": 9.477146777268437e-06, + "loss": 0.7154, + "step": 3356 + }, + { + "epoch": 0.1725254394079556, + "grad_norm": 1.1496667861938477, + "learning_rate": 9.476776192719226e-06, + "loss": 0.8046, + "step": 3357 + }, + { + "epoch": 0.17257683215130024, + "grad_norm": 1.1055561304092407, + "learning_rate": 9.476405484137019e-06, + "loss": 0.8151, + "step": 3358 + }, + { + "epoch": 0.17262822489464488, + "grad_norm": 0.777931809425354, + "learning_rate": 9.476034651532092e-06, + "loss": 0.7334, + "step": 3359 + }, + { + "epoch": 0.1726796176379895, + "grad_norm": 1.083731770515442, + "learning_rate": 9.475663694914712e-06, + "loss": 0.7969, + "step": 3360 + }, + { + "epoch": 0.17273101038133415, + "grad_norm": 1.0626894235610962, + "learning_rate": 9.475292614295163e-06, + "loss": 0.7869, + "step": 3361 + }, + { + "epoch": 0.1727824031246788, + "grad_norm": 1.1252862215042114, + "learning_rate": 9.474921409683726e-06, + "loss": 0.7781, + "step": 3362 + }, + { + "epoch": 0.17283379586802344, + "grad_norm": 1.1365172863006592, + "learning_rate": 9.474550081090681e-06, + "loss": 0.811, + "step": 3363 + }, + { + "epoch": 0.17288518861136806, + "grad_norm": 1.1246477365493774, + "learning_rate": 9.47417862852632e-06, + "loss": 0.7581, + "step": 3364 + }, + { + "epoch": 0.1729365813547127, + "grad_norm": 1.1258962154388428, + "learning_rate": 9.473807052000933e-06, + "loss": 0.8032, + "step": 3365 + }, + { + "epoch": 0.17298797409805736, + "grad_norm": 1.127597689628601, + "learning_rate": 9.473435351524815e-06, + "loss": 0.8295, + "step": 3366 + }, + { + "epoch": 0.173039366841402, + "grad_norm": 1.3430150747299194, + "learning_rate": 9.473063527108264e-06, + "loss": 0.7765, + "step": 3367 + }, + { + "epoch": 0.17309075958474662, + "grad_norm": 1.072946548461914, + "learning_rate": 9.472691578761582e-06, + "loss": 0.7691, + "step": 3368 + }, + { + "epoch": 0.17314215232809127, + "grad_norm": 1.13961660861969, + "learning_rate": 9.472319506495073e-06, + "loss": 0.806, + "step": 3369 + }, + { + "epoch": 0.17319354507143592, + "grad_norm": 1.1598742008209229, + "learning_rate": 9.471947310319047e-06, + "loss": 0.8787, + "step": 3370 + }, + { + "epoch": 0.17324493781478056, + "grad_norm": 1.017896294593811, + "learning_rate": 9.471574990243818e-06, + "loss": 0.7732, + "step": 3371 + }, + { + "epoch": 0.17329633055812518, + "grad_norm": 1.1058242321014404, + "learning_rate": 9.471202546279695e-06, + "loss": 0.7502, + "step": 3372 + }, + { + "epoch": 0.17334772330146983, + "grad_norm": 1.0317755937576294, + "learning_rate": 9.470829978437004e-06, + "loss": 0.7443, + "step": 3373 + }, + { + "epoch": 0.17339911604481448, + "grad_norm": 1.1426212787628174, + "learning_rate": 9.470457286726063e-06, + "loss": 0.827, + "step": 3374 + }, + { + "epoch": 0.17345050878815912, + "grad_norm": 0.8564515709877014, + "learning_rate": 9.470084471157199e-06, + "loss": 0.6842, + "step": 3375 + }, + { + "epoch": 0.17350190153150374, + "grad_norm": 1.166164755821228, + "learning_rate": 9.469711531740744e-06, + "loss": 0.7888, + "step": 3376 + }, + { + "epoch": 0.1735532942748484, + "grad_norm": 0.7758494019508362, + "learning_rate": 9.469338468487023e-06, + "loss": 0.6676, + "step": 3377 + }, + { + "epoch": 0.17360468701819304, + "grad_norm": 0.8199867606163025, + "learning_rate": 9.468965281406381e-06, + "loss": 0.6914, + "step": 3378 + }, + { + "epoch": 0.17365607976153766, + "grad_norm": 1.135945200920105, + "learning_rate": 9.468591970509153e-06, + "loss": 0.841, + "step": 3379 + }, + { + "epoch": 0.1737074725048823, + "grad_norm": 1.3191595077514648, + "learning_rate": 9.46821853580568e-06, + "loss": 0.844, + "step": 3380 + }, + { + "epoch": 0.17375886524822695, + "grad_norm": 1.1246620416641235, + "learning_rate": 9.467844977306313e-06, + "loss": 0.7744, + "step": 3381 + }, + { + "epoch": 0.1738102579915716, + "grad_norm": 1.1327685117721558, + "learning_rate": 9.467471295021397e-06, + "loss": 0.751, + "step": 3382 + }, + { + "epoch": 0.17386165073491622, + "grad_norm": 1.1279501914978027, + "learning_rate": 9.46709748896129e-06, + "loss": 0.8304, + "step": 3383 + }, + { + "epoch": 0.17391304347826086, + "grad_norm": 1.1337774991989136, + "learning_rate": 9.466723559136343e-06, + "loss": 0.8378, + "step": 3384 + }, + { + "epoch": 0.1739644362216055, + "grad_norm": 1.100205421447754, + "learning_rate": 9.466349505556922e-06, + "loss": 0.7181, + "step": 3385 + }, + { + "epoch": 0.17401582896495016, + "grad_norm": 1.154875636100769, + "learning_rate": 9.465975328233387e-06, + "loss": 0.8112, + "step": 3386 + }, + { + "epoch": 0.17406722170829478, + "grad_norm": 0.9268988370895386, + "learning_rate": 9.465601027176108e-06, + "loss": 0.7446, + "step": 3387 + }, + { + "epoch": 0.17411861445163943, + "grad_norm": 1.108013391494751, + "learning_rate": 9.46522660239545e-06, + "loss": 0.7554, + "step": 3388 + }, + { + "epoch": 0.17417000719498407, + "grad_norm": 1.0779836177825928, + "learning_rate": 9.464852053901789e-06, + "loss": 0.7719, + "step": 3389 + }, + { + "epoch": 0.17422139993832872, + "grad_norm": 1.0938935279846191, + "learning_rate": 9.464477381705505e-06, + "loss": 0.7854, + "step": 3390 + }, + { + "epoch": 0.17427279268167334, + "grad_norm": 1.1851407289505005, + "learning_rate": 9.464102585816977e-06, + "loss": 0.7709, + "step": 3391 + }, + { + "epoch": 0.17432418542501799, + "grad_norm": 1.1759357452392578, + "learning_rate": 9.463727666246586e-06, + "loss": 0.8114, + "step": 3392 + }, + { + "epoch": 0.17437557816836263, + "grad_norm": 1.09968900680542, + "learning_rate": 9.463352623004725e-06, + "loss": 0.7889, + "step": 3393 + }, + { + "epoch": 0.17442697091170728, + "grad_norm": 1.1203958988189697, + "learning_rate": 9.462977456101781e-06, + "loss": 0.7214, + "step": 3394 + }, + { + "epoch": 0.1744783636550519, + "grad_norm": 0.8718360662460327, + "learning_rate": 9.462602165548148e-06, + "loss": 0.7201, + "step": 3395 + }, + { + "epoch": 0.17452975639839655, + "grad_norm": 1.0940511226654053, + "learning_rate": 9.462226751354227e-06, + "loss": 0.8226, + "step": 3396 + }, + { + "epoch": 0.1745811491417412, + "grad_norm": 1.1910855770111084, + "learning_rate": 9.461851213530415e-06, + "loss": 0.8348, + "step": 3397 + }, + { + "epoch": 0.1746325418850858, + "grad_norm": 1.0758998394012451, + "learning_rate": 9.461475552087121e-06, + "loss": 0.7697, + "step": 3398 + }, + { + "epoch": 0.17468393462843046, + "grad_norm": 1.1878212690353394, + "learning_rate": 9.46109976703475e-06, + "loss": 0.7933, + "step": 3399 + }, + { + "epoch": 0.1747353273717751, + "grad_norm": 1.2305246591567993, + "learning_rate": 9.460723858383714e-06, + "loss": 0.7961, + "step": 3400 + }, + { + "epoch": 0.17478672011511975, + "grad_norm": 1.0558608770370483, + "learning_rate": 9.460347826144429e-06, + "loss": 0.7906, + "step": 3401 + }, + { + "epoch": 0.17483811285846437, + "grad_norm": 1.1486040353775024, + "learning_rate": 9.45997167032731e-06, + "loss": 0.7635, + "step": 3402 + }, + { + "epoch": 0.17488950560180902, + "grad_norm": 1.0449331998825073, + "learning_rate": 9.459595390942785e-06, + "loss": 0.7943, + "step": 3403 + }, + { + "epoch": 0.17494089834515367, + "grad_norm": 1.1241176128387451, + "learning_rate": 9.459218988001273e-06, + "loss": 0.8407, + "step": 3404 + }, + { + "epoch": 0.17499229108849831, + "grad_norm": 1.0613652467727661, + "learning_rate": 9.458842461513206e-06, + "loss": 0.7654, + "step": 3405 + }, + { + "epoch": 0.17504368383184293, + "grad_norm": 1.140722393989563, + "learning_rate": 9.458465811489014e-06, + "loss": 0.8602, + "step": 3406 + }, + { + "epoch": 0.17509507657518758, + "grad_norm": 1.141890048980713, + "learning_rate": 9.458089037939134e-06, + "loss": 0.8262, + "step": 3407 + }, + { + "epoch": 0.17514646931853223, + "grad_norm": 1.3865612745285034, + "learning_rate": 9.457712140874006e-06, + "loss": 0.8142, + "step": 3408 + }, + { + "epoch": 0.17519786206187687, + "grad_norm": 1.126225233078003, + "learning_rate": 9.457335120304068e-06, + "loss": 0.8215, + "step": 3409 + }, + { + "epoch": 0.1752492548052215, + "grad_norm": 0.87749844789505, + "learning_rate": 9.456957976239767e-06, + "loss": 0.702, + "step": 3410 + }, + { + "epoch": 0.17530064754856614, + "grad_norm": 1.1537779569625854, + "learning_rate": 9.456580708691556e-06, + "loss": 0.8224, + "step": 3411 + }, + { + "epoch": 0.1753520402919108, + "grad_norm": 0.8072025179862976, + "learning_rate": 9.456203317669884e-06, + "loss": 0.711, + "step": 3412 + }, + { + "epoch": 0.17540343303525543, + "grad_norm": 1.1483186483383179, + "learning_rate": 9.455825803185206e-06, + "loss": 0.8369, + "step": 3413 + }, + { + "epoch": 0.17545482577860005, + "grad_norm": 1.0160456895828247, + "learning_rate": 9.455448165247985e-06, + "loss": 0.7476, + "step": 3414 + }, + { + "epoch": 0.1755062185219447, + "grad_norm": 1.089198112487793, + "learning_rate": 9.455070403868682e-06, + "loss": 0.7481, + "step": 3415 + }, + { + "epoch": 0.17555761126528935, + "grad_norm": 1.1497104167938232, + "learning_rate": 9.454692519057763e-06, + "loss": 0.7798, + "step": 3416 + }, + { + "epoch": 0.17560900400863397, + "grad_norm": 0.7521306872367859, + "learning_rate": 9.454314510825698e-06, + "loss": 0.7211, + "step": 3417 + }, + { + "epoch": 0.17566039675197861, + "grad_norm": 1.1692216396331787, + "learning_rate": 9.453936379182957e-06, + "loss": 0.8858, + "step": 3418 + }, + { + "epoch": 0.17571178949532326, + "grad_norm": 1.2222484350204468, + "learning_rate": 9.453558124140023e-06, + "loss": 0.7688, + "step": 3419 + }, + { + "epoch": 0.1757631822386679, + "grad_norm": 1.0425777435302734, + "learning_rate": 9.45317974570737e-06, + "loss": 0.8038, + "step": 3420 + }, + { + "epoch": 0.17581457498201253, + "grad_norm": 1.1201952695846558, + "learning_rate": 9.452801243895485e-06, + "loss": 0.7852, + "step": 3421 + }, + { + "epoch": 0.17586596772535718, + "grad_norm": 1.2040051221847534, + "learning_rate": 9.452422618714852e-06, + "loss": 0.7955, + "step": 3422 + }, + { + "epoch": 0.17591736046870182, + "grad_norm": 1.370451807975769, + "learning_rate": 9.452043870175962e-06, + "loss": 0.7553, + "step": 3423 + }, + { + "epoch": 0.17596875321204647, + "grad_norm": 1.1475248336791992, + "learning_rate": 9.451664998289307e-06, + "loss": 0.7789, + "step": 3424 + }, + { + "epoch": 0.1760201459553911, + "grad_norm": 1.0839588642120361, + "learning_rate": 9.451286003065389e-06, + "loss": 0.8171, + "step": 3425 + }, + { + "epoch": 0.17607153869873574, + "grad_norm": 1.2139391899108887, + "learning_rate": 9.450906884514705e-06, + "loss": 0.8433, + "step": 3426 + }, + { + "epoch": 0.17612293144208038, + "grad_norm": 1.146639347076416, + "learning_rate": 9.450527642647757e-06, + "loss": 0.789, + "step": 3427 + }, + { + "epoch": 0.17617432418542503, + "grad_norm": 1.11262845993042, + "learning_rate": 9.450148277475057e-06, + "loss": 0.7421, + "step": 3428 + }, + { + "epoch": 0.17622571692876965, + "grad_norm": 0.7766870856285095, + "learning_rate": 9.44976878900711e-06, + "loss": 0.7161, + "step": 3429 + }, + { + "epoch": 0.1762771096721143, + "grad_norm": 1.1464704275131226, + "learning_rate": 9.449389177254436e-06, + "loss": 0.7977, + "step": 3430 + }, + { + "epoch": 0.17632850241545894, + "grad_norm": 1.0623836517333984, + "learning_rate": 9.449009442227548e-06, + "loss": 0.8414, + "step": 3431 + }, + { + "epoch": 0.1763798951588036, + "grad_norm": 1.2111117839813232, + "learning_rate": 9.448629583936967e-06, + "loss": 0.8343, + "step": 3432 + }, + { + "epoch": 0.1764312879021482, + "grad_norm": 0.7276771068572998, + "learning_rate": 9.44824960239322e-06, + "loss": 0.6972, + "step": 3433 + }, + { + "epoch": 0.17648268064549286, + "grad_norm": 1.1208516359329224, + "learning_rate": 9.447869497606833e-06, + "loss": 0.8134, + "step": 3434 + }, + { + "epoch": 0.1765340733888375, + "grad_norm": 1.140098214149475, + "learning_rate": 9.44748926958834e-06, + "loss": 0.8114, + "step": 3435 + }, + { + "epoch": 0.17658546613218212, + "grad_norm": 1.0979682207107544, + "learning_rate": 9.44710891834827e-06, + "loss": 0.7932, + "step": 3436 + }, + { + "epoch": 0.17663685887552677, + "grad_norm": 1.090012788772583, + "learning_rate": 9.446728443897164e-06, + "loss": 0.8027, + "step": 3437 + }, + { + "epoch": 0.17668825161887142, + "grad_norm": 1.0815987586975098, + "learning_rate": 9.446347846245566e-06, + "loss": 0.7867, + "step": 3438 + }, + { + "epoch": 0.17673964436221606, + "grad_norm": 1.1917003393173218, + "learning_rate": 9.445967125404014e-06, + "loss": 0.8368, + "step": 3439 + }, + { + "epoch": 0.17679103710556068, + "grad_norm": 1.1139849424362183, + "learning_rate": 9.445586281383064e-06, + "loss": 0.7661, + "step": 3440 + }, + { + "epoch": 0.17684242984890533, + "grad_norm": 0.7517603039741516, + "learning_rate": 9.445205314193263e-06, + "loss": 0.6879, + "step": 3441 + }, + { + "epoch": 0.17689382259224998, + "grad_norm": 1.1422585248947144, + "learning_rate": 9.444824223845165e-06, + "loss": 0.7711, + "step": 3442 + }, + { + "epoch": 0.17694521533559462, + "grad_norm": 1.1635183095932007, + "learning_rate": 9.444443010349333e-06, + "loss": 0.8472, + "step": 3443 + }, + { + "epoch": 0.17699660807893924, + "grad_norm": 1.060729742050171, + "learning_rate": 9.444061673716327e-06, + "loss": 0.8276, + "step": 3444 + }, + { + "epoch": 0.1770480008222839, + "grad_norm": 1.1513441801071167, + "learning_rate": 9.44368021395671e-06, + "loss": 0.7628, + "step": 3445 + }, + { + "epoch": 0.17709939356562854, + "grad_norm": 1.1990025043487549, + "learning_rate": 9.44329863108105e-06, + "loss": 0.8096, + "step": 3446 + }, + { + "epoch": 0.17715078630897318, + "grad_norm": 1.1629341840744019, + "learning_rate": 9.442916925099925e-06, + "loss": 0.8382, + "step": 3447 + }, + { + "epoch": 0.1772021790523178, + "grad_norm": 0.7434694766998291, + "learning_rate": 9.442535096023906e-06, + "loss": 0.73, + "step": 3448 + }, + { + "epoch": 0.17725357179566245, + "grad_norm": 1.0744454860687256, + "learning_rate": 9.442153143863571e-06, + "loss": 0.8029, + "step": 3449 + }, + { + "epoch": 0.1773049645390071, + "grad_norm": 1.1132200956344604, + "learning_rate": 9.441771068629507e-06, + "loss": 0.7834, + "step": 3450 + }, + { + "epoch": 0.17735635728235175, + "grad_norm": 1.1523597240447998, + "learning_rate": 9.441388870332294e-06, + "loss": 0.8391, + "step": 3451 + }, + { + "epoch": 0.17740775002569636, + "grad_norm": 1.2505656480789185, + "learning_rate": 9.441006548982526e-06, + "loss": 0.777, + "step": 3452 + }, + { + "epoch": 0.177459142769041, + "grad_norm": 1.1059337854385376, + "learning_rate": 9.440624104590793e-06, + "loss": 0.8205, + "step": 3453 + }, + { + "epoch": 0.17751053551238566, + "grad_norm": 3.040278196334839, + "learning_rate": 9.440241537167689e-06, + "loss": 0.8545, + "step": 3454 + }, + { + "epoch": 0.17756192825573028, + "grad_norm": 1.165428638458252, + "learning_rate": 9.43985884672382e-06, + "loss": 0.8363, + "step": 3455 + }, + { + "epoch": 0.17761332099907493, + "grad_norm": 0.7192800641059875, + "learning_rate": 9.439476033269781e-06, + "loss": 0.7291, + "step": 3456 + }, + { + "epoch": 0.17766471374241957, + "grad_norm": 1.2035566568374634, + "learning_rate": 9.439093096816183e-06, + "loss": 0.8853, + "step": 3457 + }, + { + "epoch": 0.17771610648576422, + "grad_norm": 1.1474984884262085, + "learning_rate": 9.438710037373635e-06, + "loss": 0.7916, + "step": 3458 + }, + { + "epoch": 0.17776749922910884, + "grad_norm": 1.1859370470046997, + "learning_rate": 9.438326854952748e-06, + "loss": 0.8186, + "step": 3459 + }, + { + "epoch": 0.17781889197245349, + "grad_norm": 1.067668080329895, + "learning_rate": 9.437943549564142e-06, + "loss": 0.7901, + "step": 3460 + }, + { + "epoch": 0.17787028471579813, + "grad_norm": 1.1691845655441284, + "learning_rate": 9.437560121218433e-06, + "loss": 0.7809, + "step": 3461 + }, + { + "epoch": 0.17792167745914278, + "grad_norm": 1.1305781602859497, + "learning_rate": 9.437176569926245e-06, + "loss": 0.7603, + "step": 3462 + }, + { + "epoch": 0.1779730702024874, + "grad_norm": 1.0872985124588013, + "learning_rate": 9.436792895698206e-06, + "loss": 0.7415, + "step": 3463 + }, + { + "epoch": 0.17802446294583205, + "grad_norm": 1.0424630641937256, + "learning_rate": 9.436409098544946e-06, + "loss": 0.8028, + "step": 3464 + }, + { + "epoch": 0.1780758556891767, + "grad_norm": 1.1164084672927856, + "learning_rate": 9.436025178477097e-06, + "loss": 0.8058, + "step": 3465 + }, + { + "epoch": 0.17812724843252134, + "grad_norm": 1.1132771968841553, + "learning_rate": 9.435641135505297e-06, + "loss": 0.81, + "step": 3466 + }, + { + "epoch": 0.17817864117586596, + "grad_norm": 1.0557054281234741, + "learning_rate": 9.435256969640188e-06, + "loss": 0.7627, + "step": 3467 + }, + { + "epoch": 0.1782300339192106, + "grad_norm": 1.2178592681884766, + "learning_rate": 9.434872680892409e-06, + "loss": 0.8437, + "step": 3468 + }, + { + "epoch": 0.17828142666255525, + "grad_norm": 1.0634652376174927, + "learning_rate": 9.43448826927261e-06, + "loss": 0.7834, + "step": 3469 + }, + { + "epoch": 0.1783328194058999, + "grad_norm": 1.1347999572753906, + "learning_rate": 9.434103734791442e-06, + "loss": 0.7642, + "step": 3470 + }, + { + "epoch": 0.17838421214924452, + "grad_norm": 0.773730993270874, + "learning_rate": 9.433719077459558e-06, + "loss": 0.6943, + "step": 3471 + }, + { + "epoch": 0.17843560489258917, + "grad_norm": 1.1731210947036743, + "learning_rate": 9.433334297287615e-06, + "loss": 0.8022, + "step": 3472 + }, + { + "epoch": 0.17848699763593381, + "grad_norm": 0.9087056517601013, + "learning_rate": 9.432949394286273e-06, + "loss": 0.7188, + "step": 3473 + }, + { + "epoch": 0.17853839037927843, + "grad_norm": 0.7269626259803772, + "learning_rate": 9.432564368466196e-06, + "loss": 0.7253, + "step": 3474 + }, + { + "epoch": 0.17858978312262308, + "grad_norm": 1.0776389837265015, + "learning_rate": 9.432179219838055e-06, + "loss": 0.8147, + "step": 3475 + }, + { + "epoch": 0.17864117586596773, + "grad_norm": 1.095043659210205, + "learning_rate": 9.431793948412519e-06, + "loss": 0.7807, + "step": 3476 + }, + { + "epoch": 0.17869256860931237, + "grad_norm": 1.1292461156845093, + "learning_rate": 9.43140855420026e-06, + "loss": 0.8136, + "step": 3477 + }, + { + "epoch": 0.178743961352657, + "grad_norm": 1.0339425802230835, + "learning_rate": 9.431023037211958e-06, + "loss": 0.807, + "step": 3478 + }, + { + "epoch": 0.17879535409600164, + "grad_norm": 1.131508708000183, + "learning_rate": 9.430637397458291e-06, + "loss": 0.8032, + "step": 3479 + }, + { + "epoch": 0.1788467468393463, + "grad_norm": 0.7799059748649597, + "learning_rate": 9.430251634949949e-06, + "loss": 0.7202, + "step": 3480 + }, + { + "epoch": 0.17889813958269093, + "grad_norm": 1.1265454292297363, + "learning_rate": 9.429865749697615e-06, + "loss": 0.7919, + "step": 3481 + }, + { + "epoch": 0.17894953232603555, + "grad_norm": 1.0092310905456543, + "learning_rate": 9.429479741711982e-06, + "loss": 0.7619, + "step": 3482 + }, + { + "epoch": 0.1790009250693802, + "grad_norm": 1.1282694339752197, + "learning_rate": 9.429093611003745e-06, + "loss": 0.7745, + "step": 3483 + }, + { + "epoch": 0.17905231781272485, + "grad_norm": 1.0650157928466797, + "learning_rate": 9.428707357583603e-06, + "loss": 0.7518, + "step": 3484 + }, + { + "epoch": 0.1791037105560695, + "grad_norm": 1.1747040748596191, + "learning_rate": 9.428320981462255e-06, + "loss": 0.7938, + "step": 3485 + }, + { + "epoch": 0.17915510329941411, + "grad_norm": 1.0909932851791382, + "learning_rate": 9.427934482650406e-06, + "loss": 0.7886, + "step": 3486 + }, + { + "epoch": 0.17920649604275876, + "grad_norm": 0.8447384238243103, + "learning_rate": 9.42754786115877e-06, + "loss": 0.7523, + "step": 3487 + }, + { + "epoch": 0.1792578887861034, + "grad_norm": 0.9629266262054443, + "learning_rate": 9.42716111699805e-06, + "loss": 0.7438, + "step": 3488 + }, + { + "epoch": 0.17930928152944806, + "grad_norm": 1.0727958679199219, + "learning_rate": 9.426774250178967e-06, + "loss": 0.7618, + "step": 3489 + }, + { + "epoch": 0.17936067427279268, + "grad_norm": 0.7674674391746521, + "learning_rate": 9.426387260712238e-06, + "loss": 0.6983, + "step": 3490 + }, + { + "epoch": 0.17941206701613732, + "grad_norm": 1.1505568027496338, + "learning_rate": 9.426000148608582e-06, + "loss": 0.8316, + "step": 3491 + }, + { + "epoch": 0.17946345975948197, + "grad_norm": 1.088375210762024, + "learning_rate": 9.42561291387873e-06, + "loss": 0.7697, + "step": 3492 + }, + { + "epoch": 0.1795148525028266, + "grad_norm": 1.1327625513076782, + "learning_rate": 9.425225556533405e-06, + "loss": 0.7699, + "step": 3493 + }, + { + "epoch": 0.17956624524617124, + "grad_norm": 1.1232110261917114, + "learning_rate": 9.424838076583344e-06, + "loss": 0.7602, + "step": 3494 + }, + { + "epoch": 0.17961763798951588, + "grad_norm": 1.0624712705612183, + "learning_rate": 9.42445047403928e-06, + "loss": 0.779, + "step": 3495 + }, + { + "epoch": 0.17966903073286053, + "grad_norm": 1.1274298429489136, + "learning_rate": 9.42406274891195e-06, + "loss": 0.8037, + "step": 3496 + }, + { + "epoch": 0.17972042347620515, + "grad_norm": 1.1554615497589111, + "learning_rate": 9.4236749012121e-06, + "loss": 0.8454, + "step": 3497 + }, + { + "epoch": 0.1797718162195498, + "grad_norm": 1.05156409740448, + "learning_rate": 9.423286930950473e-06, + "loss": 0.8262, + "step": 3498 + }, + { + "epoch": 0.17982320896289444, + "grad_norm": 1.159987211227417, + "learning_rate": 9.42289883813782e-06, + "loss": 0.8042, + "step": 3499 + }, + { + "epoch": 0.1798746017062391, + "grad_norm": 1.0476018190383911, + "learning_rate": 9.42251062278489e-06, + "loss": 0.7554, + "step": 3500 + }, + { + "epoch": 0.1799259944495837, + "grad_norm": 1.1192255020141602, + "learning_rate": 9.422122284902445e-06, + "loss": 0.7678, + "step": 3501 + }, + { + "epoch": 0.17997738719292836, + "grad_norm": 1.1830130815505981, + "learning_rate": 9.421733824501237e-06, + "loss": 0.8074, + "step": 3502 + }, + { + "epoch": 0.180028779936273, + "grad_norm": 1.068735957145691, + "learning_rate": 9.421345241592035e-06, + "loss": 0.8225, + "step": 3503 + }, + { + "epoch": 0.18008017267961765, + "grad_norm": 1.1787382364273071, + "learning_rate": 9.420956536185601e-06, + "loss": 0.8618, + "step": 3504 + }, + { + "epoch": 0.18013156542296227, + "grad_norm": 0.8685758709907532, + "learning_rate": 9.420567708292705e-06, + "loss": 0.6954, + "step": 3505 + }, + { + "epoch": 0.18018295816630692, + "grad_norm": 1.1619783639907837, + "learning_rate": 9.42017875792412e-06, + "loss": 0.7866, + "step": 3506 + }, + { + "epoch": 0.18023435090965156, + "grad_norm": 1.1650017499923706, + "learning_rate": 9.419789685090623e-06, + "loss": 0.8078, + "step": 3507 + }, + { + "epoch": 0.18028574365299618, + "grad_norm": 1.1158781051635742, + "learning_rate": 9.419400489802995e-06, + "loss": 0.7912, + "step": 3508 + }, + { + "epoch": 0.18033713639634083, + "grad_norm": 1.2970175743103027, + "learning_rate": 9.419011172072015e-06, + "loss": 0.7491, + "step": 3509 + }, + { + "epoch": 0.18038852913968548, + "grad_norm": 0.9577391147613525, + "learning_rate": 9.418621731908473e-06, + "loss": 0.7011, + "step": 3510 + }, + { + "epoch": 0.18043992188303012, + "grad_norm": 1.0487060546875, + "learning_rate": 9.418232169323157e-06, + "loss": 0.7431, + "step": 3511 + }, + { + "epoch": 0.18049131462637474, + "grad_norm": 1.08879816532135, + "learning_rate": 9.417842484326861e-06, + "loss": 0.8081, + "step": 3512 + }, + { + "epoch": 0.1805427073697194, + "grad_norm": 1.1513770818710327, + "learning_rate": 9.41745267693038e-06, + "loss": 0.777, + "step": 3513 + }, + { + "epoch": 0.18059410011306404, + "grad_norm": 0.8504171371459961, + "learning_rate": 9.417062747144514e-06, + "loss": 0.7494, + "step": 3514 + }, + { + "epoch": 0.18064549285640868, + "grad_norm": 0.9390472769737244, + "learning_rate": 9.41667269498007e-06, + "loss": 0.7141, + "step": 3515 + }, + { + "epoch": 0.1806968855997533, + "grad_norm": 1.1276520490646362, + "learning_rate": 9.416282520447852e-06, + "loss": 0.8015, + "step": 3516 + }, + { + "epoch": 0.18074827834309795, + "grad_norm": 1.1711316108703613, + "learning_rate": 9.415892223558668e-06, + "loss": 0.8564, + "step": 3517 + }, + { + "epoch": 0.1807996710864426, + "grad_norm": 1.1999324560165405, + "learning_rate": 9.415501804323336e-06, + "loss": 0.7266, + "step": 3518 + }, + { + "epoch": 0.18085106382978725, + "grad_norm": 1.1760706901550293, + "learning_rate": 9.41511126275267e-06, + "loss": 0.8169, + "step": 3519 + }, + { + "epoch": 0.18090245657313186, + "grad_norm": 1.1594830751419067, + "learning_rate": 9.414720598857492e-06, + "loss": 0.7801, + "step": 3520 + }, + { + "epoch": 0.1809538493164765, + "grad_norm": 0.8029430508613586, + "learning_rate": 9.414329812648624e-06, + "loss": 0.6878, + "step": 3521 + }, + { + "epoch": 0.18100524205982116, + "grad_norm": 1.1957621574401855, + "learning_rate": 9.413938904136892e-06, + "loss": 0.7954, + "step": 3522 + }, + { + "epoch": 0.1810566348031658, + "grad_norm": 1.1458935737609863, + "learning_rate": 9.41354787333313e-06, + "loss": 0.8104, + "step": 3523 + }, + { + "epoch": 0.18110802754651043, + "grad_norm": 1.078250765800476, + "learning_rate": 9.41315672024817e-06, + "loss": 0.8068, + "step": 3524 + }, + { + "epoch": 0.18115942028985507, + "grad_norm": 1.159317970275879, + "learning_rate": 9.41276544489285e-06, + "loss": 0.7939, + "step": 3525 + }, + { + "epoch": 0.18121081303319972, + "grad_norm": 1.1015045642852783, + "learning_rate": 9.412374047278008e-06, + "loss": 0.7674, + "step": 3526 + }, + { + "epoch": 0.18126220577654434, + "grad_norm": 0.7945823073387146, + "learning_rate": 9.41198252741449e-06, + "loss": 0.7177, + "step": 3527 + }, + { + "epoch": 0.18131359851988899, + "grad_norm": 1.0673738718032837, + "learning_rate": 9.411590885313144e-06, + "loss": 0.7913, + "step": 3528 + }, + { + "epoch": 0.18136499126323363, + "grad_norm": 1.1799509525299072, + "learning_rate": 9.411199120984822e-06, + "loss": 0.8195, + "step": 3529 + }, + { + "epoch": 0.18141638400657828, + "grad_norm": 1.1215784549713135, + "learning_rate": 9.410807234440374e-06, + "loss": 0.7387, + "step": 3530 + }, + { + "epoch": 0.1814677767499229, + "grad_norm": 1.1578919887542725, + "learning_rate": 9.41041522569066e-06, + "loss": 0.7251, + "step": 3531 + }, + { + "epoch": 0.18151916949326755, + "grad_norm": 1.0562947988510132, + "learning_rate": 9.410023094746542e-06, + "loss": 0.8127, + "step": 3532 + }, + { + "epoch": 0.1815705622366122, + "grad_norm": 1.0952017307281494, + "learning_rate": 9.409630841618881e-06, + "loss": 0.7925, + "step": 3533 + }, + { + "epoch": 0.18162195497995684, + "grad_norm": 1.051434874534607, + "learning_rate": 9.409238466318548e-06, + "loss": 0.7862, + "step": 3534 + }, + { + "epoch": 0.18167334772330146, + "grad_norm": 1.0959357023239136, + "learning_rate": 9.408845968856414e-06, + "loss": 0.8215, + "step": 3535 + }, + { + "epoch": 0.1817247404666461, + "grad_norm": 1.105502963066101, + "learning_rate": 9.408453349243352e-06, + "loss": 0.7818, + "step": 3536 + }, + { + "epoch": 0.18177613320999075, + "grad_norm": 1.1461687088012695, + "learning_rate": 9.40806060749024e-06, + "loss": 0.7866, + "step": 3537 + }, + { + "epoch": 0.1818275259533354, + "grad_norm": 1.1649479866027832, + "learning_rate": 9.40766774360796e-06, + "loss": 0.8435, + "step": 3538 + }, + { + "epoch": 0.18187891869668002, + "grad_norm": 1.2510865926742554, + "learning_rate": 9.407274757607396e-06, + "loss": 0.7768, + "step": 3539 + }, + { + "epoch": 0.18193031144002467, + "grad_norm": 1.0944989919662476, + "learning_rate": 9.406881649499436e-06, + "loss": 0.7948, + "step": 3540 + }, + { + "epoch": 0.1819817041833693, + "grad_norm": 1.1343542337417603, + "learning_rate": 9.40648841929497e-06, + "loss": 0.8797, + "step": 3541 + }, + { + "epoch": 0.18203309692671396, + "grad_norm": 0.9086080193519592, + "learning_rate": 9.406095067004896e-06, + "loss": 0.7037, + "step": 3542 + }, + { + "epoch": 0.18208448967005858, + "grad_norm": 0.7800127267837524, + "learning_rate": 9.405701592640112e-06, + "loss": 0.6671, + "step": 3543 + }, + { + "epoch": 0.18213588241340323, + "grad_norm": 0.8836771249771118, + "learning_rate": 9.405307996211516e-06, + "loss": 0.695, + "step": 3544 + }, + { + "epoch": 0.18218727515674787, + "grad_norm": 1.1251201629638672, + "learning_rate": 9.404914277730017e-06, + "loss": 0.7604, + "step": 3545 + }, + { + "epoch": 0.1822386679000925, + "grad_norm": 1.1130990982055664, + "learning_rate": 9.40452043720652e-06, + "loss": 0.7965, + "step": 3546 + }, + { + "epoch": 0.18229006064343714, + "grad_norm": 1.138291358947754, + "learning_rate": 9.404126474651937e-06, + "loss": 0.8411, + "step": 3547 + }, + { + "epoch": 0.1823414533867818, + "grad_norm": 1.1458921432495117, + "learning_rate": 9.403732390077185e-06, + "loss": 0.7395, + "step": 3548 + }, + { + "epoch": 0.18239284613012643, + "grad_norm": 1.2307687997817993, + "learning_rate": 9.403338183493182e-06, + "loss": 0.8341, + "step": 3549 + }, + { + "epoch": 0.18244423887347105, + "grad_norm": 1.120360255241394, + "learning_rate": 9.40294385491085e-06, + "loss": 0.7678, + "step": 3550 + }, + { + "epoch": 0.1824956316168157, + "grad_norm": 1.165789246559143, + "learning_rate": 9.402549404341112e-06, + "loss": 0.8282, + "step": 3551 + }, + { + "epoch": 0.18254702436016035, + "grad_norm": 1.0280251502990723, + "learning_rate": 9.402154831794901e-06, + "loss": 0.7691, + "step": 3552 + }, + { + "epoch": 0.182598417103505, + "grad_norm": 1.1076387166976929, + "learning_rate": 9.401760137283144e-06, + "loss": 0.7589, + "step": 3553 + }, + { + "epoch": 0.18264980984684961, + "grad_norm": 0.9806888103485107, + "learning_rate": 9.401365320816778e-06, + "loss": 0.7162, + "step": 3554 + }, + { + "epoch": 0.18270120259019426, + "grad_norm": 1.3336249589920044, + "learning_rate": 9.400970382406744e-06, + "loss": 0.7692, + "step": 3555 + }, + { + "epoch": 0.1827525953335389, + "grad_norm": 1.1388095617294312, + "learning_rate": 9.400575322063983e-06, + "loss": 0.7885, + "step": 3556 + }, + { + "epoch": 0.18280398807688356, + "grad_norm": 1.084636926651001, + "learning_rate": 9.400180139799438e-06, + "loss": 0.8226, + "step": 3557 + }, + { + "epoch": 0.18285538082022817, + "grad_norm": 1.1302292346954346, + "learning_rate": 9.399784835624061e-06, + "loss": 0.8005, + "step": 3558 + }, + { + "epoch": 0.18290677356357282, + "grad_norm": 1.147099256515503, + "learning_rate": 9.399389409548802e-06, + "loss": 0.9052, + "step": 3559 + }, + { + "epoch": 0.18295816630691747, + "grad_norm": 0.7813428044319153, + "learning_rate": 9.398993861584618e-06, + "loss": 0.6863, + "step": 3560 + }, + { + "epoch": 0.18300955905026212, + "grad_norm": 1.2046120166778564, + "learning_rate": 9.398598191742468e-06, + "loss": 0.7811, + "step": 3561 + }, + { + "epoch": 0.18306095179360674, + "grad_norm": 1.1721488237380981, + "learning_rate": 9.398202400033313e-06, + "loss": 0.8694, + "step": 3562 + }, + { + "epoch": 0.18311234453695138, + "grad_norm": 1.408753752708435, + "learning_rate": 9.397806486468121e-06, + "loss": 0.7794, + "step": 3563 + }, + { + "epoch": 0.18316373728029603, + "grad_norm": 1.167740821838379, + "learning_rate": 9.39741045105786e-06, + "loss": 0.809, + "step": 3564 + }, + { + "epoch": 0.18321513002364065, + "grad_norm": 1.0459738969802856, + "learning_rate": 9.397014293813502e-06, + "loss": 0.8327, + "step": 3565 + }, + { + "epoch": 0.1832665227669853, + "grad_norm": 0.8933892846107483, + "learning_rate": 9.396618014746024e-06, + "loss": 0.7085, + "step": 3566 + }, + { + "epoch": 0.18331791551032994, + "grad_norm": 1.1517727375030518, + "learning_rate": 9.396221613866406e-06, + "loss": 0.8066, + "step": 3567 + }, + { + "epoch": 0.1833693082536746, + "grad_norm": 1.1041796207427979, + "learning_rate": 9.395825091185627e-06, + "loss": 0.759, + "step": 3568 + }, + { + "epoch": 0.1834207009970192, + "grad_norm": 1.3052785396575928, + "learning_rate": 9.395428446714675e-06, + "loss": 0.7988, + "step": 3569 + }, + { + "epoch": 0.18347209374036386, + "grad_norm": 1.0522657632827759, + "learning_rate": 9.395031680464539e-06, + "loss": 0.7666, + "step": 3570 + }, + { + "epoch": 0.1835234864837085, + "grad_norm": 0.8035390377044678, + "learning_rate": 9.394634792446213e-06, + "loss": 0.7012, + "step": 3571 + }, + { + "epoch": 0.18357487922705315, + "grad_norm": 0.788076639175415, + "learning_rate": 9.394237782670695e-06, + "loss": 0.6688, + "step": 3572 + }, + { + "epoch": 0.18362627197039777, + "grad_norm": 0.7577769756317139, + "learning_rate": 9.39384065114898e-06, + "loss": 0.6624, + "step": 3573 + }, + { + "epoch": 0.18367766471374242, + "grad_norm": 0.995138943195343, + "learning_rate": 9.393443397892072e-06, + "loss": 0.7197, + "step": 3574 + }, + { + "epoch": 0.18372905745708706, + "grad_norm": 1.1804691553115845, + "learning_rate": 9.393046022910978e-06, + "loss": 0.8282, + "step": 3575 + }, + { + "epoch": 0.1837804502004317, + "grad_norm": 1.2751673460006714, + "learning_rate": 9.39264852621671e-06, + "loss": 0.7813, + "step": 3576 + }, + { + "epoch": 0.18383184294377633, + "grad_norm": 3.0481412410736084, + "learning_rate": 9.392250907820277e-06, + "loss": 0.7616, + "step": 3577 + }, + { + "epoch": 0.18388323568712098, + "grad_norm": 1.0465646982192993, + "learning_rate": 9.391853167732697e-06, + "loss": 0.7533, + "step": 3578 + }, + { + "epoch": 0.18393462843046562, + "grad_norm": 1.1468281745910645, + "learning_rate": 9.391455305964992e-06, + "loss": 0.7882, + "step": 3579 + }, + { + "epoch": 0.18398602117381027, + "grad_norm": 1.1356210708618164, + "learning_rate": 9.39105732252818e-06, + "loss": 0.8443, + "step": 3580 + }, + { + "epoch": 0.1840374139171549, + "grad_norm": 1.2170732021331787, + "learning_rate": 9.39065921743329e-06, + "loss": 0.8289, + "step": 3581 + }, + { + "epoch": 0.18408880666049954, + "grad_norm": 1.11255943775177, + "learning_rate": 9.390260990691356e-06, + "loss": 0.8042, + "step": 3582 + }, + { + "epoch": 0.18414019940384418, + "grad_norm": 1.1234798431396484, + "learning_rate": 9.389862642313406e-06, + "loss": 0.768, + "step": 3583 + }, + { + "epoch": 0.1841915921471888, + "grad_norm": 1.1570765972137451, + "learning_rate": 9.389464172310476e-06, + "loss": 0.8409, + "step": 3584 + }, + { + "epoch": 0.18424298489053345, + "grad_norm": 1.113181710243225, + "learning_rate": 9.38906558069361e-06, + "loss": 0.7855, + "step": 3585 + }, + { + "epoch": 0.1842943776338781, + "grad_norm": 1.1660375595092773, + "learning_rate": 9.38866686747385e-06, + "loss": 0.7317, + "step": 3586 + }, + { + "epoch": 0.18434577037722275, + "grad_norm": 0.9136704206466675, + "learning_rate": 9.38826803266224e-06, + "loss": 0.68, + "step": 3587 + }, + { + "epoch": 0.18439716312056736, + "grad_norm": 0.8244784474372864, + "learning_rate": 9.387869076269834e-06, + "loss": 0.6963, + "step": 3588 + }, + { + "epoch": 0.184448555863912, + "grad_norm": 1.3582353591918945, + "learning_rate": 9.387469998307681e-06, + "loss": 0.824, + "step": 3589 + }, + { + "epoch": 0.18449994860725666, + "grad_norm": 1.1117995977401733, + "learning_rate": 9.387070798786843e-06, + "loss": 0.8112, + "step": 3590 + }, + { + "epoch": 0.1845513413506013, + "grad_norm": 1.2154436111450195, + "learning_rate": 9.386671477718376e-06, + "loss": 0.7846, + "step": 3591 + }, + { + "epoch": 0.18460273409394592, + "grad_norm": 1.125707745552063, + "learning_rate": 9.386272035113346e-06, + "loss": 0.8186, + "step": 3592 + }, + { + "epoch": 0.18465412683729057, + "grad_norm": 1.095779538154602, + "learning_rate": 9.38587247098282e-06, + "loss": 0.7937, + "step": 3593 + }, + { + "epoch": 0.18470551958063522, + "grad_norm": 1.139014482498169, + "learning_rate": 9.385472785337866e-06, + "loss": 0.6794, + "step": 3594 + }, + { + "epoch": 0.18475691232397987, + "grad_norm": 1.0869598388671875, + "learning_rate": 9.385072978189558e-06, + "loss": 0.8252, + "step": 3595 + }, + { + "epoch": 0.18480830506732449, + "grad_norm": 1.1353763341903687, + "learning_rate": 9.384673049548974e-06, + "loss": 0.7882, + "step": 3596 + }, + { + "epoch": 0.18485969781066913, + "grad_norm": 1.1481024026870728, + "learning_rate": 9.384272999427196e-06, + "loss": 0.7628, + "step": 3597 + }, + { + "epoch": 0.18491109055401378, + "grad_norm": 1.1034921407699585, + "learning_rate": 9.383872827835305e-06, + "loss": 0.7872, + "step": 3598 + }, + { + "epoch": 0.18496248329735843, + "grad_norm": 1.0840333700180054, + "learning_rate": 9.383472534784388e-06, + "loss": 0.7987, + "step": 3599 + }, + { + "epoch": 0.18501387604070305, + "grad_norm": 1.1127790212631226, + "learning_rate": 9.38307212028554e-06, + "loss": 0.7985, + "step": 3600 + }, + { + "epoch": 0.1850652687840477, + "grad_norm": 1.1410014629364014, + "learning_rate": 9.382671584349848e-06, + "loss": 0.7292, + "step": 3601 + }, + { + "epoch": 0.18511666152739234, + "grad_norm": 1.5273505449295044, + "learning_rate": 9.382270926988413e-06, + "loss": 0.7942, + "step": 3602 + }, + { + "epoch": 0.18516805427073696, + "grad_norm": 1.2378844022750854, + "learning_rate": 9.381870148212335e-06, + "loss": 0.7689, + "step": 3603 + }, + { + "epoch": 0.1852194470140816, + "grad_norm": 1.1396708488464355, + "learning_rate": 9.38146924803272e-06, + "loss": 0.8114, + "step": 3604 + }, + { + "epoch": 0.18527083975742625, + "grad_norm": 1.0924010276794434, + "learning_rate": 9.381068226460672e-06, + "loss": 0.727, + "step": 3605 + }, + { + "epoch": 0.1853222325007709, + "grad_norm": 0.9004477262496948, + "learning_rate": 9.380667083507304e-06, + "loss": 0.6756, + "step": 3606 + }, + { + "epoch": 0.18537362524411552, + "grad_norm": 1.1782633066177368, + "learning_rate": 9.380265819183729e-06, + "loss": 0.7889, + "step": 3607 + }, + { + "epoch": 0.18542501798746017, + "grad_norm": 0.773456871509552, + "learning_rate": 9.379864433501064e-06, + "loss": 0.7265, + "step": 3608 + }, + { + "epoch": 0.1854764107308048, + "grad_norm": 1.1103293895721436, + "learning_rate": 9.37946292647043e-06, + "loss": 0.8059, + "step": 3609 + }, + { + "epoch": 0.18552780347414946, + "grad_norm": 1.1295528411865234, + "learning_rate": 9.379061298102952e-06, + "loss": 0.776, + "step": 3610 + }, + { + "epoch": 0.18557919621749408, + "grad_norm": 0.8431046009063721, + "learning_rate": 9.378659548409755e-06, + "loss": 0.7427, + "step": 3611 + }, + { + "epoch": 0.18563058896083873, + "grad_norm": 1.2043536901474, + "learning_rate": 9.378257677401972e-06, + "loss": 0.9075, + "step": 3612 + }, + { + "epoch": 0.18568198170418337, + "grad_norm": 1.0653971433639526, + "learning_rate": 9.377855685090738e-06, + "loss": 0.7766, + "step": 3613 + }, + { + "epoch": 0.18573337444752802, + "grad_norm": 1.089861512184143, + "learning_rate": 9.37745357148719e-06, + "loss": 0.7795, + "step": 3614 + }, + { + "epoch": 0.18578476719087264, + "grad_norm": 1.1111794710159302, + "learning_rate": 9.377051336602467e-06, + "loss": 0.7771, + "step": 3615 + }, + { + "epoch": 0.1858361599342173, + "grad_norm": 1.1197954416275024, + "learning_rate": 9.376648980447713e-06, + "loss": 0.7751, + "step": 3616 + }, + { + "epoch": 0.18588755267756193, + "grad_norm": 0.8791465759277344, + "learning_rate": 9.37624650303408e-06, + "loss": 0.7822, + "step": 3617 + }, + { + "epoch": 0.18593894542090658, + "grad_norm": 1.0727379322052002, + "learning_rate": 9.375843904372714e-06, + "loss": 0.7831, + "step": 3618 + }, + { + "epoch": 0.1859903381642512, + "grad_norm": 1.0167869329452515, + "learning_rate": 9.375441184474773e-06, + "loss": 0.7406, + "step": 3619 + }, + { + "epoch": 0.18604173090759585, + "grad_norm": 1.213617205619812, + "learning_rate": 9.375038343351412e-06, + "loss": 0.809, + "step": 3620 + }, + { + "epoch": 0.1860931236509405, + "grad_norm": 1.1311434507369995, + "learning_rate": 9.374635381013793e-06, + "loss": 0.7872, + "step": 3621 + }, + { + "epoch": 0.18614451639428511, + "grad_norm": 1.034319281578064, + "learning_rate": 9.374232297473082e-06, + "loss": 0.7447, + "step": 3622 + }, + { + "epoch": 0.18619590913762976, + "grad_norm": 1.1859114170074463, + "learning_rate": 9.373829092740444e-06, + "loss": 0.8156, + "step": 3623 + }, + { + "epoch": 0.1862473018809744, + "grad_norm": 1.1164816617965698, + "learning_rate": 9.373425766827053e-06, + "loss": 0.7714, + "step": 3624 + }, + { + "epoch": 0.18629869462431906, + "grad_norm": 1.1615970134735107, + "learning_rate": 9.37302231974408e-06, + "loss": 0.86, + "step": 3625 + }, + { + "epoch": 0.18635008736766367, + "grad_norm": 1.1209359169006348, + "learning_rate": 9.372618751502706e-06, + "loss": 0.7202, + "step": 3626 + }, + { + "epoch": 0.18640148011100832, + "grad_norm": 1.1102511882781982, + "learning_rate": 9.372215062114111e-06, + "loss": 0.8177, + "step": 3627 + }, + { + "epoch": 0.18645287285435297, + "grad_norm": 1.2142083644866943, + "learning_rate": 9.37181125158948e-06, + "loss": 0.8385, + "step": 3628 + }, + { + "epoch": 0.18650426559769762, + "grad_norm": 0.8942787647247314, + "learning_rate": 9.371407319940003e-06, + "loss": 0.7243, + "step": 3629 + }, + { + "epoch": 0.18655565834104224, + "grad_norm": 1.1200281381607056, + "learning_rate": 9.371003267176865e-06, + "loss": 0.8222, + "step": 3630 + }, + { + "epoch": 0.18660705108438688, + "grad_norm": 1.1421840190887451, + "learning_rate": 9.370599093311266e-06, + "loss": 0.8077, + "step": 3631 + }, + { + "epoch": 0.18665844382773153, + "grad_norm": 1.088209629058838, + "learning_rate": 9.370194798354403e-06, + "loss": 0.8145, + "step": 3632 + }, + { + "epoch": 0.18670983657107618, + "grad_norm": 1.0613124370574951, + "learning_rate": 9.369790382317476e-06, + "loss": 0.7607, + "step": 3633 + }, + { + "epoch": 0.1867612293144208, + "grad_norm": 1.1450103521347046, + "learning_rate": 9.369385845211692e-06, + "loss": 0.769, + "step": 3634 + }, + { + "epoch": 0.18681262205776544, + "grad_norm": 0.7586758136749268, + "learning_rate": 9.368981187048256e-06, + "loss": 0.7054, + "step": 3635 + }, + { + "epoch": 0.1868640148011101, + "grad_norm": 1.105120301246643, + "learning_rate": 9.368576407838381e-06, + "loss": 0.7947, + "step": 3636 + }, + { + "epoch": 0.1869154075444547, + "grad_norm": 1.1226897239685059, + "learning_rate": 9.368171507593283e-06, + "loss": 0.8463, + "step": 3637 + }, + { + "epoch": 0.18696680028779936, + "grad_norm": 0.7046691179275513, + "learning_rate": 9.367766486324179e-06, + "loss": 0.6549, + "step": 3638 + }, + { + "epoch": 0.187018193031144, + "grad_norm": 1.075786828994751, + "learning_rate": 9.367361344042289e-06, + "loss": 0.8164, + "step": 3639 + }, + { + "epoch": 0.18706958577448865, + "grad_norm": 1.1229912042617798, + "learning_rate": 9.36695608075884e-06, + "loss": 0.782, + "step": 3640 + }, + { + "epoch": 0.18712097851783327, + "grad_norm": 1.0883831977844238, + "learning_rate": 9.36655069648506e-06, + "loss": 0.7906, + "step": 3641 + }, + { + "epoch": 0.18717237126117792, + "grad_norm": 1.0604411363601685, + "learning_rate": 9.366145191232179e-06, + "loss": 0.7722, + "step": 3642 + }, + { + "epoch": 0.18722376400452256, + "grad_norm": 1.1309033632278442, + "learning_rate": 9.365739565011433e-06, + "loss": 0.8232, + "step": 3643 + }, + { + "epoch": 0.1872751567478672, + "grad_norm": 1.1275619268417358, + "learning_rate": 9.36533381783406e-06, + "loss": 0.8299, + "step": 3644 + }, + { + "epoch": 0.18732654949121183, + "grad_norm": 0.8733184933662415, + "learning_rate": 9.3649279497113e-06, + "loss": 0.6955, + "step": 3645 + }, + { + "epoch": 0.18737794223455648, + "grad_norm": 0.7558917999267578, + "learning_rate": 9.364521960654403e-06, + "loss": 0.7217, + "step": 3646 + }, + { + "epoch": 0.18742933497790112, + "grad_norm": 1.1200082302093506, + "learning_rate": 9.364115850674611e-06, + "loss": 0.8107, + "step": 3647 + }, + { + "epoch": 0.18748072772124577, + "grad_norm": 1.1277496814727783, + "learning_rate": 9.363709619783179e-06, + "loss": 0.7788, + "step": 3648 + }, + { + "epoch": 0.1875321204645904, + "grad_norm": 0.880764901638031, + "learning_rate": 9.363303267991362e-06, + "loss": 0.7323, + "step": 3649 + }, + { + "epoch": 0.18758351320793504, + "grad_norm": 1.126083493232727, + "learning_rate": 9.362896795310417e-06, + "loss": 0.8227, + "step": 3650 + }, + { + "epoch": 0.18763490595127968, + "grad_norm": 1.1959525346755981, + "learning_rate": 9.362490201751606e-06, + "loss": 0.8221, + "step": 3651 + }, + { + "epoch": 0.18768629869462433, + "grad_norm": 1.0710294246673584, + "learning_rate": 9.362083487326196e-06, + "loss": 0.775, + "step": 3652 + }, + { + "epoch": 0.18773769143796895, + "grad_norm": 1.0628138780593872, + "learning_rate": 9.361676652045453e-06, + "loss": 0.7714, + "step": 3653 + }, + { + "epoch": 0.1877890841813136, + "grad_norm": 1.0532938241958618, + "learning_rate": 9.36126969592065e-06, + "loss": 0.7778, + "step": 3654 + }, + { + "epoch": 0.18784047692465825, + "grad_norm": 1.0857855081558228, + "learning_rate": 9.36086261896306e-06, + "loss": 0.8193, + "step": 3655 + }, + { + "epoch": 0.18789186966800286, + "grad_norm": 0.904384434223175, + "learning_rate": 9.360455421183965e-06, + "loss": 0.7433, + "step": 3656 + }, + { + "epoch": 0.1879432624113475, + "grad_norm": 1.0233732461929321, + "learning_rate": 9.360048102594645e-06, + "loss": 0.8354, + "step": 3657 + }, + { + "epoch": 0.18799465515469216, + "grad_norm": 1.072852373123169, + "learning_rate": 9.359640663206385e-06, + "loss": 0.7963, + "step": 3658 + }, + { + "epoch": 0.1880460478980368, + "grad_norm": 1.0768646001815796, + "learning_rate": 9.35923310303047e-06, + "loss": 0.7461, + "step": 3659 + }, + { + "epoch": 0.18809744064138142, + "grad_norm": 0.8939974308013916, + "learning_rate": 9.358825422078197e-06, + "loss": 0.7216, + "step": 3660 + }, + { + "epoch": 0.18814883338472607, + "grad_norm": 1.1154544353485107, + "learning_rate": 9.358417620360862e-06, + "loss": 0.8072, + "step": 3661 + }, + { + "epoch": 0.18820022612807072, + "grad_norm": 1.0925756692886353, + "learning_rate": 9.358009697889758e-06, + "loss": 0.827, + "step": 3662 + }, + { + "epoch": 0.18825161887141537, + "grad_norm": 1.1930550336837769, + "learning_rate": 9.357601654676191e-06, + "loss": 0.8483, + "step": 3663 + }, + { + "epoch": 0.18830301161475999, + "grad_norm": 1.1426748037338257, + "learning_rate": 9.357193490731464e-06, + "loss": 0.7981, + "step": 3664 + }, + { + "epoch": 0.18835440435810463, + "grad_norm": 5.05674934387207, + "learning_rate": 9.356785206066887e-06, + "loss": 0.7186, + "step": 3665 + }, + { + "epoch": 0.18840579710144928, + "grad_norm": 1.112631916999817, + "learning_rate": 9.356376800693772e-06, + "loss": 0.7854, + "step": 3666 + }, + { + "epoch": 0.18845718984479393, + "grad_norm": 1.1439162492752075, + "learning_rate": 9.355968274623432e-06, + "loss": 0.8133, + "step": 3667 + }, + { + "epoch": 0.18850858258813855, + "grad_norm": 1.0858850479125977, + "learning_rate": 9.355559627867187e-06, + "loss": 0.8333, + "step": 3668 + }, + { + "epoch": 0.1885599753314832, + "grad_norm": 1.0909470319747925, + "learning_rate": 9.355150860436362e-06, + "loss": 0.7845, + "step": 3669 + }, + { + "epoch": 0.18861136807482784, + "grad_norm": 1.1100512742996216, + "learning_rate": 9.354741972342276e-06, + "loss": 0.8011, + "step": 3670 + }, + { + "epoch": 0.1886627608181725, + "grad_norm": 1.103771448135376, + "learning_rate": 9.354332963596262e-06, + "loss": 0.7495, + "step": 3671 + }, + { + "epoch": 0.1887141535615171, + "grad_norm": 1.2236180305480957, + "learning_rate": 9.353923834209651e-06, + "loss": 0.7595, + "step": 3672 + }, + { + "epoch": 0.18876554630486175, + "grad_norm": 1.1341357231140137, + "learning_rate": 9.35351458419378e-06, + "loss": 0.7438, + "step": 3673 + }, + { + "epoch": 0.1888169390482064, + "grad_norm": 0.872042179107666, + "learning_rate": 9.353105213559983e-06, + "loss": 0.6686, + "step": 3674 + }, + { + "epoch": 0.18886833179155102, + "grad_norm": 1.5196270942687988, + "learning_rate": 9.352695722319606e-06, + "loss": 0.7337, + "step": 3675 + }, + { + "epoch": 0.18891972453489567, + "grad_norm": 1.1158409118652344, + "learning_rate": 9.352286110483993e-06, + "loss": 0.799, + "step": 3676 + }, + { + "epoch": 0.1889711172782403, + "grad_norm": 1.0876052379608154, + "learning_rate": 9.351876378064493e-06, + "loss": 0.7819, + "step": 3677 + }, + { + "epoch": 0.18902251002158496, + "grad_norm": 0.6968023777008057, + "learning_rate": 9.351466525072457e-06, + "loss": 0.7145, + "step": 3678 + }, + { + "epoch": 0.18907390276492958, + "grad_norm": 1.1194862127304077, + "learning_rate": 9.35105655151924e-06, + "loss": 0.8109, + "step": 3679 + }, + { + "epoch": 0.18912529550827423, + "grad_norm": 1.2120953798294067, + "learning_rate": 9.350646457416203e-06, + "loss": 0.774, + "step": 3680 + }, + { + "epoch": 0.18917668825161887, + "grad_norm": 1.2555986642837524, + "learning_rate": 9.350236242774705e-06, + "loss": 0.7784, + "step": 3681 + }, + { + "epoch": 0.18922808099496352, + "grad_norm": 1.0862518548965454, + "learning_rate": 9.349825907606116e-06, + "loss": 0.7815, + "step": 3682 + }, + { + "epoch": 0.18927947373830814, + "grad_norm": 1.1779824495315552, + "learning_rate": 9.3494154519218e-06, + "loss": 0.7668, + "step": 3683 + }, + { + "epoch": 0.1893308664816528, + "grad_norm": 1.0437113046646118, + "learning_rate": 9.349004875733131e-06, + "loss": 0.7929, + "step": 3684 + }, + { + "epoch": 0.18938225922499743, + "grad_norm": 1.253122091293335, + "learning_rate": 9.348594179051485e-06, + "loss": 0.7733, + "step": 3685 + }, + { + "epoch": 0.18943365196834208, + "grad_norm": 1.129672884941101, + "learning_rate": 9.34818336188824e-06, + "loss": 0.8384, + "step": 3686 + }, + { + "epoch": 0.1894850447116867, + "grad_norm": 1.2041178941726685, + "learning_rate": 9.347772424254777e-06, + "loss": 0.8055, + "step": 3687 + }, + { + "epoch": 0.18953643745503135, + "grad_norm": 1.0828802585601807, + "learning_rate": 9.347361366162483e-06, + "loss": 0.8032, + "step": 3688 + }, + { + "epoch": 0.189587830198376, + "grad_norm": 1.1776783466339111, + "learning_rate": 9.346950187622745e-06, + "loss": 0.8204, + "step": 3689 + }, + { + "epoch": 0.18963922294172064, + "grad_norm": 0.8706642985343933, + "learning_rate": 9.346538888646956e-06, + "loss": 0.7198, + "step": 3690 + }, + { + "epoch": 0.18969061568506526, + "grad_norm": 1.130881905555725, + "learning_rate": 9.346127469246513e-06, + "loss": 0.8392, + "step": 3691 + }, + { + "epoch": 0.1897420084284099, + "grad_norm": 1.147912859916687, + "learning_rate": 9.345715929432812e-06, + "loss": 0.8317, + "step": 3692 + }, + { + "epoch": 0.18979340117175456, + "grad_norm": 1.0743414163589478, + "learning_rate": 9.345304269217258e-06, + "loss": 0.789, + "step": 3693 + }, + { + "epoch": 0.18984479391509917, + "grad_norm": 1.1149293184280396, + "learning_rate": 9.344892488611253e-06, + "loss": 0.7819, + "step": 3694 + }, + { + "epoch": 0.18989618665844382, + "grad_norm": 1.116666316986084, + "learning_rate": 9.344480587626207e-06, + "loss": 0.7696, + "step": 3695 + }, + { + "epoch": 0.18994757940178847, + "grad_norm": 1.1542799472808838, + "learning_rate": 9.344068566273535e-06, + "loss": 0.7725, + "step": 3696 + }, + { + "epoch": 0.18999897214513312, + "grad_norm": 1.2475882768630981, + "learning_rate": 9.34365642456465e-06, + "loss": 0.748, + "step": 3697 + }, + { + "epoch": 0.19005036488847774, + "grad_norm": 1.0761419534683228, + "learning_rate": 9.343244162510966e-06, + "loss": 0.8178, + "step": 3698 + }, + { + "epoch": 0.19010175763182238, + "grad_norm": 1.0996686220169067, + "learning_rate": 9.342831780123914e-06, + "loss": 0.7922, + "step": 3699 + }, + { + "epoch": 0.19015315037516703, + "grad_norm": 1.0922256708145142, + "learning_rate": 9.342419277414914e-06, + "loss": 0.8074, + "step": 3700 + }, + { + "epoch": 0.19020454311851168, + "grad_norm": 0.8283430337905884, + "learning_rate": 9.342006654395396e-06, + "loss": 0.716, + "step": 3701 + }, + { + "epoch": 0.1902559358618563, + "grad_norm": 0.8511554002761841, + "learning_rate": 9.341593911076791e-06, + "loss": 0.7315, + "step": 3702 + }, + { + "epoch": 0.19030732860520094, + "grad_norm": 1.11526620388031, + "learning_rate": 9.341181047470538e-06, + "loss": 0.8215, + "step": 3703 + }, + { + "epoch": 0.1903587213485456, + "grad_norm": 1.1742713451385498, + "learning_rate": 9.34076806358807e-06, + "loss": 0.8056, + "step": 3704 + }, + { + "epoch": 0.19041011409189024, + "grad_norm": 1.173837423324585, + "learning_rate": 9.340354959440835e-06, + "loss": 0.8499, + "step": 3705 + }, + { + "epoch": 0.19046150683523486, + "grad_norm": 1.06858229637146, + "learning_rate": 9.339941735040274e-06, + "loss": 0.7365, + "step": 3706 + }, + { + "epoch": 0.1905128995785795, + "grad_norm": 1.158582329750061, + "learning_rate": 9.339528390397839e-06, + "loss": 0.876, + "step": 3707 + }, + { + "epoch": 0.19056429232192415, + "grad_norm": 1.0987800359725952, + "learning_rate": 9.33911492552498e-06, + "loss": 0.7901, + "step": 3708 + }, + { + "epoch": 0.1906156850652688, + "grad_norm": 1.1143357753753662, + "learning_rate": 9.338701340433152e-06, + "loss": 0.8236, + "step": 3709 + }, + { + "epoch": 0.19066707780861342, + "grad_norm": 1.2264057397842407, + "learning_rate": 9.338287635133814e-06, + "loss": 0.8332, + "step": 3710 + }, + { + "epoch": 0.19071847055195806, + "grad_norm": 3.8975017070770264, + "learning_rate": 9.337873809638428e-06, + "loss": 0.7913, + "step": 3711 + }, + { + "epoch": 0.1907698632953027, + "grad_norm": 1.0601764917373657, + "learning_rate": 9.337459863958462e-06, + "loss": 0.8054, + "step": 3712 + }, + { + "epoch": 0.19082125603864733, + "grad_norm": 0.8995238542556763, + "learning_rate": 9.337045798105384e-06, + "loss": 0.7266, + "step": 3713 + }, + { + "epoch": 0.19087264878199198, + "grad_norm": 1.1024494171142578, + "learning_rate": 9.336631612090663e-06, + "loss": 0.7341, + "step": 3714 + }, + { + "epoch": 0.19092404152533662, + "grad_norm": 0.8019346594810486, + "learning_rate": 9.336217305925777e-06, + "loss": 0.675, + "step": 3715 + }, + { + "epoch": 0.19097543426868127, + "grad_norm": 1.1403672695159912, + "learning_rate": 9.335802879622203e-06, + "loss": 0.8044, + "step": 3716 + }, + { + "epoch": 0.1910268270120259, + "grad_norm": 1.1501086950302124, + "learning_rate": 9.335388333191424e-06, + "loss": 0.8401, + "step": 3717 + }, + { + "epoch": 0.19107821975537054, + "grad_norm": 1.1162502765655518, + "learning_rate": 9.334973666644927e-06, + "loss": 0.7691, + "step": 3718 + }, + { + "epoch": 0.19112961249871518, + "grad_norm": 1.174485445022583, + "learning_rate": 9.334558879994198e-06, + "loss": 0.8362, + "step": 3719 + }, + { + "epoch": 0.19118100524205983, + "grad_norm": 0.8484004139900208, + "learning_rate": 9.33414397325073e-06, + "loss": 0.7093, + "step": 3720 + }, + { + "epoch": 0.19123239798540445, + "grad_norm": 1.0226993560791016, + "learning_rate": 9.333728946426018e-06, + "loss": 0.7975, + "step": 3721 + }, + { + "epoch": 0.1912837907287491, + "grad_norm": 1.1397813558578491, + "learning_rate": 9.333313799531563e-06, + "loss": 0.8299, + "step": 3722 + }, + { + "epoch": 0.19133518347209374, + "grad_norm": 0.7835520505905151, + "learning_rate": 9.332898532578862e-06, + "loss": 0.7185, + "step": 3723 + }, + { + "epoch": 0.1913865762154384, + "grad_norm": 1.1082810163497925, + "learning_rate": 9.332483145579427e-06, + "loss": 0.7298, + "step": 3724 + }, + { + "epoch": 0.191437968958783, + "grad_norm": 1.471495509147644, + "learning_rate": 9.33206763854476e-06, + "loss": 0.8584, + "step": 3725 + }, + { + "epoch": 0.19148936170212766, + "grad_norm": 1.175403356552124, + "learning_rate": 9.331652011486378e-06, + "loss": 0.8538, + "step": 3726 + }, + { + "epoch": 0.1915407544454723, + "grad_norm": 0.8146130442619324, + "learning_rate": 9.331236264415795e-06, + "loss": 0.7222, + "step": 3727 + }, + { + "epoch": 0.19159214718881695, + "grad_norm": 1.1014879941940308, + "learning_rate": 9.330820397344528e-06, + "loss": 0.8092, + "step": 3728 + }, + { + "epoch": 0.19164353993216157, + "grad_norm": 1.1478755474090576, + "learning_rate": 9.3304044102841e-06, + "loss": 0.7852, + "step": 3729 + }, + { + "epoch": 0.19169493267550622, + "grad_norm": 0.7476874589920044, + "learning_rate": 9.329988303246036e-06, + "loss": 0.7202, + "step": 3730 + }, + { + "epoch": 0.19174632541885087, + "grad_norm": 1.1256016492843628, + "learning_rate": 9.329572076241866e-06, + "loss": 0.7997, + "step": 3731 + }, + { + "epoch": 0.19179771816219549, + "grad_norm": 0.703258216381073, + "learning_rate": 9.329155729283118e-06, + "loss": 0.6871, + "step": 3732 + }, + { + "epoch": 0.19184911090554013, + "grad_norm": 0.8316715359687805, + "learning_rate": 9.328739262381335e-06, + "loss": 0.7266, + "step": 3733 + }, + { + "epoch": 0.19190050364888478, + "grad_norm": 1.133061170578003, + "learning_rate": 9.328322675548048e-06, + "loss": 0.8332, + "step": 3734 + }, + { + "epoch": 0.19195189639222943, + "grad_norm": 0.7546510696411133, + "learning_rate": 9.3279059687948e-06, + "loss": 0.679, + "step": 3735 + }, + { + "epoch": 0.19200328913557405, + "grad_norm": 1.5760674476623535, + "learning_rate": 9.32748914213314e-06, + "loss": 0.7254, + "step": 3736 + }, + { + "epoch": 0.1920546818789187, + "grad_norm": 0.7752396464347839, + "learning_rate": 9.327072195574613e-06, + "loss": 0.6932, + "step": 3737 + }, + { + "epoch": 0.19210607462226334, + "grad_norm": 0.7861957550048828, + "learning_rate": 9.326655129130774e-06, + "loss": 0.7448, + "step": 3738 + }, + { + "epoch": 0.192157467365608, + "grad_norm": 1.0868723392486572, + "learning_rate": 9.326237942813175e-06, + "loss": 0.8358, + "step": 3739 + }, + { + "epoch": 0.1922088601089526, + "grad_norm": 1.277090311050415, + "learning_rate": 9.325820636633376e-06, + "loss": 0.8165, + "step": 3740 + }, + { + "epoch": 0.19226025285229725, + "grad_norm": 1.1212096214294434, + "learning_rate": 9.325403210602938e-06, + "loss": 0.8236, + "step": 3741 + }, + { + "epoch": 0.1923116455956419, + "grad_norm": 1.1101020574569702, + "learning_rate": 9.32498566473343e-06, + "loss": 0.7992, + "step": 3742 + }, + { + "epoch": 0.19236303833898655, + "grad_norm": 1.0129382610321045, + "learning_rate": 9.324567999036415e-06, + "loss": 0.7883, + "step": 3743 + }, + { + "epoch": 0.19241443108233117, + "grad_norm": 1.1038954257965088, + "learning_rate": 9.324150213523468e-06, + "loss": 0.7249, + "step": 3744 + }, + { + "epoch": 0.1924658238256758, + "grad_norm": 1.139496922492981, + "learning_rate": 9.323732308206165e-06, + "loss": 0.842, + "step": 3745 + }, + { + "epoch": 0.19251721656902046, + "grad_norm": 1.0169386863708496, + "learning_rate": 9.32331428309608e-06, + "loss": 0.7364, + "step": 3746 + }, + { + "epoch": 0.1925686093123651, + "grad_norm": 1.2437777519226074, + "learning_rate": 9.322896138204798e-06, + "loss": 0.8004, + "step": 3747 + }, + { + "epoch": 0.19262000205570973, + "grad_norm": 1.134329915046692, + "learning_rate": 9.322477873543903e-06, + "loss": 0.7754, + "step": 3748 + }, + { + "epoch": 0.19267139479905437, + "grad_norm": 1.0473402738571167, + "learning_rate": 9.322059489124984e-06, + "loss": 0.8044, + "step": 3749 + }, + { + "epoch": 0.19272278754239902, + "grad_norm": 1.1545026302337646, + "learning_rate": 9.321640984959635e-06, + "loss": 0.7697, + "step": 3750 + }, + { + "epoch": 0.19277418028574364, + "grad_norm": 1.0880850553512573, + "learning_rate": 9.321222361059446e-06, + "loss": 0.786, + "step": 3751 + }, + { + "epoch": 0.1928255730290883, + "grad_norm": 1.2690527439117432, + "learning_rate": 9.32080361743602e-06, + "loss": 0.837, + "step": 3752 + }, + { + "epoch": 0.19287696577243293, + "grad_norm": 0.9260185956954956, + "learning_rate": 9.320384754100955e-06, + "loss": 0.6938, + "step": 3753 + }, + { + "epoch": 0.19292835851577758, + "grad_norm": 1.1241703033447266, + "learning_rate": 9.319965771065857e-06, + "loss": 0.7841, + "step": 3754 + }, + { + "epoch": 0.1929797512591222, + "grad_norm": 0.7416635155677795, + "learning_rate": 9.319546668342337e-06, + "loss": 0.6806, + "step": 3755 + }, + { + "epoch": 0.19303114400246685, + "grad_norm": 1.105777621269226, + "learning_rate": 9.319127445942004e-06, + "loss": 0.8363, + "step": 3756 + }, + { + "epoch": 0.1930825367458115, + "grad_norm": 1.1783965826034546, + "learning_rate": 9.318708103876473e-06, + "loss": 0.7916, + "step": 3757 + }, + { + "epoch": 0.19313392948915614, + "grad_norm": 1.2658344507217407, + "learning_rate": 9.318288642157362e-06, + "loss": 0.8243, + "step": 3758 + }, + { + "epoch": 0.19318532223250076, + "grad_norm": 1.0369431972503662, + "learning_rate": 9.317869060796296e-06, + "loss": 0.7732, + "step": 3759 + }, + { + "epoch": 0.1932367149758454, + "grad_norm": 1.2240486145019531, + "learning_rate": 9.317449359804894e-06, + "loss": 0.8045, + "step": 3760 + }, + { + "epoch": 0.19328810771919006, + "grad_norm": 1.1623338460922241, + "learning_rate": 9.31702953919479e-06, + "loss": 0.794, + "step": 3761 + }, + { + "epoch": 0.1933395004625347, + "grad_norm": 0.9955929517745972, + "learning_rate": 9.31660959897761e-06, + "loss": 0.733, + "step": 3762 + }, + { + "epoch": 0.19339089320587932, + "grad_norm": 1.1080474853515625, + "learning_rate": 9.316189539164993e-06, + "loss": 0.8142, + "step": 3763 + }, + { + "epoch": 0.19344228594922397, + "grad_norm": 1.0698845386505127, + "learning_rate": 9.315769359768576e-06, + "loss": 0.805, + "step": 3764 + }, + { + "epoch": 0.19349367869256862, + "grad_norm": 1.1078908443450928, + "learning_rate": 9.315349060799999e-06, + "loss": 0.797, + "step": 3765 + }, + { + "epoch": 0.19354507143591326, + "grad_norm": 0.9161761403083801, + "learning_rate": 9.314928642270909e-06, + "loss": 0.6327, + "step": 3766 + }, + { + "epoch": 0.19359646417925788, + "grad_norm": 1.1263906955718994, + "learning_rate": 9.314508104192953e-06, + "loss": 0.8345, + "step": 3767 + }, + { + "epoch": 0.19364785692260253, + "grad_norm": 1.1812494993209839, + "learning_rate": 9.314087446577781e-06, + "loss": 0.8077, + "step": 3768 + }, + { + "epoch": 0.19369924966594718, + "grad_norm": 0.8404086232185364, + "learning_rate": 9.31366666943705e-06, + "loss": 0.7044, + "step": 3769 + }, + { + "epoch": 0.1937506424092918, + "grad_norm": 0.7887621521949768, + "learning_rate": 9.31324577278242e-06, + "loss": 0.7329, + "step": 3770 + }, + { + "epoch": 0.19380203515263644, + "grad_norm": 1.035783052444458, + "learning_rate": 9.312824756625545e-06, + "loss": 0.7801, + "step": 3771 + }, + { + "epoch": 0.1938534278959811, + "grad_norm": 1.0649718046188354, + "learning_rate": 9.312403620978096e-06, + "loss": 0.7574, + "step": 3772 + }, + { + "epoch": 0.19390482063932574, + "grad_norm": 1.160948395729065, + "learning_rate": 9.311982365851738e-06, + "loss": 0.8266, + "step": 3773 + }, + { + "epoch": 0.19395621338267036, + "grad_norm": 0.7871479392051697, + "learning_rate": 9.311560991258145e-06, + "loss": 0.742, + "step": 3774 + }, + { + "epoch": 0.194007606126015, + "grad_norm": 1.09604811668396, + "learning_rate": 9.311139497208987e-06, + "loss": 0.8446, + "step": 3775 + }, + { + "epoch": 0.19405899886935965, + "grad_norm": 1.123671054840088, + "learning_rate": 9.310717883715946e-06, + "loss": 0.8413, + "step": 3776 + }, + { + "epoch": 0.1941103916127043, + "grad_norm": 0.842566728591919, + "learning_rate": 9.310296150790701e-06, + "loss": 0.6526, + "step": 3777 + }, + { + "epoch": 0.19416178435604892, + "grad_norm": 1.2030017375946045, + "learning_rate": 9.309874298444938e-06, + "loss": 0.7857, + "step": 3778 + }, + { + "epoch": 0.19421317709939356, + "grad_norm": 1.1333600282669067, + "learning_rate": 9.309452326690345e-06, + "loss": 0.7948, + "step": 3779 + }, + { + "epoch": 0.1942645698427382, + "grad_norm": 1.1983345746994019, + "learning_rate": 9.30903023553861e-06, + "loss": 0.8443, + "step": 3780 + }, + { + "epoch": 0.19431596258608286, + "grad_norm": 1.1283385753631592, + "learning_rate": 9.30860802500143e-06, + "loss": 0.7728, + "step": 3781 + }, + { + "epoch": 0.19436735532942748, + "grad_norm": 1.0331902503967285, + "learning_rate": 9.308185695090504e-06, + "loss": 0.769, + "step": 3782 + }, + { + "epoch": 0.19441874807277212, + "grad_norm": 1.0716427564620972, + "learning_rate": 9.30776324581753e-06, + "loss": 0.7599, + "step": 3783 + }, + { + "epoch": 0.19447014081611677, + "grad_norm": 1.1300439834594727, + "learning_rate": 9.307340677194213e-06, + "loss": 0.7798, + "step": 3784 + }, + { + "epoch": 0.1945215335594614, + "grad_norm": 1.1070586442947388, + "learning_rate": 9.30691798923226e-06, + "loss": 0.8404, + "step": 3785 + }, + { + "epoch": 0.19457292630280604, + "grad_norm": 1.079648733139038, + "learning_rate": 9.306495181943385e-06, + "loss": 0.7789, + "step": 3786 + }, + { + "epoch": 0.19462431904615068, + "grad_norm": 1.6093806028366089, + "learning_rate": 9.3060722553393e-06, + "loss": 0.7776, + "step": 3787 + }, + { + "epoch": 0.19467571178949533, + "grad_norm": 1.092764139175415, + "learning_rate": 9.305649209431724e-06, + "loss": 0.7642, + "step": 3788 + }, + { + "epoch": 0.19472710453283995, + "grad_norm": 1.1992213726043701, + "learning_rate": 9.305226044232375e-06, + "loss": 0.8456, + "step": 3789 + }, + { + "epoch": 0.1947784972761846, + "grad_norm": 1.08976411819458, + "learning_rate": 9.30480275975298e-06, + "loss": 0.7466, + "step": 3790 + }, + { + "epoch": 0.19482989001952924, + "grad_norm": 1.0628770589828491, + "learning_rate": 9.304379356005264e-06, + "loss": 0.8171, + "step": 3791 + }, + { + "epoch": 0.1948812827628739, + "grad_norm": 1.1960781812667847, + "learning_rate": 9.30395583300096e-06, + "loss": 0.8564, + "step": 3792 + }, + { + "epoch": 0.1949326755062185, + "grad_norm": 1.0505499839782715, + "learning_rate": 9.303532190751802e-06, + "loss": 0.784, + "step": 3793 + }, + { + "epoch": 0.19498406824956316, + "grad_norm": 1.1564574241638184, + "learning_rate": 9.303108429269526e-06, + "loss": 0.781, + "step": 3794 + }, + { + "epoch": 0.1950354609929078, + "grad_norm": 0.9236723184585571, + "learning_rate": 9.302684548565873e-06, + "loss": 0.7245, + "step": 3795 + }, + { + "epoch": 0.19508685373625245, + "grad_norm": 0.8070688843727112, + "learning_rate": 9.30226054865259e-06, + "loss": 0.6946, + "step": 3796 + }, + { + "epoch": 0.19513824647959707, + "grad_norm": 1.1324723958969116, + "learning_rate": 9.301836429541417e-06, + "loss": 0.774, + "step": 3797 + }, + { + "epoch": 0.19518963922294172, + "grad_norm": 1.1159038543701172, + "learning_rate": 9.301412191244112e-06, + "loss": 0.7959, + "step": 3798 + }, + { + "epoch": 0.19524103196628637, + "grad_norm": 1.1220502853393555, + "learning_rate": 9.300987833772425e-06, + "loss": 0.8143, + "step": 3799 + }, + { + "epoch": 0.195292424709631, + "grad_norm": 0.801337480545044, + "learning_rate": 9.300563357138114e-06, + "loss": 0.7252, + "step": 3800 + }, + { + "epoch": 0.19534381745297563, + "grad_norm": 1.302274465560913, + "learning_rate": 9.300138761352942e-06, + "loss": 0.7697, + "step": 3801 + }, + { + "epoch": 0.19539521019632028, + "grad_norm": 1.1282092332839966, + "learning_rate": 9.299714046428668e-06, + "loss": 0.8612, + "step": 3802 + }, + { + "epoch": 0.19544660293966493, + "grad_norm": 0.8231169581413269, + "learning_rate": 9.299289212377063e-06, + "loss": 0.6918, + "step": 3803 + }, + { + "epoch": 0.19549799568300955, + "grad_norm": 0.7592513561248779, + "learning_rate": 9.298864259209895e-06, + "loss": 0.719, + "step": 3804 + }, + { + "epoch": 0.1955493884263542, + "grad_norm": 0.8732669353485107, + "learning_rate": 9.29843918693894e-06, + "loss": 0.6627, + "step": 3805 + }, + { + "epoch": 0.19560078116969884, + "grad_norm": 1.1332786083221436, + "learning_rate": 9.298013995575974e-06, + "loss": 0.7734, + "step": 3806 + }, + { + "epoch": 0.1956521739130435, + "grad_norm": 1.1731846332550049, + "learning_rate": 9.297588685132775e-06, + "loss": 0.7988, + "step": 3807 + }, + { + "epoch": 0.1957035666563881, + "grad_norm": 1.1542022228240967, + "learning_rate": 9.29716325562113e-06, + "loss": 0.8031, + "step": 3808 + }, + { + "epoch": 0.19575495939973275, + "grad_norm": 0.8504632711410522, + "learning_rate": 9.296737707052824e-06, + "loss": 0.7005, + "step": 3809 + }, + { + "epoch": 0.1958063521430774, + "grad_norm": 0.8363313674926758, + "learning_rate": 9.296312039439649e-06, + "loss": 0.7286, + "step": 3810 + }, + { + "epoch": 0.19585774488642205, + "grad_norm": 0.8486849069595337, + "learning_rate": 9.295886252793395e-06, + "loss": 0.6998, + "step": 3811 + }, + { + "epoch": 0.19590913762976667, + "grad_norm": 1.174891471862793, + "learning_rate": 9.295460347125864e-06, + "loss": 0.7426, + "step": 3812 + }, + { + "epoch": 0.1959605303731113, + "grad_norm": 1.2724941968917847, + "learning_rate": 9.295034322448851e-06, + "loss": 0.7262, + "step": 3813 + }, + { + "epoch": 0.19601192311645596, + "grad_norm": 0.8891855478286743, + "learning_rate": 9.294608178774162e-06, + "loss": 0.7025, + "step": 3814 + }, + { + "epoch": 0.1960633158598006, + "grad_norm": 1.1561217308044434, + "learning_rate": 9.294181916113603e-06, + "loss": 0.7814, + "step": 3815 + }, + { + "epoch": 0.19611470860314523, + "grad_norm": 1.3036850690841675, + "learning_rate": 9.293755534478985e-06, + "loss": 0.7772, + "step": 3816 + }, + { + "epoch": 0.19616610134648987, + "grad_norm": 1.215003252029419, + "learning_rate": 9.29332903388212e-06, + "loss": 0.8149, + "step": 3817 + }, + { + "epoch": 0.19621749408983452, + "grad_norm": 1.151149868965149, + "learning_rate": 9.292902414334824e-06, + "loss": 0.7753, + "step": 3818 + }, + { + "epoch": 0.19626888683317917, + "grad_norm": 1.1120131015777588, + "learning_rate": 9.292475675848918e-06, + "loss": 0.7712, + "step": 3819 + }, + { + "epoch": 0.1963202795765238, + "grad_norm": 1.0402251482009888, + "learning_rate": 9.292048818436225e-06, + "loss": 0.6883, + "step": 3820 + }, + { + "epoch": 0.19637167231986843, + "grad_norm": 3.532841920852661, + "learning_rate": 9.291621842108572e-06, + "loss": 0.8344, + "step": 3821 + }, + { + "epoch": 0.19642306506321308, + "grad_norm": 1.1286754608154297, + "learning_rate": 9.291194746877788e-06, + "loss": 0.7671, + "step": 3822 + }, + { + "epoch": 0.1964744578065577, + "grad_norm": 1.1132481098175049, + "learning_rate": 9.290767532755706e-06, + "loss": 0.7829, + "step": 3823 + }, + { + "epoch": 0.19652585054990235, + "grad_norm": 1.101176381111145, + "learning_rate": 9.290340199754165e-06, + "loss": 0.7998, + "step": 3824 + }, + { + "epoch": 0.196577243293247, + "grad_norm": 1.1110650300979614, + "learning_rate": 9.289912747885e-06, + "loss": 0.776, + "step": 3825 + }, + { + "epoch": 0.19662863603659164, + "grad_norm": 1.0882169008255005, + "learning_rate": 9.289485177160055e-06, + "loss": 0.7483, + "step": 3826 + }, + { + "epoch": 0.19668002877993626, + "grad_norm": 0.8537433743476868, + "learning_rate": 9.28905748759118e-06, + "loss": 0.7084, + "step": 3827 + }, + { + "epoch": 0.1967314215232809, + "grad_norm": 0.8792944550514221, + "learning_rate": 9.288629679190222e-06, + "loss": 0.6742, + "step": 3828 + }, + { + "epoch": 0.19678281426662556, + "grad_norm": 0.7601463198661804, + "learning_rate": 9.288201751969031e-06, + "loss": 0.7048, + "step": 3829 + }, + { + "epoch": 0.1968342070099702, + "grad_norm": 1.265582799911499, + "learning_rate": 9.28777370593947e-06, + "loss": 0.8184, + "step": 3830 + }, + { + "epoch": 0.19688559975331482, + "grad_norm": 1.0593284368515015, + "learning_rate": 9.287345541113391e-06, + "loss": 0.8006, + "step": 3831 + }, + { + "epoch": 0.19693699249665947, + "grad_norm": 1.373486876487732, + "learning_rate": 9.286917257502658e-06, + "loss": 0.7975, + "step": 3832 + }, + { + "epoch": 0.19698838524000412, + "grad_norm": 1.134986162185669, + "learning_rate": 9.286488855119143e-06, + "loss": 0.7929, + "step": 3833 + }, + { + "epoch": 0.19703977798334876, + "grad_norm": 1.20670485496521, + "learning_rate": 9.28606033397471e-06, + "loss": 0.7903, + "step": 3834 + }, + { + "epoch": 0.19709117072669338, + "grad_norm": 1.1070822477340698, + "learning_rate": 9.285631694081233e-06, + "loss": 0.809, + "step": 3835 + }, + { + "epoch": 0.19714256347003803, + "grad_norm": 1.030637264251709, + "learning_rate": 9.285202935450586e-06, + "loss": 0.7232, + "step": 3836 + }, + { + "epoch": 0.19719395621338268, + "grad_norm": 1.0732930898666382, + "learning_rate": 9.28477405809465e-06, + "loss": 0.7694, + "step": 3837 + }, + { + "epoch": 0.19724534895672732, + "grad_norm": 1.1373001337051392, + "learning_rate": 9.284345062025306e-06, + "loss": 0.8151, + "step": 3838 + }, + { + "epoch": 0.19729674170007194, + "grad_norm": 1.0920013189315796, + "learning_rate": 9.283915947254443e-06, + "loss": 0.7599, + "step": 3839 + }, + { + "epoch": 0.1973481344434166, + "grad_norm": 1.09823477268219, + "learning_rate": 9.283486713793948e-06, + "loss": 0.7515, + "step": 3840 + }, + { + "epoch": 0.19739952718676124, + "grad_norm": 1.1406538486480713, + "learning_rate": 9.283057361655711e-06, + "loss": 0.8202, + "step": 3841 + }, + { + "epoch": 0.19745091993010586, + "grad_norm": 1.1148929595947266, + "learning_rate": 9.28262789085163e-06, + "loss": 0.774, + "step": 3842 + }, + { + "epoch": 0.1975023126734505, + "grad_norm": 1.0480315685272217, + "learning_rate": 9.282198301393606e-06, + "loss": 0.8107, + "step": 3843 + }, + { + "epoch": 0.19755370541679515, + "grad_norm": 1.1431549787521362, + "learning_rate": 9.281768593293536e-06, + "loss": 0.812, + "step": 3844 + }, + { + "epoch": 0.1976050981601398, + "grad_norm": 1.216841220855713, + "learning_rate": 9.281338766563328e-06, + "loss": 0.7588, + "step": 3845 + }, + { + "epoch": 0.19765649090348442, + "grad_norm": 1.1709885597229004, + "learning_rate": 9.280908821214893e-06, + "loss": 0.8153, + "step": 3846 + }, + { + "epoch": 0.19770788364682906, + "grad_norm": 1.1572009325027466, + "learning_rate": 9.280478757260138e-06, + "loss": 0.8147, + "step": 3847 + }, + { + "epoch": 0.1977592763901737, + "grad_norm": 1.1200300455093384, + "learning_rate": 9.280048574710983e-06, + "loss": 0.8242, + "step": 3848 + }, + { + "epoch": 0.19781066913351836, + "grad_norm": 1.059646487236023, + "learning_rate": 9.279618273579346e-06, + "loss": 0.7937, + "step": 3849 + }, + { + "epoch": 0.19786206187686298, + "grad_norm": 1.1451395750045776, + "learning_rate": 9.279187853877145e-06, + "loss": 0.8064, + "step": 3850 + }, + { + "epoch": 0.19791345462020762, + "grad_norm": 0.9026865363121033, + "learning_rate": 9.27875731561631e-06, + "loss": 0.7081, + "step": 3851 + }, + { + "epoch": 0.19796484736355227, + "grad_norm": 1.1173293590545654, + "learning_rate": 9.278326658808765e-06, + "loss": 0.7849, + "step": 3852 + }, + { + "epoch": 0.19801624010689692, + "grad_norm": 1.1208003759384155, + "learning_rate": 9.277895883466444e-06, + "loss": 0.8032, + "step": 3853 + }, + { + "epoch": 0.19806763285024154, + "grad_norm": 1.1404167413711548, + "learning_rate": 9.277464989601283e-06, + "loss": 0.7905, + "step": 3854 + }, + { + "epoch": 0.19811902559358618, + "grad_norm": 1.1774544715881348, + "learning_rate": 9.27703397722522e-06, + "loss": 0.7506, + "step": 3855 + }, + { + "epoch": 0.19817041833693083, + "grad_norm": 1.0444045066833496, + "learning_rate": 9.276602846350194e-06, + "loss": 0.8306, + "step": 3856 + }, + { + "epoch": 0.19822181108027548, + "grad_norm": 1.3302297592163086, + "learning_rate": 9.276171596988152e-06, + "loss": 0.7824, + "step": 3857 + }, + { + "epoch": 0.1982732038236201, + "grad_norm": 1.0858607292175293, + "learning_rate": 9.275740229151043e-06, + "loss": 0.7531, + "step": 3858 + }, + { + "epoch": 0.19832459656696474, + "grad_norm": 1.112226963043213, + "learning_rate": 9.275308742850815e-06, + "loss": 0.7737, + "step": 3859 + }, + { + "epoch": 0.1983759893103094, + "grad_norm": 1.1191438436508179, + "learning_rate": 9.274877138099427e-06, + "loss": 0.8537, + "step": 3860 + }, + { + "epoch": 0.198427382053654, + "grad_norm": 0.9639666676521301, + "learning_rate": 9.274445414908834e-06, + "loss": 0.6851, + "step": 3861 + }, + { + "epoch": 0.19847877479699866, + "grad_norm": 1.0692031383514404, + "learning_rate": 9.274013573290998e-06, + "loss": 0.7793, + "step": 3862 + }, + { + "epoch": 0.1985301675403433, + "grad_norm": 1.1660304069519043, + "learning_rate": 9.273581613257883e-06, + "loss": 0.8227, + "step": 3863 + }, + { + "epoch": 0.19858156028368795, + "grad_norm": 1.0875794887542725, + "learning_rate": 9.27314953482146e-06, + "loss": 0.8002, + "step": 3864 + }, + { + "epoch": 0.19863295302703257, + "grad_norm": 1.1471729278564453, + "learning_rate": 9.272717337993695e-06, + "loss": 0.8135, + "step": 3865 + }, + { + "epoch": 0.19868434577037722, + "grad_norm": 1.1451597213745117, + "learning_rate": 9.272285022786567e-06, + "loss": 0.798, + "step": 3866 + }, + { + "epoch": 0.19873573851372187, + "grad_norm": 1.1351687908172607, + "learning_rate": 9.27185258921205e-06, + "loss": 0.8556, + "step": 3867 + }, + { + "epoch": 0.1987871312570665, + "grad_norm": 1.0907889604568481, + "learning_rate": 9.271420037282127e-06, + "loss": 0.7618, + "step": 3868 + }, + { + "epoch": 0.19883852400041113, + "grad_norm": 1.0901378393173218, + "learning_rate": 9.270987367008784e-06, + "loss": 0.8302, + "step": 3869 + }, + { + "epoch": 0.19888991674375578, + "grad_norm": 1.127983570098877, + "learning_rate": 9.270554578404003e-06, + "loss": 0.8115, + "step": 3870 + }, + { + "epoch": 0.19894130948710043, + "grad_norm": 1.0593156814575195, + "learning_rate": 9.27012167147978e-06, + "loss": 0.8203, + "step": 3871 + }, + { + "epoch": 0.19899270223044507, + "grad_norm": 1.1964728832244873, + "learning_rate": 9.269688646248108e-06, + "loss": 0.7258, + "step": 3872 + }, + { + "epoch": 0.1990440949737897, + "grad_norm": 0.8178982138633728, + "learning_rate": 9.269255502720983e-06, + "loss": 0.7661, + "step": 3873 + }, + { + "epoch": 0.19909548771713434, + "grad_norm": 1.09017813205719, + "learning_rate": 9.268822240910404e-06, + "loss": 0.8371, + "step": 3874 + }, + { + "epoch": 0.199146880460479, + "grad_norm": 1.0892798900604248, + "learning_rate": 9.26838886082838e-06, + "loss": 0.7843, + "step": 3875 + }, + { + "epoch": 0.19919827320382363, + "grad_norm": 1.1496427059173584, + "learning_rate": 9.267955362486915e-06, + "loss": 0.8626, + "step": 3876 + }, + { + "epoch": 0.19924966594716825, + "grad_norm": 1.0521094799041748, + "learning_rate": 9.267521745898018e-06, + "loss": 0.745, + "step": 3877 + }, + { + "epoch": 0.1993010586905129, + "grad_norm": 1.0460869073867798, + "learning_rate": 9.267088011073706e-06, + "loss": 0.719, + "step": 3878 + }, + { + "epoch": 0.19935245143385755, + "grad_norm": 1.1293199062347412, + "learning_rate": 9.266654158025993e-06, + "loss": 0.8377, + "step": 3879 + }, + { + "epoch": 0.19940384417720217, + "grad_norm": 1.0580253601074219, + "learning_rate": 9.266220186766902e-06, + "loss": 0.7712, + "step": 3880 + }, + { + "epoch": 0.1994552369205468, + "grad_norm": 1.1796948909759521, + "learning_rate": 9.265786097308455e-06, + "loss": 0.7998, + "step": 3881 + }, + { + "epoch": 0.19950662966389146, + "grad_norm": 0.7304195165634155, + "learning_rate": 9.26535188966268e-06, + "loss": 0.7088, + "step": 3882 + }, + { + "epoch": 0.1995580224072361, + "grad_norm": 1.1592259407043457, + "learning_rate": 9.264917563841605e-06, + "loss": 0.8006, + "step": 3883 + }, + { + "epoch": 0.19960941515058073, + "grad_norm": 0.8051683902740479, + "learning_rate": 9.264483119857264e-06, + "loss": 0.6894, + "step": 3884 + }, + { + "epoch": 0.19966080789392537, + "grad_norm": 0.8977078795433044, + "learning_rate": 9.264048557721695e-06, + "loss": 0.7532, + "step": 3885 + }, + { + "epoch": 0.19971220063727002, + "grad_norm": 1.12592351436615, + "learning_rate": 9.263613877446937e-06, + "loss": 0.8084, + "step": 3886 + }, + { + "epoch": 0.19976359338061467, + "grad_norm": 1.1313230991363525, + "learning_rate": 9.263179079045032e-06, + "loss": 0.7487, + "step": 3887 + }, + { + "epoch": 0.1998149861239593, + "grad_norm": 1.094099998474121, + "learning_rate": 9.26274416252803e-06, + "loss": 0.7922, + "step": 3888 + }, + { + "epoch": 0.19986637886730393, + "grad_norm": 0.8224782943725586, + "learning_rate": 9.262309127907978e-06, + "loss": 0.6581, + "step": 3889 + }, + { + "epoch": 0.19991777161064858, + "grad_norm": 1.1467647552490234, + "learning_rate": 9.26187397519693e-06, + "loss": 0.7635, + "step": 3890 + }, + { + "epoch": 0.19996916435399323, + "grad_norm": 1.1063134670257568, + "learning_rate": 9.26143870440694e-06, + "loss": 0.7974, + "step": 3891 + }, + { + "epoch": 0.20002055709733785, + "grad_norm": 1.2155370712280273, + "learning_rate": 9.261003315550073e-06, + "loss": 0.7945, + "step": 3892 + }, + { + "epoch": 0.2000719498406825, + "grad_norm": 1.0735578536987305, + "learning_rate": 9.260567808638384e-06, + "loss": 0.7508, + "step": 3893 + }, + { + "epoch": 0.20012334258402714, + "grad_norm": 1.122339129447937, + "learning_rate": 9.260132183683945e-06, + "loss": 0.8278, + "step": 3894 + }, + { + "epoch": 0.2001747353273718, + "grad_norm": 1.0974421501159668, + "learning_rate": 9.259696440698824e-06, + "loss": 0.7616, + "step": 3895 + }, + { + "epoch": 0.2002261280707164, + "grad_norm": 1.0067311525344849, + "learning_rate": 9.259260579695094e-06, + "loss": 0.7508, + "step": 3896 + }, + { + "epoch": 0.20027752081406106, + "grad_norm": 1.1260629892349243, + "learning_rate": 9.25882460068483e-06, + "loss": 0.7416, + "step": 3897 + }, + { + "epoch": 0.2003289135574057, + "grad_norm": 1.3214530944824219, + "learning_rate": 9.258388503680112e-06, + "loss": 0.8449, + "step": 3898 + }, + { + "epoch": 0.20038030630075032, + "grad_norm": 1.1357483863830566, + "learning_rate": 9.25795228869302e-06, + "loss": 0.7538, + "step": 3899 + }, + { + "epoch": 0.20043169904409497, + "grad_norm": 1.0284645557403564, + "learning_rate": 9.257515955735643e-06, + "loss": 0.749, + "step": 3900 + }, + { + "epoch": 0.20048309178743962, + "grad_norm": 0.8301587104797363, + "learning_rate": 9.257079504820069e-06, + "loss": 0.7187, + "step": 3901 + }, + { + "epoch": 0.20053448453078426, + "grad_norm": 1.0816036462783813, + "learning_rate": 9.256642935958388e-06, + "loss": 0.7648, + "step": 3902 + }, + { + "epoch": 0.20058587727412888, + "grad_norm": 1.159629464149475, + "learning_rate": 9.256206249162698e-06, + "loss": 0.7748, + "step": 3903 + }, + { + "epoch": 0.20063727001747353, + "grad_norm": 1.116652011871338, + "learning_rate": 9.255769444445098e-06, + "loss": 0.7793, + "step": 3904 + }, + { + "epoch": 0.20068866276081818, + "grad_norm": 1.0868444442749023, + "learning_rate": 9.255332521817688e-06, + "loss": 0.8123, + "step": 3905 + }, + { + "epoch": 0.20074005550416282, + "grad_norm": 1.0706380605697632, + "learning_rate": 9.254895481292575e-06, + "loss": 0.8061, + "step": 3906 + }, + { + "epoch": 0.20079144824750744, + "grad_norm": 1.05228590965271, + "learning_rate": 9.25445832288187e-06, + "loss": 0.7678, + "step": 3907 + }, + { + "epoch": 0.2008428409908521, + "grad_norm": 1.0935534238815308, + "learning_rate": 9.254021046597678e-06, + "loss": 0.7984, + "step": 3908 + }, + { + "epoch": 0.20089423373419674, + "grad_norm": 0.7832483053207397, + "learning_rate": 9.253583652452118e-06, + "loss": 0.7512, + "step": 3909 + }, + { + "epoch": 0.20094562647754138, + "grad_norm": 0.8347471356391907, + "learning_rate": 9.253146140457313e-06, + "loss": 0.7254, + "step": 3910 + }, + { + "epoch": 0.200997019220886, + "grad_norm": 1.2558609247207642, + "learning_rate": 9.252708510625376e-06, + "loss": 0.7968, + "step": 3911 + }, + { + "epoch": 0.20104841196423065, + "grad_norm": 1.1686102151870728, + "learning_rate": 9.252270762968436e-06, + "loss": 0.8048, + "step": 3912 + }, + { + "epoch": 0.2010998047075753, + "grad_norm": 1.2918058633804321, + "learning_rate": 9.251832897498622e-06, + "loss": 0.8168, + "step": 3913 + }, + { + "epoch": 0.20115119745091992, + "grad_norm": 1.0661139488220215, + "learning_rate": 9.251394914228067e-06, + "loss": 0.7818, + "step": 3914 + }, + { + "epoch": 0.20120259019426456, + "grad_norm": 1.0685780048370361, + "learning_rate": 9.250956813168902e-06, + "loss": 0.783, + "step": 3915 + }, + { + "epoch": 0.2012539829376092, + "grad_norm": 0.7564540505409241, + "learning_rate": 9.250518594333264e-06, + "loss": 0.734, + "step": 3916 + }, + { + "epoch": 0.20130537568095386, + "grad_norm": 1.1519596576690674, + "learning_rate": 9.2500802577333e-06, + "loss": 0.7922, + "step": 3917 + }, + { + "epoch": 0.20135676842429848, + "grad_norm": 1.1389437913894653, + "learning_rate": 9.24964180338115e-06, + "loss": 0.8106, + "step": 3918 + }, + { + "epoch": 0.20140816116764312, + "grad_norm": 1.0936566591262817, + "learning_rate": 9.249203231288961e-06, + "loss": 0.7585, + "step": 3919 + }, + { + "epoch": 0.20145955391098777, + "grad_norm": 1.0326403379440308, + "learning_rate": 9.248764541468888e-06, + "loss": 0.7386, + "step": 3920 + }, + { + "epoch": 0.20151094665433242, + "grad_norm": 0.890773355960846, + "learning_rate": 9.248325733933084e-06, + "loss": 0.6995, + "step": 3921 + }, + { + "epoch": 0.20156233939767704, + "grad_norm": 1.1256107091903687, + "learning_rate": 9.247886808693702e-06, + "loss": 0.7557, + "step": 3922 + }, + { + "epoch": 0.20161373214102168, + "grad_norm": 1.1653518676757812, + "learning_rate": 9.24744776576291e-06, + "loss": 0.7848, + "step": 3923 + }, + { + "epoch": 0.20166512488436633, + "grad_norm": 1.076413869857788, + "learning_rate": 9.247008605152867e-06, + "loss": 0.8185, + "step": 3924 + }, + { + "epoch": 0.20171651762771098, + "grad_norm": 1.1787540912628174, + "learning_rate": 9.246569326875743e-06, + "loss": 0.844, + "step": 3925 + }, + { + "epoch": 0.2017679103710556, + "grad_norm": 1.1158901453018188, + "learning_rate": 9.246129930943706e-06, + "loss": 0.7596, + "step": 3926 + }, + { + "epoch": 0.20181930311440024, + "grad_norm": 1.0653053522109985, + "learning_rate": 9.245690417368933e-06, + "loss": 0.802, + "step": 3927 + }, + { + "epoch": 0.2018706958577449, + "grad_norm": 1.0104701519012451, + "learning_rate": 9.245250786163599e-06, + "loss": 0.7136, + "step": 3928 + }, + { + "epoch": 0.20192208860108954, + "grad_norm": 1.085080862045288, + "learning_rate": 9.244811037339884e-06, + "loss": 0.8028, + "step": 3929 + }, + { + "epoch": 0.20197348134443416, + "grad_norm": 1.0771104097366333, + "learning_rate": 9.244371170909973e-06, + "loss": 0.7695, + "step": 3930 + }, + { + "epoch": 0.2020248740877788, + "grad_norm": 1.0662623643875122, + "learning_rate": 9.243931186886052e-06, + "loss": 0.721, + "step": 3931 + }, + { + "epoch": 0.20207626683112345, + "grad_norm": 1.168370246887207, + "learning_rate": 9.243491085280311e-06, + "loss": 0.8244, + "step": 3932 + }, + { + "epoch": 0.20212765957446807, + "grad_norm": 0.7746744751930237, + "learning_rate": 9.243050866104946e-06, + "loss": 0.7088, + "step": 3933 + }, + { + "epoch": 0.20217905231781272, + "grad_norm": 1.1639010906219482, + "learning_rate": 9.24261052937215e-06, + "loss": 0.8214, + "step": 3934 + }, + { + "epoch": 0.20223044506115737, + "grad_norm": 1.0887398719787598, + "learning_rate": 9.242170075094125e-06, + "loss": 0.8387, + "step": 3935 + }, + { + "epoch": 0.202281837804502, + "grad_norm": 1.2270092964172363, + "learning_rate": 9.241729503283072e-06, + "loss": 0.8324, + "step": 3936 + }, + { + "epoch": 0.20233323054784663, + "grad_norm": 1.1386643648147583, + "learning_rate": 9.241288813951201e-06, + "loss": 0.8353, + "step": 3937 + }, + { + "epoch": 0.20238462329119128, + "grad_norm": 1.062848448753357, + "learning_rate": 9.240848007110719e-06, + "loss": 0.7861, + "step": 3938 + }, + { + "epoch": 0.20243601603453593, + "grad_norm": 0.7572140097618103, + "learning_rate": 9.24040708277384e-06, + "loss": 0.6962, + "step": 3939 + }, + { + "epoch": 0.20248740877788057, + "grad_norm": 1.1148643493652344, + "learning_rate": 9.23996604095278e-06, + "loss": 0.8414, + "step": 3940 + }, + { + "epoch": 0.2025388015212252, + "grad_norm": 1.0984480381011963, + "learning_rate": 9.239524881659758e-06, + "loss": 0.7667, + "step": 3941 + }, + { + "epoch": 0.20259019426456984, + "grad_norm": 1.2138004302978516, + "learning_rate": 9.239083604906997e-06, + "loss": 0.8037, + "step": 3942 + }, + { + "epoch": 0.2026415870079145, + "grad_norm": 1.1093533039093018, + "learning_rate": 9.238642210706723e-06, + "loss": 0.7777, + "step": 3943 + }, + { + "epoch": 0.20269297975125913, + "grad_norm": 1.08134126663208, + "learning_rate": 9.238200699071164e-06, + "loss": 0.7663, + "step": 3944 + }, + { + "epoch": 0.20274437249460375, + "grad_norm": 1.1083340644836426, + "learning_rate": 9.237759070012555e-06, + "loss": 0.827, + "step": 3945 + }, + { + "epoch": 0.2027957652379484, + "grad_norm": 1.0571959018707275, + "learning_rate": 9.23731732354313e-06, + "loss": 0.7139, + "step": 3946 + }, + { + "epoch": 0.20284715798129305, + "grad_norm": 1.0644627809524536, + "learning_rate": 9.236875459675129e-06, + "loss": 0.7247, + "step": 3947 + }, + { + "epoch": 0.2028985507246377, + "grad_norm": 0.9224267601966858, + "learning_rate": 9.236433478420792e-06, + "loss": 0.6676, + "step": 3948 + }, + { + "epoch": 0.2029499434679823, + "grad_norm": 1.1146587133407593, + "learning_rate": 9.235991379792368e-06, + "loss": 0.8352, + "step": 3949 + }, + { + "epoch": 0.20300133621132696, + "grad_norm": 1.0715752840042114, + "learning_rate": 9.235549163802102e-06, + "loss": 0.7266, + "step": 3950 + }, + { + "epoch": 0.2030527289546716, + "grad_norm": 0.8882959485054016, + "learning_rate": 9.235106830462249e-06, + "loss": 0.7155, + "step": 3951 + }, + { + "epoch": 0.20310412169801623, + "grad_norm": 1.1432756185531616, + "learning_rate": 9.234664379785064e-06, + "loss": 0.7905, + "step": 3952 + }, + { + "epoch": 0.20315551444136087, + "grad_norm": 1.0763963460922241, + "learning_rate": 9.234221811782802e-06, + "loss": 0.7916, + "step": 3953 + }, + { + "epoch": 0.20320690718470552, + "grad_norm": 0.881188690662384, + "learning_rate": 9.233779126467729e-06, + "loss": 0.7326, + "step": 3954 + }, + { + "epoch": 0.20325829992805017, + "grad_norm": 0.7955000996589661, + "learning_rate": 9.233336323852107e-06, + "loss": 0.742, + "step": 3955 + }, + { + "epoch": 0.2033096926713948, + "grad_norm": 1.071510910987854, + "learning_rate": 9.232893403948205e-06, + "loss": 0.7736, + "step": 3956 + }, + { + "epoch": 0.20336108541473943, + "grad_norm": 0.7522737979888916, + "learning_rate": 9.232450366768295e-06, + "loss": 0.6856, + "step": 3957 + }, + { + "epoch": 0.20341247815808408, + "grad_norm": 1.387927770614624, + "learning_rate": 9.232007212324654e-06, + "loss": 0.8402, + "step": 3958 + }, + { + "epoch": 0.20346387090142873, + "grad_norm": 0.8359056711196899, + "learning_rate": 9.231563940629555e-06, + "loss": 0.7266, + "step": 3959 + }, + { + "epoch": 0.20351526364477335, + "grad_norm": 0.7284241318702698, + "learning_rate": 9.231120551695283e-06, + "loss": 0.684, + "step": 3960 + }, + { + "epoch": 0.203566656388118, + "grad_norm": 1.181214451789856, + "learning_rate": 9.230677045534121e-06, + "loss": 0.8123, + "step": 3961 + }, + { + "epoch": 0.20361804913146264, + "grad_norm": 1.0820766687393188, + "learning_rate": 9.230233422158357e-06, + "loss": 0.7692, + "step": 3962 + }, + { + "epoch": 0.2036694418748073, + "grad_norm": 0.8252794742584229, + "learning_rate": 9.229789681580283e-06, + "loss": 0.7306, + "step": 3963 + }, + { + "epoch": 0.2037208346181519, + "grad_norm": 1.0831453800201416, + "learning_rate": 9.229345823812191e-06, + "loss": 0.7277, + "step": 3964 + }, + { + "epoch": 0.20377222736149656, + "grad_norm": 1.0945754051208496, + "learning_rate": 9.228901848866379e-06, + "loss": 0.8239, + "step": 3965 + }, + { + "epoch": 0.2038236201048412, + "grad_norm": 0.6960675716400146, + "learning_rate": 9.22845775675515e-06, + "loss": 0.6866, + "step": 3966 + }, + { + "epoch": 0.20387501284818585, + "grad_norm": 1.365492820739746, + "learning_rate": 9.228013547490805e-06, + "loss": 0.7776, + "step": 3967 + }, + { + "epoch": 0.20392640559153047, + "grad_norm": 0.7762057781219482, + "learning_rate": 9.227569221085655e-06, + "loss": 0.6648, + "step": 3968 + }, + { + "epoch": 0.20397779833487512, + "grad_norm": 1.1423217058181763, + "learning_rate": 9.227124777552006e-06, + "loss": 0.8792, + "step": 3969 + }, + { + "epoch": 0.20402919107821976, + "grad_norm": 1.0567117929458618, + "learning_rate": 9.226680216902176e-06, + "loss": 0.7905, + "step": 3970 + }, + { + "epoch": 0.20408058382156438, + "grad_norm": 1.1712135076522827, + "learning_rate": 9.226235539148476e-06, + "loss": 0.7913, + "step": 3971 + }, + { + "epoch": 0.20413197656490903, + "grad_norm": 1.0792492628097534, + "learning_rate": 9.225790744303231e-06, + "loss": 0.8193, + "step": 3972 + }, + { + "epoch": 0.20418336930825368, + "grad_norm": 1.0488131046295166, + "learning_rate": 9.225345832378765e-06, + "loss": 0.7826, + "step": 3973 + }, + { + "epoch": 0.20423476205159832, + "grad_norm": 1.082502007484436, + "learning_rate": 9.224900803387402e-06, + "loss": 0.8158, + "step": 3974 + }, + { + "epoch": 0.20428615479494294, + "grad_norm": 1.048316240310669, + "learning_rate": 9.224455657341474e-06, + "loss": 0.7843, + "step": 3975 + }, + { + "epoch": 0.2043375475382876, + "grad_norm": 0.8091312646865845, + "learning_rate": 9.22401039425331e-06, + "loss": 0.7491, + "step": 3976 + }, + { + "epoch": 0.20438894028163224, + "grad_norm": 1.1756280660629272, + "learning_rate": 9.223565014135252e-06, + "loss": 0.8003, + "step": 3977 + }, + { + "epoch": 0.20444033302497688, + "grad_norm": 1.1426849365234375, + "learning_rate": 9.223119516999636e-06, + "loss": 0.8063, + "step": 3978 + }, + { + "epoch": 0.2044917257683215, + "grad_norm": 1.0891655683517456, + "learning_rate": 9.222673902858807e-06, + "loss": 0.7901, + "step": 3979 + }, + { + "epoch": 0.20454311851166615, + "grad_norm": 0.9885302782058716, + "learning_rate": 9.22222817172511e-06, + "loss": 0.7391, + "step": 3980 + }, + { + "epoch": 0.2045945112550108, + "grad_norm": 1.2324937582015991, + "learning_rate": 9.221782323610894e-06, + "loss": 0.7814, + "step": 3981 + }, + { + "epoch": 0.20464590399835544, + "grad_norm": 1.2639241218566895, + "learning_rate": 9.221336358528512e-06, + "loss": 0.7528, + "step": 3982 + }, + { + "epoch": 0.20469729674170006, + "grad_norm": 1.1169867515563965, + "learning_rate": 9.22089027649032e-06, + "loss": 0.8051, + "step": 3983 + }, + { + "epoch": 0.2047486894850447, + "grad_norm": 1.1546446084976196, + "learning_rate": 9.220444077508678e-06, + "loss": 0.7803, + "step": 3984 + }, + { + "epoch": 0.20480008222838936, + "grad_norm": 1.2693123817443848, + "learning_rate": 9.219997761595944e-06, + "loss": 0.7983, + "step": 3985 + }, + { + "epoch": 0.204851474971734, + "grad_norm": 1.1667239665985107, + "learning_rate": 9.21955132876449e-06, + "loss": 0.8, + "step": 3986 + }, + { + "epoch": 0.20490286771507862, + "grad_norm": 1.0777825117111206, + "learning_rate": 9.219104779026682e-06, + "loss": 0.7892, + "step": 3987 + }, + { + "epoch": 0.20495426045842327, + "grad_norm": 1.0768225193023682, + "learning_rate": 9.21865811239489e-06, + "loss": 0.7799, + "step": 3988 + }, + { + "epoch": 0.20500565320176792, + "grad_norm": 1.1802806854248047, + "learning_rate": 9.218211328881492e-06, + "loss": 0.8588, + "step": 3989 + }, + { + "epoch": 0.20505704594511254, + "grad_norm": 1.0987366437911987, + "learning_rate": 9.217764428498865e-06, + "loss": 0.7738, + "step": 3990 + }, + { + "epoch": 0.20510843868845718, + "grad_norm": 0.7711794972419739, + "learning_rate": 9.217317411259392e-06, + "loss": 0.7196, + "step": 3991 + }, + { + "epoch": 0.20515983143180183, + "grad_norm": 1.0974634885787964, + "learning_rate": 9.216870277175458e-06, + "loss": 0.7549, + "step": 3992 + }, + { + "epoch": 0.20521122417514648, + "grad_norm": 1.1164509057998657, + "learning_rate": 9.21642302625945e-06, + "loss": 0.794, + "step": 3993 + }, + { + "epoch": 0.2052626169184911, + "grad_norm": 1.1832889318466187, + "learning_rate": 9.215975658523759e-06, + "loss": 0.8077, + "step": 3994 + }, + { + "epoch": 0.20531400966183574, + "grad_norm": 1.1268428564071655, + "learning_rate": 9.215528173980781e-06, + "loss": 0.7876, + "step": 3995 + }, + { + "epoch": 0.2053654024051804, + "grad_norm": 1.2040382623672485, + "learning_rate": 9.215080572642915e-06, + "loss": 0.7963, + "step": 3996 + }, + { + "epoch": 0.20541679514852504, + "grad_norm": 1.177782654762268, + "learning_rate": 9.21463285452256e-06, + "loss": 0.864, + "step": 3997 + }, + { + "epoch": 0.20546818789186966, + "grad_norm": 0.7775552868843079, + "learning_rate": 9.214185019632123e-06, + "loss": 0.6736, + "step": 3998 + }, + { + "epoch": 0.2055195806352143, + "grad_norm": 1.0257055759429932, + "learning_rate": 9.213737067984007e-06, + "loss": 0.8217, + "step": 3999 + }, + { + "epoch": 0.20557097337855895, + "grad_norm": 1.1647230386734009, + "learning_rate": 9.21328899959063e-06, + "loss": 0.829, + "step": 4000 + }, + { + "epoch": 0.2056223661219036, + "grad_norm": 0.7926165461540222, + "learning_rate": 9.212840814464399e-06, + "loss": 0.7381, + "step": 4001 + }, + { + "epoch": 0.20567375886524822, + "grad_norm": 0.770859956741333, + "learning_rate": 9.212392512617734e-06, + "loss": 0.7243, + "step": 4002 + }, + { + "epoch": 0.20572515160859287, + "grad_norm": 0.7493070363998413, + "learning_rate": 9.211944094063059e-06, + "loss": 0.7077, + "step": 4003 + }, + { + "epoch": 0.2057765443519375, + "grad_norm": 1.1485666036605835, + "learning_rate": 9.211495558812793e-06, + "loss": 0.7505, + "step": 4004 + }, + { + "epoch": 0.20582793709528216, + "grad_norm": 0.775187611579895, + "learning_rate": 9.211046906879363e-06, + "loss": 0.7456, + "step": 4005 + }, + { + "epoch": 0.20587932983862678, + "grad_norm": 1.141463041305542, + "learning_rate": 9.210598138275204e-06, + "loss": 0.7971, + "step": 4006 + }, + { + "epoch": 0.20593072258197143, + "grad_norm": 1.1968775987625122, + "learning_rate": 9.210149253012745e-06, + "loss": 0.8239, + "step": 4007 + }, + { + "epoch": 0.20598211532531607, + "grad_norm": 1.034183382987976, + "learning_rate": 9.209700251104426e-06, + "loss": 0.7546, + "step": 4008 + }, + { + "epoch": 0.2060335080686607, + "grad_norm": 1.1379923820495605, + "learning_rate": 9.209251132562685e-06, + "loss": 0.7989, + "step": 4009 + }, + { + "epoch": 0.20608490081200534, + "grad_norm": 1.0693378448486328, + "learning_rate": 9.208801897399966e-06, + "loss": 0.8053, + "step": 4010 + }, + { + "epoch": 0.20613629355535, + "grad_norm": 1.0320727825164795, + "learning_rate": 9.208352545628714e-06, + "loss": 0.7635, + "step": 4011 + }, + { + "epoch": 0.20618768629869463, + "grad_norm": 1.0974820852279663, + "learning_rate": 9.20790307726138e-06, + "loss": 0.7951, + "step": 4012 + }, + { + "epoch": 0.20623907904203925, + "grad_norm": 1.20841646194458, + "learning_rate": 9.207453492310417e-06, + "loss": 0.799, + "step": 4013 + }, + { + "epoch": 0.2062904717853839, + "grad_norm": 1.1445960998535156, + "learning_rate": 9.20700379078828e-06, + "loss": 0.8037, + "step": 4014 + }, + { + "epoch": 0.20634186452872855, + "grad_norm": 1.0224921703338623, + "learning_rate": 9.20655397270743e-06, + "loss": 0.812, + "step": 4015 + }, + { + "epoch": 0.2063932572720732, + "grad_norm": 0.8716250061988831, + "learning_rate": 9.20610403808033e-06, + "loss": 0.7298, + "step": 4016 + }, + { + "epoch": 0.2064446500154178, + "grad_norm": 1.1250152587890625, + "learning_rate": 9.205653986919443e-06, + "loss": 0.8304, + "step": 4017 + }, + { + "epoch": 0.20649604275876246, + "grad_norm": 1.1214884519577026, + "learning_rate": 9.20520381923724e-06, + "loss": 0.79, + "step": 4018 + }, + { + "epoch": 0.2065474355021071, + "grad_norm": 0.6935552358627319, + "learning_rate": 9.204753535046193e-06, + "loss": 0.7154, + "step": 4019 + }, + { + "epoch": 0.20659882824545175, + "grad_norm": 1.122473955154419, + "learning_rate": 9.20430313435878e-06, + "loss": 0.7568, + "step": 4020 + }, + { + "epoch": 0.20665022098879637, + "grad_norm": 1.156865119934082, + "learning_rate": 9.203852617187474e-06, + "loss": 0.7597, + "step": 4021 + }, + { + "epoch": 0.20670161373214102, + "grad_norm": 1.1034526824951172, + "learning_rate": 9.203401983544762e-06, + "loss": 0.7569, + "step": 4022 + }, + { + "epoch": 0.20675300647548567, + "grad_norm": 1.0662312507629395, + "learning_rate": 9.202951233443126e-06, + "loss": 0.8416, + "step": 4023 + }, + { + "epoch": 0.20680439921883031, + "grad_norm": 1.1231504678726196, + "learning_rate": 9.202500366895057e-06, + "loss": 0.7714, + "step": 4024 + }, + { + "epoch": 0.20685579196217493, + "grad_norm": 1.2251371145248413, + "learning_rate": 9.202049383913045e-06, + "loss": 0.7818, + "step": 4025 + }, + { + "epoch": 0.20690718470551958, + "grad_norm": 1.0744401216506958, + "learning_rate": 9.201598284509586e-06, + "loss": 0.7699, + "step": 4026 + }, + { + "epoch": 0.20695857744886423, + "grad_norm": 1.1441630125045776, + "learning_rate": 9.201147068697178e-06, + "loss": 0.7751, + "step": 4027 + }, + { + "epoch": 0.20700997019220885, + "grad_norm": 1.113736629486084, + "learning_rate": 9.200695736488322e-06, + "loss": 0.7919, + "step": 4028 + }, + { + "epoch": 0.2070613629355535, + "grad_norm": 1.2082083225250244, + "learning_rate": 9.20024428789552e-06, + "loss": 0.8091, + "step": 4029 + }, + { + "epoch": 0.20711275567889814, + "grad_norm": 1.1719342470169067, + "learning_rate": 9.199792722931285e-06, + "loss": 0.8161, + "step": 4030 + }, + { + "epoch": 0.2071641484222428, + "grad_norm": 1.0741727352142334, + "learning_rate": 9.199341041608123e-06, + "loss": 0.8123, + "step": 4031 + }, + { + "epoch": 0.2072155411655874, + "grad_norm": 1.1328620910644531, + "learning_rate": 9.19888924393855e-06, + "loss": 0.8236, + "step": 4032 + }, + { + "epoch": 0.20726693390893206, + "grad_norm": 1.1496222019195557, + "learning_rate": 9.198437329935086e-06, + "loss": 0.791, + "step": 4033 + }, + { + "epoch": 0.2073183266522767, + "grad_norm": 1.107384204864502, + "learning_rate": 9.197985299610247e-06, + "loss": 0.782, + "step": 4034 + }, + { + "epoch": 0.20736971939562135, + "grad_norm": 0.808521568775177, + "learning_rate": 9.197533152976563e-06, + "loss": 0.7207, + "step": 4035 + }, + { + "epoch": 0.20742111213896597, + "grad_norm": 1.0780826807022095, + "learning_rate": 9.197080890046555e-06, + "loss": 0.758, + "step": 4036 + }, + { + "epoch": 0.20747250488231062, + "grad_norm": 1.120969533920288, + "learning_rate": 9.196628510832756e-06, + "loss": 0.7838, + "step": 4037 + }, + { + "epoch": 0.20752389762565526, + "grad_norm": 0.7947130799293518, + "learning_rate": 9.1961760153477e-06, + "loss": 0.6869, + "step": 4038 + }, + { + "epoch": 0.2075752903689999, + "grad_norm": 1.1954935789108276, + "learning_rate": 9.195723403603922e-06, + "loss": 0.7912, + "step": 4039 + }, + { + "epoch": 0.20762668311234453, + "grad_norm": 0.7600930333137512, + "learning_rate": 9.195270675613965e-06, + "loss": 0.6849, + "step": 4040 + }, + { + "epoch": 0.20767807585568918, + "grad_norm": 1.0679527521133423, + "learning_rate": 9.19481783139037e-06, + "loss": 0.7422, + "step": 4041 + }, + { + "epoch": 0.20772946859903382, + "grad_norm": 0.8906779885292053, + "learning_rate": 9.194364870945683e-06, + "loss": 0.6891, + "step": 4042 + }, + { + "epoch": 0.20778086134237844, + "grad_norm": 1.1099522113800049, + "learning_rate": 9.193911794292455e-06, + "loss": 0.8177, + "step": 4043 + }, + { + "epoch": 0.2078322540857231, + "grad_norm": 0.8021684288978577, + "learning_rate": 9.193458601443238e-06, + "loss": 0.6635, + "step": 4044 + }, + { + "epoch": 0.20788364682906774, + "grad_norm": 1.0883532762527466, + "learning_rate": 9.19300529241059e-06, + "loss": 0.7316, + "step": 4045 + }, + { + "epoch": 0.20793503957241238, + "grad_norm": 1.0455607175827026, + "learning_rate": 9.192551867207066e-06, + "loss": 0.7946, + "step": 4046 + }, + { + "epoch": 0.207986432315757, + "grad_norm": 0.7443743348121643, + "learning_rate": 9.192098325845234e-06, + "loss": 0.7043, + "step": 4047 + }, + { + "epoch": 0.20803782505910165, + "grad_norm": 1.102075219154358, + "learning_rate": 9.191644668337656e-06, + "loss": 0.7619, + "step": 4048 + }, + { + "epoch": 0.2080892178024463, + "grad_norm": 1.138498306274414, + "learning_rate": 9.191190894696904e-06, + "loss": 0.8153, + "step": 4049 + }, + { + "epoch": 0.20814061054579094, + "grad_norm": 1.1311196088790894, + "learning_rate": 9.190737004935545e-06, + "loss": 0.8111, + "step": 4050 + }, + { + "epoch": 0.20819200328913556, + "grad_norm": 1.0865874290466309, + "learning_rate": 9.19028299906616e-06, + "loss": 0.7774, + "step": 4051 + }, + { + "epoch": 0.2082433960324802, + "grad_norm": 0.9141707420349121, + "learning_rate": 9.189828877101326e-06, + "loss": 0.7166, + "step": 4052 + }, + { + "epoch": 0.20829478877582486, + "grad_norm": 1.1001232862472534, + "learning_rate": 9.189374639053624e-06, + "loss": 0.7647, + "step": 4053 + }, + { + "epoch": 0.2083461815191695, + "grad_norm": 1.10018789768219, + "learning_rate": 9.188920284935638e-06, + "loss": 0.7696, + "step": 4054 + }, + { + "epoch": 0.20839757426251412, + "grad_norm": 1.09285569190979, + "learning_rate": 9.188465814759956e-06, + "loss": 0.8418, + "step": 4055 + }, + { + "epoch": 0.20844896700585877, + "grad_norm": 0.7205379009246826, + "learning_rate": 9.188011228539175e-06, + "loss": 0.688, + "step": 4056 + }, + { + "epoch": 0.20850035974920342, + "grad_norm": 1.1213802099227905, + "learning_rate": 9.187556526285883e-06, + "loss": 0.7996, + "step": 4057 + }, + { + "epoch": 0.20855175249254806, + "grad_norm": 1.144457221031189, + "learning_rate": 9.187101708012682e-06, + "loss": 0.8256, + "step": 4058 + }, + { + "epoch": 0.20860314523589268, + "grad_norm": 1.125562310218811, + "learning_rate": 9.186646773732171e-06, + "loss": 0.8484, + "step": 4059 + }, + { + "epoch": 0.20865453797923733, + "grad_norm": 0.7997061014175415, + "learning_rate": 9.186191723456956e-06, + "loss": 0.6995, + "step": 4060 + }, + { + "epoch": 0.20870593072258198, + "grad_norm": 1.1060621738433838, + "learning_rate": 9.185736557199644e-06, + "loss": 0.8127, + "step": 4061 + }, + { + "epoch": 0.2087573234659266, + "grad_norm": 1.1874028444290161, + "learning_rate": 9.185281274972844e-06, + "loss": 0.8244, + "step": 4062 + }, + { + "epoch": 0.20880871620927124, + "grad_norm": 1.0982171297073364, + "learning_rate": 9.184825876789171e-06, + "loss": 0.7792, + "step": 4063 + }, + { + "epoch": 0.2088601089526159, + "grad_norm": 1.0794787406921387, + "learning_rate": 9.184370362661245e-06, + "loss": 0.7406, + "step": 4064 + }, + { + "epoch": 0.20891150169596054, + "grad_norm": 1.1303212642669678, + "learning_rate": 9.183914732601685e-06, + "loss": 0.7916, + "step": 4065 + }, + { + "epoch": 0.20896289443930516, + "grad_norm": 1.093451976776123, + "learning_rate": 9.18345898662311e-06, + "loss": 0.768, + "step": 4066 + }, + { + "epoch": 0.2090142871826498, + "grad_norm": 1.076591968536377, + "learning_rate": 9.183003124738153e-06, + "loss": 0.867, + "step": 4067 + }, + { + "epoch": 0.20906567992599445, + "grad_norm": 0.8431175351142883, + "learning_rate": 9.182547146959441e-06, + "loss": 0.7073, + "step": 4068 + }, + { + "epoch": 0.2091170726693391, + "grad_norm": 1.1227257251739502, + "learning_rate": 9.18209105329961e-06, + "loss": 0.7652, + "step": 4069 + }, + { + "epoch": 0.20916846541268372, + "grad_norm": 1.097144603729248, + "learning_rate": 9.181634843771291e-06, + "loss": 0.7395, + "step": 4070 + }, + { + "epoch": 0.20921985815602837, + "grad_norm": 1.123072624206543, + "learning_rate": 9.181178518387128e-06, + "loss": 0.7657, + "step": 4071 + }, + { + "epoch": 0.209271250899373, + "grad_norm": 1.036139726638794, + "learning_rate": 9.180722077159765e-06, + "loss": 0.7664, + "step": 4072 + }, + { + "epoch": 0.20932264364271766, + "grad_norm": 1.1210362911224365, + "learning_rate": 9.180265520101843e-06, + "loss": 0.8445, + "step": 4073 + }, + { + "epoch": 0.20937403638606228, + "grad_norm": 1.1159181594848633, + "learning_rate": 9.179808847226017e-06, + "loss": 0.8035, + "step": 4074 + }, + { + "epoch": 0.20942542912940693, + "grad_norm": 1.122412085533142, + "learning_rate": 9.179352058544935e-06, + "loss": 0.8214, + "step": 4075 + }, + { + "epoch": 0.20947682187275157, + "grad_norm": 0.8825933933258057, + "learning_rate": 9.178895154071255e-06, + "loss": 0.7118, + "step": 4076 + }, + { + "epoch": 0.20952821461609622, + "grad_norm": 1.1096134185791016, + "learning_rate": 9.178438133817637e-06, + "loss": 0.8037, + "step": 4077 + }, + { + "epoch": 0.20957960735944084, + "grad_norm": 1.1554313898086548, + "learning_rate": 9.17798099779674e-06, + "loss": 0.777, + "step": 4078 + }, + { + "epoch": 0.2096310001027855, + "grad_norm": 0.8162883520126343, + "learning_rate": 9.177523746021232e-06, + "loss": 0.7389, + "step": 4079 + }, + { + "epoch": 0.20968239284613013, + "grad_norm": 1.0952943563461304, + "learning_rate": 9.17706637850378e-06, + "loss": 0.8214, + "step": 4080 + }, + { + "epoch": 0.20973378558947475, + "grad_norm": 1.326655387878418, + "learning_rate": 9.176608895257057e-06, + "loss": 0.8016, + "step": 4081 + }, + { + "epoch": 0.2097851783328194, + "grad_norm": 0.8916594386100769, + "learning_rate": 9.176151296293739e-06, + "loss": 0.706, + "step": 4082 + }, + { + "epoch": 0.20983657107616405, + "grad_norm": 1.212406039237976, + "learning_rate": 9.1756935816265e-06, + "loss": 0.7847, + "step": 4083 + }, + { + "epoch": 0.2098879638195087, + "grad_norm": 2.6856772899627686, + "learning_rate": 9.175235751268023e-06, + "loss": 0.7286, + "step": 4084 + }, + { + "epoch": 0.2099393565628533, + "grad_norm": 1.0466660261154175, + "learning_rate": 9.174777805230997e-06, + "loss": 0.7383, + "step": 4085 + }, + { + "epoch": 0.20999074930619796, + "grad_norm": 1.1743662357330322, + "learning_rate": 9.174319743528104e-06, + "loss": 0.818, + "step": 4086 + }, + { + "epoch": 0.2100421420495426, + "grad_norm": 1.1300913095474243, + "learning_rate": 9.173861566172038e-06, + "loss": 0.7937, + "step": 4087 + }, + { + "epoch": 0.21009353479288725, + "grad_norm": 0.8407766222953796, + "learning_rate": 9.173403273175492e-06, + "loss": 0.6763, + "step": 4088 + }, + { + "epoch": 0.21014492753623187, + "grad_norm": 1.072435736656189, + "learning_rate": 9.172944864551163e-06, + "loss": 0.7882, + "step": 4089 + }, + { + "epoch": 0.21019632027957652, + "grad_norm": 1.0915374755859375, + "learning_rate": 9.172486340311755e-06, + "loss": 0.7844, + "step": 4090 + }, + { + "epoch": 0.21024771302292117, + "grad_norm": 1.3030527830123901, + "learning_rate": 9.172027700469967e-06, + "loss": 0.8454, + "step": 4091 + }, + { + "epoch": 0.21029910576626581, + "grad_norm": 1.1855648756027222, + "learning_rate": 9.17156894503851e-06, + "loss": 0.821, + "step": 4092 + }, + { + "epoch": 0.21035049850961043, + "grad_norm": 0.7978924512863159, + "learning_rate": 9.171110074030092e-06, + "loss": 0.6707, + "step": 4093 + }, + { + "epoch": 0.21040189125295508, + "grad_norm": 1.1909937858581543, + "learning_rate": 9.170651087457427e-06, + "loss": 0.7577, + "step": 4094 + }, + { + "epoch": 0.21045328399629973, + "grad_norm": 1.1295768022537231, + "learning_rate": 9.17019198533323e-06, + "loss": 0.7734, + "step": 4095 + }, + { + "epoch": 0.21050467673964438, + "grad_norm": 1.3759740591049194, + "learning_rate": 9.169732767670225e-06, + "loss": 0.8333, + "step": 4096 + }, + { + "epoch": 0.210556069482989, + "grad_norm": 1.1282240152359009, + "learning_rate": 9.169273434481132e-06, + "loss": 0.7976, + "step": 4097 + }, + { + "epoch": 0.21060746222633364, + "grad_norm": 1.5105525255203247, + "learning_rate": 9.168813985778677e-06, + "loss": 0.7399, + "step": 4098 + }, + { + "epoch": 0.2106588549696783, + "grad_norm": 0.7949177622795105, + "learning_rate": 9.16835442157559e-06, + "loss": 0.7409, + "step": 4099 + }, + { + "epoch": 0.2107102477130229, + "grad_norm": 1.0517692565917969, + "learning_rate": 9.167894741884604e-06, + "loss": 0.7258, + "step": 4100 + }, + { + "epoch": 0.21076164045636755, + "grad_norm": 1.0692452192306519, + "learning_rate": 9.167434946718455e-06, + "loss": 0.8521, + "step": 4101 + }, + { + "epoch": 0.2108130331997122, + "grad_norm": 0.7752538919448853, + "learning_rate": 9.16697503608988e-06, + "loss": 0.69, + "step": 4102 + }, + { + "epoch": 0.21086442594305685, + "grad_norm": 1.1508145332336426, + "learning_rate": 9.166515010011625e-06, + "loss": 0.8239, + "step": 4103 + }, + { + "epoch": 0.21091581868640147, + "grad_norm": 1.1212024688720703, + "learning_rate": 9.166054868496433e-06, + "loss": 0.7824, + "step": 4104 + }, + { + "epoch": 0.21096721142974612, + "grad_norm": 0.7913763523101807, + "learning_rate": 9.165594611557052e-06, + "loss": 0.6895, + "step": 4105 + }, + { + "epoch": 0.21101860417309076, + "grad_norm": 1.0686748027801514, + "learning_rate": 9.165134239206236e-06, + "loss": 0.8244, + "step": 4106 + }, + { + "epoch": 0.2110699969164354, + "grad_norm": 1.0987708568572998, + "learning_rate": 9.164673751456738e-06, + "loss": 0.8037, + "step": 4107 + }, + { + "epoch": 0.21112138965978003, + "grad_norm": 1.0956056118011475, + "learning_rate": 9.164213148321315e-06, + "loss": 0.8406, + "step": 4108 + }, + { + "epoch": 0.21117278240312468, + "grad_norm": 1.1297279596328735, + "learning_rate": 9.163752429812733e-06, + "loss": 0.8626, + "step": 4109 + }, + { + "epoch": 0.21122417514646932, + "grad_norm": 0.9671109914779663, + "learning_rate": 9.163291595943753e-06, + "loss": 0.6765, + "step": 4110 + }, + { + "epoch": 0.21127556788981397, + "grad_norm": 1.112369418144226, + "learning_rate": 9.162830646727143e-06, + "loss": 0.8295, + "step": 4111 + }, + { + "epoch": 0.2113269606331586, + "grad_norm": 1.121092677116394, + "learning_rate": 9.162369582175676e-06, + "loss": 0.7927, + "step": 4112 + }, + { + "epoch": 0.21137835337650324, + "grad_norm": 1.1575745344161987, + "learning_rate": 9.161908402302125e-06, + "loss": 0.7862, + "step": 4113 + }, + { + "epoch": 0.21142974611984788, + "grad_norm": 1.0813905000686646, + "learning_rate": 9.161447107119266e-06, + "loss": 0.7926, + "step": 4114 + }, + { + "epoch": 0.21148113886319253, + "grad_norm": 1.1230854988098145, + "learning_rate": 9.160985696639882e-06, + "loss": 0.785, + "step": 4115 + }, + { + "epoch": 0.21153253160653715, + "grad_norm": 1.1330019235610962, + "learning_rate": 9.160524170876756e-06, + "loss": 0.8373, + "step": 4116 + }, + { + "epoch": 0.2115839243498818, + "grad_norm": 0.7802382707595825, + "learning_rate": 9.160062529842675e-06, + "loss": 0.6708, + "step": 4117 + }, + { + "epoch": 0.21163531709322644, + "grad_norm": 1.1753462553024292, + "learning_rate": 9.159600773550428e-06, + "loss": 0.7757, + "step": 4118 + }, + { + "epoch": 0.21168670983657106, + "grad_norm": 1.136126160621643, + "learning_rate": 9.15913890201281e-06, + "loss": 0.8242, + "step": 4119 + }, + { + "epoch": 0.2117381025799157, + "grad_norm": 1.087391972541809, + "learning_rate": 9.158676915242615e-06, + "loss": 0.7408, + "step": 4120 + }, + { + "epoch": 0.21178949532326036, + "grad_norm": 1.2799772024154663, + "learning_rate": 9.158214813252646e-06, + "loss": 0.8115, + "step": 4121 + }, + { + "epoch": 0.211840888066605, + "grad_norm": 1.114206314086914, + "learning_rate": 9.157752596055704e-06, + "loss": 0.823, + "step": 4122 + }, + { + "epoch": 0.21189228080994962, + "grad_norm": 1.092413306236267, + "learning_rate": 9.157290263664596e-06, + "loss": 0.8126, + "step": 4123 + }, + { + "epoch": 0.21194367355329427, + "grad_norm": 0.7647567391395569, + "learning_rate": 9.15682781609213e-06, + "loss": 0.6799, + "step": 4124 + }, + { + "epoch": 0.21199506629663892, + "grad_norm": 0.7781330943107605, + "learning_rate": 9.15636525335112e-06, + "loss": 0.718, + "step": 4125 + }, + { + "epoch": 0.21204645903998356, + "grad_norm": 1.1484543085098267, + "learning_rate": 9.155902575454381e-06, + "loss": 0.7885, + "step": 4126 + }, + { + "epoch": 0.21209785178332818, + "grad_norm": 1.244184136390686, + "learning_rate": 9.155439782414732e-06, + "loss": 0.8081, + "step": 4127 + }, + { + "epoch": 0.21214924452667283, + "grad_norm": 1.12799870967865, + "learning_rate": 9.154976874244996e-06, + "loss": 0.7673, + "step": 4128 + }, + { + "epoch": 0.21220063727001748, + "grad_norm": 1.1538182497024536, + "learning_rate": 9.154513850957995e-06, + "loss": 0.7868, + "step": 4129 + }, + { + "epoch": 0.21225203001336213, + "grad_norm": 1.1459710597991943, + "learning_rate": 9.154050712566563e-06, + "loss": 0.7947, + "step": 4130 + }, + { + "epoch": 0.21230342275670674, + "grad_norm": 1.1236449480056763, + "learning_rate": 9.153587459083525e-06, + "loss": 0.8077, + "step": 4131 + }, + { + "epoch": 0.2123548155000514, + "grad_norm": 1.090453863143921, + "learning_rate": 9.153124090521723e-06, + "loss": 0.7571, + "step": 4132 + }, + { + "epoch": 0.21240620824339604, + "grad_norm": 1.104998230934143, + "learning_rate": 9.152660606893991e-06, + "loss": 0.809, + "step": 4133 + }, + { + "epoch": 0.21245760098674069, + "grad_norm": 1.0853921175003052, + "learning_rate": 9.152197008213169e-06, + "loss": 0.7708, + "step": 4134 + }, + { + "epoch": 0.2125089937300853, + "grad_norm": 1.1155692338943481, + "learning_rate": 9.151733294492101e-06, + "loss": 0.8059, + "step": 4135 + }, + { + "epoch": 0.21256038647342995, + "grad_norm": 1.091387152671814, + "learning_rate": 9.151269465743641e-06, + "loss": 0.7658, + "step": 4136 + }, + { + "epoch": 0.2126117792167746, + "grad_norm": 1.074527621269226, + "learning_rate": 9.150805521980634e-06, + "loss": 0.7429, + "step": 4137 + }, + { + "epoch": 0.21266317196011922, + "grad_norm": 1.1839638948440552, + "learning_rate": 9.150341463215935e-06, + "loss": 0.8066, + "step": 4138 + }, + { + "epoch": 0.21271456470346387, + "grad_norm": 1.147925853729248, + "learning_rate": 9.1498772894624e-06, + "loss": 0.7642, + "step": 4139 + }, + { + "epoch": 0.2127659574468085, + "grad_norm": 1.1146399974822998, + "learning_rate": 9.149413000732892e-06, + "loss": 0.8282, + "step": 4140 + }, + { + "epoch": 0.21281735019015316, + "grad_norm": 1.0158265829086304, + "learning_rate": 9.148948597040274e-06, + "loss": 0.7226, + "step": 4141 + }, + { + "epoch": 0.21286874293349778, + "grad_norm": 0.8555070161819458, + "learning_rate": 9.148484078397412e-06, + "loss": 0.6998, + "step": 4142 + }, + { + "epoch": 0.21292013567684243, + "grad_norm": 1.2350852489471436, + "learning_rate": 9.148019444817175e-06, + "loss": 0.7903, + "step": 4143 + }, + { + "epoch": 0.21297152842018707, + "grad_norm": 1.1090493202209473, + "learning_rate": 9.147554696312438e-06, + "loss": 0.8118, + "step": 4144 + }, + { + "epoch": 0.21302292116353172, + "grad_norm": 1.0957188606262207, + "learning_rate": 9.147089832896075e-06, + "loss": 0.7279, + "step": 4145 + }, + { + "epoch": 0.21307431390687634, + "grad_norm": 1.1010417938232422, + "learning_rate": 9.146624854580968e-06, + "loss": 0.8248, + "step": 4146 + }, + { + "epoch": 0.213125706650221, + "grad_norm": 1.0710384845733643, + "learning_rate": 9.146159761379998e-06, + "loss": 0.7796, + "step": 4147 + }, + { + "epoch": 0.21317709939356563, + "grad_norm": 1.104913353919983, + "learning_rate": 9.145694553306051e-06, + "loss": 0.7783, + "step": 4148 + }, + { + "epoch": 0.21322849213691028, + "grad_norm": 1.1737264394760132, + "learning_rate": 9.145229230372017e-06, + "loss": 0.8813, + "step": 4149 + }, + { + "epoch": 0.2132798848802549, + "grad_norm": 1.098204493522644, + "learning_rate": 9.144763792590787e-06, + "loss": 0.8129, + "step": 4150 + }, + { + "epoch": 0.21333127762359955, + "grad_norm": 1.3907911777496338, + "learning_rate": 9.144298239975255e-06, + "loss": 0.8014, + "step": 4151 + }, + { + "epoch": 0.2133826703669442, + "grad_norm": 1.1620992422103882, + "learning_rate": 9.143832572538324e-06, + "loss": 0.7728, + "step": 4152 + }, + { + "epoch": 0.21343406311028884, + "grad_norm": 0.8103654384613037, + "learning_rate": 9.143366790292892e-06, + "loss": 0.7174, + "step": 4153 + }, + { + "epoch": 0.21348545585363346, + "grad_norm": 1.1073077917099, + "learning_rate": 9.142900893251864e-06, + "loss": 0.7936, + "step": 4154 + }, + { + "epoch": 0.2135368485969781, + "grad_norm": 1.1065016984939575, + "learning_rate": 9.14243488142815e-06, + "loss": 0.8063, + "step": 4155 + }, + { + "epoch": 0.21358824134032275, + "grad_norm": 1.1206870079040527, + "learning_rate": 9.14196875483466e-06, + "loss": 0.8022, + "step": 4156 + }, + { + "epoch": 0.21363963408366737, + "grad_norm": 1.1078795194625854, + "learning_rate": 9.141502513484308e-06, + "loss": 0.7639, + "step": 4157 + }, + { + "epoch": 0.21369102682701202, + "grad_norm": 0.7858216762542725, + "learning_rate": 9.141036157390014e-06, + "loss": 0.757, + "step": 4158 + }, + { + "epoch": 0.21374241957035667, + "grad_norm": 1.0773473978042603, + "learning_rate": 9.140569686564695e-06, + "loss": 0.7673, + "step": 4159 + }, + { + "epoch": 0.21379381231370131, + "grad_norm": 1.1611648797988892, + "learning_rate": 9.14010310102128e-06, + "loss": 0.7841, + "step": 4160 + }, + { + "epoch": 0.21384520505704593, + "grad_norm": 1.0764635801315308, + "learning_rate": 9.139636400772692e-06, + "loss": 0.7509, + "step": 4161 + }, + { + "epoch": 0.21389659780039058, + "grad_norm": 0.8107448816299438, + "learning_rate": 9.139169585831862e-06, + "loss": 0.6771, + "step": 4162 + }, + { + "epoch": 0.21394799054373523, + "grad_norm": 0.7882181406021118, + "learning_rate": 9.138702656211723e-06, + "loss": 0.6587, + "step": 4163 + }, + { + "epoch": 0.21399938328707988, + "grad_norm": 1.0768563747406006, + "learning_rate": 9.138235611925215e-06, + "loss": 0.8408, + "step": 4164 + }, + { + "epoch": 0.2140507760304245, + "grad_norm": 1.3791990280151367, + "learning_rate": 9.137768452985273e-06, + "loss": 0.8195, + "step": 4165 + }, + { + "epoch": 0.21410216877376914, + "grad_norm": 1.171093463897705, + "learning_rate": 9.137301179404845e-06, + "loss": 0.8214, + "step": 4166 + }, + { + "epoch": 0.2141535615171138, + "grad_norm": 1.111439824104309, + "learning_rate": 9.136833791196875e-06, + "loss": 0.7838, + "step": 4167 + }, + { + "epoch": 0.21420495426045844, + "grad_norm": 1.1361054182052612, + "learning_rate": 9.136366288374311e-06, + "loss": 0.8215, + "step": 4168 + }, + { + "epoch": 0.21425634700380305, + "grad_norm": 1.1506834030151367, + "learning_rate": 9.135898670950108e-06, + "loss": 0.8051, + "step": 4169 + }, + { + "epoch": 0.2143077397471477, + "grad_norm": 1.0800647735595703, + "learning_rate": 9.135430938937219e-06, + "loss": 0.7629, + "step": 4170 + }, + { + "epoch": 0.21435913249049235, + "grad_norm": 1.371882677078247, + "learning_rate": 9.134963092348608e-06, + "loss": 0.8322, + "step": 4171 + }, + { + "epoch": 0.214410525233837, + "grad_norm": 1.0859326124191284, + "learning_rate": 9.134495131197231e-06, + "loss": 0.7789, + "step": 4172 + }, + { + "epoch": 0.21446191797718162, + "grad_norm": 1.1921128034591675, + "learning_rate": 9.134027055496057e-06, + "loss": 0.7917, + "step": 4173 + }, + { + "epoch": 0.21451331072052626, + "grad_norm": 1.2561819553375244, + "learning_rate": 9.133558865258052e-06, + "loss": 0.8345, + "step": 4174 + }, + { + "epoch": 0.2145647034638709, + "grad_norm": 1.0786267518997192, + "learning_rate": 9.13309056049619e-06, + "loss": 0.7668, + "step": 4175 + }, + { + "epoch": 0.21461609620721553, + "grad_norm": 1.0573195219039917, + "learning_rate": 9.132622141223447e-06, + "loss": 0.7805, + "step": 4176 + }, + { + "epoch": 0.21466748895056018, + "grad_norm": 1.0086778402328491, + "learning_rate": 9.132153607452795e-06, + "loss": 0.7415, + "step": 4177 + }, + { + "epoch": 0.21471888169390482, + "grad_norm": 1.3335920572280884, + "learning_rate": 9.131684959197222e-06, + "loss": 0.7906, + "step": 4178 + }, + { + "epoch": 0.21477027443724947, + "grad_norm": 1.1119905710220337, + "learning_rate": 9.131216196469706e-06, + "loss": 0.7694, + "step": 4179 + }, + { + "epoch": 0.2148216671805941, + "grad_norm": 1.1683192253112793, + "learning_rate": 9.13074731928324e-06, + "loss": 0.7492, + "step": 4180 + }, + { + "epoch": 0.21487305992393874, + "grad_norm": 1.136493444442749, + "learning_rate": 9.130278327650812e-06, + "loss": 0.7294, + "step": 4181 + }, + { + "epoch": 0.21492445266728338, + "grad_norm": 1.1425938606262207, + "learning_rate": 9.129809221585416e-06, + "loss": 0.7924, + "step": 4182 + }, + { + "epoch": 0.21497584541062803, + "grad_norm": 1.1476768255233765, + "learning_rate": 9.12934000110005e-06, + "loss": 0.7814, + "step": 4183 + }, + { + "epoch": 0.21502723815397265, + "grad_norm": 1.1310791969299316, + "learning_rate": 9.128870666207711e-06, + "loss": 0.8608, + "step": 4184 + }, + { + "epoch": 0.2150786308973173, + "grad_norm": 1.1162221431732178, + "learning_rate": 9.128401216921407e-06, + "loss": 0.7906, + "step": 4185 + }, + { + "epoch": 0.21513002364066194, + "grad_norm": 1.05939781665802, + "learning_rate": 9.127931653254143e-06, + "loss": 0.794, + "step": 4186 + }, + { + "epoch": 0.2151814163840066, + "grad_norm": 1.1121755838394165, + "learning_rate": 9.127461975218926e-06, + "loss": 0.8339, + "step": 4187 + }, + { + "epoch": 0.2152328091273512, + "grad_norm": 1.129523515701294, + "learning_rate": 9.12699218282877e-06, + "loss": 0.7701, + "step": 4188 + }, + { + "epoch": 0.21528420187069586, + "grad_norm": 1.0820891857147217, + "learning_rate": 9.126522276096694e-06, + "loss": 0.7914, + "step": 4189 + }, + { + "epoch": 0.2153355946140405, + "grad_norm": 1.0078492164611816, + "learning_rate": 9.126052255035714e-06, + "loss": 0.7366, + "step": 4190 + }, + { + "epoch": 0.21538698735738512, + "grad_norm": 0.871012806892395, + "learning_rate": 9.12558211965885e-06, + "loss": 0.7274, + "step": 4191 + }, + { + "epoch": 0.21543838010072977, + "grad_norm": 0.8761668801307678, + "learning_rate": 9.125111869979136e-06, + "loss": 0.7172, + "step": 4192 + }, + { + "epoch": 0.21548977284407442, + "grad_norm": 1.0423604249954224, + "learning_rate": 9.124641506009593e-06, + "loss": 0.786, + "step": 4193 + }, + { + "epoch": 0.21554116558741906, + "grad_norm": 1.108942985534668, + "learning_rate": 9.124171027763255e-06, + "loss": 0.7609, + "step": 4194 + }, + { + "epoch": 0.21559255833076368, + "grad_norm": 1.1549575328826904, + "learning_rate": 9.123700435253157e-06, + "loss": 0.8103, + "step": 4195 + }, + { + "epoch": 0.21564395107410833, + "grad_norm": 1.0554914474487305, + "learning_rate": 9.123229728492338e-06, + "loss": 0.8096, + "step": 4196 + }, + { + "epoch": 0.21569534381745298, + "grad_norm": 1.1119860410690308, + "learning_rate": 9.122758907493839e-06, + "loss": 0.7533, + "step": 4197 + }, + { + "epoch": 0.21574673656079763, + "grad_norm": 1.1737781763076782, + "learning_rate": 9.122287972270704e-06, + "loss": 0.7745, + "step": 4198 + }, + { + "epoch": 0.21579812930414224, + "grad_norm": 1.1175786256790161, + "learning_rate": 9.121816922835982e-06, + "loss": 0.7173, + "step": 4199 + }, + { + "epoch": 0.2158495220474869, + "grad_norm": 1.1554908752441406, + "learning_rate": 9.121345759202722e-06, + "loss": 0.8963, + "step": 4200 + }, + { + "epoch": 0.21590091479083154, + "grad_norm": 1.1126316785812378, + "learning_rate": 9.120874481383979e-06, + "loss": 0.7997, + "step": 4201 + }, + { + "epoch": 0.21595230753417619, + "grad_norm": 1.1244614124298096, + "learning_rate": 9.120403089392808e-06, + "loss": 0.8033, + "step": 4202 + }, + { + "epoch": 0.2160037002775208, + "grad_norm": 1.0426387786865234, + "learning_rate": 9.119931583242275e-06, + "loss": 0.749, + "step": 4203 + }, + { + "epoch": 0.21605509302086545, + "grad_norm": 1.1404590606689453, + "learning_rate": 9.119459962945436e-06, + "loss": 0.7906, + "step": 4204 + }, + { + "epoch": 0.2161064857642101, + "grad_norm": 1.107214331626892, + "learning_rate": 9.118988228515365e-06, + "loss": 0.7866, + "step": 4205 + }, + { + "epoch": 0.21615787850755475, + "grad_norm": 0.9314901828765869, + "learning_rate": 9.118516379965126e-06, + "loss": 0.6897, + "step": 4206 + }, + { + "epoch": 0.21620927125089937, + "grad_norm": 1.1537880897521973, + "learning_rate": 9.118044417307793e-06, + "loss": 0.7745, + "step": 4207 + }, + { + "epoch": 0.216260663994244, + "grad_norm": 1.1069575548171997, + "learning_rate": 9.117572340556446e-06, + "loss": 0.762, + "step": 4208 + }, + { + "epoch": 0.21631205673758866, + "grad_norm": 1.1378830671310425, + "learning_rate": 9.11710014972416e-06, + "loss": 0.8789, + "step": 4209 + }, + { + "epoch": 0.21636344948093328, + "grad_norm": 1.1269419193267822, + "learning_rate": 9.11662784482402e-06, + "loss": 0.7967, + "step": 4210 + }, + { + "epoch": 0.21641484222427793, + "grad_norm": 0.777703046798706, + "learning_rate": 9.11615542586911e-06, + "loss": 0.704, + "step": 4211 + }, + { + "epoch": 0.21646623496762257, + "grad_norm": 1.108641266822815, + "learning_rate": 9.115682892872521e-06, + "loss": 0.7728, + "step": 4212 + }, + { + "epoch": 0.21651762771096722, + "grad_norm": 1.1214264631271362, + "learning_rate": 9.115210245847343e-06, + "loss": 0.7236, + "step": 4213 + }, + { + "epoch": 0.21656902045431184, + "grad_norm": 1.136697769165039, + "learning_rate": 9.11473748480667e-06, + "loss": 0.8146, + "step": 4214 + }, + { + "epoch": 0.2166204131976565, + "grad_norm": 1.1365002393722534, + "learning_rate": 9.114264609763603e-06, + "loss": 0.809, + "step": 4215 + }, + { + "epoch": 0.21667180594100113, + "grad_norm": 1.1066017150878906, + "learning_rate": 9.113791620731242e-06, + "loss": 0.7794, + "step": 4216 + }, + { + "epoch": 0.21672319868434578, + "grad_norm": 1.1277374029159546, + "learning_rate": 9.113318517722693e-06, + "loss": 0.7895, + "step": 4217 + }, + { + "epoch": 0.2167745914276904, + "grad_norm": 1.0917003154754639, + "learning_rate": 9.11284530075106e-06, + "loss": 0.7415, + "step": 4218 + }, + { + "epoch": 0.21682598417103505, + "grad_norm": 1.0714293718338013, + "learning_rate": 9.112371969829458e-06, + "loss": 0.8347, + "step": 4219 + }, + { + "epoch": 0.2168773769143797, + "grad_norm": 1.143161654472351, + "learning_rate": 9.111898524971e-06, + "loss": 0.7608, + "step": 4220 + }, + { + "epoch": 0.21692876965772434, + "grad_norm": 1.1832191944122314, + "learning_rate": 9.111424966188802e-06, + "loss": 0.8163, + "step": 4221 + }, + { + "epoch": 0.21698016240106896, + "grad_norm": 0.7797741293907166, + "learning_rate": 9.110951293495983e-06, + "loss": 0.7226, + "step": 4222 + }, + { + "epoch": 0.2170315551444136, + "grad_norm": 0.8132724165916443, + "learning_rate": 9.110477506905672e-06, + "loss": 0.7269, + "step": 4223 + }, + { + "epoch": 0.21708294788775825, + "grad_norm": 1.154966950416565, + "learning_rate": 9.110003606430991e-06, + "loss": 0.7881, + "step": 4224 + }, + { + "epoch": 0.2171343406311029, + "grad_norm": 1.0446710586547852, + "learning_rate": 9.10952959208507e-06, + "loss": 0.762, + "step": 4225 + }, + { + "epoch": 0.21718573337444752, + "grad_norm": 0.7129215598106384, + "learning_rate": 9.109055463881045e-06, + "loss": 0.7073, + "step": 4226 + }, + { + "epoch": 0.21723712611779217, + "grad_norm": 1.0493582487106323, + "learning_rate": 9.10858122183205e-06, + "loss": 0.809, + "step": 4227 + }, + { + "epoch": 0.21728851886113681, + "grad_norm": 1.0873157978057861, + "learning_rate": 9.108106865951223e-06, + "loss": 0.7873, + "step": 4228 + }, + { + "epoch": 0.21733991160448143, + "grad_norm": 0.8385129570960999, + "learning_rate": 9.10763239625171e-06, + "loss": 0.7598, + "step": 4229 + }, + { + "epoch": 0.21739130434782608, + "grad_norm": 0.9693784713745117, + "learning_rate": 9.107157812746652e-06, + "loss": 0.6354, + "step": 4230 + }, + { + "epoch": 0.21744269709117073, + "grad_norm": 1.0374523401260376, + "learning_rate": 9.1066831154492e-06, + "loss": 0.7641, + "step": 4231 + }, + { + "epoch": 0.21749408983451538, + "grad_norm": 1.118335485458374, + "learning_rate": 9.106208304372509e-06, + "loss": 0.6814, + "step": 4232 + }, + { + "epoch": 0.21754548257786, + "grad_norm": 1.0439949035644531, + "learning_rate": 9.10573337952973e-06, + "loss": 0.74, + "step": 4233 + }, + { + "epoch": 0.21759687532120464, + "grad_norm": 1.1221650838851929, + "learning_rate": 9.10525834093402e-06, + "loss": 0.8212, + "step": 4234 + }, + { + "epoch": 0.2176482680645493, + "grad_norm": 0.786357045173645, + "learning_rate": 9.104783188598545e-06, + "loss": 0.6903, + "step": 4235 + }, + { + "epoch": 0.21769966080789394, + "grad_norm": 1.0469022989273071, + "learning_rate": 9.104307922536466e-06, + "loss": 0.7557, + "step": 4236 + }, + { + "epoch": 0.21775105355123855, + "grad_norm": 0.753881573677063, + "learning_rate": 9.103832542760954e-06, + "loss": 0.7059, + "step": 4237 + }, + { + "epoch": 0.2178024462945832, + "grad_norm": 1.14176607131958, + "learning_rate": 9.103357049285177e-06, + "loss": 0.7253, + "step": 4238 + }, + { + "epoch": 0.21785383903792785, + "grad_norm": 0.8362029194831848, + "learning_rate": 9.102881442122308e-06, + "loss": 0.7107, + "step": 4239 + }, + { + "epoch": 0.2179052317812725, + "grad_norm": 0.7867474555969238, + "learning_rate": 9.102405721285527e-06, + "loss": 0.6787, + "step": 4240 + }, + { + "epoch": 0.21795662452461712, + "grad_norm": 0.7196457386016846, + "learning_rate": 9.101929886788014e-06, + "loss": 0.653, + "step": 4241 + }, + { + "epoch": 0.21800801726796176, + "grad_norm": 1.1716468334197998, + "learning_rate": 9.101453938642951e-06, + "loss": 0.772, + "step": 4242 + }, + { + "epoch": 0.2180594100113064, + "grad_norm": 1.053269386291504, + "learning_rate": 9.100977876863527e-06, + "loss": 0.7364, + "step": 4243 + }, + { + "epoch": 0.21811080275465106, + "grad_norm": 1.0741207599639893, + "learning_rate": 9.100501701462925e-06, + "loss": 0.7623, + "step": 4244 + }, + { + "epoch": 0.21816219549799568, + "grad_norm": 1.0592821836471558, + "learning_rate": 9.100025412454346e-06, + "loss": 0.7707, + "step": 4245 + }, + { + "epoch": 0.21821358824134032, + "grad_norm": 1.1328836679458618, + "learning_rate": 9.099549009850984e-06, + "loss": 0.7818, + "step": 4246 + }, + { + "epoch": 0.21826498098468497, + "grad_norm": 1.1115812063217163, + "learning_rate": 9.099072493666032e-06, + "loss": 0.7955, + "step": 4247 + }, + { + "epoch": 0.2183163737280296, + "grad_norm": 1.1512393951416016, + "learning_rate": 9.0985958639127e-06, + "loss": 0.8269, + "step": 4248 + }, + { + "epoch": 0.21836776647137424, + "grad_norm": 1.1867098808288574, + "learning_rate": 9.09811912060419e-06, + "loss": 0.8134, + "step": 4249 + }, + { + "epoch": 0.21841915921471888, + "grad_norm": 1.0500290393829346, + "learning_rate": 9.097642263753712e-06, + "loss": 0.7623, + "step": 4250 + }, + { + "epoch": 0.21847055195806353, + "grad_norm": 1.0363597869873047, + "learning_rate": 9.097165293374477e-06, + "loss": 0.7873, + "step": 4251 + }, + { + "epoch": 0.21852194470140815, + "grad_norm": 0.9595499038696289, + "learning_rate": 9.096688209479699e-06, + "loss": 0.6616, + "step": 4252 + }, + { + "epoch": 0.2185733374447528, + "grad_norm": 1.1250566244125366, + "learning_rate": 9.096211012082596e-06, + "loss": 0.7687, + "step": 4253 + }, + { + "epoch": 0.21862473018809744, + "grad_norm": 1.0896481275558472, + "learning_rate": 9.095733701196392e-06, + "loss": 0.7973, + "step": 4254 + }, + { + "epoch": 0.2186761229314421, + "grad_norm": 1.0918326377868652, + "learning_rate": 9.095256276834308e-06, + "loss": 0.7783, + "step": 4255 + }, + { + "epoch": 0.2187275156747867, + "grad_norm": 1.1954625844955444, + "learning_rate": 9.094778739009571e-06, + "loss": 0.7601, + "step": 4256 + }, + { + "epoch": 0.21877890841813136, + "grad_norm": 0.8080825209617615, + "learning_rate": 9.094301087735415e-06, + "loss": 0.7092, + "step": 4257 + }, + { + "epoch": 0.218830301161476, + "grad_norm": 1.0741758346557617, + "learning_rate": 9.093823323025073e-06, + "loss": 0.7616, + "step": 4258 + }, + { + "epoch": 0.21888169390482065, + "grad_norm": 1.0875614881515503, + "learning_rate": 9.093345444891778e-06, + "loss": 0.8056, + "step": 4259 + }, + { + "epoch": 0.21893308664816527, + "grad_norm": 1.09056556224823, + "learning_rate": 9.092867453348776e-06, + "loss": 0.7879, + "step": 4260 + }, + { + "epoch": 0.21898447939150992, + "grad_norm": 1.122534155845642, + "learning_rate": 9.092389348409306e-06, + "loss": 0.7981, + "step": 4261 + }, + { + "epoch": 0.21903587213485456, + "grad_norm": 1.0954110622406006, + "learning_rate": 9.091911130086616e-06, + "loss": 0.758, + "step": 4262 + }, + { + "epoch": 0.2190872648781992, + "grad_norm": 1.1522572040557861, + "learning_rate": 9.091432798393954e-06, + "loss": 0.8237, + "step": 4263 + }, + { + "epoch": 0.21913865762154383, + "grad_norm": 0.9982960820198059, + "learning_rate": 9.090954353344574e-06, + "loss": 0.7266, + "step": 4264 + }, + { + "epoch": 0.21919005036488848, + "grad_norm": 1.0826950073242188, + "learning_rate": 9.09047579495173e-06, + "loss": 0.7736, + "step": 4265 + }, + { + "epoch": 0.21924144310823312, + "grad_norm": 1.2022103071212769, + "learning_rate": 9.089997123228684e-06, + "loss": 0.8082, + "step": 4266 + }, + { + "epoch": 0.21929283585157774, + "grad_norm": 1.1171106100082397, + "learning_rate": 9.089518338188695e-06, + "loss": 0.8277, + "step": 4267 + }, + { + "epoch": 0.2193442285949224, + "grad_norm": 1.0945594310760498, + "learning_rate": 9.08903943984503e-06, + "loss": 0.7808, + "step": 4268 + }, + { + "epoch": 0.21939562133826704, + "grad_norm": 1.1168155670166016, + "learning_rate": 9.088560428210956e-06, + "loss": 0.8519, + "step": 4269 + }, + { + "epoch": 0.21944701408161169, + "grad_norm": 1.0966004133224487, + "learning_rate": 9.088081303299745e-06, + "loss": 0.7984, + "step": 4270 + }, + { + "epoch": 0.2194984068249563, + "grad_norm": 0.8071814179420471, + "learning_rate": 9.087602065124672e-06, + "loss": 0.7084, + "step": 4271 + }, + { + "epoch": 0.21954979956830095, + "grad_norm": 1.1144964694976807, + "learning_rate": 9.087122713699014e-06, + "loss": 0.7709, + "step": 4272 + }, + { + "epoch": 0.2196011923116456, + "grad_norm": 1.0576223134994507, + "learning_rate": 9.086643249036053e-06, + "loss": 0.8007, + "step": 4273 + }, + { + "epoch": 0.21965258505499025, + "grad_norm": 1.1493570804595947, + "learning_rate": 9.086163671149071e-06, + "loss": 0.7256, + "step": 4274 + }, + { + "epoch": 0.21970397779833487, + "grad_norm": 0.7365999221801758, + "learning_rate": 9.085683980051356e-06, + "loss": 0.7046, + "step": 4275 + }, + { + "epoch": 0.2197553705416795, + "grad_norm": 0.8011993169784546, + "learning_rate": 9.085204175756199e-06, + "loss": 0.7055, + "step": 4276 + }, + { + "epoch": 0.21980676328502416, + "grad_norm": 1.090439796447754, + "learning_rate": 9.084724258276894e-06, + "loss": 0.7941, + "step": 4277 + }, + { + "epoch": 0.2198581560283688, + "grad_norm": 1.2138172388076782, + "learning_rate": 9.084244227626736e-06, + "loss": 0.8068, + "step": 4278 + }, + { + "epoch": 0.21990954877171343, + "grad_norm": 1.1071089506149292, + "learning_rate": 9.083764083819024e-06, + "loss": 0.8307, + "step": 4279 + }, + { + "epoch": 0.21996094151505807, + "grad_norm": 1.1578168869018555, + "learning_rate": 9.083283826867063e-06, + "loss": 0.824, + "step": 4280 + }, + { + "epoch": 0.22001233425840272, + "grad_norm": 1.1298171281814575, + "learning_rate": 9.082803456784157e-06, + "loss": 0.8725, + "step": 4281 + }, + { + "epoch": 0.22006372700174737, + "grad_norm": 1.1123601198196411, + "learning_rate": 9.082322973583616e-06, + "loss": 0.7709, + "step": 4282 + }, + { + "epoch": 0.22011511974509199, + "grad_norm": 1.1035863161087036, + "learning_rate": 9.081842377278754e-06, + "loss": 0.7655, + "step": 4283 + }, + { + "epoch": 0.22016651248843663, + "grad_norm": 1.102044939994812, + "learning_rate": 9.081361667882883e-06, + "loss": 0.7485, + "step": 4284 + }, + { + "epoch": 0.22021790523178128, + "grad_norm": 0.9537819623947144, + "learning_rate": 9.080880845409324e-06, + "loss": 0.717, + "step": 4285 + }, + { + "epoch": 0.2202692979751259, + "grad_norm": 1.2634668350219727, + "learning_rate": 9.080399909871395e-06, + "loss": 0.7778, + "step": 4286 + }, + { + "epoch": 0.22032069071847055, + "grad_norm": 1.194694995880127, + "learning_rate": 9.079918861282427e-06, + "loss": 0.781, + "step": 4287 + }, + { + "epoch": 0.2203720834618152, + "grad_norm": 1.0634434223175049, + "learning_rate": 9.079437699655743e-06, + "loss": 0.7688, + "step": 4288 + }, + { + "epoch": 0.22042347620515984, + "grad_norm": 0.8530346751213074, + "learning_rate": 9.078956425004672e-06, + "loss": 0.6735, + "step": 4289 + }, + { + "epoch": 0.22047486894850446, + "grad_norm": 1.0793602466583252, + "learning_rate": 9.078475037342555e-06, + "loss": 0.7694, + "step": 4290 + }, + { + "epoch": 0.2205262616918491, + "grad_norm": 1.0319033861160278, + "learning_rate": 9.077993536682723e-06, + "loss": 0.7633, + "step": 4291 + }, + { + "epoch": 0.22057765443519375, + "grad_norm": 0.7619673609733582, + "learning_rate": 9.07751192303852e-06, + "loss": 0.6922, + "step": 4292 + }, + { + "epoch": 0.2206290471785384, + "grad_norm": 1.058688759803772, + "learning_rate": 9.07703019642329e-06, + "loss": 0.7637, + "step": 4293 + }, + { + "epoch": 0.22068043992188302, + "grad_norm": 1.0879544019699097, + "learning_rate": 9.076548356850376e-06, + "loss": 0.7912, + "step": 4294 + }, + { + "epoch": 0.22073183266522767, + "grad_norm": 1.1209956407546997, + "learning_rate": 9.076066404333132e-06, + "loss": 0.7812, + "step": 4295 + }, + { + "epoch": 0.22078322540857231, + "grad_norm": 1.112785816192627, + "learning_rate": 9.075584338884907e-06, + "loss": 0.814, + "step": 4296 + }, + { + "epoch": 0.22083461815191696, + "grad_norm": 1.204616665840149, + "learning_rate": 9.07510216051906e-06, + "loss": 0.8328, + "step": 4297 + }, + { + "epoch": 0.22088601089526158, + "grad_norm": 1.1497029066085815, + "learning_rate": 9.07461986924895e-06, + "loss": 0.7882, + "step": 4298 + }, + { + "epoch": 0.22093740363860623, + "grad_norm": 1.06961190700531, + "learning_rate": 9.074137465087935e-06, + "loss": 0.8282, + "step": 4299 + }, + { + "epoch": 0.22098879638195087, + "grad_norm": 1.0907753705978394, + "learning_rate": 9.073654948049387e-06, + "loss": 0.7823, + "step": 4300 + }, + { + "epoch": 0.22104018912529552, + "grad_norm": 0.8082263469696045, + "learning_rate": 9.073172318146671e-06, + "loss": 0.7314, + "step": 4301 + }, + { + "epoch": 0.22109158186864014, + "grad_norm": 1.1509058475494385, + "learning_rate": 9.07268957539316e-06, + "loss": 0.8147, + "step": 4302 + }, + { + "epoch": 0.2211429746119848, + "grad_norm": 1.0601377487182617, + "learning_rate": 9.072206719802225e-06, + "loss": 0.7164, + "step": 4303 + }, + { + "epoch": 0.22119436735532944, + "grad_norm": 1.0618751049041748, + "learning_rate": 9.071723751387247e-06, + "loss": 0.7634, + "step": 4304 + }, + { + "epoch": 0.22124576009867405, + "grad_norm": 1.1580489873886108, + "learning_rate": 9.071240670161609e-06, + "loss": 0.7804, + "step": 4305 + }, + { + "epoch": 0.2212971528420187, + "grad_norm": 1.1457750797271729, + "learning_rate": 9.070757476138692e-06, + "loss": 0.808, + "step": 4306 + }, + { + "epoch": 0.22134854558536335, + "grad_norm": 0.748276948928833, + "learning_rate": 9.070274169331884e-06, + "loss": 0.7648, + "step": 4307 + }, + { + "epoch": 0.221399938328708, + "grad_norm": 1.173012137413025, + "learning_rate": 9.069790749754576e-06, + "loss": 0.8053, + "step": 4308 + }, + { + "epoch": 0.22145133107205262, + "grad_norm": 1.1354033946990967, + "learning_rate": 9.069307217420163e-06, + "loss": 0.79, + "step": 4309 + }, + { + "epoch": 0.22150272381539726, + "grad_norm": 1.0960289239883423, + "learning_rate": 9.068823572342038e-06, + "loss": 0.7786, + "step": 4310 + }, + { + "epoch": 0.2215541165587419, + "grad_norm": 1.0697039365768433, + "learning_rate": 9.068339814533602e-06, + "loss": 0.8123, + "step": 4311 + }, + { + "epoch": 0.22160550930208656, + "grad_norm": 1.0992567539215088, + "learning_rate": 9.06785594400826e-06, + "loss": 0.7684, + "step": 4312 + }, + { + "epoch": 0.22165690204543118, + "grad_norm": 1.0429848432540894, + "learning_rate": 9.067371960779419e-06, + "loss": 0.7642, + "step": 4313 + }, + { + "epoch": 0.22170829478877582, + "grad_norm": 1.074609398841858, + "learning_rate": 9.066887864860483e-06, + "loss": 0.7397, + "step": 4314 + }, + { + "epoch": 0.22175968753212047, + "grad_norm": 1.0766611099243164, + "learning_rate": 9.066403656264868e-06, + "loss": 0.7811, + "step": 4315 + }, + { + "epoch": 0.22181108027546512, + "grad_norm": 1.1003326177597046, + "learning_rate": 9.065919335005989e-06, + "loss": 0.8023, + "step": 4316 + }, + { + "epoch": 0.22186247301880974, + "grad_norm": 1.0612140893936157, + "learning_rate": 9.065434901097266e-06, + "loss": 0.7807, + "step": 4317 + }, + { + "epoch": 0.22191386576215438, + "grad_norm": 1.0972939729690552, + "learning_rate": 9.064950354552115e-06, + "loss": 0.7792, + "step": 4318 + }, + { + "epoch": 0.22196525850549903, + "grad_norm": 1.1753911972045898, + "learning_rate": 9.064465695383968e-06, + "loss": 0.8264, + "step": 4319 + }, + { + "epoch": 0.22201665124884365, + "grad_norm": 1.1278507709503174, + "learning_rate": 9.063980923606249e-06, + "loss": 0.7913, + "step": 4320 + }, + { + "epoch": 0.2220680439921883, + "grad_norm": 1.1230570077896118, + "learning_rate": 9.063496039232389e-06, + "loss": 0.8273, + "step": 4321 + }, + { + "epoch": 0.22211943673553294, + "grad_norm": 1.1253111362457275, + "learning_rate": 9.063011042275824e-06, + "loss": 0.7372, + "step": 4322 + }, + { + "epoch": 0.2221708294788776, + "grad_norm": 0.8204745054244995, + "learning_rate": 9.06252593274999e-06, + "loss": 0.6768, + "step": 4323 + }, + { + "epoch": 0.2222222222222222, + "grad_norm": 1.1202855110168457, + "learning_rate": 9.062040710668325e-06, + "loss": 0.7912, + "step": 4324 + }, + { + "epoch": 0.22227361496556686, + "grad_norm": 1.126147985458374, + "learning_rate": 9.061555376044278e-06, + "loss": 0.7511, + "step": 4325 + }, + { + "epoch": 0.2223250077089115, + "grad_norm": 0.9461855292320251, + "learning_rate": 9.061069928891291e-06, + "loss": 0.7137, + "step": 4326 + }, + { + "epoch": 0.22237640045225615, + "grad_norm": 1.1177200078964233, + "learning_rate": 9.060584369222816e-06, + "loss": 0.7502, + "step": 4327 + }, + { + "epoch": 0.22242779319560077, + "grad_norm": 1.214698076248169, + "learning_rate": 9.060098697052304e-06, + "loss": 0.7886, + "step": 4328 + }, + { + "epoch": 0.22247918593894542, + "grad_norm": 1.0965791940689087, + "learning_rate": 9.059612912393213e-06, + "loss": 0.7515, + "step": 4329 + }, + { + "epoch": 0.22253057868229006, + "grad_norm": 1.1039177179336548, + "learning_rate": 9.059127015259002e-06, + "loss": 0.8275, + "step": 4330 + }, + { + "epoch": 0.2225819714256347, + "grad_norm": 0.741967499256134, + "learning_rate": 9.05864100566313e-06, + "loss": 0.6929, + "step": 4331 + }, + { + "epoch": 0.22263336416897933, + "grad_norm": 1.0916329622268677, + "learning_rate": 9.058154883619068e-06, + "loss": 0.8796, + "step": 4332 + }, + { + "epoch": 0.22268475691232398, + "grad_norm": 1.04004967212677, + "learning_rate": 9.05766864914028e-06, + "loss": 0.7688, + "step": 4333 + }, + { + "epoch": 0.22273614965566862, + "grad_norm": 1.1143995523452759, + "learning_rate": 9.057182302240237e-06, + "loss": 0.7307, + "step": 4334 + }, + { + "epoch": 0.22278754239901327, + "grad_norm": 1.0955370664596558, + "learning_rate": 9.056695842932417e-06, + "loss": 0.7393, + "step": 4335 + }, + { + "epoch": 0.2228389351423579, + "grad_norm": 0.7515177726745605, + "learning_rate": 9.056209271230296e-06, + "loss": 0.7327, + "step": 4336 + }, + { + "epoch": 0.22289032788570254, + "grad_norm": 1.0424476861953735, + "learning_rate": 9.055722587147356e-06, + "loss": 0.8171, + "step": 4337 + }, + { + "epoch": 0.22294172062904719, + "grad_norm": 1.1556648015975952, + "learning_rate": 9.055235790697077e-06, + "loss": 0.8175, + "step": 4338 + }, + { + "epoch": 0.2229931133723918, + "grad_norm": 1.1507762670516968, + "learning_rate": 9.054748881892952e-06, + "loss": 0.7655, + "step": 4339 + }, + { + "epoch": 0.22304450611573645, + "grad_norm": 1.1124593019485474, + "learning_rate": 9.054261860748468e-06, + "loss": 0.7441, + "step": 4340 + }, + { + "epoch": 0.2230958988590811, + "grad_norm": 1.1042824983596802, + "learning_rate": 9.053774727277119e-06, + "loss": 0.7626, + "step": 4341 + }, + { + "epoch": 0.22314729160242575, + "grad_norm": 1.2667195796966553, + "learning_rate": 9.0532874814924e-06, + "loss": 0.821, + "step": 4342 + }, + { + "epoch": 0.22319868434577037, + "grad_norm": 1.2013894319534302, + "learning_rate": 9.052800123407812e-06, + "loss": 0.8227, + "step": 4343 + }, + { + "epoch": 0.223250077089115, + "grad_norm": 1.1458237171173096, + "learning_rate": 9.052312653036858e-06, + "loss": 0.7942, + "step": 4344 + }, + { + "epoch": 0.22330146983245966, + "grad_norm": 1.0957010984420776, + "learning_rate": 9.051825070393043e-06, + "loss": 0.8045, + "step": 4345 + }, + { + "epoch": 0.2233528625758043, + "grad_norm": 1.116281270980835, + "learning_rate": 9.051337375489877e-06, + "loss": 0.7745, + "step": 4346 + }, + { + "epoch": 0.22340425531914893, + "grad_norm": 0.82821124792099, + "learning_rate": 9.050849568340872e-06, + "loss": 0.6804, + "step": 4347 + }, + { + "epoch": 0.22345564806249357, + "grad_norm": 1.11138916015625, + "learning_rate": 9.05036164895954e-06, + "loss": 0.7884, + "step": 4348 + }, + { + "epoch": 0.22350704080583822, + "grad_norm": 1.0637050867080688, + "learning_rate": 9.049873617359404e-06, + "loss": 0.7817, + "step": 4349 + }, + { + "epoch": 0.22355843354918287, + "grad_norm": 1.1221212148666382, + "learning_rate": 9.04938547355398e-06, + "loss": 0.84, + "step": 4350 + }, + { + "epoch": 0.22360982629252749, + "grad_norm": 0.7269451022148132, + "learning_rate": 9.048897217556798e-06, + "loss": 0.73, + "step": 4351 + }, + { + "epoch": 0.22366121903587213, + "grad_norm": 1.0613042116165161, + "learning_rate": 9.04840884938138e-06, + "loss": 0.8104, + "step": 4352 + }, + { + "epoch": 0.22371261177921678, + "grad_norm": 1.072580099105835, + "learning_rate": 9.047920369041264e-06, + "loss": 0.8032, + "step": 4353 + }, + { + "epoch": 0.22376400452256143, + "grad_norm": 1.1029006242752075, + "learning_rate": 9.047431776549976e-06, + "loss": 0.7955, + "step": 4354 + }, + { + "epoch": 0.22381539726590605, + "grad_norm": 4.790678024291992, + "learning_rate": 9.046943071921057e-06, + "loss": 0.854, + "step": 4355 + }, + { + "epoch": 0.2238667900092507, + "grad_norm": 0.8257871270179749, + "learning_rate": 9.046454255168045e-06, + "loss": 0.6975, + "step": 4356 + }, + { + "epoch": 0.22391818275259534, + "grad_norm": 1.0733985900878906, + "learning_rate": 9.045965326304486e-06, + "loss": 0.806, + "step": 4357 + }, + { + "epoch": 0.22396957549593996, + "grad_norm": 1.1073120832443237, + "learning_rate": 9.045476285343924e-06, + "loss": 0.7573, + "step": 4358 + }, + { + "epoch": 0.2240209682392846, + "grad_norm": 1.1115367412567139, + "learning_rate": 9.044987132299909e-06, + "loss": 0.7826, + "step": 4359 + }, + { + "epoch": 0.22407236098262925, + "grad_norm": 1.1194162368774414, + "learning_rate": 9.044497867185994e-06, + "loss": 0.7762, + "step": 4360 + }, + { + "epoch": 0.2241237537259739, + "grad_norm": 1.3852365016937256, + "learning_rate": 9.044008490015731e-06, + "loss": 0.8142, + "step": 4361 + }, + { + "epoch": 0.22417514646931852, + "grad_norm": 0.9488205909729004, + "learning_rate": 9.043519000802683e-06, + "loss": 0.7508, + "step": 4362 + }, + { + "epoch": 0.22422653921266317, + "grad_norm": 1.0546590089797974, + "learning_rate": 9.043029399560412e-06, + "loss": 0.7599, + "step": 4363 + }, + { + "epoch": 0.22427793195600781, + "grad_norm": 1.0679047107696533, + "learning_rate": 9.04253968630248e-06, + "loss": 0.7923, + "step": 4364 + }, + { + "epoch": 0.22432932469935246, + "grad_norm": 1.0072203874588013, + "learning_rate": 9.042049861042455e-06, + "loss": 0.797, + "step": 4365 + }, + { + "epoch": 0.22438071744269708, + "grad_norm": 1.1805046796798706, + "learning_rate": 9.041559923793908e-06, + "loss": 0.828, + "step": 4366 + }, + { + "epoch": 0.22443211018604173, + "grad_norm": 0.8413265347480774, + "learning_rate": 9.041069874570416e-06, + "loss": 0.673, + "step": 4367 + }, + { + "epoch": 0.22448350292938637, + "grad_norm": 1.0767192840576172, + "learning_rate": 9.040579713385553e-06, + "loss": 0.8117, + "step": 4368 + }, + { + "epoch": 0.22453489567273102, + "grad_norm": 1.0955297946929932, + "learning_rate": 9.040089440252901e-06, + "loss": 0.7536, + "step": 4369 + }, + { + "epoch": 0.22458628841607564, + "grad_norm": 1.06802237033844, + "learning_rate": 9.039599055186044e-06, + "loss": 0.8012, + "step": 4370 + }, + { + "epoch": 0.2246376811594203, + "grad_norm": 1.1725267171859741, + "learning_rate": 9.039108558198566e-06, + "loss": 0.8195, + "step": 4371 + }, + { + "epoch": 0.22468907390276494, + "grad_norm": 1.1977851390838623, + "learning_rate": 9.038617949304058e-06, + "loss": 0.8276, + "step": 4372 + }, + { + "epoch": 0.22474046664610958, + "grad_norm": 1.0465898513793945, + "learning_rate": 9.038127228516114e-06, + "loss": 0.7673, + "step": 4373 + }, + { + "epoch": 0.2247918593894542, + "grad_norm": 1.0858691930770874, + "learning_rate": 9.03763639584833e-06, + "loss": 0.7972, + "step": 4374 + }, + { + "epoch": 0.22484325213279885, + "grad_norm": 1.0539582967758179, + "learning_rate": 9.037145451314303e-06, + "loss": 0.7816, + "step": 4375 + }, + { + "epoch": 0.2248946448761435, + "grad_norm": 1.0489228963851929, + "learning_rate": 9.036654394927635e-06, + "loss": 0.7472, + "step": 4376 + }, + { + "epoch": 0.22494603761948812, + "grad_norm": 1.2524515390396118, + "learning_rate": 9.036163226701933e-06, + "loss": 0.8101, + "step": 4377 + }, + { + "epoch": 0.22499743036283276, + "grad_norm": 1.1080033779144287, + "learning_rate": 9.035671946650803e-06, + "loss": 0.8068, + "step": 4378 + }, + { + "epoch": 0.2250488231061774, + "grad_norm": 0.7447730898857117, + "learning_rate": 9.035180554787859e-06, + "loss": 0.7079, + "step": 4379 + }, + { + "epoch": 0.22510021584952206, + "grad_norm": 0.7839935421943665, + "learning_rate": 9.034689051126712e-06, + "loss": 0.7158, + "step": 4380 + }, + { + "epoch": 0.22515160859286668, + "grad_norm": 1.1289641857147217, + "learning_rate": 9.034197435680984e-06, + "loss": 0.7953, + "step": 4381 + }, + { + "epoch": 0.22520300133621132, + "grad_norm": 1.193946361541748, + "learning_rate": 9.033705708464291e-06, + "loss": 0.8185, + "step": 4382 + }, + { + "epoch": 0.22525439407955597, + "grad_norm": 1.0938438177108765, + "learning_rate": 9.033213869490261e-06, + "loss": 0.7964, + "step": 4383 + }, + { + "epoch": 0.22530578682290062, + "grad_norm": 1.1532719135284424, + "learning_rate": 9.032721918772518e-06, + "loss": 0.8084, + "step": 4384 + }, + { + "epoch": 0.22535717956624524, + "grad_norm": 1.2949411869049072, + "learning_rate": 9.032229856324693e-06, + "loss": 0.74, + "step": 4385 + }, + { + "epoch": 0.22540857230958988, + "grad_norm": 1.1437602043151855, + "learning_rate": 9.031737682160418e-06, + "loss": 0.7805, + "step": 4386 + }, + { + "epoch": 0.22545996505293453, + "grad_norm": 1.0841890573501587, + "learning_rate": 9.031245396293332e-06, + "loss": 0.7745, + "step": 4387 + }, + { + "epoch": 0.22551135779627918, + "grad_norm": 0.8282328844070435, + "learning_rate": 9.030752998737069e-06, + "loss": 0.7187, + "step": 4388 + }, + { + "epoch": 0.2255627505396238, + "grad_norm": 1.1629488468170166, + "learning_rate": 9.030260489505276e-06, + "loss": 0.7829, + "step": 4389 + }, + { + "epoch": 0.22561414328296844, + "grad_norm": 1.0477409362792969, + "learning_rate": 9.029767868611597e-06, + "loss": 0.7615, + "step": 4390 + }, + { + "epoch": 0.2256655360263131, + "grad_norm": 1.159829020500183, + "learning_rate": 9.029275136069678e-06, + "loss": 0.7608, + "step": 4391 + }, + { + "epoch": 0.22571692876965774, + "grad_norm": 1.1267942190170288, + "learning_rate": 9.028782291893174e-06, + "loss": 0.8103, + "step": 4392 + }, + { + "epoch": 0.22576832151300236, + "grad_norm": 1.0907851457595825, + "learning_rate": 9.02828933609574e-06, + "loss": 0.7784, + "step": 4393 + }, + { + "epoch": 0.225819714256347, + "grad_norm": 1.163543939590454, + "learning_rate": 9.02779626869103e-06, + "loss": 0.798, + "step": 4394 + }, + { + "epoch": 0.22587110699969165, + "grad_norm": 1.0861319303512573, + "learning_rate": 9.027303089692709e-06, + "loss": 0.8274, + "step": 4395 + }, + { + "epoch": 0.22592249974303627, + "grad_norm": 1.1938151121139526, + "learning_rate": 9.026809799114438e-06, + "loss": 0.7648, + "step": 4396 + }, + { + "epoch": 0.22597389248638092, + "grad_norm": 1.1090257167816162, + "learning_rate": 9.026316396969886e-06, + "loss": 0.7908, + "step": 4397 + }, + { + "epoch": 0.22602528522972556, + "grad_norm": 1.1303318738937378, + "learning_rate": 9.025822883272721e-06, + "loss": 0.8011, + "step": 4398 + }, + { + "epoch": 0.2260766779730702, + "grad_norm": 1.145537257194519, + "learning_rate": 9.02532925803662e-06, + "loss": 0.7724, + "step": 4399 + }, + { + "epoch": 0.22612807071641483, + "grad_norm": 1.1233175992965698, + "learning_rate": 9.024835521275254e-06, + "loss": 0.8114, + "step": 4400 + }, + { + "epoch": 0.22617946345975948, + "grad_norm": 1.113986611366272, + "learning_rate": 9.024341673002306e-06, + "loss": 0.821, + "step": 4401 + }, + { + "epoch": 0.22623085620310412, + "grad_norm": 1.1142867803573608, + "learning_rate": 9.023847713231457e-06, + "loss": 0.8513, + "step": 4402 + }, + { + "epoch": 0.22628224894644877, + "grad_norm": 1.0345150232315063, + "learning_rate": 9.023353641976395e-06, + "loss": 0.7772, + "step": 4403 + }, + { + "epoch": 0.2263336416897934, + "grad_norm": 1.0581997632980347, + "learning_rate": 9.022859459250806e-06, + "loss": 0.7977, + "step": 4404 + }, + { + "epoch": 0.22638503443313804, + "grad_norm": 1.0816503763198853, + "learning_rate": 9.022365165068383e-06, + "loss": 0.7146, + "step": 4405 + }, + { + "epoch": 0.22643642717648269, + "grad_norm": 1.1069536209106445, + "learning_rate": 9.02187075944282e-06, + "loss": 0.7615, + "step": 4406 + }, + { + "epoch": 0.22648781991982733, + "grad_norm": 1.1262763738632202, + "learning_rate": 9.021376242387816e-06, + "loss": 0.8341, + "step": 4407 + }, + { + "epoch": 0.22653921266317195, + "grad_norm": 1.0377700328826904, + "learning_rate": 9.020881613917071e-06, + "loss": 0.7573, + "step": 4408 + }, + { + "epoch": 0.2265906054065166, + "grad_norm": 1.2070157527923584, + "learning_rate": 9.020386874044292e-06, + "loss": 0.7885, + "step": 4409 + }, + { + "epoch": 0.22664199814986125, + "grad_norm": 1.1245477199554443, + "learning_rate": 9.019892022783182e-06, + "loss": 0.7898, + "step": 4410 + }, + { + "epoch": 0.2266933908932059, + "grad_norm": 1.2579352855682373, + "learning_rate": 9.019397060147453e-06, + "loss": 0.7415, + "step": 4411 + }, + { + "epoch": 0.2267447836365505, + "grad_norm": 1.1080354452133179, + "learning_rate": 9.018901986150818e-06, + "loss": 0.7613, + "step": 4412 + }, + { + "epoch": 0.22679617637989516, + "grad_norm": 1.0786489248275757, + "learning_rate": 9.018406800806996e-06, + "loss": 0.7787, + "step": 4413 + }, + { + "epoch": 0.2268475691232398, + "grad_norm": 1.088999629020691, + "learning_rate": 9.017911504129704e-06, + "loss": 0.7566, + "step": 4414 + }, + { + "epoch": 0.22689896186658443, + "grad_norm": 1.1700572967529297, + "learning_rate": 9.017416096132666e-06, + "loss": 0.8157, + "step": 4415 + }, + { + "epoch": 0.22695035460992907, + "grad_norm": 1.074612021446228, + "learning_rate": 9.016920576829605e-06, + "loss": 0.7906, + "step": 4416 + }, + { + "epoch": 0.22700174735327372, + "grad_norm": 1.0596516132354736, + "learning_rate": 9.01642494623425e-06, + "loss": 0.8096, + "step": 4417 + }, + { + "epoch": 0.22705314009661837, + "grad_norm": 1.140804648399353, + "learning_rate": 9.015929204360338e-06, + "loss": 0.7971, + "step": 4418 + }, + { + "epoch": 0.22710453283996299, + "grad_norm": 0.7986681461334229, + "learning_rate": 9.015433351221599e-06, + "loss": 0.7308, + "step": 4419 + }, + { + "epoch": 0.22715592558330763, + "grad_norm": 1.0625759363174438, + "learning_rate": 9.014937386831772e-06, + "loss": 0.7755, + "step": 4420 + }, + { + "epoch": 0.22720731832665228, + "grad_norm": 1.0955561399459839, + "learning_rate": 9.014441311204601e-06, + "loss": 0.8003, + "step": 4421 + }, + { + "epoch": 0.22725871106999693, + "grad_norm": 0.7873517870903015, + "learning_rate": 9.013945124353825e-06, + "loss": 0.6508, + "step": 4422 + }, + { + "epoch": 0.22731010381334155, + "grad_norm": 1.1384801864624023, + "learning_rate": 9.013448826293197e-06, + "loss": 0.7577, + "step": 4423 + }, + { + "epoch": 0.2273614965566862, + "grad_norm": 1.138342022895813, + "learning_rate": 9.012952417036462e-06, + "loss": 0.811, + "step": 4424 + }, + { + "epoch": 0.22741288930003084, + "grad_norm": 1.2481677532196045, + "learning_rate": 9.012455896597376e-06, + "loss": 0.8544, + "step": 4425 + }, + { + "epoch": 0.2274642820433755, + "grad_norm": 1.1288131475448608, + "learning_rate": 9.011959264989698e-06, + "loss": 0.7707, + "step": 4426 + }, + { + "epoch": 0.2275156747867201, + "grad_norm": 1.0423487424850464, + "learning_rate": 9.011462522227184e-06, + "loss": 0.7593, + "step": 4427 + }, + { + "epoch": 0.22756706753006475, + "grad_norm": 1.0508102178573608, + "learning_rate": 9.010965668323596e-06, + "loss": 0.7695, + "step": 4428 + }, + { + "epoch": 0.2276184602734094, + "grad_norm": 1.0805801153182983, + "learning_rate": 9.010468703292703e-06, + "loss": 0.7554, + "step": 4429 + }, + { + "epoch": 0.22766985301675405, + "grad_norm": 0.8440739512443542, + "learning_rate": 9.009971627148272e-06, + "loss": 0.6645, + "step": 4430 + }, + { + "epoch": 0.22772124576009867, + "grad_norm": 1.0897002220153809, + "learning_rate": 9.009474439904077e-06, + "loss": 0.8681, + "step": 4431 + }, + { + "epoch": 0.22777263850344331, + "grad_norm": 1.0401653051376343, + "learning_rate": 9.008977141573892e-06, + "loss": 0.7388, + "step": 4432 + }, + { + "epoch": 0.22782403124678796, + "grad_norm": 1.361769676208496, + "learning_rate": 9.008479732171491e-06, + "loss": 0.8219, + "step": 4433 + }, + { + "epoch": 0.22787542399013258, + "grad_norm": 1.1984179019927979, + "learning_rate": 9.007982211710662e-06, + "loss": 0.8121, + "step": 4434 + }, + { + "epoch": 0.22792681673347723, + "grad_norm": 0.7784889936447144, + "learning_rate": 9.007484580205183e-06, + "loss": 0.7179, + "step": 4435 + }, + { + "epoch": 0.22797820947682187, + "grad_norm": 1.1230177879333496, + "learning_rate": 9.006986837668848e-06, + "loss": 0.7252, + "step": 4436 + }, + { + "epoch": 0.22802960222016652, + "grad_norm": 0.8413630723953247, + "learning_rate": 9.00648898411544e-06, + "loss": 0.7143, + "step": 4437 + }, + { + "epoch": 0.22808099496351114, + "grad_norm": 1.2831002473831177, + "learning_rate": 9.005991019558757e-06, + "loss": 0.7109, + "step": 4438 + }, + { + "epoch": 0.2281323877068558, + "grad_norm": 1.1145620346069336, + "learning_rate": 9.005492944012596e-06, + "loss": 0.7894, + "step": 4439 + }, + { + "epoch": 0.22818378045020044, + "grad_norm": 1.246866226196289, + "learning_rate": 9.004994757490753e-06, + "loss": 0.7372, + "step": 4440 + }, + { + "epoch": 0.22823517319354508, + "grad_norm": 1.0651546716690063, + "learning_rate": 9.004496460007033e-06, + "loss": 0.7823, + "step": 4441 + }, + { + "epoch": 0.2282865659368897, + "grad_norm": 1.1191308498382568, + "learning_rate": 9.003998051575245e-06, + "loss": 0.8379, + "step": 4442 + }, + { + "epoch": 0.22833795868023435, + "grad_norm": 1.1205394268035889, + "learning_rate": 9.003499532209192e-06, + "loss": 0.8078, + "step": 4443 + }, + { + "epoch": 0.228389351423579, + "grad_norm": 1.1528823375701904, + "learning_rate": 9.003000901922686e-06, + "loss": 0.7854, + "step": 4444 + }, + { + "epoch": 0.22844074416692364, + "grad_norm": 0.8207101225852966, + "learning_rate": 9.00250216072955e-06, + "loss": 0.7128, + "step": 4445 + }, + { + "epoch": 0.22849213691026826, + "grad_norm": 1.1122715473175049, + "learning_rate": 9.002003308643593e-06, + "loss": 0.794, + "step": 4446 + }, + { + "epoch": 0.2285435296536129, + "grad_norm": 1.116278052330017, + "learning_rate": 9.00150434567864e-06, + "loss": 0.7944, + "step": 4447 + }, + { + "epoch": 0.22859492239695756, + "grad_norm": 0.7807151675224304, + "learning_rate": 9.001005271848514e-06, + "loss": 0.6728, + "step": 4448 + }, + { + "epoch": 0.2286463151403022, + "grad_norm": 1.1525764465332031, + "learning_rate": 9.000506087167045e-06, + "loss": 0.8539, + "step": 4449 + }, + { + "epoch": 0.22869770788364682, + "grad_norm": 1.152514100074768, + "learning_rate": 9.00000679164806e-06, + "loss": 0.7782, + "step": 4450 + }, + { + "epoch": 0.22874910062699147, + "grad_norm": 1.0513501167297363, + "learning_rate": 8.999507385305394e-06, + "loss": 0.8015, + "step": 4451 + }, + { + "epoch": 0.22880049337033612, + "grad_norm": 1.1203625202178955, + "learning_rate": 8.999007868152884e-06, + "loss": 0.8346, + "step": 4452 + }, + { + "epoch": 0.22885188611368074, + "grad_norm": 1.0725682973861694, + "learning_rate": 8.998508240204368e-06, + "loss": 0.8004, + "step": 4453 + }, + { + "epoch": 0.22890327885702538, + "grad_norm": 1.120689034461975, + "learning_rate": 8.998008501473689e-06, + "loss": 0.7415, + "step": 4454 + }, + { + "epoch": 0.22895467160037003, + "grad_norm": 0.9727587699890137, + "learning_rate": 8.997508651974695e-06, + "loss": 0.6759, + "step": 4455 + }, + { + "epoch": 0.22900606434371468, + "grad_norm": 1.18680739402771, + "learning_rate": 8.997008691721232e-06, + "loss": 0.7564, + "step": 4456 + }, + { + "epoch": 0.2290574570870593, + "grad_norm": 1.1619582176208496, + "learning_rate": 8.996508620727153e-06, + "loss": 0.7865, + "step": 4457 + }, + { + "epoch": 0.22910884983040394, + "grad_norm": 1.1755377054214478, + "learning_rate": 8.996008439006314e-06, + "loss": 0.7574, + "step": 4458 + }, + { + "epoch": 0.2291602425737486, + "grad_norm": 1.1475725173950195, + "learning_rate": 8.99550814657257e-06, + "loss": 0.8064, + "step": 4459 + }, + { + "epoch": 0.22921163531709324, + "grad_norm": 1.1178922653198242, + "learning_rate": 8.995007743439785e-06, + "loss": 0.758, + "step": 4460 + }, + { + "epoch": 0.22926302806043786, + "grad_norm": 1.1709433794021606, + "learning_rate": 8.99450722962182e-06, + "loss": 0.8076, + "step": 4461 + }, + { + "epoch": 0.2293144208037825, + "grad_norm": 0.8454722166061401, + "learning_rate": 8.994006605132545e-06, + "loss": 0.6948, + "step": 4462 + }, + { + "epoch": 0.22936581354712715, + "grad_norm": 1.1098442077636719, + "learning_rate": 8.99350586998583e-06, + "loss": 0.7606, + "step": 4463 + }, + { + "epoch": 0.2294172062904718, + "grad_norm": 1.0443400144577026, + "learning_rate": 8.993005024195547e-06, + "loss": 0.8005, + "step": 4464 + }, + { + "epoch": 0.22946859903381642, + "grad_norm": 1.1996192932128906, + "learning_rate": 8.992504067775573e-06, + "loss": 0.8142, + "step": 4465 + }, + { + "epoch": 0.22951999177716106, + "grad_norm": 0.8399350643157959, + "learning_rate": 8.992003000739788e-06, + "loss": 0.7124, + "step": 4466 + }, + { + "epoch": 0.2295713845205057, + "grad_norm": 1.1041971445083618, + "learning_rate": 8.991501823102074e-06, + "loss": 0.768, + "step": 4467 + }, + { + "epoch": 0.22962277726385033, + "grad_norm": 1.094053030014038, + "learning_rate": 8.991000534876314e-06, + "loss": 0.8055, + "step": 4468 + }, + { + "epoch": 0.22967417000719498, + "grad_norm": 1.2026352882385254, + "learning_rate": 8.990499136076402e-06, + "loss": 0.8133, + "step": 4469 + }, + { + "epoch": 0.22972556275053962, + "grad_norm": 0.7800304293632507, + "learning_rate": 8.989997626716226e-06, + "loss": 0.6957, + "step": 4470 + }, + { + "epoch": 0.22977695549388427, + "grad_norm": 1.1374176740646362, + "learning_rate": 8.98949600680968e-06, + "loss": 0.858, + "step": 4471 + }, + { + "epoch": 0.2298283482372289, + "grad_norm": 0.7385038137435913, + "learning_rate": 8.988994276370666e-06, + "loss": 0.6652, + "step": 4472 + }, + { + "epoch": 0.22987974098057354, + "grad_norm": 1.0653998851776123, + "learning_rate": 8.988492435413079e-06, + "loss": 0.7535, + "step": 4473 + }, + { + "epoch": 0.22993113372391819, + "grad_norm": 1.1988612413406372, + "learning_rate": 8.987990483950828e-06, + "loss": 0.7988, + "step": 4474 + }, + { + "epoch": 0.22998252646726283, + "grad_norm": 1.2839815616607666, + "learning_rate": 8.987488421997817e-06, + "loss": 0.8316, + "step": 4475 + }, + { + "epoch": 0.23003391921060745, + "grad_norm": 1.1141865253448486, + "learning_rate": 8.98698624956796e-06, + "loss": 0.8094, + "step": 4476 + }, + { + "epoch": 0.2300853119539521, + "grad_norm": 1.1077133417129517, + "learning_rate": 8.986483966675165e-06, + "loss": 0.7718, + "step": 4477 + }, + { + "epoch": 0.23013670469729675, + "grad_norm": 0.812037467956543, + "learning_rate": 8.98598157333335e-06, + "loss": 0.704, + "step": 4478 + }, + { + "epoch": 0.2301880974406414, + "grad_norm": 1.0690003633499146, + "learning_rate": 8.985479069556435e-06, + "loss": 0.8058, + "step": 4479 + }, + { + "epoch": 0.230239490183986, + "grad_norm": 1.1047130823135376, + "learning_rate": 8.984976455358343e-06, + "loss": 0.8027, + "step": 4480 + }, + { + "epoch": 0.23029088292733066, + "grad_norm": 1.0936604738235474, + "learning_rate": 8.984473730752999e-06, + "loss": 0.7636, + "step": 4481 + }, + { + "epoch": 0.2303422756706753, + "grad_norm": 1.1142051219940186, + "learning_rate": 8.98397089575433e-06, + "loss": 0.7486, + "step": 4482 + }, + { + "epoch": 0.23039366841401995, + "grad_norm": 1.1740727424621582, + "learning_rate": 8.983467950376269e-06, + "loss": 0.779, + "step": 4483 + }, + { + "epoch": 0.23044506115736457, + "grad_norm": 1.217950463294983, + "learning_rate": 8.982964894632748e-06, + "loss": 0.8035, + "step": 4484 + }, + { + "epoch": 0.23049645390070922, + "grad_norm": 1.1258676052093506, + "learning_rate": 8.98246172853771e-06, + "loss": 0.7557, + "step": 4485 + }, + { + "epoch": 0.23054784664405387, + "grad_norm": 1.2208675146102905, + "learning_rate": 8.981958452105089e-06, + "loss": 0.8168, + "step": 4486 + }, + { + "epoch": 0.23059923938739849, + "grad_norm": 1.1240817308425903, + "learning_rate": 8.981455065348833e-06, + "loss": 0.8311, + "step": 4487 + }, + { + "epoch": 0.23065063213074313, + "grad_norm": 1.0656596422195435, + "learning_rate": 8.980951568282887e-06, + "loss": 0.8146, + "step": 4488 + }, + { + "epoch": 0.23070202487408778, + "grad_norm": 1.082695722579956, + "learning_rate": 8.980447960921202e-06, + "loss": 0.7956, + "step": 4489 + }, + { + "epoch": 0.23075341761743243, + "grad_norm": 1.1599644422531128, + "learning_rate": 8.979944243277731e-06, + "loss": 0.7998, + "step": 4490 + }, + { + "epoch": 0.23080481036077705, + "grad_norm": 0.8711803555488586, + "learning_rate": 8.979440415366428e-06, + "loss": 0.7813, + "step": 4491 + }, + { + "epoch": 0.2308562031041217, + "grad_norm": 1.0243113040924072, + "learning_rate": 8.978936477201254e-06, + "loss": 0.7542, + "step": 4492 + }, + { + "epoch": 0.23090759584746634, + "grad_norm": 1.1325196027755737, + "learning_rate": 8.978432428796172e-06, + "loss": 0.835, + "step": 4493 + }, + { + "epoch": 0.230958988590811, + "grad_norm": 1.199767827987671, + "learning_rate": 8.977928270165142e-06, + "loss": 0.8232, + "step": 4494 + }, + { + "epoch": 0.2310103813341556, + "grad_norm": 1.0398298501968384, + "learning_rate": 8.977424001322138e-06, + "loss": 0.8073, + "step": 4495 + }, + { + "epoch": 0.23106177407750025, + "grad_norm": 0.7911036610603333, + "learning_rate": 8.97691962228113e-06, + "loss": 0.669, + "step": 4496 + }, + { + "epoch": 0.2311131668208449, + "grad_norm": 1.0952215194702148, + "learning_rate": 8.976415133056086e-06, + "loss": 0.7722, + "step": 4497 + }, + { + "epoch": 0.23116455956418955, + "grad_norm": 1.1566587686538696, + "learning_rate": 8.975910533660991e-06, + "loss": 0.7534, + "step": 4498 + }, + { + "epoch": 0.23121595230753417, + "grad_norm": 1.1169732809066772, + "learning_rate": 8.975405824109824e-06, + "loss": 0.7574, + "step": 4499 + }, + { + "epoch": 0.23126734505087881, + "grad_norm": 1.0392191410064697, + "learning_rate": 8.974901004416566e-06, + "loss": 0.7814, + "step": 4500 + }, + { + "epoch": 0.23131873779422346, + "grad_norm": 1.1220823526382446, + "learning_rate": 8.974396074595206e-06, + "loss": 0.8103, + "step": 4501 + }, + { + "epoch": 0.2313701305375681, + "grad_norm": 1.0670078992843628, + "learning_rate": 8.97389103465973e-06, + "loss": 0.7642, + "step": 4502 + }, + { + "epoch": 0.23142152328091273, + "grad_norm": 1.1362812519073486, + "learning_rate": 8.973385884624134e-06, + "loss": 0.7947, + "step": 4503 + }, + { + "epoch": 0.23147291602425737, + "grad_norm": 1.1523749828338623, + "learning_rate": 8.972880624502412e-06, + "loss": 0.7975, + "step": 4504 + }, + { + "epoch": 0.23152430876760202, + "grad_norm": 1.1217130422592163, + "learning_rate": 8.972375254308563e-06, + "loss": 0.749, + "step": 4505 + }, + { + "epoch": 0.23157570151094664, + "grad_norm": 1.173633337020874, + "learning_rate": 8.971869774056588e-06, + "loss": 0.8364, + "step": 4506 + }, + { + "epoch": 0.2316270942542913, + "grad_norm": 0.8056478500366211, + "learning_rate": 8.971364183760493e-06, + "loss": 0.6664, + "step": 4507 + }, + { + "epoch": 0.23167848699763594, + "grad_norm": 0.7606384754180908, + "learning_rate": 8.970858483434288e-06, + "loss": 0.6788, + "step": 4508 + }, + { + "epoch": 0.23172987974098058, + "grad_norm": 0.7088168859481812, + "learning_rate": 8.970352673091977e-06, + "loss": 0.716, + "step": 4509 + }, + { + "epoch": 0.2317812724843252, + "grad_norm": 0.8459866642951965, + "learning_rate": 8.96984675274758e-06, + "loss": 0.7091, + "step": 4510 + }, + { + "epoch": 0.23183266522766985, + "grad_norm": 1.1425189971923828, + "learning_rate": 8.969340722415112e-06, + "loss": 0.7561, + "step": 4511 + }, + { + "epoch": 0.2318840579710145, + "grad_norm": 0.7219640612602234, + "learning_rate": 8.968834582108595e-06, + "loss": 0.7114, + "step": 4512 + }, + { + "epoch": 0.23193545071435914, + "grad_norm": 1.202033519744873, + "learning_rate": 8.968328331842047e-06, + "loss": 0.8163, + "step": 4513 + }, + { + "epoch": 0.23198684345770376, + "grad_norm": 1.132049798965454, + "learning_rate": 8.967821971629498e-06, + "loss": 0.7245, + "step": 4514 + }, + { + "epoch": 0.2320382362010484, + "grad_norm": 1.0010114908218384, + "learning_rate": 8.967315501484976e-06, + "loss": 0.7391, + "step": 4515 + }, + { + "epoch": 0.23208962894439306, + "grad_norm": 1.1907685995101929, + "learning_rate": 8.966808921422517e-06, + "loss": 0.7508, + "step": 4516 + }, + { + "epoch": 0.2321410216877377, + "grad_norm": 1.1338914632797241, + "learning_rate": 8.966302231456149e-06, + "loss": 0.8153, + "step": 4517 + }, + { + "epoch": 0.23219241443108232, + "grad_norm": 0.885730504989624, + "learning_rate": 8.965795431599915e-06, + "loss": 0.6724, + "step": 4518 + }, + { + "epoch": 0.23224380717442697, + "grad_norm": 1.1674913167953491, + "learning_rate": 8.965288521867857e-06, + "loss": 0.8093, + "step": 4519 + }, + { + "epoch": 0.23229519991777162, + "grad_norm": 1.1272701025009155, + "learning_rate": 8.964781502274016e-06, + "loss": 0.7623, + "step": 4520 + }, + { + "epoch": 0.23234659266111626, + "grad_norm": 1.206316590309143, + "learning_rate": 8.964274372832442e-06, + "loss": 0.7989, + "step": 4521 + }, + { + "epoch": 0.23239798540446088, + "grad_norm": 1.1185319423675537, + "learning_rate": 8.963767133557184e-06, + "loss": 0.7863, + "step": 4522 + }, + { + "epoch": 0.23244937814780553, + "grad_norm": 1.223645567893982, + "learning_rate": 8.963259784462297e-06, + "loss": 0.8357, + "step": 4523 + }, + { + "epoch": 0.23250077089115018, + "grad_norm": 1.0890369415283203, + "learning_rate": 8.962752325561838e-06, + "loss": 0.7882, + "step": 4524 + }, + { + "epoch": 0.2325521636344948, + "grad_norm": 1.0306440591812134, + "learning_rate": 8.962244756869864e-06, + "loss": 0.775, + "step": 4525 + }, + { + "epoch": 0.23260355637783944, + "grad_norm": 1.1407660245895386, + "learning_rate": 8.961737078400438e-06, + "loss": 0.771, + "step": 4526 + }, + { + "epoch": 0.2326549491211841, + "grad_norm": 1.143796682357788, + "learning_rate": 8.961229290167629e-06, + "loss": 0.8029, + "step": 4527 + }, + { + "epoch": 0.23270634186452874, + "grad_norm": 1.1385587453842163, + "learning_rate": 8.960721392185502e-06, + "loss": 0.7906, + "step": 4528 + }, + { + "epoch": 0.23275773460787336, + "grad_norm": 1.0587302446365356, + "learning_rate": 8.960213384468131e-06, + "loss": 0.8018, + "step": 4529 + }, + { + "epoch": 0.232809127351218, + "grad_norm": 1.0513598918914795, + "learning_rate": 8.959705267029588e-06, + "loss": 0.7932, + "step": 4530 + }, + { + "epoch": 0.23286052009456265, + "grad_norm": 1.1498783826828003, + "learning_rate": 8.959197039883953e-06, + "loss": 0.8374, + "step": 4531 + }, + { + "epoch": 0.2329119128379073, + "grad_norm": 1.338956356048584, + "learning_rate": 8.958688703045308e-06, + "loss": 0.8174, + "step": 4532 + }, + { + "epoch": 0.23296330558125192, + "grad_norm": 1.1401498317718506, + "learning_rate": 8.958180256527737e-06, + "loss": 0.7903, + "step": 4533 + }, + { + "epoch": 0.23301469832459656, + "grad_norm": 1.056542158126831, + "learning_rate": 8.957671700345323e-06, + "loss": 0.7848, + "step": 4534 + }, + { + "epoch": 0.2330660910679412, + "grad_norm": 1.1784013509750366, + "learning_rate": 8.95716303451216e-06, + "loss": 0.7592, + "step": 4535 + }, + { + "epoch": 0.23311748381128586, + "grad_norm": 1.1189968585968018, + "learning_rate": 8.95665425904234e-06, + "loss": 0.8134, + "step": 4536 + }, + { + "epoch": 0.23316887655463048, + "grad_norm": 1.0967286825180054, + "learning_rate": 8.956145373949956e-06, + "loss": 0.7691, + "step": 4537 + }, + { + "epoch": 0.23322026929797512, + "grad_norm": 1.098055362701416, + "learning_rate": 8.95563637924911e-06, + "loss": 0.8455, + "step": 4538 + }, + { + "epoch": 0.23327166204131977, + "grad_norm": 1.1101347208023071, + "learning_rate": 8.955127274953907e-06, + "loss": 0.79, + "step": 4539 + }, + { + "epoch": 0.23332305478466442, + "grad_norm": 1.1038672924041748, + "learning_rate": 8.954618061078446e-06, + "loss": 0.8121, + "step": 4540 + }, + { + "epoch": 0.23337444752800904, + "grad_norm": 1.1377947330474854, + "learning_rate": 8.95410873763684e-06, + "loss": 0.7491, + "step": 4541 + }, + { + "epoch": 0.23342584027135369, + "grad_norm": 1.0690524578094482, + "learning_rate": 8.9535993046432e-06, + "loss": 0.8045, + "step": 4542 + }, + { + "epoch": 0.23347723301469833, + "grad_norm": 1.1139917373657227, + "learning_rate": 8.953089762111635e-06, + "loss": 0.7898, + "step": 4543 + }, + { + "epoch": 0.23352862575804295, + "grad_norm": 0.8483744859695435, + "learning_rate": 8.952580110056268e-06, + "loss": 0.7127, + "step": 4544 + }, + { + "epoch": 0.2335800185013876, + "grad_norm": 1.1292800903320312, + "learning_rate": 8.952070348491218e-06, + "loss": 0.7718, + "step": 4545 + }, + { + "epoch": 0.23363141124473225, + "grad_norm": 1.0786324739456177, + "learning_rate": 8.951560477430608e-06, + "loss": 0.7644, + "step": 4546 + }, + { + "epoch": 0.2336828039880769, + "grad_norm": 1.1720752716064453, + "learning_rate": 8.951050496888563e-06, + "loss": 0.7792, + "step": 4547 + }, + { + "epoch": 0.2337341967314215, + "grad_norm": 1.1188567876815796, + "learning_rate": 8.950540406879213e-06, + "loss": 0.7516, + "step": 4548 + }, + { + "epoch": 0.23378558947476616, + "grad_norm": 1.1418123245239258, + "learning_rate": 8.950030207416693e-06, + "loss": 0.8257, + "step": 4549 + }, + { + "epoch": 0.2338369822181108, + "grad_norm": 1.0946413278579712, + "learning_rate": 8.949519898515137e-06, + "loss": 0.8446, + "step": 4550 + }, + { + "epoch": 0.23388837496145545, + "grad_norm": 0.9861295223236084, + "learning_rate": 8.949009480188683e-06, + "loss": 0.703, + "step": 4551 + }, + { + "epoch": 0.23393976770480007, + "grad_norm": 1.0266975164413452, + "learning_rate": 8.948498952451471e-06, + "loss": 0.7289, + "step": 4552 + }, + { + "epoch": 0.23399116044814472, + "grad_norm": 0.8530096411705017, + "learning_rate": 8.94798831531765e-06, + "loss": 0.6927, + "step": 4553 + }, + { + "epoch": 0.23404255319148937, + "grad_norm": 1.1061002016067505, + "learning_rate": 8.947477568801365e-06, + "loss": 0.8045, + "step": 4554 + }, + { + "epoch": 0.234093945934834, + "grad_norm": 1.099226951599121, + "learning_rate": 8.946966712916767e-06, + "loss": 0.7296, + "step": 4555 + }, + { + "epoch": 0.23414533867817863, + "grad_norm": 0.7294313311576843, + "learning_rate": 8.946455747678007e-06, + "loss": 0.6967, + "step": 4556 + }, + { + "epoch": 0.23419673142152328, + "grad_norm": 0.7947079539299011, + "learning_rate": 8.945944673099247e-06, + "loss": 0.6819, + "step": 4557 + }, + { + "epoch": 0.23424812416486793, + "grad_norm": 1.1666070222854614, + "learning_rate": 8.945433489194644e-06, + "loss": 0.8125, + "step": 4558 + }, + { + "epoch": 0.23429951690821257, + "grad_norm": 1.1196647882461548, + "learning_rate": 8.944922195978358e-06, + "loss": 0.7334, + "step": 4559 + }, + { + "epoch": 0.2343509096515572, + "grad_norm": 1.1116751432418823, + "learning_rate": 8.94441079346456e-06, + "loss": 0.8016, + "step": 4560 + }, + { + "epoch": 0.23440230239490184, + "grad_norm": 0.8251853585243225, + "learning_rate": 8.943899281667417e-06, + "loss": 0.691, + "step": 4561 + }, + { + "epoch": 0.2344536951382465, + "grad_norm": 1.170778751373291, + "learning_rate": 8.9433876606011e-06, + "loss": 0.7783, + "step": 4562 + }, + { + "epoch": 0.2345050878815911, + "grad_norm": 1.0678445100784302, + "learning_rate": 8.942875930279783e-06, + "loss": 0.8069, + "step": 4563 + }, + { + "epoch": 0.23455648062493575, + "grad_norm": 0.7048591375350952, + "learning_rate": 8.942364090717646e-06, + "loss": 0.6724, + "step": 4564 + }, + { + "epoch": 0.2346078733682804, + "grad_norm": 1.0690581798553467, + "learning_rate": 8.941852141928871e-06, + "loss": 0.8012, + "step": 4565 + }, + { + "epoch": 0.23465926611162505, + "grad_norm": 1.0831481218338013, + "learning_rate": 8.941340083927636e-06, + "loss": 0.7832, + "step": 4566 + }, + { + "epoch": 0.23471065885496967, + "grad_norm": 1.034258484840393, + "learning_rate": 8.940827916728136e-06, + "loss": 0.7502, + "step": 4567 + }, + { + "epoch": 0.23476205159831431, + "grad_norm": 1.0713729858398438, + "learning_rate": 8.940315640344558e-06, + "loss": 0.7836, + "step": 4568 + }, + { + "epoch": 0.23481344434165896, + "grad_norm": 0.7750464677810669, + "learning_rate": 8.93980325479109e-06, + "loss": 0.7455, + "step": 4569 + }, + { + "epoch": 0.2348648370850036, + "grad_norm": 1.0907200574874878, + "learning_rate": 8.939290760081936e-06, + "loss": 0.7765, + "step": 4570 + }, + { + "epoch": 0.23491622982834823, + "grad_norm": 0.7505485415458679, + "learning_rate": 8.93877815623129e-06, + "loss": 0.7139, + "step": 4571 + }, + { + "epoch": 0.23496762257169287, + "grad_norm": 1.0978574752807617, + "learning_rate": 8.938265443253356e-06, + "loss": 0.789, + "step": 4572 + }, + { + "epoch": 0.23501901531503752, + "grad_norm": 1.0731624364852905, + "learning_rate": 8.937752621162338e-06, + "loss": 0.7851, + "step": 4573 + }, + { + "epoch": 0.23507040805838217, + "grad_norm": 1.0606452226638794, + "learning_rate": 8.937239689972447e-06, + "loss": 0.8246, + "step": 4574 + }, + { + "epoch": 0.2351218008017268, + "grad_norm": 1.1022939682006836, + "learning_rate": 8.93672664969789e-06, + "loss": 0.7821, + "step": 4575 + }, + { + "epoch": 0.23517319354507144, + "grad_norm": 1.070448875427246, + "learning_rate": 8.936213500352885e-06, + "loss": 0.7312, + "step": 4576 + }, + { + "epoch": 0.23522458628841608, + "grad_norm": 1.2047014236450195, + "learning_rate": 8.935700241951648e-06, + "loss": 0.7973, + "step": 4577 + }, + { + "epoch": 0.23527597903176073, + "grad_norm": 1.0923635959625244, + "learning_rate": 8.935186874508398e-06, + "loss": 0.7559, + "step": 4578 + }, + { + "epoch": 0.23532737177510535, + "grad_norm": 1.3156719207763672, + "learning_rate": 8.934673398037359e-06, + "loss": 0.695, + "step": 4579 + }, + { + "epoch": 0.23537876451845, + "grad_norm": 1.1310994625091553, + "learning_rate": 8.934159812552758e-06, + "loss": 0.7762, + "step": 4580 + }, + { + "epoch": 0.23543015726179464, + "grad_norm": 1.1260188817977905, + "learning_rate": 8.933646118068823e-06, + "loss": 0.758, + "step": 4581 + }, + { + "epoch": 0.23548155000513926, + "grad_norm": 1.0956807136535645, + "learning_rate": 8.93313231459979e-06, + "loss": 0.747, + "step": 4582 + }, + { + "epoch": 0.2355329427484839, + "grad_norm": 1.0462346076965332, + "learning_rate": 8.932618402159889e-06, + "loss": 0.8148, + "step": 4583 + }, + { + "epoch": 0.23558433549182856, + "grad_norm": 1.121314525604248, + "learning_rate": 8.932104380763363e-06, + "loss": 0.7542, + "step": 4584 + }, + { + "epoch": 0.2356357282351732, + "grad_norm": 0.8486557602882385, + "learning_rate": 8.93159025042445e-06, + "loss": 0.7125, + "step": 4585 + }, + { + "epoch": 0.23568712097851782, + "grad_norm": 1.0845333337783813, + "learning_rate": 8.931076011157395e-06, + "loss": 0.7961, + "step": 4586 + }, + { + "epoch": 0.23573851372186247, + "grad_norm": 1.1341445446014404, + "learning_rate": 8.930561662976447e-06, + "loss": 0.8298, + "step": 4587 + }, + { + "epoch": 0.23578990646520712, + "grad_norm": 1.028184413909912, + "learning_rate": 8.930047205895857e-06, + "loss": 0.7072, + "step": 4588 + }, + { + "epoch": 0.23584129920855176, + "grad_norm": 0.8409712314605713, + "learning_rate": 8.929532639929877e-06, + "loss": 0.7093, + "step": 4589 + }, + { + "epoch": 0.23589269195189638, + "grad_norm": 1.1844570636749268, + "learning_rate": 8.929017965092764e-06, + "loss": 0.7725, + "step": 4590 + }, + { + "epoch": 0.23594408469524103, + "grad_norm": 1.053727149963379, + "learning_rate": 8.928503181398776e-06, + "loss": 0.7503, + "step": 4591 + }, + { + "epoch": 0.23599547743858568, + "grad_norm": 1.0847516059875488, + "learning_rate": 8.927988288862178e-06, + "loss": 0.8394, + "step": 4592 + }, + { + "epoch": 0.23604687018193032, + "grad_norm": 1.1723809242248535, + "learning_rate": 8.927473287497234e-06, + "loss": 0.7766, + "step": 4593 + }, + { + "epoch": 0.23609826292527494, + "grad_norm": 1.328163743019104, + "learning_rate": 8.926958177318212e-06, + "loss": 0.7778, + "step": 4594 + }, + { + "epoch": 0.2361496556686196, + "grad_norm": 1.1360726356506348, + "learning_rate": 8.926442958339385e-06, + "loss": 0.8347, + "step": 4595 + }, + { + "epoch": 0.23620104841196424, + "grad_norm": 1.1715898513793945, + "learning_rate": 8.925927630575029e-06, + "loss": 0.8164, + "step": 4596 + }, + { + "epoch": 0.23625244115530886, + "grad_norm": 1.2126623392105103, + "learning_rate": 8.925412194039418e-06, + "loss": 0.8685, + "step": 4597 + }, + { + "epoch": 0.2363038338986535, + "grad_norm": 0.8496134281158447, + "learning_rate": 8.924896648746833e-06, + "loss": 0.6945, + "step": 4598 + }, + { + "epoch": 0.23635522664199815, + "grad_norm": 0.813201904296875, + "learning_rate": 8.924380994711562e-06, + "loss": 0.696, + "step": 4599 + }, + { + "epoch": 0.2364066193853428, + "grad_norm": 1.106196641921997, + "learning_rate": 8.923865231947886e-06, + "loss": 0.8128, + "step": 4600 + }, + { + "epoch": 0.23645801212868742, + "grad_norm": 1.0879048109054565, + "learning_rate": 8.923349360470097e-06, + "loss": 0.8075, + "step": 4601 + }, + { + "epoch": 0.23650940487203206, + "grad_norm": 1.0690749883651733, + "learning_rate": 8.92283338029249e-06, + "loss": 0.7977, + "step": 4602 + }, + { + "epoch": 0.2365607976153767, + "grad_norm": 1.0636183023452759, + "learning_rate": 8.922317291429359e-06, + "loss": 0.7493, + "step": 4603 + }, + { + "epoch": 0.23661219035872136, + "grad_norm": 1.1910406351089478, + "learning_rate": 8.921801093894999e-06, + "loss": 0.7833, + "step": 4604 + }, + { + "epoch": 0.23666358310206598, + "grad_norm": 1.0821884870529175, + "learning_rate": 8.921284787703719e-06, + "loss": 0.7591, + "step": 4605 + }, + { + "epoch": 0.23671497584541062, + "grad_norm": 1.050493597984314, + "learning_rate": 8.920768372869818e-06, + "loss": 0.7265, + "step": 4606 + }, + { + "epoch": 0.23676636858875527, + "grad_norm": 1.0911407470703125, + "learning_rate": 8.920251849407603e-06, + "loss": 0.7685, + "step": 4607 + }, + { + "epoch": 0.23681776133209992, + "grad_norm": 1.0285933017730713, + "learning_rate": 8.919735217331391e-06, + "loss": 0.7892, + "step": 4608 + }, + { + "epoch": 0.23686915407544454, + "grad_norm": 1.2337404489517212, + "learning_rate": 8.91921847665549e-06, + "loss": 0.7928, + "step": 4609 + }, + { + "epoch": 0.23692054681878918, + "grad_norm": 1.114739179611206, + "learning_rate": 8.91870162739422e-06, + "loss": 0.8135, + "step": 4610 + }, + { + "epoch": 0.23697193956213383, + "grad_norm": 1.0755085945129395, + "learning_rate": 8.918184669561898e-06, + "loss": 0.7435, + "step": 4611 + }, + { + "epoch": 0.23702333230547848, + "grad_norm": 0.9738936424255371, + "learning_rate": 8.917667603172849e-06, + "loss": 0.7545, + "step": 4612 + }, + { + "epoch": 0.2370747250488231, + "grad_norm": 1.1980764865875244, + "learning_rate": 8.917150428241396e-06, + "loss": 0.8395, + "step": 4613 + }, + { + "epoch": 0.23712611779216775, + "grad_norm": 1.0453612804412842, + "learning_rate": 8.916633144781872e-06, + "loss": 0.7529, + "step": 4614 + }, + { + "epoch": 0.2371775105355124, + "grad_norm": 1.217781662940979, + "learning_rate": 8.916115752808606e-06, + "loss": 0.7697, + "step": 4615 + }, + { + "epoch": 0.237228903278857, + "grad_norm": 1.076009750366211, + "learning_rate": 8.915598252335932e-06, + "loss": 0.7917, + "step": 4616 + }, + { + "epoch": 0.23728029602220166, + "grad_norm": 0.8455723524093628, + "learning_rate": 8.91508064337819e-06, + "loss": 0.6762, + "step": 4617 + }, + { + "epoch": 0.2373316887655463, + "grad_norm": 1.0471481084823608, + "learning_rate": 8.914562925949722e-06, + "loss": 0.7684, + "step": 4618 + }, + { + "epoch": 0.23738308150889095, + "grad_norm": 1.0268428325653076, + "learning_rate": 8.914045100064867e-06, + "loss": 0.7982, + "step": 4619 + }, + { + "epoch": 0.23743447425223557, + "grad_norm": 1.1538622379302979, + "learning_rate": 8.913527165737977e-06, + "loss": 0.8277, + "step": 4620 + }, + { + "epoch": 0.23748586699558022, + "grad_norm": 0.8173934817314148, + "learning_rate": 8.913009122983398e-06, + "loss": 0.6387, + "step": 4621 + }, + { + "epoch": 0.23753725973892487, + "grad_norm": 1.1196017265319824, + "learning_rate": 8.912490971815484e-06, + "loss": 0.8006, + "step": 4622 + }, + { + "epoch": 0.2375886524822695, + "grad_norm": 0.7456162571907043, + "learning_rate": 8.911972712248591e-06, + "loss": 0.7491, + "step": 4623 + }, + { + "epoch": 0.23764004522561413, + "grad_norm": 1.1029239892959595, + "learning_rate": 8.911454344297079e-06, + "loss": 0.7641, + "step": 4624 + }, + { + "epoch": 0.23769143796895878, + "grad_norm": 1.128382921218872, + "learning_rate": 8.910935867975309e-06, + "loss": 0.7897, + "step": 4625 + }, + { + "epoch": 0.23774283071230343, + "grad_norm": 1.164041519165039, + "learning_rate": 8.910417283297644e-06, + "loss": 0.8348, + "step": 4626 + }, + { + "epoch": 0.23779422345564807, + "grad_norm": 1.1257448196411133, + "learning_rate": 8.909898590278454e-06, + "loss": 0.8082, + "step": 4627 + }, + { + "epoch": 0.2378456161989927, + "grad_norm": 1.1597113609313965, + "learning_rate": 8.909379788932109e-06, + "loss": 0.7304, + "step": 4628 + }, + { + "epoch": 0.23789700894233734, + "grad_norm": 1.0364222526550293, + "learning_rate": 8.908860879272984e-06, + "loss": 0.7434, + "step": 4629 + }, + { + "epoch": 0.237948401685682, + "grad_norm": 1.1011673212051392, + "learning_rate": 8.908341861315455e-06, + "loss": 0.7693, + "step": 4630 + }, + { + "epoch": 0.23799979442902663, + "grad_norm": 1.011147379875183, + "learning_rate": 8.9078227350739e-06, + "loss": 0.6551, + "step": 4631 + }, + { + "epoch": 0.23805118717237125, + "grad_norm": 1.146329641342163, + "learning_rate": 8.907303500562706e-06, + "loss": 0.8192, + "step": 4632 + }, + { + "epoch": 0.2381025799157159, + "grad_norm": 1.1481460332870483, + "learning_rate": 8.906784157796255e-06, + "loss": 0.8728, + "step": 4633 + }, + { + "epoch": 0.23815397265906055, + "grad_norm": 1.1370223760604858, + "learning_rate": 8.906264706788938e-06, + "loss": 0.8327, + "step": 4634 + }, + { + "epoch": 0.23820536540240517, + "grad_norm": 1.2424817085266113, + "learning_rate": 8.905745147555145e-06, + "loss": 0.8169, + "step": 4635 + }, + { + "epoch": 0.23825675814574981, + "grad_norm": 1.0799906253814697, + "learning_rate": 8.905225480109273e-06, + "loss": 0.7915, + "step": 4636 + }, + { + "epoch": 0.23830815088909446, + "grad_norm": 0.9839283227920532, + "learning_rate": 8.90470570446572e-06, + "loss": 0.6992, + "step": 4637 + }, + { + "epoch": 0.2383595436324391, + "grad_norm": 1.1425650119781494, + "learning_rate": 8.904185820638883e-06, + "loss": 0.805, + "step": 4638 + }, + { + "epoch": 0.23841093637578373, + "grad_norm": 1.0785012245178223, + "learning_rate": 8.903665828643171e-06, + "loss": 0.7964, + "step": 4639 + }, + { + "epoch": 0.23846232911912837, + "grad_norm": 1.0946489572525024, + "learning_rate": 8.903145728492986e-06, + "loss": 0.7881, + "step": 4640 + }, + { + "epoch": 0.23851372186247302, + "grad_norm": 1.0840612649917603, + "learning_rate": 8.902625520202742e-06, + "loss": 0.6918, + "step": 4641 + }, + { + "epoch": 0.23856511460581767, + "grad_norm": 1.1396162509918213, + "learning_rate": 8.90210520378685e-06, + "loss": 0.7199, + "step": 4642 + }, + { + "epoch": 0.2386165073491623, + "grad_norm": 1.1419869661331177, + "learning_rate": 8.901584779259724e-06, + "loss": 0.7596, + "step": 4643 + }, + { + "epoch": 0.23866790009250693, + "grad_norm": 1.1918631792068481, + "learning_rate": 8.901064246635786e-06, + "loss": 0.7865, + "step": 4644 + }, + { + "epoch": 0.23871929283585158, + "grad_norm": 1.1053193807601929, + "learning_rate": 8.900543605929457e-06, + "loss": 0.8104, + "step": 4645 + }, + { + "epoch": 0.23877068557919623, + "grad_norm": 1.0856996774673462, + "learning_rate": 8.900022857155162e-06, + "loss": 0.7859, + "step": 4646 + }, + { + "epoch": 0.23882207832254085, + "grad_norm": 1.0968729257583618, + "learning_rate": 8.899502000327326e-06, + "loss": 0.8558, + "step": 4647 + }, + { + "epoch": 0.2388734710658855, + "grad_norm": 1.1180557012557983, + "learning_rate": 8.898981035460384e-06, + "loss": 0.8288, + "step": 4648 + }, + { + "epoch": 0.23892486380923014, + "grad_norm": 1.1450949907302856, + "learning_rate": 8.898459962568766e-06, + "loss": 0.816, + "step": 4649 + }, + { + "epoch": 0.2389762565525748, + "grad_norm": 1.1149415969848633, + "learning_rate": 8.89793878166691e-06, + "loss": 0.8358, + "step": 4650 + }, + { + "epoch": 0.2390276492959194, + "grad_norm": 1.0573062896728516, + "learning_rate": 8.897417492769258e-06, + "loss": 0.8233, + "step": 4651 + }, + { + "epoch": 0.23907904203926406, + "grad_norm": 1.0443198680877686, + "learning_rate": 8.896896095890249e-06, + "loss": 0.7348, + "step": 4652 + }, + { + "epoch": 0.2391304347826087, + "grad_norm": 0.9232312440872192, + "learning_rate": 8.896374591044333e-06, + "loss": 0.7323, + "step": 4653 + }, + { + "epoch": 0.23918182752595332, + "grad_norm": 1.1453986167907715, + "learning_rate": 8.895852978245953e-06, + "loss": 0.7608, + "step": 4654 + }, + { + "epoch": 0.23923322026929797, + "grad_norm": 1.0893425941467285, + "learning_rate": 8.895331257509567e-06, + "loss": 0.7375, + "step": 4655 + }, + { + "epoch": 0.23928461301264262, + "grad_norm": 0.7401126027107239, + "learning_rate": 8.894809428849626e-06, + "loss": 0.6899, + "step": 4656 + }, + { + "epoch": 0.23933600575598726, + "grad_norm": 0.7751044034957886, + "learning_rate": 8.89428749228059e-06, + "loss": 0.6604, + "step": 4657 + }, + { + "epoch": 0.23938739849933188, + "grad_norm": 1.0986485481262207, + "learning_rate": 8.893765447816916e-06, + "loss": 0.7595, + "step": 4658 + }, + { + "epoch": 0.23943879124267653, + "grad_norm": 0.9008376598358154, + "learning_rate": 8.893243295473074e-06, + "loss": 0.7473, + "step": 4659 + }, + { + "epoch": 0.23949018398602118, + "grad_norm": 1.1359559297561646, + "learning_rate": 8.892721035263523e-06, + "loss": 0.7984, + "step": 4660 + }, + { + "epoch": 0.23954157672936582, + "grad_norm": 1.1289680004119873, + "learning_rate": 8.892198667202737e-06, + "loss": 0.7515, + "step": 4661 + }, + { + "epoch": 0.23959296947271044, + "grad_norm": 1.2171883583068848, + "learning_rate": 8.891676191305189e-06, + "loss": 0.832, + "step": 4662 + }, + { + "epoch": 0.2396443622160551, + "grad_norm": 0.7863712906837463, + "learning_rate": 8.891153607585353e-06, + "loss": 0.6769, + "step": 4663 + }, + { + "epoch": 0.23969575495939974, + "grad_norm": 0.7765997648239136, + "learning_rate": 8.89063091605771e-06, + "loss": 0.7329, + "step": 4664 + }, + { + "epoch": 0.23974714770274438, + "grad_norm": 1.1068962812423706, + "learning_rate": 8.890108116736737e-06, + "loss": 0.8031, + "step": 4665 + }, + { + "epoch": 0.239798540446089, + "grad_norm": 0.8460896015167236, + "learning_rate": 8.889585209636923e-06, + "loss": 0.6946, + "step": 4666 + }, + { + "epoch": 0.23984993318943365, + "grad_norm": 1.1296321153640747, + "learning_rate": 8.889062194772755e-06, + "loss": 0.8177, + "step": 4667 + }, + { + "epoch": 0.2399013259327783, + "grad_norm": 0.9098497033119202, + "learning_rate": 8.888539072158725e-06, + "loss": 0.7137, + "step": 4668 + }, + { + "epoch": 0.23995271867612294, + "grad_norm": 0.872864305973053, + "learning_rate": 8.88801584180932e-06, + "loss": 0.72, + "step": 4669 + }, + { + "epoch": 0.24000411141946756, + "grad_norm": 1.0843358039855957, + "learning_rate": 8.887492503739043e-06, + "loss": 0.799, + "step": 4670 + }, + { + "epoch": 0.2400555041628122, + "grad_norm": 1.0153932571411133, + "learning_rate": 8.886969057962392e-06, + "loss": 0.7932, + "step": 4671 + }, + { + "epoch": 0.24010689690615686, + "grad_norm": 1.439363718032837, + "learning_rate": 8.886445504493867e-06, + "loss": 0.7693, + "step": 4672 + }, + { + "epoch": 0.24015828964950148, + "grad_norm": 0.7604982852935791, + "learning_rate": 8.885921843347976e-06, + "loss": 0.6512, + "step": 4673 + }, + { + "epoch": 0.24020968239284612, + "grad_norm": 1.1090489625930786, + "learning_rate": 8.885398074539229e-06, + "loss": 0.7521, + "step": 4674 + }, + { + "epoch": 0.24026107513619077, + "grad_norm": 1.0481171607971191, + "learning_rate": 8.884874198082133e-06, + "loss": 0.7501, + "step": 4675 + }, + { + "epoch": 0.24031246787953542, + "grad_norm": 1.0286756753921509, + "learning_rate": 8.884350213991206e-06, + "loss": 0.7358, + "step": 4676 + }, + { + "epoch": 0.24036386062288004, + "grad_norm": 1.0527909994125366, + "learning_rate": 8.883826122280963e-06, + "loss": 0.7942, + "step": 4677 + }, + { + "epoch": 0.24041525336622468, + "grad_norm": 1.1009913682937622, + "learning_rate": 8.883301922965929e-06, + "loss": 0.8168, + "step": 4678 + }, + { + "epoch": 0.24046664610956933, + "grad_norm": 1.1306523084640503, + "learning_rate": 8.882777616060621e-06, + "loss": 0.8462, + "step": 4679 + }, + { + "epoch": 0.24051803885291398, + "grad_norm": 1.6010468006134033, + "learning_rate": 8.88225320157957e-06, + "loss": 0.7841, + "step": 4680 + }, + { + "epoch": 0.2405694315962586, + "grad_norm": 0.7657957673072815, + "learning_rate": 8.881728679537303e-06, + "loss": 0.7034, + "step": 4681 + }, + { + "epoch": 0.24062082433960325, + "grad_norm": 1.0416008234024048, + "learning_rate": 8.881204049948355e-06, + "loss": 0.7756, + "step": 4682 + }, + { + "epoch": 0.2406722170829479, + "grad_norm": 1.0818085670471191, + "learning_rate": 8.880679312827259e-06, + "loss": 0.7797, + "step": 4683 + }, + { + "epoch": 0.24072360982629254, + "grad_norm": 1.025872826576233, + "learning_rate": 8.880154468188552e-06, + "loss": 0.7524, + "step": 4684 + }, + { + "epoch": 0.24077500256963716, + "grad_norm": 1.0807186365127563, + "learning_rate": 8.879629516046778e-06, + "loss": 0.816, + "step": 4685 + }, + { + "epoch": 0.2408263953129818, + "grad_norm": 1.1045392751693726, + "learning_rate": 8.87910445641648e-06, + "loss": 0.7695, + "step": 4686 + }, + { + "epoch": 0.24087778805632645, + "grad_norm": 1.1010226011276245, + "learning_rate": 8.878579289312208e-06, + "loss": 0.7887, + "step": 4687 + }, + { + "epoch": 0.2409291807996711, + "grad_norm": 1.0953706502914429, + "learning_rate": 8.878054014748507e-06, + "loss": 0.7692, + "step": 4688 + }, + { + "epoch": 0.24098057354301572, + "grad_norm": 1.2257829904556274, + "learning_rate": 8.877528632739936e-06, + "loss": 0.7809, + "step": 4689 + }, + { + "epoch": 0.24103196628636037, + "grad_norm": 0.9221262335777283, + "learning_rate": 8.877003143301046e-06, + "loss": 0.7037, + "step": 4690 + }, + { + "epoch": 0.241083359029705, + "grad_norm": 0.824110746383667, + "learning_rate": 8.876477546446398e-06, + "loss": 0.7257, + "step": 4691 + }, + { + "epoch": 0.24113475177304963, + "grad_norm": 1.0780715942382812, + "learning_rate": 8.875951842190555e-06, + "loss": 0.7974, + "step": 4692 + }, + { + "epoch": 0.24118614451639428, + "grad_norm": 1.1381309032440186, + "learning_rate": 8.875426030548082e-06, + "loss": 0.8029, + "step": 4693 + }, + { + "epoch": 0.24123753725973893, + "grad_norm": 1.121325135231018, + "learning_rate": 8.874900111533548e-06, + "loss": 0.7681, + "step": 4694 + }, + { + "epoch": 0.24128893000308357, + "grad_norm": 1.0619456768035889, + "learning_rate": 8.87437408516152e-06, + "loss": 0.727, + "step": 4695 + }, + { + "epoch": 0.2413403227464282, + "grad_norm": 1.1301515102386475, + "learning_rate": 8.873847951446577e-06, + "loss": 0.806, + "step": 4696 + }, + { + "epoch": 0.24139171548977284, + "grad_norm": 1.1236293315887451, + "learning_rate": 8.873321710403291e-06, + "loss": 0.8116, + "step": 4697 + }, + { + "epoch": 0.2414431082331175, + "grad_norm": 1.0581533908843994, + "learning_rate": 8.872795362046246e-06, + "loss": 0.7769, + "step": 4698 + }, + { + "epoch": 0.24149450097646213, + "grad_norm": 1.2560044527053833, + "learning_rate": 8.872268906390025e-06, + "loss": 0.8299, + "step": 4699 + }, + { + "epoch": 0.24154589371980675, + "grad_norm": 0.7762824892997742, + "learning_rate": 8.87174234344921e-06, + "loss": 0.6805, + "step": 4700 + }, + { + "epoch": 0.2415972864631514, + "grad_norm": 1.0945128202438354, + "learning_rate": 8.871215673238395e-06, + "loss": 0.7721, + "step": 4701 + }, + { + "epoch": 0.24164867920649605, + "grad_norm": 1.2013400793075562, + "learning_rate": 8.870688895772168e-06, + "loss": 0.754, + "step": 4702 + }, + { + "epoch": 0.2417000719498407, + "grad_norm": 0.7914587259292603, + "learning_rate": 8.870162011065125e-06, + "loss": 0.7172, + "step": 4703 + }, + { + "epoch": 0.24175146469318531, + "grad_norm": 1.1099352836608887, + "learning_rate": 8.869635019131863e-06, + "loss": 0.7252, + "step": 4704 + }, + { + "epoch": 0.24180285743652996, + "grad_norm": 1.04390549659729, + "learning_rate": 8.869107919986986e-06, + "loss": 0.7271, + "step": 4705 + }, + { + "epoch": 0.2418542501798746, + "grad_norm": 1.2175709009170532, + "learning_rate": 8.868580713645094e-06, + "loss": 0.7416, + "step": 4706 + }, + { + "epoch": 0.24190564292321926, + "grad_norm": 1.137791395187378, + "learning_rate": 8.868053400120796e-06, + "loss": 0.7982, + "step": 4707 + }, + { + "epoch": 0.24195703566656387, + "grad_norm": 1.1600502729415894, + "learning_rate": 8.8675259794287e-06, + "loss": 0.8103, + "step": 4708 + }, + { + "epoch": 0.24200842840990852, + "grad_norm": 1.1336969137191772, + "learning_rate": 8.866998451583418e-06, + "loss": 0.7705, + "step": 4709 + }, + { + "epoch": 0.24205982115325317, + "grad_norm": 1.0881843566894531, + "learning_rate": 8.866470816599569e-06, + "loss": 0.7607, + "step": 4710 + }, + { + "epoch": 0.2421112138965978, + "grad_norm": 1.1276473999023438, + "learning_rate": 8.865943074491769e-06, + "loss": 0.8072, + "step": 4711 + }, + { + "epoch": 0.24216260663994243, + "grad_norm": 1.1565089225769043, + "learning_rate": 8.86541522527464e-06, + "loss": 0.773, + "step": 4712 + }, + { + "epoch": 0.24221399938328708, + "grad_norm": 1.0687453746795654, + "learning_rate": 8.864887268962807e-06, + "loss": 0.7429, + "step": 4713 + }, + { + "epoch": 0.24226539212663173, + "grad_norm": 0.9930742383003235, + "learning_rate": 8.864359205570899e-06, + "loss": 0.7583, + "step": 4714 + }, + { + "epoch": 0.24231678486997635, + "grad_norm": 0.9466663599014282, + "learning_rate": 8.863831035113542e-06, + "loss": 0.7272, + "step": 4715 + }, + { + "epoch": 0.242368177613321, + "grad_norm": 1.1421409845352173, + "learning_rate": 8.863302757605373e-06, + "loss": 0.7373, + "step": 4716 + }, + { + "epoch": 0.24241957035666564, + "grad_norm": 1.1920064687728882, + "learning_rate": 8.862774373061026e-06, + "loss": 0.7543, + "step": 4717 + }, + { + "epoch": 0.2424709631000103, + "grad_norm": 1.0750113725662231, + "learning_rate": 8.862245881495144e-06, + "loss": 0.8017, + "step": 4718 + }, + { + "epoch": 0.2425223558433549, + "grad_norm": 0.865716814994812, + "learning_rate": 8.861717282922369e-06, + "loss": 0.7103, + "step": 4719 + }, + { + "epoch": 0.24257374858669956, + "grad_norm": 1.1474857330322266, + "learning_rate": 8.86118857735734e-06, + "loss": 0.7498, + "step": 4720 + }, + { + "epoch": 0.2426251413300442, + "grad_norm": 1.1631758213043213, + "learning_rate": 8.860659764814713e-06, + "loss": 0.7864, + "step": 4721 + }, + { + "epoch": 0.24267653407338885, + "grad_norm": 1.0661959648132324, + "learning_rate": 8.860130845309134e-06, + "loss": 0.7952, + "step": 4722 + }, + { + "epoch": 0.24272792681673347, + "grad_norm": 1.179243803024292, + "learning_rate": 8.85960181885526e-06, + "loss": 0.7598, + "step": 4723 + }, + { + "epoch": 0.24277931956007812, + "grad_norm": 1.128421664237976, + "learning_rate": 8.859072685467746e-06, + "loss": 0.7671, + "step": 4724 + }, + { + "epoch": 0.24283071230342276, + "grad_norm": 1.0811216831207275, + "learning_rate": 8.858543445161256e-06, + "loss": 0.7783, + "step": 4725 + }, + { + "epoch": 0.2428821050467674, + "grad_norm": 1.0029337406158447, + "learning_rate": 8.858014097950448e-06, + "loss": 0.7492, + "step": 4726 + }, + { + "epoch": 0.24293349779011203, + "grad_norm": 1.0179736614227295, + "learning_rate": 8.857484643849991e-06, + "loss": 0.7444, + "step": 4727 + }, + { + "epoch": 0.24298489053345668, + "grad_norm": 0.7943317890167236, + "learning_rate": 8.856955082874554e-06, + "loss": 0.7022, + "step": 4728 + }, + { + "epoch": 0.24303628327680132, + "grad_norm": 1.0310739278793335, + "learning_rate": 8.856425415038808e-06, + "loss": 0.7725, + "step": 4729 + }, + { + "epoch": 0.24308767602014594, + "grad_norm": 0.7097966074943542, + "learning_rate": 8.855895640357429e-06, + "loss": 0.6781, + "step": 4730 + }, + { + "epoch": 0.2431390687634906, + "grad_norm": 0.7207580208778381, + "learning_rate": 8.855365758845092e-06, + "loss": 0.7311, + "step": 4731 + }, + { + "epoch": 0.24319046150683524, + "grad_norm": 1.0366616249084473, + "learning_rate": 8.854835770516483e-06, + "loss": 0.7837, + "step": 4732 + }, + { + "epoch": 0.24324185425017988, + "grad_norm": 0.7645068764686584, + "learning_rate": 8.85430567538628e-06, + "loss": 0.7056, + "step": 4733 + }, + { + "epoch": 0.2432932469935245, + "grad_norm": 1.1128915548324585, + "learning_rate": 8.853775473469174e-06, + "loss": 0.8017, + "step": 4734 + }, + { + "epoch": 0.24334463973686915, + "grad_norm": 1.1075836420059204, + "learning_rate": 8.853245164779853e-06, + "loss": 0.8206, + "step": 4735 + }, + { + "epoch": 0.2433960324802138, + "grad_norm": 1.0938459634780884, + "learning_rate": 8.852714749333008e-06, + "loss": 0.7321, + "step": 4736 + }, + { + "epoch": 0.24344742522355844, + "grad_norm": 1.0838030576705933, + "learning_rate": 8.85218422714334e-06, + "loss": 0.8294, + "step": 4737 + }, + { + "epoch": 0.24349881796690306, + "grad_norm": 1.0463584661483765, + "learning_rate": 8.851653598225542e-06, + "loss": 0.7946, + "step": 4738 + }, + { + "epoch": 0.2435502107102477, + "grad_norm": 1.056262493133545, + "learning_rate": 8.851122862594319e-06, + "loss": 0.7492, + "step": 4739 + }, + { + "epoch": 0.24360160345359236, + "grad_norm": 1.1224285364151, + "learning_rate": 8.850592020264373e-06, + "loss": 0.8422, + "step": 4740 + }, + { + "epoch": 0.243652996196937, + "grad_norm": 1.0783976316452026, + "learning_rate": 8.850061071250413e-06, + "loss": 0.7924, + "step": 4741 + }, + { + "epoch": 0.24370438894028162, + "grad_norm": 1.1357896327972412, + "learning_rate": 8.849530015567149e-06, + "loss": 0.7784, + "step": 4742 + }, + { + "epoch": 0.24375578168362627, + "grad_norm": 1.089284062385559, + "learning_rate": 8.848998853229294e-06, + "loss": 0.7429, + "step": 4743 + }, + { + "epoch": 0.24380717442697092, + "grad_norm": 1.0761910676956177, + "learning_rate": 8.848467584251563e-06, + "loss": 0.7396, + "step": 4744 + }, + { + "epoch": 0.24385856717031554, + "grad_norm": 0.8542054295539856, + "learning_rate": 8.84793620864868e-06, + "loss": 0.6922, + "step": 4745 + }, + { + "epoch": 0.24390995991366018, + "grad_norm": 1.1290571689605713, + "learning_rate": 8.847404726435363e-06, + "loss": 0.6948, + "step": 4746 + }, + { + "epoch": 0.24396135265700483, + "grad_norm": 1.0668021440505981, + "learning_rate": 8.846873137626338e-06, + "loss": 0.7665, + "step": 4747 + }, + { + "epoch": 0.24401274540034948, + "grad_norm": 1.0984876155853271, + "learning_rate": 8.846341442236333e-06, + "loss": 0.8261, + "step": 4748 + }, + { + "epoch": 0.2440641381436941, + "grad_norm": 1.0564554929733276, + "learning_rate": 8.84580964028008e-06, + "loss": 0.7989, + "step": 4749 + }, + { + "epoch": 0.24411553088703875, + "grad_norm": 1.2301905155181885, + "learning_rate": 8.845277731772312e-06, + "loss": 0.7858, + "step": 4750 + }, + { + "epoch": 0.2441669236303834, + "grad_norm": 0.7716259956359863, + "learning_rate": 8.844745716727767e-06, + "loss": 0.6789, + "step": 4751 + }, + { + "epoch": 0.24421831637372804, + "grad_norm": 1.4047126770019531, + "learning_rate": 8.844213595161186e-06, + "loss": 0.7874, + "step": 4752 + }, + { + "epoch": 0.24426970911707266, + "grad_norm": 1.187303900718689, + "learning_rate": 8.84368136708731e-06, + "loss": 0.7958, + "step": 4753 + }, + { + "epoch": 0.2443211018604173, + "grad_norm": 1.1583155393600464, + "learning_rate": 8.843149032520884e-06, + "loss": 0.8104, + "step": 4754 + }, + { + "epoch": 0.24437249460376195, + "grad_norm": 0.7943881750106812, + "learning_rate": 8.842616591476659e-06, + "loss": 0.6914, + "step": 4755 + }, + { + "epoch": 0.2444238873471066, + "grad_norm": 1.0763680934906006, + "learning_rate": 8.842084043969383e-06, + "loss": 0.7844, + "step": 4756 + }, + { + "epoch": 0.24447528009045122, + "grad_norm": 1.150385856628418, + "learning_rate": 8.841551390013816e-06, + "loss": 0.78, + "step": 4757 + }, + { + "epoch": 0.24452667283379587, + "grad_norm": 1.1021569967269897, + "learning_rate": 8.841018629624714e-06, + "loss": 0.7522, + "step": 4758 + }, + { + "epoch": 0.2445780655771405, + "grad_norm": 1.145723581314087, + "learning_rate": 8.840485762816835e-06, + "loss": 0.8086, + "step": 4759 + }, + { + "epoch": 0.24462945832048516, + "grad_norm": 0.7842158079147339, + "learning_rate": 8.839952789604946e-06, + "loss": 0.7585, + "step": 4760 + }, + { + "epoch": 0.24468085106382978, + "grad_norm": 1.1114294528961182, + "learning_rate": 8.83941971000381e-06, + "loss": 0.7974, + "step": 4761 + }, + { + "epoch": 0.24473224380717443, + "grad_norm": 0.7318762540817261, + "learning_rate": 8.838886524028198e-06, + "loss": 0.7292, + "step": 4762 + }, + { + "epoch": 0.24478363655051907, + "grad_norm": 0.7385783791542053, + "learning_rate": 8.838353231692884e-06, + "loss": 0.6978, + "step": 4763 + }, + { + "epoch": 0.2448350292938637, + "grad_norm": 1.1245821714401245, + "learning_rate": 8.837819833012642e-06, + "loss": 0.7756, + "step": 4764 + }, + { + "epoch": 0.24488642203720834, + "grad_norm": 1.1304298639297485, + "learning_rate": 8.83728632800225e-06, + "loss": 0.7272, + "step": 4765 + }, + { + "epoch": 0.244937814780553, + "grad_norm": 0.9192416071891785, + "learning_rate": 8.83675271667649e-06, + "loss": 0.7087, + "step": 4766 + }, + { + "epoch": 0.24498920752389763, + "grad_norm": 0.8372642397880554, + "learning_rate": 8.836218999050143e-06, + "loss": 0.7281, + "step": 4767 + }, + { + "epoch": 0.24504060026724225, + "grad_norm": 1.4684903621673584, + "learning_rate": 8.835685175138e-06, + "loss": 0.8055, + "step": 4768 + }, + { + "epoch": 0.2450919930105869, + "grad_norm": 1.075905442237854, + "learning_rate": 8.83515124495485e-06, + "loss": 0.7624, + "step": 4769 + }, + { + "epoch": 0.24514338575393155, + "grad_norm": 0.7970883846282959, + "learning_rate": 8.834617208515486e-06, + "loss": 0.7161, + "step": 4770 + }, + { + "epoch": 0.2451947784972762, + "grad_norm": 1.1217435598373413, + "learning_rate": 8.834083065834702e-06, + "loss": 0.8332, + "step": 4771 + }, + { + "epoch": 0.2452461712406208, + "grad_norm": 0.905123233795166, + "learning_rate": 8.8335488169273e-06, + "loss": 0.6922, + "step": 4772 + }, + { + "epoch": 0.24529756398396546, + "grad_norm": 1.1003978252410889, + "learning_rate": 8.83301446180808e-06, + "loss": 0.7984, + "step": 4773 + }, + { + "epoch": 0.2453489567273101, + "grad_norm": 1.2487705945968628, + "learning_rate": 8.832480000491847e-06, + "loss": 0.7346, + "step": 4774 + }, + { + "epoch": 0.24540034947065476, + "grad_norm": 1.1054446697235107, + "learning_rate": 8.831945432993411e-06, + "loss": 0.8237, + "step": 4775 + }, + { + "epoch": 0.24545174221399937, + "grad_norm": 1.1106668710708618, + "learning_rate": 8.831410759327579e-06, + "loss": 0.7999, + "step": 4776 + }, + { + "epoch": 0.24550313495734402, + "grad_norm": 0.7692917585372925, + "learning_rate": 8.830875979509165e-06, + "loss": 0.6842, + "step": 4777 + }, + { + "epoch": 0.24555452770068867, + "grad_norm": 1.0115216970443726, + "learning_rate": 8.830341093552988e-06, + "loss": 0.7095, + "step": 4778 + }, + { + "epoch": 0.24560592044403332, + "grad_norm": 0.8051899075508118, + "learning_rate": 8.829806101473866e-06, + "loss": 0.7366, + "step": 4779 + }, + { + "epoch": 0.24565731318737793, + "grad_norm": 0.8092594742774963, + "learning_rate": 8.829271003286621e-06, + "loss": 0.7001, + "step": 4780 + }, + { + "epoch": 0.24570870593072258, + "grad_norm": 1.1987711191177368, + "learning_rate": 8.82873579900608e-06, + "loss": 0.8187, + "step": 4781 + }, + { + "epoch": 0.24576009867406723, + "grad_norm": 1.0493359565734863, + "learning_rate": 8.82820048864707e-06, + "loss": 0.7593, + "step": 4782 + }, + { + "epoch": 0.24581149141741185, + "grad_norm": 0.8839566707611084, + "learning_rate": 8.827665072224422e-06, + "loss": 0.7042, + "step": 4783 + }, + { + "epoch": 0.2458628841607565, + "grad_norm": 1.1349273920059204, + "learning_rate": 8.82712954975297e-06, + "loss": 0.8006, + "step": 4784 + }, + { + "epoch": 0.24591427690410114, + "grad_norm": 1.0763131380081177, + "learning_rate": 8.826593921247554e-06, + "loss": 0.817, + "step": 4785 + }, + { + "epoch": 0.2459656696474458, + "grad_norm": 1.0929253101348877, + "learning_rate": 8.82605818672301e-06, + "loss": 0.8018, + "step": 4786 + }, + { + "epoch": 0.2460170623907904, + "grad_norm": 0.9399827122688293, + "learning_rate": 8.825522346194184e-06, + "loss": 0.7326, + "step": 4787 + }, + { + "epoch": 0.24606845513413506, + "grad_norm": 1.0634305477142334, + "learning_rate": 8.82498639967592e-06, + "loss": 0.7901, + "step": 4788 + }, + { + "epoch": 0.2461198478774797, + "grad_norm": 1.214478850364685, + "learning_rate": 8.824450347183067e-06, + "loss": 0.7627, + "step": 4789 + }, + { + "epoch": 0.24617124062082435, + "grad_norm": 0.9317747950553894, + "learning_rate": 8.82391418873048e-06, + "loss": 0.7237, + "step": 4790 + }, + { + "epoch": 0.24622263336416897, + "grad_norm": 1.0761404037475586, + "learning_rate": 8.82337792433301e-06, + "loss": 0.7809, + "step": 4791 + }, + { + "epoch": 0.24627402610751362, + "grad_norm": 1.033531665802002, + "learning_rate": 8.822841554005514e-06, + "loss": 0.7695, + "step": 4792 + }, + { + "epoch": 0.24632541885085826, + "grad_norm": 1.172295093536377, + "learning_rate": 8.822305077762856e-06, + "loss": 0.8267, + "step": 4793 + }, + { + "epoch": 0.2463768115942029, + "grad_norm": 1.1045151948928833, + "learning_rate": 8.821768495619897e-06, + "loss": 0.775, + "step": 4794 + }, + { + "epoch": 0.24642820433754753, + "grad_norm": 0.7222758531570435, + "learning_rate": 8.821231807591508e-06, + "loss": 0.7355, + "step": 4795 + }, + { + "epoch": 0.24647959708089218, + "grad_norm": 1.0813877582550049, + "learning_rate": 8.82069501369255e-06, + "loss": 0.7448, + "step": 4796 + }, + { + "epoch": 0.24653098982423682, + "grad_norm": 1.094832181930542, + "learning_rate": 8.820158113937904e-06, + "loss": 0.7615, + "step": 4797 + }, + { + "epoch": 0.24658238256758147, + "grad_norm": 1.0937809944152832, + "learning_rate": 8.81962110834244e-06, + "loss": 0.7703, + "step": 4798 + }, + { + "epoch": 0.2466337753109261, + "grad_norm": 1.0613828897476196, + "learning_rate": 8.819083996921038e-06, + "loss": 0.7822, + "step": 4799 + }, + { + "epoch": 0.24668516805427074, + "grad_norm": 1.0581883192062378, + "learning_rate": 8.81854677968858e-06, + "loss": 0.7135, + "step": 4800 + }, + { + "epoch": 0.24673656079761538, + "grad_norm": 1.125534176826477, + "learning_rate": 8.818009456659947e-06, + "loss": 0.7827, + "step": 4801 + }, + { + "epoch": 0.24678795354096, + "grad_norm": 1.1007810831069946, + "learning_rate": 8.817472027850026e-06, + "loss": 0.7705, + "step": 4802 + }, + { + "epoch": 0.24683934628430465, + "grad_norm": 0.7310556769371033, + "learning_rate": 8.816934493273713e-06, + "loss": 0.7016, + "step": 4803 + }, + { + "epoch": 0.2468907390276493, + "grad_norm": 1.0512195825576782, + "learning_rate": 8.816396852945896e-06, + "loss": 0.7838, + "step": 4804 + }, + { + "epoch": 0.24694213177099394, + "grad_norm": 1.0588195323944092, + "learning_rate": 8.815859106881471e-06, + "loss": 0.7194, + "step": 4805 + }, + { + "epoch": 0.24699352451433856, + "grad_norm": 1.0114433765411377, + "learning_rate": 8.815321255095337e-06, + "loss": 0.7229, + "step": 4806 + }, + { + "epoch": 0.2470449172576832, + "grad_norm": 0.7256984710693359, + "learning_rate": 8.814783297602394e-06, + "loss": 0.7246, + "step": 4807 + }, + { + "epoch": 0.24709631000102786, + "grad_norm": 1.1471654176712036, + "learning_rate": 8.814245234417551e-06, + "loss": 0.8076, + "step": 4808 + }, + { + "epoch": 0.2471477027443725, + "grad_norm": 0.7653288245201111, + "learning_rate": 8.813707065555713e-06, + "loss": 0.7048, + "step": 4809 + }, + { + "epoch": 0.24719909548771712, + "grad_norm": 1.0900944471359253, + "learning_rate": 8.81316879103179e-06, + "loss": 0.7806, + "step": 4810 + }, + { + "epoch": 0.24725048823106177, + "grad_norm": 0.85138338804245, + "learning_rate": 8.812630410860697e-06, + "loss": 0.7298, + "step": 4811 + }, + { + "epoch": 0.24730188097440642, + "grad_norm": 0.7847518920898438, + "learning_rate": 8.812091925057347e-06, + "loss": 0.6982, + "step": 4812 + }, + { + "epoch": 0.24735327371775107, + "grad_norm": 1.2386887073516846, + "learning_rate": 8.811553333636663e-06, + "loss": 0.8292, + "step": 4813 + }, + { + "epoch": 0.24740466646109568, + "grad_norm": 1.2640650272369385, + "learning_rate": 8.811014636613564e-06, + "loss": 0.768, + "step": 4814 + }, + { + "epoch": 0.24745605920444033, + "grad_norm": 1.6772304773330688, + "learning_rate": 8.810475834002976e-06, + "loss": 0.7881, + "step": 4815 + }, + { + "epoch": 0.24750745194778498, + "grad_norm": 1.0697636604309082, + "learning_rate": 8.80993692581983e-06, + "loss": 0.7781, + "step": 4816 + }, + { + "epoch": 0.24755884469112963, + "grad_norm": 1.0476969480514526, + "learning_rate": 8.809397912079054e-06, + "loss": 0.7716, + "step": 4817 + }, + { + "epoch": 0.24761023743447425, + "grad_norm": 0.9056881666183472, + "learning_rate": 8.808858792795581e-06, + "loss": 0.7223, + "step": 4818 + }, + { + "epoch": 0.2476616301778189, + "grad_norm": 0.8230631351470947, + "learning_rate": 8.808319567984348e-06, + "loss": 0.6915, + "step": 4819 + }, + { + "epoch": 0.24771302292116354, + "grad_norm": 1.1733490228652954, + "learning_rate": 8.8077802376603e-06, + "loss": 0.7341, + "step": 4820 + }, + { + "epoch": 0.24776441566450816, + "grad_norm": 1.1157865524291992, + "learning_rate": 8.807240801838373e-06, + "loss": 0.7713, + "step": 4821 + }, + { + "epoch": 0.2478158084078528, + "grad_norm": 1.090049386024475, + "learning_rate": 8.806701260533514e-06, + "loss": 0.7891, + "step": 4822 + }, + { + "epoch": 0.24786720115119745, + "grad_norm": 1.123653531074524, + "learning_rate": 8.806161613760674e-06, + "loss": 0.753, + "step": 4823 + }, + { + "epoch": 0.2479185938945421, + "grad_norm": 1.1349486112594604, + "learning_rate": 8.805621861534802e-06, + "loss": 0.7383, + "step": 4824 + }, + { + "epoch": 0.24796998663788672, + "grad_norm": 1.1102007627487183, + "learning_rate": 8.805082003870851e-06, + "loss": 0.7909, + "step": 4825 + }, + { + "epoch": 0.24802137938123137, + "grad_norm": 1.1651266813278198, + "learning_rate": 8.804542040783783e-06, + "loss": 0.8268, + "step": 4826 + }, + { + "epoch": 0.248072772124576, + "grad_norm": 1.107848048210144, + "learning_rate": 8.804001972288554e-06, + "loss": 0.7421, + "step": 4827 + }, + { + "epoch": 0.24812416486792066, + "grad_norm": 1.1189789772033691, + "learning_rate": 8.80346179840013e-06, + "loss": 0.7914, + "step": 4828 + }, + { + "epoch": 0.24817555761126528, + "grad_norm": 1.0694493055343628, + "learning_rate": 8.802921519133475e-06, + "loss": 0.8046, + "step": 4829 + }, + { + "epoch": 0.24822695035460993, + "grad_norm": 1.2083418369293213, + "learning_rate": 8.802381134503559e-06, + "loss": 0.769, + "step": 4830 + }, + { + "epoch": 0.24827834309795457, + "grad_norm": 1.0982270240783691, + "learning_rate": 8.801840644525352e-06, + "loss": 0.7426, + "step": 4831 + }, + { + "epoch": 0.24832973584129922, + "grad_norm": 0.8952498435974121, + "learning_rate": 8.80130004921383e-06, + "loss": 0.7044, + "step": 4832 + }, + { + "epoch": 0.24838112858464384, + "grad_norm": 1.2216222286224365, + "learning_rate": 8.800759348583972e-06, + "loss": 0.7719, + "step": 4833 + }, + { + "epoch": 0.2484325213279885, + "grad_norm": 1.1027307510375977, + "learning_rate": 8.800218542650757e-06, + "loss": 0.8374, + "step": 4834 + }, + { + "epoch": 0.24848391407133313, + "grad_norm": 1.131588101387024, + "learning_rate": 8.79967763142917e-06, + "loss": 0.8336, + "step": 4835 + }, + { + "epoch": 0.24853530681467778, + "grad_norm": 1.0985100269317627, + "learning_rate": 8.799136614934192e-06, + "loss": 0.7432, + "step": 4836 + }, + { + "epoch": 0.2485866995580224, + "grad_norm": 1.1121591329574585, + "learning_rate": 8.798595493180819e-06, + "loss": 0.8282, + "step": 4837 + }, + { + "epoch": 0.24863809230136705, + "grad_norm": 1.09903085231781, + "learning_rate": 8.798054266184041e-06, + "loss": 0.7997, + "step": 4838 + }, + { + "epoch": 0.2486894850447117, + "grad_norm": 1.0357295274734497, + "learning_rate": 8.797512933958853e-06, + "loss": 0.7785, + "step": 4839 + }, + { + "epoch": 0.2487408777880563, + "grad_norm": 1.0244630575180054, + "learning_rate": 8.796971496520252e-06, + "loss": 0.7668, + "step": 4840 + }, + { + "epoch": 0.24879227053140096, + "grad_norm": 1.065058946609497, + "learning_rate": 8.79642995388324e-06, + "loss": 0.7434, + "step": 4841 + }, + { + "epoch": 0.2488436632747456, + "grad_norm": 1.043064832687378, + "learning_rate": 8.795888306062823e-06, + "loss": 0.7633, + "step": 4842 + }, + { + "epoch": 0.24889505601809025, + "grad_norm": 1.1005511283874512, + "learning_rate": 8.795346553074005e-06, + "loss": 0.8141, + "step": 4843 + }, + { + "epoch": 0.24894644876143487, + "grad_norm": 1.0140762329101562, + "learning_rate": 8.794804694931795e-06, + "loss": 0.7549, + "step": 4844 + }, + { + "epoch": 0.24899784150477952, + "grad_norm": 1.1355570554733276, + "learning_rate": 8.79426273165121e-06, + "loss": 0.8015, + "step": 4845 + }, + { + "epoch": 0.24904923424812417, + "grad_norm": 1.103007435798645, + "learning_rate": 8.793720663247259e-06, + "loss": 0.8372, + "step": 4846 + }, + { + "epoch": 0.24910062699146882, + "grad_norm": 1.1142619848251343, + "learning_rate": 8.793178489734966e-06, + "loss": 0.8508, + "step": 4847 + }, + { + "epoch": 0.24915201973481343, + "grad_norm": 0.8448128700256348, + "learning_rate": 8.79263621112935e-06, + "loss": 0.6659, + "step": 4848 + }, + { + "epoch": 0.24920341247815808, + "grad_norm": 0.9091752767562866, + "learning_rate": 8.792093827445437e-06, + "loss": 0.7079, + "step": 4849 + }, + { + "epoch": 0.24925480522150273, + "grad_norm": 1.2166436910629272, + "learning_rate": 8.791551338698252e-06, + "loss": 0.8276, + "step": 4850 + }, + { + "epoch": 0.24930619796484738, + "grad_norm": 1.0822728872299194, + "learning_rate": 8.791008744902827e-06, + "loss": 0.7539, + "step": 4851 + }, + { + "epoch": 0.249357590708192, + "grad_norm": 1.1478793621063232, + "learning_rate": 8.790466046074193e-06, + "loss": 0.8044, + "step": 4852 + }, + { + "epoch": 0.24940898345153664, + "grad_norm": 1.1383388042449951, + "learning_rate": 8.789923242227389e-06, + "loss": 0.7851, + "step": 4853 + }, + { + "epoch": 0.2494603761948813, + "grad_norm": 1.0272635221481323, + "learning_rate": 8.789380333377451e-06, + "loss": 0.797, + "step": 4854 + }, + { + "epoch": 0.24951176893822594, + "grad_norm": 1.1973938941955566, + "learning_rate": 8.788837319539421e-06, + "loss": 0.8078, + "step": 4855 + }, + { + "epoch": 0.24956316168157056, + "grad_norm": 1.0882850885391235, + "learning_rate": 8.788294200728345e-06, + "loss": 0.768, + "step": 4856 + }, + { + "epoch": 0.2496145544249152, + "grad_norm": 1.124497652053833, + "learning_rate": 8.78775097695927e-06, + "loss": 0.7577, + "step": 4857 + }, + { + "epoch": 0.24966594716825985, + "grad_norm": 1.048090934753418, + "learning_rate": 8.787207648247249e-06, + "loss": 0.7636, + "step": 4858 + }, + { + "epoch": 0.24971733991160447, + "grad_norm": 1.0921497344970703, + "learning_rate": 8.78666421460733e-06, + "loss": 0.85, + "step": 4859 + }, + { + "epoch": 0.24976873265494912, + "grad_norm": 1.1429721117019653, + "learning_rate": 8.786120676054573e-06, + "loss": 0.7988, + "step": 4860 + }, + { + "epoch": 0.24982012539829376, + "grad_norm": 1.210495114326477, + "learning_rate": 8.785577032604036e-06, + "loss": 0.7913, + "step": 4861 + }, + { + "epoch": 0.2498715181416384, + "grad_norm": 1.0442157983779907, + "learning_rate": 8.785033284270783e-06, + "loss": 0.8144, + "step": 4862 + }, + { + "epoch": 0.24992291088498303, + "grad_norm": 1.0624934434890747, + "learning_rate": 8.784489431069878e-06, + "loss": 0.8, + "step": 4863 + }, + { + "epoch": 0.24997430362832768, + "grad_norm": 1.1329983472824097, + "learning_rate": 8.783945473016387e-06, + "loss": 0.7648, + "step": 4864 + }, + { + "epoch": 0.2500256963716723, + "grad_norm": 1.2176034450531006, + "learning_rate": 8.783401410125383e-06, + "loss": 0.7952, + "step": 4865 + }, + { + "epoch": 0.25007708911501697, + "grad_norm": 0.9558403491973877, + "learning_rate": 8.782857242411937e-06, + "loss": 0.7442, + "step": 4866 + }, + { + "epoch": 0.2501284818583616, + "grad_norm": 0.9302815794944763, + "learning_rate": 8.782312969891131e-06, + "loss": 0.7091, + "step": 4867 + }, + { + "epoch": 0.25017987460170626, + "grad_norm": 1.12680184841156, + "learning_rate": 8.78176859257804e-06, + "loss": 0.7044, + "step": 4868 + }, + { + "epoch": 0.25023126734505086, + "grad_norm": 1.1850327253341675, + "learning_rate": 8.781224110487747e-06, + "loss": 0.7586, + "step": 4869 + }, + { + "epoch": 0.2502826600883955, + "grad_norm": 1.1618678569793701, + "learning_rate": 8.780679523635339e-06, + "loss": 0.7799, + "step": 4870 + }, + { + "epoch": 0.25033405283174015, + "grad_norm": 1.1340609788894653, + "learning_rate": 8.780134832035904e-06, + "loss": 0.7964, + "step": 4871 + }, + { + "epoch": 0.2503854455750848, + "grad_norm": 1.1002004146575928, + "learning_rate": 8.77959003570453e-06, + "loss": 0.8099, + "step": 4872 + }, + { + "epoch": 0.25043683831842944, + "grad_norm": 1.0841517448425293, + "learning_rate": 8.779045134656317e-06, + "loss": 0.7599, + "step": 4873 + }, + { + "epoch": 0.2504882310617741, + "grad_norm": 1.073439598083496, + "learning_rate": 8.778500128906355e-06, + "loss": 0.7929, + "step": 4874 + }, + { + "epoch": 0.25053962380511874, + "grad_norm": 1.0940415859222412, + "learning_rate": 8.77795501846975e-06, + "loss": 0.8059, + "step": 4875 + }, + { + "epoch": 0.25059101654846333, + "grad_norm": 1.0908809900283813, + "learning_rate": 8.7774098033616e-06, + "loss": 0.7435, + "step": 4876 + }, + { + "epoch": 0.250642409291808, + "grad_norm": 1.0483298301696777, + "learning_rate": 8.776864483597014e-06, + "loss": 0.7276, + "step": 4877 + }, + { + "epoch": 0.2506938020351526, + "grad_norm": 1.0982178449630737, + "learning_rate": 8.7763190591911e-06, + "loss": 0.7697, + "step": 4878 + }, + { + "epoch": 0.25074519477849727, + "grad_norm": 1.3911322355270386, + "learning_rate": 8.775773530158968e-06, + "loss": 0.6958, + "step": 4879 + }, + { + "epoch": 0.2507965875218419, + "grad_norm": 0.7102989554405212, + "learning_rate": 8.775227896515734e-06, + "loss": 0.673, + "step": 4880 + }, + { + "epoch": 0.25084798026518657, + "grad_norm": 1.1448079347610474, + "learning_rate": 8.774682158276512e-06, + "loss": 0.8116, + "step": 4881 + }, + { + "epoch": 0.2508993730085312, + "grad_norm": 1.0189813375473022, + "learning_rate": 8.774136315456428e-06, + "loss": 0.7397, + "step": 4882 + }, + { + "epoch": 0.25095076575187586, + "grad_norm": 1.1340067386627197, + "learning_rate": 8.7735903680706e-06, + "loss": 0.8496, + "step": 4883 + }, + { + "epoch": 0.25100215849522045, + "grad_norm": 1.1618138551712036, + "learning_rate": 8.773044316134156e-06, + "loss": 0.751, + "step": 4884 + }, + { + "epoch": 0.2510535512385651, + "grad_norm": 1.035233974456787, + "learning_rate": 8.772498159662223e-06, + "loss": 0.705, + "step": 4885 + }, + { + "epoch": 0.25110494398190975, + "grad_norm": 1.0860227346420288, + "learning_rate": 8.771951898669935e-06, + "loss": 0.7664, + "step": 4886 + }, + { + "epoch": 0.2511563367252544, + "grad_norm": 1.0964267253875732, + "learning_rate": 8.771405533172426e-06, + "loss": 0.7827, + "step": 4887 + }, + { + "epoch": 0.25120772946859904, + "grad_norm": 1.1427735090255737, + "learning_rate": 8.770859063184833e-06, + "loss": 0.7852, + "step": 4888 + }, + { + "epoch": 0.2512591222119437, + "grad_norm": 1.029692530632019, + "learning_rate": 8.770312488722297e-06, + "loss": 0.7579, + "step": 4889 + }, + { + "epoch": 0.25131051495528833, + "grad_norm": 1.0979682207107544, + "learning_rate": 8.769765809799962e-06, + "loss": 0.7934, + "step": 4890 + }, + { + "epoch": 0.251361907698633, + "grad_norm": 1.119998574256897, + "learning_rate": 8.769219026432974e-06, + "loss": 0.8121, + "step": 4891 + }, + { + "epoch": 0.25141330044197757, + "grad_norm": 1.0282078981399536, + "learning_rate": 8.768672138636477e-06, + "loss": 0.7592, + "step": 4892 + }, + { + "epoch": 0.2514646931853222, + "grad_norm": 1.1124991178512573, + "learning_rate": 8.768125146425632e-06, + "loss": 0.8187, + "step": 4893 + }, + { + "epoch": 0.25151608592866687, + "grad_norm": 1.0637891292572021, + "learning_rate": 8.767578049815589e-06, + "loss": 0.7697, + "step": 4894 + }, + { + "epoch": 0.2515674786720115, + "grad_norm": 1.0996404886245728, + "learning_rate": 8.767030848821503e-06, + "loss": 0.7549, + "step": 4895 + }, + { + "epoch": 0.25161887141535616, + "grad_norm": 1.0600802898406982, + "learning_rate": 8.76648354345854e-06, + "loss": 0.7675, + "step": 4896 + }, + { + "epoch": 0.2516702641587008, + "grad_norm": 1.13198983669281, + "learning_rate": 8.765936133741861e-06, + "loss": 0.805, + "step": 4897 + }, + { + "epoch": 0.25172165690204545, + "grad_norm": 1.0183762311935425, + "learning_rate": 8.765388619686632e-06, + "loss": 0.7344, + "step": 4898 + }, + { + "epoch": 0.25177304964539005, + "grad_norm": 1.1054775714874268, + "learning_rate": 8.764841001308024e-06, + "loss": 0.8151, + "step": 4899 + }, + { + "epoch": 0.2518244423887347, + "grad_norm": 1.2429449558258057, + "learning_rate": 8.76429327862121e-06, + "loss": 0.7952, + "step": 4900 + }, + { + "epoch": 0.25187583513207934, + "grad_norm": 1.1846113204956055, + "learning_rate": 8.763745451641361e-06, + "loss": 0.818, + "step": 4901 + }, + { + "epoch": 0.251927227875424, + "grad_norm": 0.9064440727233887, + "learning_rate": 8.763197520383659e-06, + "loss": 0.7484, + "step": 4902 + }, + { + "epoch": 0.25197862061876863, + "grad_norm": 0.8786363005638123, + "learning_rate": 8.762649484863284e-06, + "loss": 0.6768, + "step": 4903 + }, + { + "epoch": 0.2520300133621133, + "grad_norm": 1.1978445053100586, + "learning_rate": 8.762101345095417e-06, + "loss": 0.7347, + "step": 4904 + }, + { + "epoch": 0.25208140610545793, + "grad_norm": 1.0382858514785767, + "learning_rate": 8.76155310109525e-06, + "loss": 0.7723, + "step": 4905 + }, + { + "epoch": 0.2521327988488026, + "grad_norm": 1.1391074657440186, + "learning_rate": 8.761004752877967e-06, + "loss": 0.8323, + "step": 4906 + }, + { + "epoch": 0.25218419159214717, + "grad_norm": 1.0844844579696655, + "learning_rate": 8.760456300458765e-06, + "loss": 0.8273, + "step": 4907 + }, + { + "epoch": 0.2522355843354918, + "grad_norm": 1.124345064163208, + "learning_rate": 8.759907743852836e-06, + "loss": 0.8255, + "step": 4908 + }, + { + "epoch": 0.25228697707883646, + "grad_norm": 1.1418821811676025, + "learning_rate": 8.759359083075381e-06, + "loss": 0.7624, + "step": 4909 + }, + { + "epoch": 0.2523383698221811, + "grad_norm": 1.0390063524246216, + "learning_rate": 8.758810318141598e-06, + "loss": 0.8392, + "step": 4910 + }, + { + "epoch": 0.25238976256552575, + "grad_norm": 1.0535207986831665, + "learning_rate": 8.758261449066694e-06, + "loss": 0.785, + "step": 4911 + }, + { + "epoch": 0.2524411553088704, + "grad_norm": 1.0583621263504028, + "learning_rate": 8.757712475865873e-06, + "loss": 0.7098, + "step": 4912 + }, + { + "epoch": 0.25249254805221505, + "grad_norm": 1.0995343923568726, + "learning_rate": 8.757163398554349e-06, + "loss": 0.7531, + "step": 4913 + }, + { + "epoch": 0.25254394079555964, + "grad_norm": 1.0213854312896729, + "learning_rate": 8.75661421714733e-06, + "loss": 0.7664, + "step": 4914 + }, + { + "epoch": 0.2525953335389043, + "grad_norm": 1.1030197143554688, + "learning_rate": 8.756064931660035e-06, + "loss": 0.838, + "step": 4915 + }, + { + "epoch": 0.25264672628224893, + "grad_norm": 1.116595983505249, + "learning_rate": 8.755515542107682e-06, + "loss": 0.7127, + "step": 4916 + }, + { + "epoch": 0.2526981190255936, + "grad_norm": 1.089133858680725, + "learning_rate": 8.75496604850549e-06, + "loss": 0.769, + "step": 4917 + }, + { + "epoch": 0.25274951176893823, + "grad_norm": 1.0371425151824951, + "learning_rate": 8.754416450868683e-06, + "loss": 0.7073, + "step": 4918 + }, + { + "epoch": 0.2528009045122829, + "grad_norm": 1.168327808380127, + "learning_rate": 8.753866749212491e-06, + "loss": 0.8087, + "step": 4919 + }, + { + "epoch": 0.2528522972556275, + "grad_norm": 1.1146843433380127, + "learning_rate": 8.753316943552143e-06, + "loss": 0.739, + "step": 4920 + }, + { + "epoch": 0.25290368999897217, + "grad_norm": 0.7569361329078674, + "learning_rate": 8.752767033902873e-06, + "loss": 0.7263, + "step": 4921 + }, + { + "epoch": 0.25295508274231676, + "grad_norm": 1.0661402940750122, + "learning_rate": 8.752217020279914e-06, + "loss": 0.7507, + "step": 4922 + }, + { + "epoch": 0.2530064754856614, + "grad_norm": 1.0629280805587769, + "learning_rate": 8.751666902698506e-06, + "loss": 0.7684, + "step": 4923 + }, + { + "epoch": 0.25305786822900606, + "grad_norm": 1.0731984376907349, + "learning_rate": 8.75111668117389e-06, + "loss": 0.7975, + "step": 4924 + }, + { + "epoch": 0.2531092609723507, + "grad_norm": 1.0546234846115112, + "learning_rate": 8.750566355721311e-06, + "loss": 0.8127, + "step": 4925 + }, + { + "epoch": 0.25316065371569535, + "grad_norm": 1.1837005615234375, + "learning_rate": 8.750015926356014e-06, + "loss": 0.8075, + "step": 4926 + }, + { + "epoch": 0.25321204645904, + "grad_norm": 1.0947976112365723, + "learning_rate": 8.749465393093253e-06, + "loss": 0.8067, + "step": 4927 + }, + { + "epoch": 0.25326343920238464, + "grad_norm": 1.197335958480835, + "learning_rate": 8.74891475594828e-06, + "loss": 0.8037, + "step": 4928 + }, + { + "epoch": 0.2533148319457293, + "grad_norm": 1.0454286336898804, + "learning_rate": 8.74836401493635e-06, + "loss": 0.7186, + "step": 4929 + }, + { + "epoch": 0.2533662246890739, + "grad_norm": 0.9589943885803223, + "learning_rate": 8.74781317007272e-06, + "loss": 0.7115, + "step": 4930 + }, + { + "epoch": 0.25341761743241853, + "grad_norm": 1.107591986656189, + "learning_rate": 8.747262221372653e-06, + "loss": 0.7425, + "step": 4931 + }, + { + "epoch": 0.2534690101757632, + "grad_norm": 1.0696320533752441, + "learning_rate": 8.746711168851416e-06, + "loss": 0.768, + "step": 4932 + }, + { + "epoch": 0.2535204029191078, + "grad_norm": 1.2163894176483154, + "learning_rate": 8.746160012524273e-06, + "loss": 0.7873, + "step": 4933 + }, + { + "epoch": 0.25357179566245247, + "grad_norm": 1.149474024772644, + "learning_rate": 8.745608752406496e-06, + "loss": 0.8166, + "step": 4934 + }, + { + "epoch": 0.2536231884057971, + "grad_norm": 0.8342404365539551, + "learning_rate": 8.745057388513357e-06, + "loss": 0.658, + "step": 4935 + }, + { + "epoch": 0.25367458114914176, + "grad_norm": 1.1002564430236816, + "learning_rate": 8.744505920860133e-06, + "loss": 0.7489, + "step": 4936 + }, + { + "epoch": 0.25372597389248636, + "grad_norm": 0.7716323137283325, + "learning_rate": 8.743954349462103e-06, + "loss": 0.7094, + "step": 4937 + }, + { + "epoch": 0.253777366635831, + "grad_norm": 1.1159147024154663, + "learning_rate": 8.743402674334548e-06, + "loss": 0.8203, + "step": 4938 + }, + { + "epoch": 0.25382875937917565, + "grad_norm": 1.0739567279815674, + "learning_rate": 8.742850895492753e-06, + "loss": 0.7985, + "step": 4939 + }, + { + "epoch": 0.2538801521225203, + "grad_norm": 0.8378993272781372, + "learning_rate": 8.742299012952006e-06, + "loss": 0.7256, + "step": 4940 + }, + { + "epoch": 0.25393154486586494, + "grad_norm": 1.1590425968170166, + "learning_rate": 8.741747026727596e-06, + "loss": 0.8107, + "step": 4941 + }, + { + "epoch": 0.2539829376092096, + "grad_norm": 1.1324280500411987, + "learning_rate": 8.741194936834818e-06, + "loss": 0.7977, + "step": 4942 + }, + { + "epoch": 0.25403433035255424, + "grad_norm": 1.1204556226730347, + "learning_rate": 8.740642743288966e-06, + "loss": 0.78, + "step": 4943 + }, + { + "epoch": 0.2540857230958989, + "grad_norm": 1.0953364372253418, + "learning_rate": 8.740090446105342e-06, + "loss": 0.7656, + "step": 4944 + }, + { + "epoch": 0.2541371158392435, + "grad_norm": 1.0907549858093262, + "learning_rate": 8.739538045299245e-06, + "loss": 0.7689, + "step": 4945 + }, + { + "epoch": 0.2541885085825881, + "grad_norm": 1.0318487882614136, + "learning_rate": 8.738985540885981e-06, + "loss": 0.7512, + "step": 4946 + }, + { + "epoch": 0.25423990132593277, + "grad_norm": 1.1583892107009888, + "learning_rate": 8.738432932880858e-06, + "loss": 0.8172, + "step": 4947 + }, + { + "epoch": 0.2542912940692774, + "grad_norm": 1.1423659324645996, + "learning_rate": 8.737880221299187e-06, + "loss": 0.7866, + "step": 4948 + }, + { + "epoch": 0.25434268681262207, + "grad_norm": 1.1041771173477173, + "learning_rate": 8.73732740615628e-06, + "loss": 0.8195, + "step": 4949 + }, + { + "epoch": 0.2543940795559667, + "grad_norm": 1.1157997846603394, + "learning_rate": 8.736774487467452e-06, + "loss": 0.8101, + "step": 4950 + }, + { + "epoch": 0.25444547229931136, + "grad_norm": 1.0511146783828735, + "learning_rate": 8.736221465248025e-06, + "loss": 0.7203, + "step": 4951 + }, + { + "epoch": 0.25449686504265595, + "grad_norm": 1.0837618112564087, + "learning_rate": 8.735668339513319e-06, + "loss": 0.8002, + "step": 4952 + }, + { + "epoch": 0.2545482577860006, + "grad_norm": 1.0301893949508667, + "learning_rate": 8.73511511027866e-06, + "loss": 0.7427, + "step": 4953 + }, + { + "epoch": 0.25459965052934524, + "grad_norm": 1.1394309997558594, + "learning_rate": 8.734561777559375e-06, + "loss": 0.76, + "step": 4954 + }, + { + "epoch": 0.2546510432726899, + "grad_norm": 0.8104747533798218, + "learning_rate": 8.734008341370795e-06, + "loss": 0.6917, + "step": 4955 + }, + { + "epoch": 0.25470243601603454, + "grad_norm": 1.1251262426376343, + "learning_rate": 8.733454801728253e-06, + "loss": 0.7651, + "step": 4956 + }, + { + "epoch": 0.2547538287593792, + "grad_norm": 1.0606492757797241, + "learning_rate": 8.732901158647084e-06, + "loss": 0.8331, + "step": 4957 + }, + { + "epoch": 0.25480522150272383, + "grad_norm": 1.0583362579345703, + "learning_rate": 8.732347412142632e-06, + "loss": 0.7876, + "step": 4958 + }, + { + "epoch": 0.2548566142460685, + "grad_norm": 0.989799439907074, + "learning_rate": 8.731793562230232e-06, + "loss": 0.747, + "step": 4959 + }, + { + "epoch": 0.25490800698941307, + "grad_norm": 0.7415286898612976, + "learning_rate": 8.731239608925235e-06, + "loss": 0.6395, + "step": 4960 + }, + { + "epoch": 0.2549593997327577, + "grad_norm": 1.0102417469024658, + "learning_rate": 8.730685552242986e-06, + "loss": 0.8068, + "step": 4961 + }, + { + "epoch": 0.25501079247610237, + "grad_norm": 1.1203761100769043, + "learning_rate": 8.730131392198836e-06, + "loss": 0.8247, + "step": 4962 + }, + { + "epoch": 0.255062185219447, + "grad_norm": 1.1466277837753296, + "learning_rate": 8.729577128808138e-06, + "loss": 0.7664, + "step": 4963 + }, + { + "epoch": 0.25511357796279166, + "grad_norm": 0.718056321144104, + "learning_rate": 8.72902276208625e-06, + "loss": 0.7195, + "step": 4964 + }, + { + "epoch": 0.2551649707061363, + "grad_norm": 0.7646077275276184, + "learning_rate": 8.72846829204853e-06, + "loss": 0.7061, + "step": 4965 + }, + { + "epoch": 0.25521636344948095, + "grad_norm": 1.080056071281433, + "learning_rate": 8.727913718710338e-06, + "loss": 0.7759, + "step": 4966 + }, + { + "epoch": 0.25526775619282555, + "grad_norm": 1.0603599548339844, + "learning_rate": 8.727359042087043e-06, + "loss": 0.752, + "step": 4967 + }, + { + "epoch": 0.2553191489361702, + "grad_norm": 1.0147136449813843, + "learning_rate": 8.726804262194009e-06, + "loss": 0.7794, + "step": 4968 + }, + { + "epoch": 0.25537054167951484, + "grad_norm": 1.2840293645858765, + "learning_rate": 8.726249379046609e-06, + "loss": 0.7425, + "step": 4969 + }, + { + "epoch": 0.2554219344228595, + "grad_norm": 1.090493083000183, + "learning_rate": 8.725694392660217e-06, + "loss": 0.7547, + "step": 4970 + }, + { + "epoch": 0.25547332716620413, + "grad_norm": 1.0992645025253296, + "learning_rate": 8.72513930305021e-06, + "loss": 0.8339, + "step": 4971 + }, + { + "epoch": 0.2555247199095488, + "grad_norm": 1.0836302042007446, + "learning_rate": 8.724584110231962e-06, + "loss": 0.7487, + "step": 4972 + }, + { + "epoch": 0.25557611265289343, + "grad_norm": 0.8223841786384583, + "learning_rate": 8.724028814220863e-06, + "loss": 0.7373, + "step": 4973 + }, + { + "epoch": 0.2556275053962381, + "grad_norm": 1.6133450269699097, + "learning_rate": 8.723473415032288e-06, + "loss": 0.7086, + "step": 4974 + }, + { + "epoch": 0.25567889813958267, + "grad_norm": 1.1400054693222046, + "learning_rate": 8.722917912681635e-06, + "loss": 0.7844, + "step": 4975 + }, + { + "epoch": 0.2557302908829273, + "grad_norm": 1.12342369556427, + "learning_rate": 8.722362307184288e-06, + "loss": 0.7324, + "step": 4976 + }, + { + "epoch": 0.25578168362627196, + "grad_norm": 1.0867410898208618, + "learning_rate": 8.721806598555644e-06, + "loss": 0.7731, + "step": 4977 + }, + { + "epoch": 0.2558330763696166, + "grad_norm": 0.8389273881912231, + "learning_rate": 8.721250786811099e-06, + "loss": 0.6945, + "step": 4978 + }, + { + "epoch": 0.25588446911296125, + "grad_norm": 1.1234270334243774, + "learning_rate": 8.72069487196605e-06, + "loss": 0.7571, + "step": 4979 + }, + { + "epoch": 0.2559358618563059, + "grad_norm": 1.1643598079681396, + "learning_rate": 8.7201388540359e-06, + "loss": 0.7864, + "step": 4980 + }, + { + "epoch": 0.25598725459965055, + "grad_norm": 0.7769237756729126, + "learning_rate": 8.719582733036056e-06, + "loss": 0.6761, + "step": 4981 + }, + { + "epoch": 0.2560386473429952, + "grad_norm": 1.110906720161438, + "learning_rate": 8.719026508981922e-06, + "loss": 0.7742, + "step": 4982 + }, + { + "epoch": 0.2560900400863398, + "grad_norm": 1.195372462272644, + "learning_rate": 8.718470181888913e-06, + "loss": 0.855, + "step": 4983 + }, + { + "epoch": 0.25614143282968443, + "grad_norm": 1.1559065580368042, + "learning_rate": 8.717913751772441e-06, + "loss": 0.8368, + "step": 4984 + }, + { + "epoch": 0.2561928255730291, + "grad_norm": 1.0620718002319336, + "learning_rate": 8.717357218647921e-06, + "loss": 0.7777, + "step": 4985 + }, + { + "epoch": 0.25624421831637373, + "grad_norm": 1.0265811681747437, + "learning_rate": 8.716800582530773e-06, + "loss": 0.779, + "step": 4986 + }, + { + "epoch": 0.2562956110597184, + "grad_norm": 1.0878773927688599, + "learning_rate": 8.716243843436419e-06, + "loss": 0.786, + "step": 4987 + }, + { + "epoch": 0.256347003803063, + "grad_norm": 1.1171302795410156, + "learning_rate": 8.715687001380284e-06, + "loss": 0.7597, + "step": 4988 + }, + { + "epoch": 0.25639839654640767, + "grad_norm": 1.1406511068344116, + "learning_rate": 8.715130056377796e-06, + "loss": 0.738, + "step": 4989 + }, + { + "epoch": 0.25644978928975226, + "grad_norm": 1.052254557609558, + "learning_rate": 8.714573008444384e-06, + "loss": 0.8281, + "step": 4990 + }, + { + "epoch": 0.2565011820330969, + "grad_norm": 1.089924931526184, + "learning_rate": 8.714015857595486e-06, + "loss": 0.7863, + "step": 4991 + }, + { + "epoch": 0.25655257477644156, + "grad_norm": 1.038926124572754, + "learning_rate": 8.713458603846534e-06, + "loss": 0.7337, + "step": 4992 + }, + { + "epoch": 0.2566039675197862, + "grad_norm": 1.222162127494812, + "learning_rate": 8.71290124721297e-06, + "loss": 0.7727, + "step": 4993 + }, + { + "epoch": 0.25665536026313085, + "grad_norm": 0.7234727144241333, + "learning_rate": 8.712343787710233e-06, + "loss": 0.7213, + "step": 4994 + }, + { + "epoch": 0.2567067530064755, + "grad_norm": 1.1149122714996338, + "learning_rate": 8.71178622535377e-06, + "loss": 0.7795, + "step": 4995 + }, + { + "epoch": 0.25675814574982014, + "grad_norm": 0.762754499912262, + "learning_rate": 8.711228560159028e-06, + "loss": 0.7041, + "step": 4996 + }, + { + "epoch": 0.2568095384931648, + "grad_norm": 1.1941490173339844, + "learning_rate": 8.710670792141457e-06, + "loss": 0.781, + "step": 4997 + }, + { + "epoch": 0.2568609312365094, + "grad_norm": 1.1050397157669067, + "learning_rate": 8.710112921316513e-06, + "loss": 0.7821, + "step": 4998 + }, + { + "epoch": 0.25691232397985403, + "grad_norm": 1.0956331491470337, + "learning_rate": 8.709554947699651e-06, + "loss": 0.7783, + "step": 4999 + }, + { + "epoch": 0.2569637167231987, + "grad_norm": 1.0385510921478271, + "learning_rate": 8.70899687130633e-06, + "loss": 0.7627, + "step": 5000 + }, + { + "epoch": 0.2570151094665433, + "grad_norm": 1.1730506420135498, + "learning_rate": 8.70843869215201e-06, + "loss": 0.7567, + "step": 5001 + }, + { + "epoch": 0.25706650220988797, + "grad_norm": 0.813791036605835, + "learning_rate": 8.70788041025216e-06, + "loss": 0.6647, + "step": 5002 + }, + { + "epoch": 0.2571178949532326, + "grad_norm": 1.2626097202301025, + "learning_rate": 8.707322025622244e-06, + "loss": 0.6425, + "step": 5003 + }, + { + "epoch": 0.25716928769657726, + "grad_norm": 1.4615684747695923, + "learning_rate": 8.706763538277734e-06, + "loss": 0.8106, + "step": 5004 + }, + { + "epoch": 0.25722068043992186, + "grad_norm": 1.0810116529464722, + "learning_rate": 8.706204948234103e-06, + "loss": 0.8044, + "step": 5005 + }, + { + "epoch": 0.2572720731832665, + "grad_norm": 1.1453149318695068, + "learning_rate": 8.705646255506827e-06, + "loss": 0.8038, + "step": 5006 + }, + { + "epoch": 0.25732346592661115, + "grad_norm": 1.0877200365066528, + "learning_rate": 8.705087460111387e-06, + "loss": 0.7832, + "step": 5007 + }, + { + "epoch": 0.2573748586699558, + "grad_norm": 1.072830319404602, + "learning_rate": 8.704528562063264e-06, + "loss": 0.7436, + "step": 5008 + }, + { + "epoch": 0.25742625141330044, + "grad_norm": 1.3051509857177734, + "learning_rate": 8.70396956137794e-06, + "loss": 0.7607, + "step": 5009 + }, + { + "epoch": 0.2574776441566451, + "grad_norm": 1.1476794481277466, + "learning_rate": 8.703410458070906e-06, + "loss": 0.777, + "step": 5010 + }, + { + "epoch": 0.25752903689998974, + "grad_norm": 0.7591148018836975, + "learning_rate": 8.702851252157651e-06, + "loss": 0.6938, + "step": 5011 + }, + { + "epoch": 0.2575804296433344, + "grad_norm": 1.0850828886032104, + "learning_rate": 8.702291943653669e-06, + "loss": 0.7332, + "step": 5012 + }, + { + "epoch": 0.257631822386679, + "grad_norm": 1.0773557424545288, + "learning_rate": 8.701732532574453e-06, + "loss": 0.8087, + "step": 5013 + }, + { + "epoch": 0.2576832151300236, + "grad_norm": 1.158715844154358, + "learning_rate": 8.70117301893551e-06, + "loss": 0.8394, + "step": 5014 + }, + { + "epoch": 0.25773460787336827, + "grad_norm": 1.02519953250885, + "learning_rate": 8.700613402752332e-06, + "loss": 0.8025, + "step": 5015 + }, + { + "epoch": 0.2577860006167129, + "grad_norm": 1.1967909336090088, + "learning_rate": 8.70005368404043e-06, + "loss": 0.7651, + "step": 5016 + }, + { + "epoch": 0.25783739336005757, + "grad_norm": 1.0611220598220825, + "learning_rate": 8.699493862815308e-06, + "loss": 0.801, + "step": 5017 + }, + { + "epoch": 0.2578887861034022, + "grad_norm": 1.1413936614990234, + "learning_rate": 8.69893393909248e-06, + "loss": 0.8026, + "step": 5018 + }, + { + "epoch": 0.25794017884674686, + "grad_norm": 1.3112047910690308, + "learning_rate": 8.698373912887457e-06, + "loss": 0.7915, + "step": 5019 + }, + { + "epoch": 0.2579915715900915, + "grad_norm": 1.1287792921066284, + "learning_rate": 8.697813784215755e-06, + "loss": 0.7579, + "step": 5020 + }, + { + "epoch": 0.2580429643334361, + "grad_norm": 1.0979455709457397, + "learning_rate": 8.697253553092893e-06, + "loss": 0.7549, + "step": 5021 + }, + { + "epoch": 0.25809435707678074, + "grad_norm": 1.2231143712997437, + "learning_rate": 8.69669321953439e-06, + "loss": 0.8331, + "step": 5022 + }, + { + "epoch": 0.2581457498201254, + "grad_norm": 1.136443018913269, + "learning_rate": 8.696132783555777e-06, + "loss": 0.7266, + "step": 5023 + }, + { + "epoch": 0.25819714256347004, + "grad_norm": 1.1581809520721436, + "learning_rate": 8.695572245172577e-06, + "loss": 0.8048, + "step": 5024 + }, + { + "epoch": 0.2582485353068147, + "grad_norm": 1.0799040794372559, + "learning_rate": 8.695011604400318e-06, + "loss": 0.6908, + "step": 5025 + }, + { + "epoch": 0.25829992805015933, + "grad_norm": 1.0844402313232422, + "learning_rate": 8.69445086125454e-06, + "loss": 0.813, + "step": 5026 + }, + { + "epoch": 0.258351320793504, + "grad_norm": 1.1042795181274414, + "learning_rate": 8.693890015750772e-06, + "loss": 0.827, + "step": 5027 + }, + { + "epoch": 0.25840271353684857, + "grad_norm": 1.0872057676315308, + "learning_rate": 8.693329067904556e-06, + "loss": 0.7743, + "step": 5028 + }, + { + "epoch": 0.2584541062801932, + "grad_norm": 0.788754940032959, + "learning_rate": 8.692768017731432e-06, + "loss": 0.7389, + "step": 5029 + }, + { + "epoch": 0.25850549902353787, + "grad_norm": 1.173190951347351, + "learning_rate": 8.692206865246947e-06, + "loss": 0.8211, + "step": 5030 + }, + { + "epoch": 0.2585568917668825, + "grad_norm": 1.1367353200912476, + "learning_rate": 8.691645610466647e-06, + "loss": 0.7815, + "step": 5031 + }, + { + "epoch": 0.25860828451022716, + "grad_norm": 1.2713085412979126, + "learning_rate": 8.69108425340608e-06, + "loss": 0.7401, + "step": 5032 + }, + { + "epoch": 0.2586596772535718, + "grad_norm": 1.0603816509246826, + "learning_rate": 8.6905227940808e-06, + "loss": 0.7759, + "step": 5033 + }, + { + "epoch": 0.25871106999691645, + "grad_norm": 1.0356656312942505, + "learning_rate": 8.689961232506365e-06, + "loss": 0.6597, + "step": 5034 + }, + { + "epoch": 0.2587624627402611, + "grad_norm": 0.810200572013855, + "learning_rate": 8.689399568698332e-06, + "loss": 0.6643, + "step": 5035 + }, + { + "epoch": 0.2588138554836057, + "grad_norm": 1.0437504053115845, + "learning_rate": 8.688837802672262e-06, + "loss": 0.7397, + "step": 5036 + }, + { + "epoch": 0.25886524822695034, + "grad_norm": 1.1028997898101807, + "learning_rate": 8.688275934443718e-06, + "loss": 0.8189, + "step": 5037 + }, + { + "epoch": 0.258916640970295, + "grad_norm": 1.0486176013946533, + "learning_rate": 8.687713964028269e-06, + "loss": 0.7322, + "step": 5038 + }, + { + "epoch": 0.25896803371363963, + "grad_norm": 0.8827479481697083, + "learning_rate": 8.687151891441485e-06, + "loss": 0.732, + "step": 5039 + }, + { + "epoch": 0.2590194264569843, + "grad_norm": 1.0800729990005493, + "learning_rate": 8.686589716698938e-06, + "loss": 0.8007, + "step": 5040 + }, + { + "epoch": 0.25907081920032893, + "grad_norm": 1.0989967584609985, + "learning_rate": 8.686027439816203e-06, + "loss": 0.7402, + "step": 5041 + }, + { + "epoch": 0.2591222119436736, + "grad_norm": 1.1183170080184937, + "learning_rate": 8.68546506080886e-06, + "loss": 0.8842, + "step": 5042 + }, + { + "epoch": 0.25917360468701817, + "grad_norm": 0.7797960638999939, + "learning_rate": 8.684902579692487e-06, + "loss": 0.727, + "step": 5043 + }, + { + "epoch": 0.2592249974303628, + "grad_norm": 1.0695255994796753, + "learning_rate": 8.684339996482672e-06, + "loss": 0.7888, + "step": 5044 + }, + { + "epoch": 0.25927639017370746, + "grad_norm": 1.0753449201583862, + "learning_rate": 8.683777311195e-06, + "loss": 0.8243, + "step": 5045 + }, + { + "epoch": 0.2593277829170521, + "grad_norm": 1.0878175497055054, + "learning_rate": 8.68321452384506e-06, + "loss": 0.7643, + "step": 5046 + }, + { + "epoch": 0.25937917566039675, + "grad_norm": 1.0748659372329712, + "learning_rate": 8.682651634448447e-06, + "loss": 0.7614, + "step": 5047 + }, + { + "epoch": 0.2594305684037414, + "grad_norm": 1.0884603261947632, + "learning_rate": 8.682088643020754e-06, + "loss": 0.7311, + "step": 5048 + }, + { + "epoch": 0.25948196114708605, + "grad_norm": 0.8031647205352783, + "learning_rate": 8.681525549577578e-06, + "loss": 0.6888, + "step": 5049 + }, + { + "epoch": 0.2595333538904307, + "grad_norm": 1.0524901151657104, + "learning_rate": 8.680962354134524e-06, + "loss": 0.7752, + "step": 5050 + }, + { + "epoch": 0.2595847466337753, + "grad_norm": 0.6999702453613281, + "learning_rate": 8.680399056707192e-06, + "loss": 0.6595, + "step": 5051 + }, + { + "epoch": 0.25963613937711993, + "grad_norm": 1.1733062267303467, + "learning_rate": 8.679835657311191e-06, + "loss": 0.7802, + "step": 5052 + }, + { + "epoch": 0.2596875321204646, + "grad_norm": 0.9867770671844482, + "learning_rate": 8.67927215596213e-06, + "loss": 0.7245, + "step": 5053 + }, + { + "epoch": 0.25973892486380923, + "grad_norm": 1.10702383518219, + "learning_rate": 8.67870855267562e-06, + "loss": 0.7867, + "step": 5054 + }, + { + "epoch": 0.2597903176071539, + "grad_norm": 0.7846687436103821, + "learning_rate": 8.678144847467278e-06, + "loss": 0.7008, + "step": 5055 + }, + { + "epoch": 0.2598417103504985, + "grad_norm": 1.0470688343048096, + "learning_rate": 8.67758104035272e-06, + "loss": 0.7566, + "step": 5056 + }, + { + "epoch": 0.25989310309384317, + "grad_norm": 1.096498966217041, + "learning_rate": 8.677017131347571e-06, + "loss": 0.7461, + "step": 5057 + }, + { + "epoch": 0.2599444958371878, + "grad_norm": 1.0553582906723022, + "learning_rate": 8.676453120467449e-06, + "loss": 0.7382, + "step": 5058 + }, + { + "epoch": 0.2599958885805324, + "grad_norm": 1.1455203294754028, + "learning_rate": 8.675889007727982e-06, + "loss": 0.8325, + "step": 5059 + }, + { + "epoch": 0.26004728132387706, + "grad_norm": 1.11444091796875, + "learning_rate": 8.675324793144801e-06, + "loss": 0.7885, + "step": 5060 + }, + { + "epoch": 0.2600986740672217, + "grad_norm": 0.8875847458839417, + "learning_rate": 8.674760476733537e-06, + "loss": 0.699, + "step": 5061 + }, + { + "epoch": 0.26015006681056635, + "grad_norm": 0.7678377628326416, + "learning_rate": 8.674196058509826e-06, + "loss": 0.7154, + "step": 5062 + }, + { + "epoch": 0.260201459553911, + "grad_norm": 0.9734421968460083, + "learning_rate": 8.673631538489305e-06, + "loss": 0.7798, + "step": 5063 + }, + { + "epoch": 0.26025285229725564, + "grad_norm": 1.1335232257843018, + "learning_rate": 8.673066916687613e-06, + "loss": 0.7203, + "step": 5064 + }, + { + "epoch": 0.2603042450406003, + "grad_norm": 1.07967209815979, + "learning_rate": 8.672502193120394e-06, + "loss": 0.8198, + "step": 5065 + }, + { + "epoch": 0.2603556377839449, + "grad_norm": 1.126824975013733, + "learning_rate": 8.671937367803295e-06, + "loss": 0.8024, + "step": 5066 + }, + { + "epoch": 0.26040703052728953, + "grad_norm": 0.7854825258255005, + "learning_rate": 8.671372440751966e-06, + "loss": 0.6872, + "step": 5067 + }, + { + "epoch": 0.2604584232706342, + "grad_norm": 0.7319926023483276, + "learning_rate": 8.670807411982056e-06, + "loss": 0.6964, + "step": 5068 + }, + { + "epoch": 0.2605098160139788, + "grad_norm": 1.024675965309143, + "learning_rate": 8.670242281509222e-06, + "loss": 0.8014, + "step": 5069 + }, + { + "epoch": 0.26056120875732347, + "grad_norm": 0.9250428080558777, + "learning_rate": 8.669677049349122e-06, + "loss": 0.6704, + "step": 5070 + }, + { + "epoch": 0.2606126015006681, + "grad_norm": 1.0948251485824585, + "learning_rate": 8.669111715517413e-06, + "loss": 0.8217, + "step": 5071 + }, + { + "epoch": 0.26066399424401276, + "grad_norm": 0.777854323387146, + "learning_rate": 8.668546280029759e-06, + "loss": 0.6905, + "step": 5072 + }, + { + "epoch": 0.2607153869873574, + "grad_norm": 1.0761494636535645, + "learning_rate": 8.66798074290183e-06, + "loss": 0.8133, + "step": 5073 + }, + { + "epoch": 0.260766779730702, + "grad_norm": 1.0734977722167969, + "learning_rate": 8.667415104149288e-06, + "loss": 0.755, + "step": 5074 + }, + { + "epoch": 0.26081817247404665, + "grad_norm": 1.1395795345306396, + "learning_rate": 8.66684936378781e-06, + "loss": 0.84, + "step": 5075 + }, + { + "epoch": 0.2608695652173913, + "grad_norm": 0.794562816619873, + "learning_rate": 8.666283521833067e-06, + "loss": 0.6791, + "step": 5076 + }, + { + "epoch": 0.26092095796073594, + "grad_norm": 1.1529595851898193, + "learning_rate": 8.665717578300739e-06, + "loss": 0.7896, + "step": 5077 + }, + { + "epoch": 0.2609723507040806, + "grad_norm": 0.7578186392784119, + "learning_rate": 8.665151533206504e-06, + "loss": 0.7024, + "step": 5078 + }, + { + "epoch": 0.26102374344742524, + "grad_norm": 1.1673104763031006, + "learning_rate": 8.664585386566046e-06, + "loss": 0.8382, + "step": 5079 + }, + { + "epoch": 0.2610751361907699, + "grad_norm": 1.203487515449524, + "learning_rate": 8.664019138395047e-06, + "loss": 0.8381, + "step": 5080 + }, + { + "epoch": 0.2611265289341145, + "grad_norm": 1.1061145067214966, + "learning_rate": 8.6634527887092e-06, + "loss": 0.7673, + "step": 5081 + }, + { + "epoch": 0.2611779216774591, + "grad_norm": 0.8297333717346191, + "learning_rate": 8.662886337524196e-06, + "loss": 0.6849, + "step": 5082 + }, + { + "epoch": 0.26122931442080377, + "grad_norm": 1.1618329286575317, + "learning_rate": 8.662319784855727e-06, + "loss": 0.8178, + "step": 5083 + }, + { + "epoch": 0.2612807071641484, + "grad_norm": 0.8649482131004333, + "learning_rate": 8.661753130719488e-06, + "loss": 0.6944, + "step": 5084 + }, + { + "epoch": 0.26133209990749307, + "grad_norm": 1.0497634410858154, + "learning_rate": 8.661186375131181e-06, + "loss": 0.7729, + "step": 5085 + }, + { + "epoch": 0.2613834926508377, + "grad_norm": 0.761749804019928, + "learning_rate": 8.66061951810651e-06, + "loss": 0.7245, + "step": 5086 + }, + { + "epoch": 0.26143488539418236, + "grad_norm": 1.1066533327102661, + "learning_rate": 8.660052559661178e-06, + "loss": 0.798, + "step": 5087 + }, + { + "epoch": 0.261486278137527, + "grad_norm": 1.1950860023498535, + "learning_rate": 8.659485499810894e-06, + "loss": 0.7683, + "step": 5088 + }, + { + "epoch": 0.2615376708808716, + "grad_norm": 1.0164082050323486, + "learning_rate": 8.658918338571367e-06, + "loss": 0.7085, + "step": 5089 + }, + { + "epoch": 0.26158906362421624, + "grad_norm": 1.0267949104309082, + "learning_rate": 8.658351075958314e-06, + "loss": 0.7453, + "step": 5090 + }, + { + "epoch": 0.2616404563675609, + "grad_norm": 1.1727787256240845, + "learning_rate": 8.65778371198745e-06, + "loss": 0.7814, + "step": 5091 + }, + { + "epoch": 0.26169184911090554, + "grad_norm": 1.1793876886367798, + "learning_rate": 8.657216246674493e-06, + "loss": 0.8194, + "step": 5092 + }, + { + "epoch": 0.2617432418542502, + "grad_norm": 1.0998691320419312, + "learning_rate": 8.656648680035166e-06, + "loss": 0.7992, + "step": 5093 + }, + { + "epoch": 0.26179463459759483, + "grad_norm": 0.8477131724357605, + "learning_rate": 8.656081012085196e-06, + "loss": 0.751, + "step": 5094 + }, + { + "epoch": 0.2618460273409395, + "grad_norm": 1.113799810409546, + "learning_rate": 8.655513242840308e-06, + "loss": 0.7602, + "step": 5095 + }, + { + "epoch": 0.26189742008428407, + "grad_norm": 1.1180912256240845, + "learning_rate": 8.654945372316233e-06, + "loss": 0.7779, + "step": 5096 + }, + { + "epoch": 0.2619488128276287, + "grad_norm": 1.2480909824371338, + "learning_rate": 8.654377400528706e-06, + "loss": 0.8325, + "step": 5097 + }, + { + "epoch": 0.26200020557097337, + "grad_norm": 1.0494873523712158, + "learning_rate": 8.65380932749346e-06, + "loss": 0.7941, + "step": 5098 + }, + { + "epoch": 0.262051598314318, + "grad_norm": 0.7988775372505188, + "learning_rate": 8.653241153226237e-06, + "loss": 0.7066, + "step": 5099 + }, + { + "epoch": 0.26210299105766266, + "grad_norm": 0.7774263024330139, + "learning_rate": 8.652672877742778e-06, + "loss": 0.6591, + "step": 5100 + }, + { + "epoch": 0.2621543838010073, + "grad_norm": 0.8363752961158752, + "learning_rate": 8.652104501058827e-06, + "loss": 0.7104, + "step": 5101 + }, + { + "epoch": 0.26220577654435195, + "grad_norm": 1.057090163230896, + "learning_rate": 8.651536023190131e-06, + "loss": 0.8097, + "step": 5102 + }, + { + "epoch": 0.2622571692876966, + "grad_norm": 1.4543486833572388, + "learning_rate": 8.650967444152441e-06, + "loss": 0.7698, + "step": 5103 + }, + { + "epoch": 0.2623085620310412, + "grad_norm": 1.0873017311096191, + "learning_rate": 8.650398763961511e-06, + "loss": 0.7985, + "step": 5104 + }, + { + "epoch": 0.26235995477438584, + "grad_norm": 1.0244823694229126, + "learning_rate": 8.649829982633094e-06, + "loss": 0.737, + "step": 5105 + }, + { + "epoch": 0.2624113475177305, + "grad_norm": 1.105635643005371, + "learning_rate": 8.649261100182951e-06, + "loss": 0.7347, + "step": 5106 + }, + { + "epoch": 0.26246274026107513, + "grad_norm": 1.021653652191162, + "learning_rate": 8.648692116626842e-06, + "loss": 0.7414, + "step": 5107 + }, + { + "epoch": 0.2625141330044198, + "grad_norm": 1.1400269269943237, + "learning_rate": 8.648123031980533e-06, + "loss": 0.7245, + "step": 5108 + }, + { + "epoch": 0.26256552574776443, + "grad_norm": 1.0821542739868164, + "learning_rate": 8.647553846259789e-06, + "loss": 0.7688, + "step": 5109 + }, + { + "epoch": 0.2626169184911091, + "grad_norm": 0.8694726824760437, + "learning_rate": 8.64698455948038e-06, + "loss": 0.7083, + "step": 5110 + }, + { + "epoch": 0.2626683112344537, + "grad_norm": 1.0345886945724487, + "learning_rate": 8.646415171658078e-06, + "loss": 0.7441, + "step": 5111 + }, + { + "epoch": 0.2627197039777983, + "grad_norm": 1.1652065515518188, + "learning_rate": 8.64584568280866e-06, + "loss": 0.7594, + "step": 5112 + }, + { + "epoch": 0.26277109672114296, + "grad_norm": 1.1456356048583984, + "learning_rate": 8.645276092947906e-06, + "loss": 0.7828, + "step": 5113 + }, + { + "epoch": 0.2628224894644876, + "grad_norm": 1.012272596359253, + "learning_rate": 8.644706402091594e-06, + "loss": 0.7396, + "step": 5114 + }, + { + "epoch": 0.26287388220783225, + "grad_norm": 1.0662199258804321, + "learning_rate": 8.644136610255506e-06, + "loss": 0.7493, + "step": 5115 + }, + { + "epoch": 0.2629252749511769, + "grad_norm": 1.0539804697036743, + "learning_rate": 8.643566717455433e-06, + "loss": 0.7198, + "step": 5116 + }, + { + "epoch": 0.26297666769452155, + "grad_norm": 1.1356357336044312, + "learning_rate": 8.642996723707162e-06, + "loss": 0.8406, + "step": 5117 + }, + { + "epoch": 0.2630280604378662, + "grad_norm": 1.1133993864059448, + "learning_rate": 8.642426629026485e-06, + "loss": 0.7375, + "step": 5118 + }, + { + "epoch": 0.2630794531812108, + "grad_norm": 1.1521869897842407, + "learning_rate": 8.6418564334292e-06, + "loss": 0.8206, + "step": 5119 + }, + { + "epoch": 0.26313084592455543, + "grad_norm": 1.0873502492904663, + "learning_rate": 8.641286136931097e-06, + "loss": 0.8057, + "step": 5120 + }, + { + "epoch": 0.2631822386679001, + "grad_norm": 1.2929216623306274, + "learning_rate": 8.640715739547986e-06, + "loss": 0.8423, + "step": 5121 + }, + { + "epoch": 0.26323363141124473, + "grad_norm": 0.7494547367095947, + "learning_rate": 8.640145241295665e-06, + "loss": 0.6868, + "step": 5122 + }, + { + "epoch": 0.2632850241545894, + "grad_norm": 1.157834768295288, + "learning_rate": 8.639574642189942e-06, + "loss": 0.7931, + "step": 5123 + }, + { + "epoch": 0.263336416897934, + "grad_norm": 1.177663803100586, + "learning_rate": 8.639003942246625e-06, + "loss": 0.8395, + "step": 5124 + }, + { + "epoch": 0.26338780964127867, + "grad_norm": 1.0290026664733887, + "learning_rate": 8.638433141481526e-06, + "loss": 0.6914, + "step": 5125 + }, + { + "epoch": 0.2634392023846233, + "grad_norm": 1.0879486799240112, + "learning_rate": 8.63786223991046e-06, + "loss": 0.7715, + "step": 5126 + }, + { + "epoch": 0.2634905951279679, + "grad_norm": 1.1543580293655396, + "learning_rate": 8.637291237549243e-06, + "loss": 0.7774, + "step": 5127 + }, + { + "epoch": 0.26354198787131256, + "grad_norm": 1.1650959253311157, + "learning_rate": 8.636720134413696e-06, + "loss": 0.7929, + "step": 5128 + }, + { + "epoch": 0.2635933806146572, + "grad_norm": 1.182424545288086, + "learning_rate": 8.636148930519642e-06, + "loss": 0.7495, + "step": 5129 + }, + { + "epoch": 0.26364477335800185, + "grad_norm": 0.9837983250617981, + "learning_rate": 8.635577625882907e-06, + "loss": 0.7398, + "step": 5130 + }, + { + "epoch": 0.2636961661013465, + "grad_norm": 1.0453872680664062, + "learning_rate": 8.63500622051932e-06, + "loss": 0.755, + "step": 5131 + }, + { + "epoch": 0.26374755884469114, + "grad_norm": 1.0032366514205933, + "learning_rate": 8.634434714444711e-06, + "loss": 0.7382, + "step": 5132 + }, + { + "epoch": 0.2637989515880358, + "grad_norm": 1.1037267446517944, + "learning_rate": 8.633863107674914e-06, + "loss": 0.7449, + "step": 5133 + }, + { + "epoch": 0.2638503443313804, + "grad_norm": 1.072238564491272, + "learning_rate": 8.633291400225766e-06, + "loss": 0.7802, + "step": 5134 + }, + { + "epoch": 0.26390173707472503, + "grad_norm": 1.1269937753677368, + "learning_rate": 8.632719592113106e-06, + "loss": 0.7634, + "step": 5135 + }, + { + "epoch": 0.2639531298180697, + "grad_norm": 0.7631784677505493, + "learning_rate": 8.63214768335278e-06, + "loss": 0.7382, + "step": 5136 + }, + { + "epoch": 0.2640045225614143, + "grad_norm": 1.1025582551956177, + "learning_rate": 8.631575673960628e-06, + "loss": 0.8277, + "step": 5137 + }, + { + "epoch": 0.26405591530475897, + "grad_norm": 0.7933250069618225, + "learning_rate": 8.631003563952504e-06, + "loss": 0.6967, + "step": 5138 + }, + { + "epoch": 0.2641073080481036, + "grad_norm": 1.1932233572006226, + "learning_rate": 8.630431353344254e-06, + "loss": 0.813, + "step": 5139 + }, + { + "epoch": 0.26415870079144826, + "grad_norm": 1.1918531656265259, + "learning_rate": 8.629859042151732e-06, + "loss": 0.7922, + "step": 5140 + }, + { + "epoch": 0.2642100935347929, + "grad_norm": 1.0909969806671143, + "learning_rate": 8.629286630390797e-06, + "loss": 0.7623, + "step": 5141 + }, + { + "epoch": 0.2642614862781375, + "grad_norm": 1.0843340158462524, + "learning_rate": 8.628714118077305e-06, + "loss": 0.7488, + "step": 5142 + }, + { + "epoch": 0.26431287902148215, + "grad_norm": 1.1961742639541626, + "learning_rate": 8.628141505227121e-06, + "loss": 0.7505, + "step": 5143 + }, + { + "epoch": 0.2643642717648268, + "grad_norm": 1.1419634819030762, + "learning_rate": 8.627568791856107e-06, + "loss": 0.8349, + "step": 5144 + }, + { + "epoch": 0.26441566450817144, + "grad_norm": 1.1175090074539185, + "learning_rate": 8.626995977980133e-06, + "loss": 0.7927, + "step": 5145 + }, + { + "epoch": 0.2644670572515161, + "grad_norm": 1.0712730884552002, + "learning_rate": 8.626423063615068e-06, + "loss": 0.7784, + "step": 5146 + }, + { + "epoch": 0.26451844999486074, + "grad_norm": 1.1190199851989746, + "learning_rate": 8.625850048776785e-06, + "loss": 0.8551, + "step": 5147 + }, + { + "epoch": 0.2645698427382054, + "grad_norm": 1.1434930562973022, + "learning_rate": 8.625276933481162e-06, + "loss": 0.815, + "step": 5148 + }, + { + "epoch": 0.26462123548155003, + "grad_norm": 1.1858470439910889, + "learning_rate": 8.624703717744073e-06, + "loss": 0.7797, + "step": 5149 + }, + { + "epoch": 0.2646726282248946, + "grad_norm": 0.7807711362838745, + "learning_rate": 8.624130401581403e-06, + "loss": 0.6667, + "step": 5150 + }, + { + "epoch": 0.26472402096823927, + "grad_norm": 1.0951123237609863, + "learning_rate": 8.623556985009035e-06, + "loss": 0.7942, + "step": 5151 + }, + { + "epoch": 0.2647754137115839, + "grad_norm": 0.747201681137085, + "learning_rate": 8.622983468042858e-06, + "loss": 0.7058, + "step": 5152 + }, + { + "epoch": 0.26482680645492856, + "grad_norm": 1.1744269132614136, + "learning_rate": 8.62240985069876e-06, + "loss": 0.7728, + "step": 5153 + }, + { + "epoch": 0.2648781991982732, + "grad_norm": 1.1512211561203003, + "learning_rate": 8.621836132992634e-06, + "loss": 0.8197, + "step": 5154 + }, + { + "epoch": 0.26492959194161786, + "grad_norm": 1.1541978120803833, + "learning_rate": 8.621262314940375e-06, + "loss": 0.7131, + "step": 5155 + }, + { + "epoch": 0.2649809846849625, + "grad_norm": 0.7653104662895203, + "learning_rate": 8.62068839655788e-06, + "loss": 0.7284, + "step": 5156 + }, + { + "epoch": 0.2650323774283071, + "grad_norm": 1.1041626930236816, + "learning_rate": 8.620114377861053e-06, + "loss": 0.8126, + "step": 5157 + }, + { + "epoch": 0.26508377017165174, + "grad_norm": 1.0981786251068115, + "learning_rate": 8.619540258865795e-06, + "loss": 0.8056, + "step": 5158 + }, + { + "epoch": 0.2651351629149964, + "grad_norm": 1.1062973737716675, + "learning_rate": 8.618966039588015e-06, + "loss": 0.8203, + "step": 5159 + }, + { + "epoch": 0.26518655565834104, + "grad_norm": 1.0797981023788452, + "learning_rate": 8.618391720043619e-06, + "loss": 0.7644, + "step": 5160 + }, + { + "epoch": 0.2652379484016857, + "grad_norm": 0.9855642914772034, + "learning_rate": 8.61781730024852e-06, + "loss": 0.7819, + "step": 5161 + }, + { + "epoch": 0.26528934114503033, + "grad_norm": 1.036364197731018, + "learning_rate": 8.617242780218634e-06, + "loss": 0.7873, + "step": 5162 + }, + { + "epoch": 0.265340733888375, + "grad_norm": 1.1648101806640625, + "learning_rate": 8.61666815996988e-06, + "loss": 0.8164, + "step": 5163 + }, + { + "epoch": 0.2653921266317196, + "grad_norm": 0.7500884532928467, + "learning_rate": 8.616093439518174e-06, + "loss": 0.6845, + "step": 5164 + }, + { + "epoch": 0.2654435193750642, + "grad_norm": 0.8555760383605957, + "learning_rate": 8.61551861887944e-06, + "loss": 0.6706, + "step": 5165 + }, + { + "epoch": 0.26549491211840887, + "grad_norm": 1.122339129447937, + "learning_rate": 8.61494369806961e-06, + "loss": 0.7486, + "step": 5166 + }, + { + "epoch": 0.2655463048617535, + "grad_norm": 1.2896727323532104, + "learning_rate": 8.614368677104605e-06, + "loss": 0.7495, + "step": 5167 + }, + { + "epoch": 0.26559769760509816, + "grad_norm": 0.7060507535934448, + "learning_rate": 8.613793556000361e-06, + "loss": 0.7243, + "step": 5168 + }, + { + "epoch": 0.2656490903484428, + "grad_norm": 1.0945807695388794, + "learning_rate": 8.61321833477281e-06, + "loss": 0.7809, + "step": 5169 + }, + { + "epoch": 0.26570048309178745, + "grad_norm": 1.1773170232772827, + "learning_rate": 8.61264301343789e-06, + "loss": 0.7957, + "step": 5170 + }, + { + "epoch": 0.2657518758351321, + "grad_norm": 1.1937668323516846, + "learning_rate": 8.612067592011542e-06, + "loss": 0.8343, + "step": 5171 + }, + { + "epoch": 0.2658032685784767, + "grad_norm": 1.1287274360656738, + "learning_rate": 8.611492070509704e-06, + "loss": 0.8351, + "step": 5172 + }, + { + "epoch": 0.26585466132182134, + "grad_norm": 1.0422691106796265, + "learning_rate": 8.610916448948326e-06, + "loss": 0.7549, + "step": 5173 + }, + { + "epoch": 0.265906054065166, + "grad_norm": 1.053804636001587, + "learning_rate": 8.610340727343355e-06, + "loss": 0.7362, + "step": 5174 + }, + { + "epoch": 0.26595744680851063, + "grad_norm": 1.0331788063049316, + "learning_rate": 8.609764905710743e-06, + "loss": 0.7673, + "step": 5175 + }, + { + "epoch": 0.2660088395518553, + "grad_norm": 1.1716344356536865, + "learning_rate": 8.609188984066438e-06, + "loss": 0.7855, + "step": 5176 + }, + { + "epoch": 0.2660602322951999, + "grad_norm": 1.093544363975525, + "learning_rate": 8.608612962426404e-06, + "loss": 0.7802, + "step": 5177 + }, + { + "epoch": 0.2661116250385446, + "grad_norm": 1.080235242843628, + "learning_rate": 8.608036840806596e-06, + "loss": 0.7464, + "step": 5178 + }, + { + "epoch": 0.2661630177818892, + "grad_norm": 1.0788439512252808, + "learning_rate": 8.607460619222976e-06, + "loss": 0.7617, + "step": 5179 + }, + { + "epoch": 0.2662144105252338, + "grad_norm": 1.0717482566833496, + "learning_rate": 8.606884297691508e-06, + "loss": 0.811, + "step": 5180 + }, + { + "epoch": 0.26626580326857846, + "grad_norm": 0.7974849939346313, + "learning_rate": 8.606307876228164e-06, + "loss": 0.7467, + "step": 5181 + }, + { + "epoch": 0.2663171960119231, + "grad_norm": 1.110707402229309, + "learning_rate": 8.60573135484891e-06, + "loss": 0.7343, + "step": 5182 + }, + { + "epoch": 0.26636858875526775, + "grad_norm": 1.0326586961746216, + "learning_rate": 8.605154733569719e-06, + "loss": 0.6968, + "step": 5183 + }, + { + "epoch": 0.2664199814986124, + "grad_norm": 1.1575734615325928, + "learning_rate": 8.604578012406568e-06, + "loss": 0.8542, + "step": 5184 + }, + { + "epoch": 0.26647137424195705, + "grad_norm": 0.8235086798667908, + "learning_rate": 8.604001191375436e-06, + "loss": 0.6815, + "step": 5185 + }, + { + "epoch": 0.2665227669853017, + "grad_norm": 0.7503124475479126, + "learning_rate": 8.603424270492305e-06, + "loss": 0.701, + "step": 5186 + }, + { + "epoch": 0.26657415972864634, + "grad_norm": 1.07439124584198, + "learning_rate": 8.602847249773157e-06, + "loss": 0.7795, + "step": 5187 + }, + { + "epoch": 0.26662555247199093, + "grad_norm": 1.0742294788360596, + "learning_rate": 8.602270129233979e-06, + "loss": 0.7283, + "step": 5188 + }, + { + "epoch": 0.2666769452153356, + "grad_norm": 1.0845779180526733, + "learning_rate": 8.601692908890761e-06, + "loss": 0.8082, + "step": 5189 + }, + { + "epoch": 0.26672833795868023, + "grad_norm": 1.169121265411377, + "learning_rate": 8.601115588759497e-06, + "loss": 0.7409, + "step": 5190 + }, + { + "epoch": 0.2667797307020249, + "grad_norm": 1.1016429662704468, + "learning_rate": 8.600538168856181e-06, + "loss": 0.6637, + "step": 5191 + }, + { + "epoch": 0.2668311234453695, + "grad_norm": 1.0152069330215454, + "learning_rate": 8.59996064919681e-06, + "loss": 0.7655, + "step": 5192 + }, + { + "epoch": 0.26688251618871417, + "grad_norm": 1.1455893516540527, + "learning_rate": 8.599383029797385e-06, + "loss": 0.7382, + "step": 5193 + }, + { + "epoch": 0.2669339089320588, + "grad_norm": 1.1317106485366821, + "learning_rate": 8.598805310673913e-06, + "loss": 0.8099, + "step": 5194 + }, + { + "epoch": 0.2669853016754034, + "grad_norm": 1.1316012144088745, + "learning_rate": 8.598227491842395e-06, + "loss": 0.7898, + "step": 5195 + }, + { + "epoch": 0.26703669441874806, + "grad_norm": 1.0742870569229126, + "learning_rate": 8.597649573318842e-06, + "loss": 0.7666, + "step": 5196 + }, + { + "epoch": 0.2670880871620927, + "grad_norm": 1.0907400846481323, + "learning_rate": 8.597071555119268e-06, + "loss": 0.8118, + "step": 5197 + }, + { + "epoch": 0.26713947990543735, + "grad_norm": 1.0511053800582886, + "learning_rate": 8.596493437259684e-06, + "loss": 0.7121, + "step": 5198 + }, + { + "epoch": 0.267190872648782, + "grad_norm": 1.049952507019043, + "learning_rate": 8.595915219756108e-06, + "loss": 0.7571, + "step": 5199 + }, + { + "epoch": 0.26724226539212664, + "grad_norm": 1.2749274969100952, + "learning_rate": 8.595336902624562e-06, + "loss": 0.7701, + "step": 5200 + }, + { + "epoch": 0.2672936581354713, + "grad_norm": 0.8104772567749023, + "learning_rate": 8.594758485881066e-06, + "loss": 0.7115, + "step": 5201 + }, + { + "epoch": 0.26734505087881594, + "grad_norm": 1.1337356567382812, + "learning_rate": 8.594179969541649e-06, + "loss": 0.7462, + "step": 5202 + }, + { + "epoch": 0.26739644362216053, + "grad_norm": 0.8976438641548157, + "learning_rate": 8.593601353622337e-06, + "loss": 0.7139, + "step": 5203 + }, + { + "epoch": 0.2674478363655052, + "grad_norm": 1.150750756263733, + "learning_rate": 8.59302263813916e-06, + "loss": 0.8114, + "step": 5204 + }, + { + "epoch": 0.2674992291088498, + "grad_norm": 1.183227777481079, + "learning_rate": 8.592443823108155e-06, + "loss": 0.8059, + "step": 5205 + }, + { + "epoch": 0.26755062185219447, + "grad_norm": 0.6917653679847717, + "learning_rate": 8.591864908545355e-06, + "loss": 0.6633, + "step": 5206 + }, + { + "epoch": 0.2676020145955391, + "grad_norm": 1.0715919733047485, + "learning_rate": 8.591285894466802e-06, + "loss": 0.7681, + "step": 5207 + }, + { + "epoch": 0.26765340733888376, + "grad_norm": 1.0410206317901611, + "learning_rate": 8.590706780888536e-06, + "loss": 0.7709, + "step": 5208 + }, + { + "epoch": 0.2677048000822284, + "grad_norm": 1.1177297830581665, + "learning_rate": 8.590127567826605e-06, + "loss": 0.7949, + "step": 5209 + }, + { + "epoch": 0.267756192825573, + "grad_norm": 1.0460163354873657, + "learning_rate": 8.589548255297053e-06, + "loss": 0.7188, + "step": 5210 + }, + { + "epoch": 0.26780758556891765, + "grad_norm": 1.1022695302963257, + "learning_rate": 8.588968843315934e-06, + "loss": 0.7839, + "step": 5211 + }, + { + "epoch": 0.2678589783122623, + "grad_norm": 0.85365229845047, + "learning_rate": 8.588389331899298e-06, + "loss": 0.695, + "step": 5212 + }, + { + "epoch": 0.26791037105560694, + "grad_norm": 0.7649239301681519, + "learning_rate": 8.587809721063202e-06, + "loss": 0.6645, + "step": 5213 + }, + { + "epoch": 0.2679617637989516, + "grad_norm": 1.0407639741897583, + "learning_rate": 8.587230010823704e-06, + "loss": 0.7223, + "step": 5214 + }, + { + "epoch": 0.26801315654229624, + "grad_norm": 1.111070990562439, + "learning_rate": 8.586650201196866e-06, + "loss": 0.8047, + "step": 5215 + }, + { + "epoch": 0.2680645492856409, + "grad_norm": 1.070738434791565, + "learning_rate": 8.586070292198754e-06, + "loss": 0.7869, + "step": 5216 + }, + { + "epoch": 0.26811594202898553, + "grad_norm": 1.0527263879776, + "learning_rate": 8.585490283845432e-06, + "loss": 0.7646, + "step": 5217 + }, + { + "epoch": 0.2681673347723301, + "grad_norm": 1.1252719163894653, + "learning_rate": 8.58491017615297e-06, + "loss": 0.8296, + "step": 5218 + }, + { + "epoch": 0.26821872751567477, + "grad_norm": 1.064440369606018, + "learning_rate": 8.584329969137442e-06, + "loss": 0.7831, + "step": 5219 + }, + { + "epoch": 0.2682701202590194, + "grad_norm": 1.135534644126892, + "learning_rate": 8.583749662814922e-06, + "loss": 0.7966, + "step": 5220 + }, + { + "epoch": 0.26832151300236406, + "grad_norm": 1.0979188680648804, + "learning_rate": 8.58316925720149e-06, + "loss": 0.7799, + "step": 5221 + }, + { + "epoch": 0.2683729057457087, + "grad_norm": 0.8376423716545105, + "learning_rate": 8.58258875231322e-06, + "loss": 0.6975, + "step": 5222 + }, + { + "epoch": 0.26842429848905336, + "grad_norm": 1.1467187404632568, + "learning_rate": 8.582008148166204e-06, + "loss": 0.7253, + "step": 5223 + }, + { + "epoch": 0.268475691232398, + "grad_norm": 1.1565247774124146, + "learning_rate": 8.581427444776524e-06, + "loss": 0.7909, + "step": 5224 + }, + { + "epoch": 0.2685270839757426, + "grad_norm": 1.0919477939605713, + "learning_rate": 8.580846642160268e-06, + "loss": 0.7911, + "step": 5225 + }, + { + "epoch": 0.26857847671908724, + "grad_norm": 0.7637118101119995, + "learning_rate": 8.58026574033353e-06, + "loss": 0.6743, + "step": 5226 + }, + { + "epoch": 0.2686298694624319, + "grad_norm": 0.6978450417518616, + "learning_rate": 8.579684739312401e-06, + "loss": 0.7074, + "step": 5227 + }, + { + "epoch": 0.26868126220577654, + "grad_norm": 0.7102873921394348, + "learning_rate": 8.579103639112983e-06, + "loss": 0.6827, + "step": 5228 + }, + { + "epoch": 0.2687326549491212, + "grad_norm": 1.1391888856887817, + "learning_rate": 8.57852243975137e-06, + "loss": 0.7582, + "step": 5229 + }, + { + "epoch": 0.26878404769246583, + "grad_norm": 1.0433030128479004, + "learning_rate": 8.577941141243672e-06, + "loss": 0.7798, + "step": 5230 + }, + { + "epoch": 0.2688354404358105, + "grad_norm": 1.1109044551849365, + "learning_rate": 8.577359743605989e-06, + "loss": 0.8187, + "step": 5231 + }, + { + "epoch": 0.2688868331791551, + "grad_norm": 0.7943516969680786, + "learning_rate": 8.576778246854429e-06, + "loss": 0.6777, + "step": 5232 + }, + { + "epoch": 0.2689382259224997, + "grad_norm": 1.088222861289978, + "learning_rate": 8.576196651005105e-06, + "loss": 0.7823, + "step": 5233 + }, + { + "epoch": 0.26898961866584437, + "grad_norm": 1.1497323513031006, + "learning_rate": 8.575614956074129e-06, + "loss": 0.7857, + "step": 5234 + }, + { + "epoch": 0.269041011409189, + "grad_norm": 1.1060123443603516, + "learning_rate": 8.57503316207762e-06, + "loss": 0.7232, + "step": 5235 + }, + { + "epoch": 0.26909240415253366, + "grad_norm": 1.0698469877243042, + "learning_rate": 8.574451269031694e-06, + "loss": 0.7686, + "step": 5236 + }, + { + "epoch": 0.2691437968958783, + "grad_norm": 1.1172964572906494, + "learning_rate": 8.573869276952475e-06, + "loss": 0.7963, + "step": 5237 + }, + { + "epoch": 0.26919518963922295, + "grad_norm": 1.1459288597106934, + "learning_rate": 8.573287185856085e-06, + "loss": 0.7425, + "step": 5238 + }, + { + "epoch": 0.2692465823825676, + "grad_norm": 1.1439753770828247, + "learning_rate": 8.572704995758656e-06, + "loss": 0.8356, + "step": 5239 + }, + { + "epoch": 0.26929797512591225, + "grad_norm": 1.1226779222488403, + "learning_rate": 8.572122706676314e-06, + "loss": 0.859, + "step": 5240 + }, + { + "epoch": 0.26934936786925684, + "grad_norm": 1.0330973863601685, + "learning_rate": 8.571540318625192e-06, + "loss": 0.7626, + "step": 5241 + }, + { + "epoch": 0.2694007606126015, + "grad_norm": 1.102895736694336, + "learning_rate": 8.570957831621429e-06, + "loss": 0.8574, + "step": 5242 + }, + { + "epoch": 0.26945215335594613, + "grad_norm": 1.0460255146026611, + "learning_rate": 8.57037524568116e-06, + "loss": 0.7875, + "step": 5243 + }, + { + "epoch": 0.2695035460992908, + "grad_norm": 1.0452920198440552, + "learning_rate": 8.569792560820525e-06, + "loss": 0.7903, + "step": 5244 + }, + { + "epoch": 0.2695549388426354, + "grad_norm": 1.0433146953582764, + "learning_rate": 8.569209777055671e-06, + "loss": 0.7232, + "step": 5245 + }, + { + "epoch": 0.2696063315859801, + "grad_norm": 1.107867956161499, + "learning_rate": 8.568626894402744e-06, + "loss": 0.8027, + "step": 5246 + }, + { + "epoch": 0.2696577243293247, + "grad_norm": 1.1847078800201416, + "learning_rate": 8.568043912877893e-06, + "loss": 0.8256, + "step": 5247 + }, + { + "epoch": 0.2697091170726693, + "grad_norm": 0.8863545060157776, + "learning_rate": 8.567460832497268e-06, + "loss": 0.7405, + "step": 5248 + }, + { + "epoch": 0.26976050981601396, + "grad_norm": 1.001041293144226, + "learning_rate": 8.566877653277027e-06, + "loss": 0.7763, + "step": 5249 + }, + { + "epoch": 0.2698119025593586, + "grad_norm": 1.0426985025405884, + "learning_rate": 8.566294375233325e-06, + "loss": 0.8406, + "step": 5250 + }, + { + "epoch": 0.26986329530270325, + "grad_norm": 1.037388563156128, + "learning_rate": 8.565710998382325e-06, + "loss": 0.7776, + "step": 5251 + }, + { + "epoch": 0.2699146880460479, + "grad_norm": 1.0881850719451904, + "learning_rate": 8.565127522740184e-06, + "loss": 0.805, + "step": 5252 + }, + { + "epoch": 0.26996608078939255, + "grad_norm": 1.0541712045669556, + "learning_rate": 8.564543948323073e-06, + "loss": 0.79, + "step": 5253 + }, + { + "epoch": 0.2700174735327372, + "grad_norm": 1.0583751201629639, + "learning_rate": 8.563960275147162e-06, + "loss": 0.7638, + "step": 5254 + }, + { + "epoch": 0.27006886627608184, + "grad_norm": 1.1146904230117798, + "learning_rate": 8.563376503228618e-06, + "loss": 0.7554, + "step": 5255 + }, + { + "epoch": 0.27012025901942643, + "grad_norm": 1.114192247390747, + "learning_rate": 8.562792632583616e-06, + "loss": 0.7395, + "step": 5256 + }, + { + "epoch": 0.2701716517627711, + "grad_norm": 0.8435994982719421, + "learning_rate": 8.562208663228334e-06, + "loss": 0.7073, + "step": 5257 + }, + { + "epoch": 0.27022304450611573, + "grad_norm": 1.1012240648269653, + "learning_rate": 8.561624595178947e-06, + "loss": 0.8356, + "step": 5258 + }, + { + "epoch": 0.2702744372494604, + "grad_norm": 1.1703585386276245, + "learning_rate": 8.561040428451644e-06, + "loss": 0.8053, + "step": 5259 + }, + { + "epoch": 0.270325829992805, + "grad_norm": 1.1016672849655151, + "learning_rate": 8.560456163062604e-06, + "loss": 0.8081, + "step": 5260 + }, + { + "epoch": 0.27037722273614967, + "grad_norm": 1.152122139930725, + "learning_rate": 8.559871799028017e-06, + "loss": 0.7346, + "step": 5261 + }, + { + "epoch": 0.2704286154794943, + "grad_norm": 1.1270984411239624, + "learning_rate": 8.559287336364075e-06, + "loss": 0.8063, + "step": 5262 + }, + { + "epoch": 0.2704800082228389, + "grad_norm": 0.803991436958313, + "learning_rate": 8.558702775086969e-06, + "loss": 0.7281, + "step": 5263 + }, + { + "epoch": 0.27053140096618356, + "grad_norm": 0.7978885173797607, + "learning_rate": 8.558118115212892e-06, + "loss": 0.6921, + "step": 5264 + }, + { + "epoch": 0.2705827937095282, + "grad_norm": 1.1450138092041016, + "learning_rate": 8.557533356758049e-06, + "loss": 0.7398, + "step": 5265 + }, + { + "epoch": 0.27063418645287285, + "grad_norm": 1.167701005935669, + "learning_rate": 8.556948499738635e-06, + "loss": 0.769, + "step": 5266 + }, + { + "epoch": 0.2706855791962175, + "grad_norm": 1.3588557243347168, + "learning_rate": 8.556363544170859e-06, + "loss": 0.6969, + "step": 5267 + }, + { + "epoch": 0.27073697193956214, + "grad_norm": 1.1220605373382568, + "learning_rate": 8.555778490070925e-06, + "loss": 0.7478, + "step": 5268 + }, + { + "epoch": 0.2707883646829068, + "grad_norm": 1.1549495458602905, + "learning_rate": 8.555193337455043e-06, + "loss": 0.7526, + "step": 5269 + }, + { + "epoch": 0.27083975742625144, + "grad_norm": 0.8924218416213989, + "learning_rate": 8.554608086339425e-06, + "loss": 0.6692, + "step": 5270 + }, + { + "epoch": 0.27089115016959603, + "grad_norm": 1.0983549356460571, + "learning_rate": 8.554022736740284e-06, + "loss": 0.7464, + "step": 5271 + }, + { + "epoch": 0.2709425429129407, + "grad_norm": 1.115360975265503, + "learning_rate": 8.55343728867384e-06, + "loss": 0.752, + "step": 5272 + }, + { + "epoch": 0.2709939356562853, + "grad_norm": 0.7294501662254333, + "learning_rate": 8.552851742156314e-06, + "loss": 0.6485, + "step": 5273 + }, + { + "epoch": 0.27104532839962997, + "grad_norm": 1.089875340461731, + "learning_rate": 8.55226609720393e-06, + "loss": 0.7347, + "step": 5274 + }, + { + "epoch": 0.2710967211429746, + "grad_norm": 1.076592206954956, + "learning_rate": 8.551680353832908e-06, + "loss": 0.789, + "step": 5275 + }, + { + "epoch": 0.27114811388631926, + "grad_norm": 1.1637295484542847, + "learning_rate": 8.551094512059483e-06, + "loss": 0.8085, + "step": 5276 + }, + { + "epoch": 0.2711995066296639, + "grad_norm": 1.0136363506317139, + "learning_rate": 8.550508571899882e-06, + "loss": 0.744, + "step": 5277 + }, + { + "epoch": 0.27125089937300856, + "grad_norm": 1.1101983785629272, + "learning_rate": 8.549922533370341e-06, + "loss": 0.7885, + "step": 5278 + }, + { + "epoch": 0.27130229211635315, + "grad_norm": 1.0831009149551392, + "learning_rate": 8.549336396487096e-06, + "loss": 0.8321, + "step": 5279 + }, + { + "epoch": 0.2713536848596978, + "grad_norm": 1.0593751668930054, + "learning_rate": 8.548750161266388e-06, + "loss": 0.7146, + "step": 5280 + }, + { + "epoch": 0.27140507760304244, + "grad_norm": 1.0701123476028442, + "learning_rate": 8.548163827724456e-06, + "loss": 0.7725, + "step": 5281 + }, + { + "epoch": 0.2714564703463871, + "grad_norm": 1.0847185850143433, + "learning_rate": 8.547577395877548e-06, + "loss": 0.8096, + "step": 5282 + }, + { + "epoch": 0.27150786308973174, + "grad_norm": 1.0586906671524048, + "learning_rate": 8.54699086574191e-06, + "loss": 0.7954, + "step": 5283 + }, + { + "epoch": 0.2715592558330764, + "grad_norm": 0.9394407868385315, + "learning_rate": 8.546404237333793e-06, + "loss": 0.7246, + "step": 5284 + }, + { + "epoch": 0.27161064857642103, + "grad_norm": 1.0791479349136353, + "learning_rate": 8.545817510669449e-06, + "loss": 0.7418, + "step": 5285 + }, + { + "epoch": 0.2716620413197656, + "grad_norm": 1.1013513803482056, + "learning_rate": 8.545230685765134e-06, + "loss": 0.8055, + "step": 5286 + }, + { + "epoch": 0.27171343406311027, + "grad_norm": 0.7479370832443237, + "learning_rate": 8.544643762637109e-06, + "loss": 0.6781, + "step": 5287 + }, + { + "epoch": 0.2717648268064549, + "grad_norm": 1.0581074953079224, + "learning_rate": 8.544056741301632e-06, + "loss": 0.8666, + "step": 5288 + }, + { + "epoch": 0.27181621954979956, + "grad_norm": 1.062928318977356, + "learning_rate": 8.543469621774968e-06, + "loss": 0.7564, + "step": 5289 + }, + { + "epoch": 0.2718676122931442, + "grad_norm": 1.145674705505371, + "learning_rate": 8.542882404073384e-06, + "loss": 0.7747, + "step": 5290 + }, + { + "epoch": 0.27191900503648886, + "grad_norm": 1.059910535812378, + "learning_rate": 8.54229508821315e-06, + "loss": 0.8131, + "step": 5291 + }, + { + "epoch": 0.2719703977798335, + "grad_norm": 1.0476080179214478, + "learning_rate": 8.541707674210536e-06, + "loss": 0.7652, + "step": 5292 + }, + { + "epoch": 0.27202179052317815, + "grad_norm": 1.0923998355865479, + "learning_rate": 8.541120162081818e-06, + "loss": 0.8069, + "step": 5293 + }, + { + "epoch": 0.27207318326652274, + "grad_norm": 0.9357771277427673, + "learning_rate": 8.540532551843274e-06, + "loss": 0.6962, + "step": 5294 + }, + { + "epoch": 0.2721245760098674, + "grad_norm": 1.0638911724090576, + "learning_rate": 8.539944843511186e-06, + "loss": 0.7443, + "step": 5295 + }, + { + "epoch": 0.27217596875321204, + "grad_norm": 1.110261082649231, + "learning_rate": 8.539357037101833e-06, + "loss": 0.775, + "step": 5296 + }, + { + "epoch": 0.2722273614965567, + "grad_norm": 1.137700080871582, + "learning_rate": 8.538769132631501e-06, + "loss": 0.7799, + "step": 5297 + }, + { + "epoch": 0.27227875423990133, + "grad_norm": 1.0177677869796753, + "learning_rate": 8.538181130116484e-06, + "loss": 0.7313, + "step": 5298 + }, + { + "epoch": 0.272330146983246, + "grad_norm": 1.0921170711517334, + "learning_rate": 8.537593029573066e-06, + "loss": 0.8394, + "step": 5299 + }, + { + "epoch": 0.2723815397265906, + "grad_norm": 1.4290047883987427, + "learning_rate": 8.537004831017544e-06, + "loss": 0.7282, + "step": 5300 + }, + { + "epoch": 0.2724329324699352, + "grad_norm": 1.0766286849975586, + "learning_rate": 8.536416534466215e-06, + "loss": 0.7806, + "step": 5301 + }, + { + "epoch": 0.27248432521327987, + "grad_norm": 1.1013396978378296, + "learning_rate": 8.535828139935378e-06, + "loss": 0.7552, + "step": 5302 + }, + { + "epoch": 0.2725357179566245, + "grad_norm": 0.8312982320785522, + "learning_rate": 8.535239647441335e-06, + "loss": 0.6766, + "step": 5303 + }, + { + "epoch": 0.27258711069996916, + "grad_norm": 1.0789040327072144, + "learning_rate": 8.53465105700039e-06, + "loss": 0.7712, + "step": 5304 + }, + { + "epoch": 0.2726385034433138, + "grad_norm": 1.102656602859497, + "learning_rate": 8.53406236862885e-06, + "loss": 0.7403, + "step": 5305 + }, + { + "epoch": 0.27268989618665845, + "grad_norm": 0.7600048780441284, + "learning_rate": 8.533473582343029e-06, + "loss": 0.6674, + "step": 5306 + }, + { + "epoch": 0.2727412889300031, + "grad_norm": 1.037812352180481, + "learning_rate": 8.532884698159233e-06, + "loss": 0.7506, + "step": 5307 + }, + { + "epoch": 0.27279268167334775, + "grad_norm": 1.1774437427520752, + "learning_rate": 8.532295716093784e-06, + "loss": 0.7656, + "step": 5308 + }, + { + "epoch": 0.27284407441669234, + "grad_norm": 0.7507709860801697, + "learning_rate": 8.531706636162997e-06, + "loss": 0.7033, + "step": 5309 + }, + { + "epoch": 0.272895467160037, + "grad_norm": 1.014439344406128, + "learning_rate": 8.531117458383194e-06, + "loss": 0.7521, + "step": 5310 + }, + { + "epoch": 0.27294685990338163, + "grad_norm": 0.7373960614204407, + "learning_rate": 8.530528182770696e-06, + "loss": 0.701, + "step": 5311 + }, + { + "epoch": 0.2729982526467263, + "grad_norm": 0.798416793346405, + "learning_rate": 8.529938809341833e-06, + "loss": 0.6688, + "step": 5312 + }, + { + "epoch": 0.2730496453900709, + "grad_norm": 0.8056088089942932, + "learning_rate": 8.529349338112934e-06, + "loss": 0.7222, + "step": 5313 + }, + { + "epoch": 0.2731010381334156, + "grad_norm": 0.8066539168357849, + "learning_rate": 8.528759769100329e-06, + "loss": 0.6882, + "step": 5314 + }, + { + "epoch": 0.2731524308767602, + "grad_norm": 0.8207864165306091, + "learning_rate": 8.528170102320352e-06, + "loss": 0.7567, + "step": 5315 + }, + { + "epoch": 0.27320382362010487, + "grad_norm": 1.0950533151626587, + "learning_rate": 8.527580337789343e-06, + "loss": 0.7921, + "step": 5316 + }, + { + "epoch": 0.27325521636344946, + "grad_norm": 0.7688065767288208, + "learning_rate": 8.52699047552364e-06, + "loss": 0.6609, + "step": 5317 + }, + { + "epoch": 0.2733066091067941, + "grad_norm": 1.0789706707000732, + "learning_rate": 8.526400515539585e-06, + "loss": 0.7268, + "step": 5318 + }, + { + "epoch": 0.27335800185013875, + "grad_norm": 1.0623565912246704, + "learning_rate": 8.525810457853525e-06, + "loss": 0.8116, + "step": 5319 + }, + { + "epoch": 0.2734093945934834, + "grad_norm": 0.8834909796714783, + "learning_rate": 8.525220302481807e-06, + "loss": 0.6576, + "step": 5320 + }, + { + "epoch": 0.27346078733682805, + "grad_norm": 1.0651026964187622, + "learning_rate": 8.524630049440783e-06, + "loss": 0.704, + "step": 5321 + }, + { + "epoch": 0.2735121800801727, + "grad_norm": 1.0169562101364136, + "learning_rate": 8.524039698746804e-06, + "loss": 0.6895, + "step": 5322 + }, + { + "epoch": 0.27356357282351734, + "grad_norm": 1.0979901552200317, + "learning_rate": 8.523449250416229e-06, + "loss": 0.8008, + "step": 5323 + }, + { + "epoch": 0.27361496556686193, + "grad_norm": 1.0751806497573853, + "learning_rate": 8.522858704465416e-06, + "loss": 0.7647, + "step": 5324 + }, + { + "epoch": 0.2736663583102066, + "grad_norm": 0.8188237547874451, + "learning_rate": 8.522268060910726e-06, + "loss": 0.7143, + "step": 5325 + }, + { + "epoch": 0.27371775105355123, + "grad_norm": 1.1386761665344238, + "learning_rate": 8.521677319768523e-06, + "loss": 0.7909, + "step": 5326 + }, + { + "epoch": 0.2737691437968959, + "grad_norm": 0.829815149307251, + "learning_rate": 8.521086481055175e-06, + "loss": 0.7065, + "step": 5327 + }, + { + "epoch": 0.2738205365402405, + "grad_norm": 1.119255781173706, + "learning_rate": 8.52049554478705e-06, + "loss": 0.7297, + "step": 5328 + }, + { + "epoch": 0.27387192928358517, + "grad_norm": 1.049949288368225, + "learning_rate": 8.519904510980524e-06, + "loss": 0.789, + "step": 5329 + }, + { + "epoch": 0.2739233220269298, + "grad_norm": 0.7242515683174133, + "learning_rate": 8.519313379651968e-06, + "loss": 0.6972, + "step": 5330 + }, + { + "epoch": 0.27397471477027446, + "grad_norm": 1.0899670124053955, + "learning_rate": 8.518722150817762e-06, + "loss": 0.7836, + "step": 5331 + }, + { + "epoch": 0.27402610751361905, + "grad_norm": 1.2884798049926758, + "learning_rate": 8.518130824494286e-06, + "loss": 0.7318, + "step": 5332 + }, + { + "epoch": 0.2740775002569637, + "grad_norm": 0.7555671334266663, + "learning_rate": 8.517539400697924e-06, + "loss": 0.7017, + "step": 5333 + }, + { + "epoch": 0.27412889300030835, + "grad_norm": 1.0647447109222412, + "learning_rate": 8.516947879445061e-06, + "loss": 0.7661, + "step": 5334 + }, + { + "epoch": 0.274180285743653, + "grad_norm": 1.06218421459198, + "learning_rate": 8.516356260752086e-06, + "loss": 0.7841, + "step": 5335 + }, + { + "epoch": 0.27423167848699764, + "grad_norm": 0.7147724032402039, + "learning_rate": 8.515764544635389e-06, + "loss": 0.7299, + "step": 5336 + }, + { + "epoch": 0.2742830712303423, + "grad_norm": 0.7570116519927979, + "learning_rate": 8.515172731111367e-06, + "loss": 0.7034, + "step": 5337 + }, + { + "epoch": 0.27433446397368694, + "grad_norm": 1.0586297512054443, + "learning_rate": 8.514580820196414e-06, + "loss": 0.7234, + "step": 5338 + }, + { + "epoch": 0.27438585671703153, + "grad_norm": 0.7304074764251709, + "learning_rate": 8.51398881190693e-06, + "loss": 0.6942, + "step": 5339 + }, + { + "epoch": 0.2744372494603762, + "grad_norm": 0.9326439499855042, + "learning_rate": 8.513396706259319e-06, + "loss": 0.7073, + "step": 5340 + }, + { + "epoch": 0.2744886422037208, + "grad_norm": 1.1090595722198486, + "learning_rate": 8.51280450326998e-06, + "loss": 0.7647, + "step": 5341 + }, + { + "epoch": 0.27454003494706547, + "grad_norm": 1.241669774055481, + "learning_rate": 8.512212202955329e-06, + "loss": 0.827, + "step": 5342 + }, + { + "epoch": 0.2745914276904101, + "grad_norm": 1.0170283317565918, + "learning_rate": 8.51161980533177e-06, + "loss": 0.8398, + "step": 5343 + }, + { + "epoch": 0.27464282043375476, + "grad_norm": 1.3315719366073608, + "learning_rate": 8.511027310415718e-06, + "loss": 0.8119, + "step": 5344 + }, + { + "epoch": 0.2746942131770994, + "grad_norm": 0.9917698502540588, + "learning_rate": 8.51043471822359e-06, + "loss": 0.7266, + "step": 5345 + }, + { + "epoch": 0.27474560592044406, + "grad_norm": 0.7620207071304321, + "learning_rate": 8.5098420287718e-06, + "loss": 0.6947, + "step": 5346 + }, + { + "epoch": 0.27479699866378865, + "grad_norm": 1.072868824005127, + "learning_rate": 8.509249242076774e-06, + "loss": 0.7461, + "step": 5347 + }, + { + "epoch": 0.2748483914071333, + "grad_norm": 1.074194073677063, + "learning_rate": 8.508656358154932e-06, + "loss": 0.7864, + "step": 5348 + }, + { + "epoch": 0.27489978415047794, + "grad_norm": 1.122197151184082, + "learning_rate": 8.5080633770227e-06, + "loss": 0.8152, + "step": 5349 + }, + { + "epoch": 0.2749511768938226, + "grad_norm": 1.094781517982483, + "learning_rate": 8.507470298696512e-06, + "loss": 0.849, + "step": 5350 + }, + { + "epoch": 0.27500256963716724, + "grad_norm": 1.1370644569396973, + "learning_rate": 8.506877123192796e-06, + "loss": 0.7829, + "step": 5351 + }, + { + "epoch": 0.2750539623805119, + "grad_norm": 1.0682247877120972, + "learning_rate": 8.506283850527985e-06, + "loss": 0.7511, + "step": 5352 + }, + { + "epoch": 0.27510535512385653, + "grad_norm": 0.916619598865509, + "learning_rate": 8.505690480718521e-06, + "loss": 0.6929, + "step": 5353 + }, + { + "epoch": 0.2751567478672011, + "grad_norm": 0.8062255382537842, + "learning_rate": 8.505097013780837e-06, + "loss": 0.6804, + "step": 5354 + }, + { + "epoch": 0.27520814061054577, + "grad_norm": 1.1527599096298218, + "learning_rate": 8.504503449731382e-06, + "loss": 0.8282, + "step": 5355 + }, + { + "epoch": 0.2752595333538904, + "grad_norm": 1.1587036848068237, + "learning_rate": 8.503909788586598e-06, + "loss": 0.7733, + "step": 5356 + }, + { + "epoch": 0.27531092609723506, + "grad_norm": 1.1360194683074951, + "learning_rate": 8.503316030362934e-06, + "loss": 0.789, + "step": 5357 + }, + { + "epoch": 0.2753623188405797, + "grad_norm": 1.1783379316329956, + "learning_rate": 8.50272217507684e-06, + "loss": 0.7551, + "step": 5358 + }, + { + "epoch": 0.27541371158392436, + "grad_norm": 1.0723165273666382, + "learning_rate": 8.50212822274477e-06, + "loss": 0.7856, + "step": 5359 + }, + { + "epoch": 0.275465104327269, + "grad_norm": 1.0904268026351929, + "learning_rate": 8.501534173383178e-06, + "loss": 0.7922, + "step": 5360 + }, + { + "epoch": 0.27551649707061365, + "grad_norm": 1.19148588180542, + "learning_rate": 8.500940027008524e-06, + "loss": 0.7012, + "step": 5361 + }, + { + "epoch": 0.27556788981395824, + "grad_norm": 1.1049919128417969, + "learning_rate": 8.50034578363727e-06, + "loss": 0.7397, + "step": 5362 + }, + { + "epoch": 0.2756192825573029, + "grad_norm": 0.9565017819404602, + "learning_rate": 8.49975144328588e-06, + "loss": 0.6937, + "step": 5363 + }, + { + "epoch": 0.27567067530064754, + "grad_norm": 0.8546234965324402, + "learning_rate": 8.499157005970819e-06, + "loss": 0.7046, + "step": 5364 + }, + { + "epoch": 0.2757220680439922, + "grad_norm": 0.7992849349975586, + "learning_rate": 8.498562471708558e-06, + "loss": 0.7107, + "step": 5365 + }, + { + "epoch": 0.27577346078733683, + "grad_norm": 1.1289910078048706, + "learning_rate": 8.49796784051557e-06, + "loss": 0.7702, + "step": 5366 + }, + { + "epoch": 0.2758248535306815, + "grad_norm": 1.22629714012146, + "learning_rate": 8.497373112408327e-06, + "loss": 0.8207, + "step": 5367 + }, + { + "epoch": 0.2758762462740261, + "grad_norm": 0.8818389177322388, + "learning_rate": 8.496778287403308e-06, + "loss": 0.7082, + "step": 5368 + }, + { + "epoch": 0.2759276390173708, + "grad_norm": 1.2009376287460327, + "learning_rate": 8.496183365516992e-06, + "loss": 0.8156, + "step": 5369 + }, + { + "epoch": 0.27597903176071537, + "grad_norm": 1.110411524772644, + "learning_rate": 8.495588346765864e-06, + "loss": 0.7846, + "step": 5370 + }, + { + "epoch": 0.27603042450406, + "grad_norm": 1.100922703742981, + "learning_rate": 8.494993231166408e-06, + "loss": 0.8622, + "step": 5371 + }, + { + "epoch": 0.27608181724740466, + "grad_norm": 1.1176868677139282, + "learning_rate": 8.494398018735113e-06, + "loss": 0.7837, + "step": 5372 + }, + { + "epoch": 0.2761332099907493, + "grad_norm": 1.2481036186218262, + "learning_rate": 8.49380270948847e-06, + "loss": 0.7691, + "step": 5373 + }, + { + "epoch": 0.27618460273409395, + "grad_norm": 1.0315886735916138, + "learning_rate": 8.493207303442971e-06, + "loss": 0.7736, + "step": 5374 + }, + { + "epoch": 0.2762359954774386, + "grad_norm": 0.9144653081893921, + "learning_rate": 8.492611800615114e-06, + "loss": 0.733, + "step": 5375 + }, + { + "epoch": 0.27628738822078325, + "grad_norm": 0.8169595003128052, + "learning_rate": 8.492016201021396e-06, + "loss": 0.6648, + "step": 5376 + }, + { + "epoch": 0.27633878096412784, + "grad_norm": 1.0692890882492065, + "learning_rate": 8.49142050467832e-06, + "loss": 0.7419, + "step": 5377 + }, + { + "epoch": 0.2763901737074725, + "grad_norm": 1.1379425525665283, + "learning_rate": 8.49082471160239e-06, + "loss": 0.7409, + "step": 5378 + }, + { + "epoch": 0.27644156645081713, + "grad_norm": 0.9054638147354126, + "learning_rate": 8.490228821810114e-06, + "loss": 0.707, + "step": 5379 + }, + { + "epoch": 0.2764929591941618, + "grad_norm": 1.0645204782485962, + "learning_rate": 8.489632835318e-06, + "loss": 0.7533, + "step": 5380 + }, + { + "epoch": 0.2765443519375064, + "grad_norm": 0.7317367792129517, + "learning_rate": 8.489036752142561e-06, + "loss": 0.7131, + "step": 5381 + }, + { + "epoch": 0.2765957446808511, + "grad_norm": 0.967462956905365, + "learning_rate": 8.488440572300312e-06, + "loss": 0.6899, + "step": 5382 + }, + { + "epoch": 0.2766471374241957, + "grad_norm": 1.082800030708313, + "learning_rate": 8.48784429580777e-06, + "loss": 0.7569, + "step": 5383 + }, + { + "epoch": 0.27669853016754037, + "grad_norm": 0.7163935899734497, + "learning_rate": 8.487247922681459e-06, + "loss": 0.6803, + "step": 5384 + }, + { + "epoch": 0.27674992291088496, + "grad_norm": 1.061950445175171, + "learning_rate": 8.486651452937896e-06, + "loss": 0.7694, + "step": 5385 + }, + { + "epoch": 0.2768013156542296, + "grad_norm": 1.1027023792266846, + "learning_rate": 8.486054886593612e-06, + "loss": 0.7393, + "step": 5386 + }, + { + "epoch": 0.27685270839757425, + "grad_norm": 1.1072064638137817, + "learning_rate": 8.48545822366513e-06, + "loss": 0.788, + "step": 5387 + }, + { + "epoch": 0.2769041011409189, + "grad_norm": 1.1185179948806763, + "learning_rate": 8.484861464168987e-06, + "loss": 0.8333, + "step": 5388 + }, + { + "epoch": 0.27695549388426355, + "grad_norm": 1.0440915822982788, + "learning_rate": 8.484264608121713e-06, + "loss": 0.7648, + "step": 5389 + }, + { + "epoch": 0.2770068866276082, + "grad_norm": 1.0296357870101929, + "learning_rate": 8.483667655539846e-06, + "loss": 0.7531, + "step": 5390 + }, + { + "epoch": 0.27705827937095284, + "grad_norm": 1.048190951347351, + "learning_rate": 8.483070606439923e-06, + "loss": 0.8223, + "step": 5391 + }, + { + "epoch": 0.27710967211429743, + "grad_norm": 0.9317312836647034, + "learning_rate": 8.48247346083849e-06, + "loss": 0.6566, + "step": 5392 + }, + { + "epoch": 0.2771610648576421, + "grad_norm": 0.7316219210624695, + "learning_rate": 8.481876218752085e-06, + "loss": 0.6595, + "step": 5393 + }, + { + "epoch": 0.27721245760098673, + "grad_norm": 1.1095342636108398, + "learning_rate": 8.481278880197261e-06, + "loss": 0.771, + "step": 5394 + }, + { + "epoch": 0.2772638503443314, + "grad_norm": 1.0935989618301392, + "learning_rate": 8.480681445190566e-06, + "loss": 0.7435, + "step": 5395 + }, + { + "epoch": 0.277315243087676, + "grad_norm": 0.7224438190460205, + "learning_rate": 8.480083913748551e-06, + "loss": 0.708, + "step": 5396 + }, + { + "epoch": 0.27736663583102067, + "grad_norm": 0.7135837078094482, + "learning_rate": 8.479486285887774e-06, + "loss": 0.6797, + "step": 5397 + }, + { + "epoch": 0.2774180285743653, + "grad_norm": 1.2090249061584473, + "learning_rate": 8.478888561624789e-06, + "loss": 0.8155, + "step": 5398 + }, + { + "epoch": 0.27746942131770996, + "grad_norm": 1.0543020963668823, + "learning_rate": 8.478290740976161e-06, + "loss": 0.7858, + "step": 5399 + }, + { + "epoch": 0.27752081406105455, + "grad_norm": 1.0511952638626099, + "learning_rate": 8.477692823958448e-06, + "loss": 0.7264, + "step": 5400 + }, + { + "epoch": 0.2775722068043992, + "grad_norm": 1.1163196563720703, + "learning_rate": 8.477094810588219e-06, + "loss": 0.7905, + "step": 5401 + }, + { + "epoch": 0.27762359954774385, + "grad_norm": 1.0773780345916748, + "learning_rate": 8.476496700882042e-06, + "loss": 0.8054, + "step": 5402 + }, + { + "epoch": 0.2776749922910885, + "grad_norm": 1.1471275091171265, + "learning_rate": 8.47589849485649e-06, + "loss": 0.798, + "step": 5403 + }, + { + "epoch": 0.27772638503443314, + "grad_norm": 1.1065164804458618, + "learning_rate": 8.475300192528132e-06, + "loss": 0.8049, + "step": 5404 + }, + { + "epoch": 0.2777777777777778, + "grad_norm": 0.7866690754890442, + "learning_rate": 8.47470179391355e-06, + "loss": 0.6966, + "step": 5405 + }, + { + "epoch": 0.27782917052112244, + "grad_norm": 1.0428229570388794, + "learning_rate": 8.47410329902932e-06, + "loss": 0.7944, + "step": 5406 + }, + { + "epoch": 0.2778805632644671, + "grad_norm": 1.0897939205169678, + "learning_rate": 8.473504707892021e-06, + "loss": 0.8426, + "step": 5407 + }, + { + "epoch": 0.2779319560078117, + "grad_norm": 1.0835639238357544, + "learning_rate": 8.472906020518243e-06, + "loss": 0.7974, + "step": 5408 + }, + { + "epoch": 0.2779833487511563, + "grad_norm": 1.0952891111373901, + "learning_rate": 8.472307236924573e-06, + "loss": 0.7558, + "step": 5409 + }, + { + "epoch": 0.27803474149450097, + "grad_norm": 1.1048285961151123, + "learning_rate": 8.471708357127597e-06, + "loss": 0.7846, + "step": 5410 + }, + { + "epoch": 0.2780861342378456, + "grad_norm": 0.75752192735672, + "learning_rate": 8.47110938114391e-06, + "loss": 0.7368, + "step": 5411 + }, + { + "epoch": 0.27813752698119026, + "grad_norm": 1.2553702592849731, + "learning_rate": 8.470510308990105e-06, + "loss": 0.7968, + "step": 5412 + }, + { + "epoch": 0.2781889197245349, + "grad_norm": 1.0701875686645508, + "learning_rate": 8.469911140682782e-06, + "loss": 0.8207, + "step": 5413 + }, + { + "epoch": 0.27824031246787956, + "grad_norm": 1.1648060083389282, + "learning_rate": 8.469311876238542e-06, + "loss": 0.8424, + "step": 5414 + }, + { + "epoch": 0.27829170521122415, + "grad_norm": 1.2138426303863525, + "learning_rate": 8.468712515673985e-06, + "loss": 0.7975, + "step": 5415 + }, + { + "epoch": 0.2783430979545688, + "grad_norm": 1.0524147748947144, + "learning_rate": 8.46811305900572e-06, + "loss": 0.7646, + "step": 5416 + }, + { + "epoch": 0.27839449069791344, + "grad_norm": 0.7821248769760132, + "learning_rate": 8.467513506250354e-06, + "loss": 0.7003, + "step": 5417 + }, + { + "epoch": 0.2784458834412581, + "grad_norm": 1.1876329183578491, + "learning_rate": 8.466913857424499e-06, + "loss": 0.8045, + "step": 5418 + }, + { + "epoch": 0.27849727618460274, + "grad_norm": 1.0438202619552612, + "learning_rate": 8.466314112544767e-06, + "loss": 0.7365, + "step": 5419 + }, + { + "epoch": 0.2785486689279474, + "grad_norm": 1.0742578506469727, + "learning_rate": 8.465714271627777e-06, + "loss": 0.7974, + "step": 5420 + }, + { + "epoch": 0.27860006167129203, + "grad_norm": 0.7598520517349243, + "learning_rate": 8.465114334690146e-06, + "loss": 0.6677, + "step": 5421 + }, + { + "epoch": 0.2786514544146367, + "grad_norm": 1.092868447303772, + "learning_rate": 8.464514301748496e-06, + "loss": 0.7995, + "step": 5422 + }, + { + "epoch": 0.27870284715798127, + "grad_norm": 1.0680253505706787, + "learning_rate": 8.463914172819452e-06, + "loss": 0.7613, + "step": 5423 + }, + { + "epoch": 0.2787542399013259, + "grad_norm": 1.1098958253860474, + "learning_rate": 8.46331394791964e-06, + "loss": 0.7286, + "step": 5424 + }, + { + "epoch": 0.27880563264467056, + "grad_norm": 1.123618483543396, + "learning_rate": 8.46271362706569e-06, + "loss": 0.7662, + "step": 5425 + }, + { + "epoch": 0.2788570253880152, + "grad_norm": 1.1878520250320435, + "learning_rate": 8.462113210274239e-06, + "loss": 0.8002, + "step": 5426 + }, + { + "epoch": 0.27890841813135986, + "grad_norm": 1.0876141786575317, + "learning_rate": 8.461512697561915e-06, + "loss": 0.7332, + "step": 5427 + }, + { + "epoch": 0.2789598108747045, + "grad_norm": 0.7556460499763489, + "learning_rate": 8.460912088945361e-06, + "loss": 0.7154, + "step": 5428 + }, + { + "epoch": 0.27901120361804915, + "grad_norm": 0.780170738697052, + "learning_rate": 8.460311384441215e-06, + "loss": 0.7298, + "step": 5429 + }, + { + "epoch": 0.27906259636139374, + "grad_norm": 1.0849303007125854, + "learning_rate": 8.45971058406612e-06, + "loss": 0.7705, + "step": 5430 + }, + { + "epoch": 0.2791139891047384, + "grad_norm": 0.7247140407562256, + "learning_rate": 8.459109687836721e-06, + "loss": 0.6882, + "step": 5431 + }, + { + "epoch": 0.27916538184808304, + "grad_norm": 0.7308477163314819, + "learning_rate": 8.458508695769669e-06, + "loss": 0.7294, + "step": 5432 + }, + { + "epoch": 0.2792167745914277, + "grad_norm": 0.800254225730896, + "learning_rate": 8.457907607881612e-06, + "loss": 0.6964, + "step": 5433 + }, + { + "epoch": 0.27926816733477233, + "grad_norm": 1.088930606842041, + "learning_rate": 8.457306424189207e-06, + "loss": 0.6773, + "step": 5434 + }, + { + "epoch": 0.279319560078117, + "grad_norm": 1.1602481603622437, + "learning_rate": 8.456705144709108e-06, + "loss": 0.7528, + "step": 5435 + }, + { + "epoch": 0.2793709528214616, + "grad_norm": 1.5446277856826782, + "learning_rate": 8.456103769457974e-06, + "loss": 0.7796, + "step": 5436 + }, + { + "epoch": 0.2794223455648063, + "grad_norm": 1.1176252365112305, + "learning_rate": 8.455502298452467e-06, + "loss": 0.7225, + "step": 5437 + }, + { + "epoch": 0.27947373830815087, + "grad_norm": 1.0958378314971924, + "learning_rate": 8.45490073170925e-06, + "loss": 0.803, + "step": 5438 + }, + { + "epoch": 0.2795251310514955, + "grad_norm": 0.7505422234535217, + "learning_rate": 8.454299069244993e-06, + "loss": 0.7058, + "step": 5439 + }, + { + "epoch": 0.27957652379484016, + "grad_norm": 0.8660080432891846, + "learning_rate": 8.453697311076364e-06, + "loss": 0.6874, + "step": 5440 + }, + { + "epoch": 0.2796279165381848, + "grad_norm": 1.0632009506225586, + "learning_rate": 8.453095457220033e-06, + "loss": 0.7674, + "step": 5441 + }, + { + "epoch": 0.27967930928152945, + "grad_norm": 1.084378719329834, + "learning_rate": 8.45249350769268e-06, + "loss": 0.7459, + "step": 5442 + }, + { + "epoch": 0.2797307020248741, + "grad_norm": 0.7546572685241699, + "learning_rate": 8.451891462510977e-06, + "loss": 0.7398, + "step": 5443 + }, + { + "epoch": 0.27978209476821875, + "grad_norm": 1.0199382305145264, + "learning_rate": 8.451289321691609e-06, + "loss": 0.801, + "step": 5444 + }, + { + "epoch": 0.2798334875115634, + "grad_norm": 1.1750742197036743, + "learning_rate": 8.450687085251255e-06, + "loss": 0.8125, + "step": 5445 + }, + { + "epoch": 0.279884880254908, + "grad_norm": 0.7520389556884766, + "learning_rate": 8.450084753206601e-06, + "loss": 0.7157, + "step": 5446 + }, + { + "epoch": 0.27993627299825263, + "grad_norm": 0.850633442401886, + "learning_rate": 8.449482325574339e-06, + "loss": 0.6856, + "step": 5447 + }, + { + "epoch": 0.2799876657415973, + "grad_norm": 1.1524409055709839, + "learning_rate": 8.448879802371155e-06, + "loss": 0.7676, + "step": 5448 + }, + { + "epoch": 0.2800390584849419, + "grad_norm": 1.1859307289123535, + "learning_rate": 8.448277183613743e-06, + "loss": 0.7898, + "step": 5449 + }, + { + "epoch": 0.2800904512282866, + "grad_norm": 0.8038631677627563, + "learning_rate": 8.447674469318802e-06, + "loss": 0.7195, + "step": 5450 + }, + { + "epoch": 0.2801418439716312, + "grad_norm": 1.1111232042312622, + "learning_rate": 8.44707165950303e-06, + "loss": 0.7946, + "step": 5451 + }, + { + "epoch": 0.28019323671497587, + "grad_norm": 0.7855945229530334, + "learning_rate": 8.446468754183125e-06, + "loss": 0.6936, + "step": 5452 + }, + { + "epoch": 0.28024462945832046, + "grad_norm": 1.0613653659820557, + "learning_rate": 8.445865753375797e-06, + "loss": 0.7517, + "step": 5453 + }, + { + "epoch": 0.2802960222016651, + "grad_norm": 1.114047646522522, + "learning_rate": 8.445262657097748e-06, + "loss": 0.7919, + "step": 5454 + }, + { + "epoch": 0.28034741494500975, + "grad_norm": 1.1228047609329224, + "learning_rate": 8.444659465365688e-06, + "loss": 0.8, + "step": 5455 + }, + { + "epoch": 0.2803988076883544, + "grad_norm": 1.032792568206787, + "learning_rate": 8.444056178196329e-06, + "loss": 0.7221, + "step": 5456 + }, + { + "epoch": 0.28045020043169905, + "grad_norm": 1.1268510818481445, + "learning_rate": 8.443452795606385e-06, + "loss": 0.7749, + "step": 5457 + }, + { + "epoch": 0.2805015931750437, + "grad_norm": 1.1430174112319946, + "learning_rate": 8.442849317612578e-06, + "loss": 0.7949, + "step": 5458 + }, + { + "epoch": 0.28055298591838834, + "grad_norm": 1.0804526805877686, + "learning_rate": 8.442245744231621e-06, + "loss": 0.8009, + "step": 5459 + }, + { + "epoch": 0.280604378661733, + "grad_norm": 0.8016070127487183, + "learning_rate": 8.441642075480241e-06, + "loss": 0.7135, + "step": 5460 + }, + { + "epoch": 0.2806557714050776, + "grad_norm": 1.0725575685501099, + "learning_rate": 8.441038311375163e-06, + "loss": 0.7327, + "step": 5461 + }, + { + "epoch": 0.28070716414842223, + "grad_norm": 1.0554770231246948, + "learning_rate": 8.440434451933112e-06, + "loss": 0.804, + "step": 5462 + }, + { + "epoch": 0.2807585568917669, + "grad_norm": 1.995858907699585, + "learning_rate": 8.43983049717082e-06, + "loss": 0.7411, + "step": 5463 + }, + { + "epoch": 0.2808099496351115, + "grad_norm": 1.1054331064224243, + "learning_rate": 8.439226447105021e-06, + "loss": 0.8082, + "step": 5464 + }, + { + "epoch": 0.28086134237845617, + "grad_norm": 1.0908995866775513, + "learning_rate": 8.438622301752451e-06, + "loss": 0.774, + "step": 5465 + }, + { + "epoch": 0.2809127351218008, + "grad_norm": 1.0509769916534424, + "learning_rate": 8.438018061129846e-06, + "loss": 0.7433, + "step": 5466 + }, + { + "epoch": 0.28096412786514546, + "grad_norm": 0.8322144150733948, + "learning_rate": 8.437413725253949e-06, + "loss": 0.6506, + "step": 5467 + }, + { + "epoch": 0.28101552060849005, + "grad_norm": 1.1251063346862793, + "learning_rate": 8.436809294141503e-06, + "loss": 0.7111, + "step": 5468 + }, + { + "epoch": 0.2810669133518347, + "grad_norm": 1.106492519378662, + "learning_rate": 8.436204767809254e-06, + "loss": 0.8089, + "step": 5469 + }, + { + "epoch": 0.28111830609517935, + "grad_norm": 0.7839671969413757, + "learning_rate": 8.435600146273953e-06, + "loss": 0.6875, + "step": 5470 + }, + { + "epoch": 0.281169698838524, + "grad_norm": 1.10338294506073, + "learning_rate": 8.434995429552347e-06, + "loss": 0.7929, + "step": 5471 + }, + { + "epoch": 0.28122109158186864, + "grad_norm": 0.8638939261436462, + "learning_rate": 8.434390617661195e-06, + "loss": 0.6829, + "step": 5472 + }, + { + "epoch": 0.2812724843252133, + "grad_norm": 0.6777721643447876, + "learning_rate": 8.433785710617249e-06, + "loss": 0.6698, + "step": 5473 + }, + { + "epoch": 0.28132387706855794, + "grad_norm": 1.1163380146026611, + "learning_rate": 8.433180708437274e-06, + "loss": 0.7898, + "step": 5474 + }, + { + "epoch": 0.2813752698119026, + "grad_norm": 0.8966031670570374, + "learning_rate": 8.43257561113803e-06, + "loss": 0.6901, + "step": 5475 + }, + { + "epoch": 0.2814266625552472, + "grad_norm": 1.3003183603286743, + "learning_rate": 8.43197041873628e-06, + "loss": 0.751, + "step": 5476 + }, + { + "epoch": 0.2814780552985918, + "grad_norm": 1.0738472938537598, + "learning_rate": 8.431365131248791e-06, + "loss": 0.7405, + "step": 5477 + }, + { + "epoch": 0.28152944804193647, + "grad_norm": 1.0920639038085938, + "learning_rate": 8.430759748692336e-06, + "loss": 0.773, + "step": 5478 + }, + { + "epoch": 0.2815808407852811, + "grad_norm": 1.0732194185256958, + "learning_rate": 8.430154271083688e-06, + "loss": 0.8153, + "step": 5479 + }, + { + "epoch": 0.28163223352862576, + "grad_norm": 1.2458409070968628, + "learning_rate": 8.42954869843962e-06, + "loss": 0.7318, + "step": 5480 + }, + { + "epoch": 0.2816836262719704, + "grad_norm": 1.157037377357483, + "learning_rate": 8.428943030776907e-06, + "loss": 0.8012, + "step": 5481 + }, + { + "epoch": 0.28173501901531506, + "grad_norm": 1.1143583059310913, + "learning_rate": 8.428337268112338e-06, + "loss": 0.7583, + "step": 5482 + }, + { + "epoch": 0.28178641175865965, + "grad_norm": 1.1703667640686035, + "learning_rate": 8.42773141046269e-06, + "loss": 0.7513, + "step": 5483 + }, + { + "epoch": 0.2818378045020043, + "grad_norm": 1.0678144693374634, + "learning_rate": 8.427125457844746e-06, + "loss": 0.7577, + "step": 5484 + }, + { + "epoch": 0.28188919724534894, + "grad_norm": 0.7678773999214172, + "learning_rate": 8.426519410275304e-06, + "loss": 0.6921, + "step": 5485 + }, + { + "epoch": 0.2819405899886936, + "grad_norm": 1.1912566423416138, + "learning_rate": 8.425913267771146e-06, + "loss": 0.8636, + "step": 5486 + }, + { + "epoch": 0.28199198273203824, + "grad_norm": 1.189429759979248, + "learning_rate": 8.42530703034907e-06, + "loss": 0.7913, + "step": 5487 + }, + { + "epoch": 0.2820433754753829, + "grad_norm": 1.1005582809448242, + "learning_rate": 8.424700698025873e-06, + "loss": 0.8107, + "step": 5488 + }, + { + "epoch": 0.28209476821872753, + "grad_norm": 1.049407720565796, + "learning_rate": 8.42409427081835e-06, + "loss": 0.7916, + "step": 5489 + }, + { + "epoch": 0.2821461609620722, + "grad_norm": 1.205552577972412, + "learning_rate": 8.423487748743306e-06, + "loss": 0.7698, + "step": 5490 + }, + { + "epoch": 0.28219755370541677, + "grad_norm": 1.2440963983535767, + "learning_rate": 8.422881131817546e-06, + "loss": 0.7554, + "step": 5491 + }, + { + "epoch": 0.2822489464487614, + "grad_norm": 0.7267575860023499, + "learning_rate": 8.422274420057875e-06, + "loss": 0.7106, + "step": 5492 + }, + { + "epoch": 0.28230033919210606, + "grad_norm": 1.0136288404464722, + "learning_rate": 8.421667613481102e-06, + "loss": 0.7958, + "step": 5493 + }, + { + "epoch": 0.2823517319354507, + "grad_norm": 1.067198395729065, + "learning_rate": 8.421060712104038e-06, + "loss": 0.7648, + "step": 5494 + }, + { + "epoch": 0.28240312467879536, + "grad_norm": 1.1421473026275635, + "learning_rate": 8.420453715943502e-06, + "loss": 0.7602, + "step": 5495 + }, + { + "epoch": 0.28245451742214, + "grad_norm": 1.1569751501083374, + "learning_rate": 8.419846625016307e-06, + "loss": 0.7703, + "step": 5496 + }, + { + "epoch": 0.28250591016548465, + "grad_norm": 0.794883668422699, + "learning_rate": 8.419239439339277e-06, + "loss": 0.7213, + "step": 5497 + }, + { + "epoch": 0.2825573029088293, + "grad_norm": 1.2750657796859741, + "learning_rate": 8.418632158929233e-06, + "loss": 0.7643, + "step": 5498 + }, + { + "epoch": 0.2826086956521739, + "grad_norm": 1.0822252035140991, + "learning_rate": 8.418024783802999e-06, + "loss": 0.7712, + "step": 5499 + }, + { + "epoch": 0.28266008839551854, + "grad_norm": 1.223413109779358, + "learning_rate": 8.417417313977402e-06, + "loss": 0.7898, + "step": 5500 + }, + { + "epoch": 0.2827114811388632, + "grad_norm": 1.0950260162353516, + "learning_rate": 8.416809749469275e-06, + "loss": 0.7906, + "step": 5501 + }, + { + "epoch": 0.28276287388220783, + "grad_norm": 1.1743396520614624, + "learning_rate": 8.416202090295448e-06, + "loss": 0.8077, + "step": 5502 + }, + { + "epoch": 0.2828142666255525, + "grad_norm": 1.1662472486495972, + "learning_rate": 8.415594336472764e-06, + "loss": 0.8005, + "step": 5503 + }, + { + "epoch": 0.2828656593688971, + "grad_norm": 0.7794203162193298, + "learning_rate": 8.414986488018053e-06, + "loss": 0.6802, + "step": 5504 + }, + { + "epoch": 0.2829170521122418, + "grad_norm": 1.1030915975570679, + "learning_rate": 8.414378544948159e-06, + "loss": 0.7047, + "step": 5505 + }, + { + "epoch": 0.28296844485558637, + "grad_norm": 1.120401382446289, + "learning_rate": 8.413770507279926e-06, + "loss": 0.7845, + "step": 5506 + }, + { + "epoch": 0.283019837598931, + "grad_norm": 1.0885928869247437, + "learning_rate": 8.413162375030202e-06, + "loss": 0.7565, + "step": 5507 + }, + { + "epoch": 0.28307123034227566, + "grad_norm": 1.0733895301818848, + "learning_rate": 8.41255414821583e-06, + "loss": 0.726, + "step": 5508 + }, + { + "epoch": 0.2831226230856203, + "grad_norm": 1.1310372352600098, + "learning_rate": 8.41194582685367e-06, + "loss": 0.7094, + "step": 5509 + }, + { + "epoch": 0.28317401582896495, + "grad_norm": 1.1021548509597778, + "learning_rate": 8.411337410960567e-06, + "loss": 0.7621, + "step": 5510 + }, + { + "epoch": 0.2832254085723096, + "grad_norm": 0.7907571792602539, + "learning_rate": 8.410728900553384e-06, + "loss": 0.7, + "step": 5511 + }, + { + "epoch": 0.28327680131565425, + "grad_norm": 0.8956877589225769, + "learning_rate": 8.41012029564898e-06, + "loss": 0.6343, + "step": 5512 + }, + { + "epoch": 0.2833281940589989, + "grad_norm": 0.7475018501281738, + "learning_rate": 8.409511596264213e-06, + "loss": 0.6756, + "step": 5513 + }, + { + "epoch": 0.2833795868023435, + "grad_norm": 1.1505030393600464, + "learning_rate": 8.408902802415951e-06, + "loss": 0.7693, + "step": 5514 + }, + { + "epoch": 0.28343097954568813, + "grad_norm": 1.1221455335617065, + "learning_rate": 8.40829391412106e-06, + "loss": 0.7891, + "step": 5515 + }, + { + "epoch": 0.2834823722890328, + "grad_norm": 1.0632977485656738, + "learning_rate": 8.40768493139641e-06, + "loss": 0.746, + "step": 5516 + }, + { + "epoch": 0.2835337650323774, + "grad_norm": 1.0695126056671143, + "learning_rate": 8.407075854258873e-06, + "loss": 0.7757, + "step": 5517 + }, + { + "epoch": 0.2835851577757221, + "grad_norm": 0.7279176115989685, + "learning_rate": 8.406466682725324e-06, + "loss": 0.6869, + "step": 5518 + }, + { + "epoch": 0.2836365505190667, + "grad_norm": 1.1311094760894775, + "learning_rate": 8.40585741681264e-06, + "loss": 0.8046, + "step": 5519 + }, + { + "epoch": 0.28368794326241137, + "grad_norm": 1.1183536052703857, + "learning_rate": 8.405248056537704e-06, + "loss": 0.7964, + "step": 5520 + }, + { + "epoch": 0.28373933600575596, + "grad_norm": 1.1579886674880981, + "learning_rate": 8.404638601917396e-06, + "loss": 0.7759, + "step": 5521 + }, + { + "epoch": 0.2837907287491006, + "grad_norm": 1.131960153579712, + "learning_rate": 8.404029052968603e-06, + "loss": 0.7978, + "step": 5522 + }, + { + "epoch": 0.28384212149244525, + "grad_norm": 0.9828609824180603, + "learning_rate": 8.403419409708214e-06, + "loss": 0.6789, + "step": 5523 + }, + { + "epoch": 0.2838935142357899, + "grad_norm": 1.1015490293502808, + "learning_rate": 8.402809672153115e-06, + "loss": 0.8088, + "step": 5524 + }, + { + "epoch": 0.28394490697913455, + "grad_norm": 1.073251485824585, + "learning_rate": 8.402199840320204e-06, + "loss": 0.7815, + "step": 5525 + }, + { + "epoch": 0.2839962997224792, + "grad_norm": 0.7691653966903687, + "learning_rate": 8.401589914226376e-06, + "loss": 0.6764, + "step": 5526 + }, + { + "epoch": 0.28404769246582384, + "grad_norm": 6.130173206329346, + "learning_rate": 8.400979893888529e-06, + "loss": 0.8631, + "step": 5527 + }, + { + "epoch": 0.2840990852091685, + "grad_norm": 0.7439635992050171, + "learning_rate": 8.400369779323563e-06, + "loss": 0.6851, + "step": 5528 + }, + { + "epoch": 0.2841504779525131, + "grad_norm": 1.007220983505249, + "learning_rate": 8.399759570548383e-06, + "loss": 0.7609, + "step": 5529 + }, + { + "epoch": 0.28420187069585773, + "grad_norm": 1.109399437904358, + "learning_rate": 8.399149267579896e-06, + "loss": 0.8075, + "step": 5530 + }, + { + "epoch": 0.2842532634392024, + "grad_norm": 1.1456400156021118, + "learning_rate": 8.39853887043501e-06, + "loss": 0.7849, + "step": 5531 + }, + { + "epoch": 0.284304656182547, + "grad_norm": 0.851325511932373, + "learning_rate": 8.397928379130637e-06, + "loss": 0.6856, + "step": 5532 + }, + { + "epoch": 0.28435604892589167, + "grad_norm": 1.0858800411224365, + "learning_rate": 8.39731779368369e-06, + "loss": 0.7338, + "step": 5533 + }, + { + "epoch": 0.2844074416692363, + "grad_norm": 1.124840497970581, + "learning_rate": 8.396707114111089e-06, + "loss": 0.8116, + "step": 5534 + }, + { + "epoch": 0.28445883441258096, + "grad_norm": 1.015531301498413, + "learning_rate": 8.39609634042975e-06, + "loss": 0.6974, + "step": 5535 + }, + { + "epoch": 0.2845102271559256, + "grad_norm": 1.022371530532837, + "learning_rate": 8.395485472656596e-06, + "loss": 0.7132, + "step": 5536 + }, + { + "epoch": 0.2845616198992702, + "grad_norm": 1.0348109006881714, + "learning_rate": 8.394874510808552e-06, + "loss": 0.76, + "step": 5537 + }, + { + "epoch": 0.28461301264261485, + "grad_norm": 0.8034092783927917, + "learning_rate": 8.394263454902545e-06, + "loss": 0.7276, + "step": 5538 + }, + { + "epoch": 0.2846644053859595, + "grad_norm": 1.0559484958648682, + "learning_rate": 8.393652304955506e-06, + "loss": 0.815, + "step": 5539 + }, + { + "epoch": 0.28471579812930414, + "grad_norm": 1.0767971277236938, + "learning_rate": 8.393041060984366e-06, + "loss": 0.7517, + "step": 5540 + }, + { + "epoch": 0.2847671908726488, + "grad_norm": 1.0510344505310059, + "learning_rate": 8.392429723006059e-06, + "loss": 0.7939, + "step": 5541 + }, + { + "epoch": 0.28481858361599344, + "grad_norm": 1.095042109489441, + "learning_rate": 8.391818291037526e-06, + "loss": 0.7789, + "step": 5542 + }, + { + "epoch": 0.2848699763593381, + "grad_norm": 1.271406650543213, + "learning_rate": 8.391206765095705e-06, + "loss": 0.7416, + "step": 5543 + }, + { + "epoch": 0.2849213691026827, + "grad_norm": 1.087684988975525, + "learning_rate": 8.39059514519754e-06, + "loss": 0.7577, + "step": 5544 + }, + { + "epoch": 0.2849727618460273, + "grad_norm": 0.8637363910675049, + "learning_rate": 8.389983431359973e-06, + "loss": 0.6961, + "step": 5545 + }, + { + "epoch": 0.28502415458937197, + "grad_norm": 0.8796000480651855, + "learning_rate": 8.389371623599956e-06, + "loss": 0.6626, + "step": 5546 + }, + { + "epoch": 0.2850755473327166, + "grad_norm": 1.0837634801864624, + "learning_rate": 8.388759721934439e-06, + "loss": 0.7923, + "step": 5547 + }, + { + "epoch": 0.28512694007606126, + "grad_norm": 1.1532115936279297, + "learning_rate": 8.388147726380374e-06, + "loss": 0.8316, + "step": 5548 + }, + { + "epoch": 0.2851783328194059, + "grad_norm": 0.8441208004951477, + "learning_rate": 8.387535636954719e-06, + "loss": 0.712, + "step": 5549 + }, + { + "epoch": 0.28522972556275056, + "grad_norm": 1.100807785987854, + "learning_rate": 8.38692345367443e-06, + "loss": 0.7576, + "step": 5550 + }, + { + "epoch": 0.2852811183060952, + "grad_norm": 1.1495287418365479, + "learning_rate": 8.386311176556467e-06, + "loss": 0.7829, + "step": 5551 + }, + { + "epoch": 0.2853325110494398, + "grad_norm": 0.7996395826339722, + "learning_rate": 8.3856988056178e-06, + "loss": 0.6936, + "step": 5552 + }, + { + "epoch": 0.28538390379278444, + "grad_norm": 1.0803638696670532, + "learning_rate": 8.385086340875388e-06, + "loss": 0.8106, + "step": 5553 + }, + { + "epoch": 0.2854352965361291, + "grad_norm": 1.097886085510254, + "learning_rate": 8.384473782346203e-06, + "loss": 0.7908, + "step": 5554 + }, + { + "epoch": 0.28548668927947374, + "grad_norm": 1.0587661266326904, + "learning_rate": 8.383861130047218e-06, + "loss": 0.7735, + "step": 5555 + }, + { + "epoch": 0.2855380820228184, + "grad_norm": 1.1672956943511963, + "learning_rate": 8.383248383995405e-06, + "loss": 0.7644, + "step": 5556 + }, + { + "epoch": 0.28558947476616303, + "grad_norm": 1.1685290336608887, + "learning_rate": 8.38263554420774e-06, + "loss": 0.7508, + "step": 5557 + }, + { + "epoch": 0.2856408675095077, + "grad_norm": 1.0458850860595703, + "learning_rate": 8.382022610701204e-06, + "loss": 0.74, + "step": 5558 + }, + { + "epoch": 0.28569226025285227, + "grad_norm": 1.0270891189575195, + "learning_rate": 8.38140958349278e-06, + "loss": 0.7676, + "step": 5559 + }, + { + "epoch": 0.2857436529961969, + "grad_norm": 1.0821629762649536, + "learning_rate": 8.380796462599448e-06, + "loss": 0.796, + "step": 5560 + }, + { + "epoch": 0.28579504573954156, + "grad_norm": 1.111987829208374, + "learning_rate": 8.380183248038198e-06, + "loss": 0.7464, + "step": 5561 + }, + { + "epoch": 0.2858464384828862, + "grad_norm": 1.0526288747787476, + "learning_rate": 8.379569939826022e-06, + "loss": 0.7472, + "step": 5562 + }, + { + "epoch": 0.28589783122623086, + "grad_norm": 1.2051901817321777, + "learning_rate": 8.378956537979907e-06, + "loss": 0.7756, + "step": 5563 + }, + { + "epoch": 0.2859492239695755, + "grad_norm": 1.0770734548568726, + "learning_rate": 8.378343042516853e-06, + "loss": 0.7754, + "step": 5564 + }, + { + "epoch": 0.28600061671292015, + "grad_norm": 1.1310136318206787, + "learning_rate": 8.377729453453852e-06, + "loss": 0.7881, + "step": 5565 + }, + { + "epoch": 0.2860520094562648, + "grad_norm": 1.1231906414031982, + "learning_rate": 8.37711577080791e-06, + "loss": 0.8021, + "step": 5566 + }, + { + "epoch": 0.2861034021996094, + "grad_norm": 1.0788301229476929, + "learning_rate": 8.376501994596022e-06, + "loss": 0.7912, + "step": 5567 + }, + { + "epoch": 0.28615479494295404, + "grad_norm": 1.1053603887557983, + "learning_rate": 8.3758881248352e-06, + "loss": 0.7447, + "step": 5568 + }, + { + "epoch": 0.2862061876862987, + "grad_norm": 1.2205684185028076, + "learning_rate": 8.37527416154245e-06, + "loss": 0.8371, + "step": 5569 + }, + { + "epoch": 0.28625758042964333, + "grad_norm": 0.9917560815811157, + "learning_rate": 8.374660104734784e-06, + "loss": 0.7161, + "step": 5570 + }, + { + "epoch": 0.286308973172988, + "grad_norm": 1.0688197612762451, + "learning_rate": 8.374045954429211e-06, + "loss": 0.822, + "step": 5571 + }, + { + "epoch": 0.2863603659163326, + "grad_norm": 1.2498170137405396, + "learning_rate": 8.373431710642748e-06, + "loss": 0.7807, + "step": 5572 + }, + { + "epoch": 0.2864117586596773, + "grad_norm": 1.055712342262268, + "learning_rate": 8.372817373392412e-06, + "loss": 0.7837, + "step": 5573 + }, + { + "epoch": 0.2864631514030219, + "grad_norm": 0.8229730129241943, + "learning_rate": 8.372202942695228e-06, + "loss": 0.684, + "step": 5574 + }, + { + "epoch": 0.2865145441463665, + "grad_norm": 0.7589721083641052, + "learning_rate": 8.371588418568216e-06, + "loss": 0.6801, + "step": 5575 + }, + { + "epoch": 0.28656593688971116, + "grad_norm": 1.1124353408813477, + "learning_rate": 8.370973801028404e-06, + "loss": 0.8072, + "step": 5576 + }, + { + "epoch": 0.2866173296330558, + "grad_norm": 1.1101818084716797, + "learning_rate": 8.370359090092816e-06, + "loss": 0.8049, + "step": 5577 + }, + { + "epoch": 0.28666872237640045, + "grad_norm": 0.929706871509552, + "learning_rate": 8.369744285778489e-06, + "loss": 0.6492, + "step": 5578 + }, + { + "epoch": 0.2867201151197451, + "grad_norm": 1.1461102962493896, + "learning_rate": 8.369129388102453e-06, + "loss": 0.7328, + "step": 5579 + }, + { + "epoch": 0.28677150786308975, + "grad_norm": 1.1354528665542603, + "learning_rate": 8.368514397081744e-06, + "loss": 0.8171, + "step": 5580 + }, + { + "epoch": 0.2868229006064344, + "grad_norm": 1.067921757698059, + "learning_rate": 8.367899312733404e-06, + "loss": 0.7386, + "step": 5581 + }, + { + "epoch": 0.286874293349779, + "grad_norm": 1.1526296138763428, + "learning_rate": 8.367284135074472e-06, + "loss": 0.7819, + "step": 5582 + }, + { + "epoch": 0.28692568609312363, + "grad_norm": 1.0225430727005005, + "learning_rate": 8.366668864121991e-06, + "loss": 0.8147, + "step": 5583 + }, + { + "epoch": 0.2869770788364683, + "grad_norm": 1.0895295143127441, + "learning_rate": 8.366053499893012e-06, + "loss": 0.7885, + "step": 5584 + }, + { + "epoch": 0.2870284715798129, + "grad_norm": 1.1318414211273193, + "learning_rate": 8.36543804240458e-06, + "loss": 0.7876, + "step": 5585 + }, + { + "epoch": 0.2870798643231576, + "grad_norm": 0.7978352904319763, + "learning_rate": 8.364822491673749e-06, + "loss": 0.7149, + "step": 5586 + }, + { + "epoch": 0.2871312570665022, + "grad_norm": 1.0664278268814087, + "learning_rate": 8.36420684771757e-06, + "loss": 0.6915, + "step": 5587 + }, + { + "epoch": 0.28718264980984687, + "grad_norm": 1.0693354606628418, + "learning_rate": 8.363591110553105e-06, + "loss": 0.7747, + "step": 5588 + }, + { + "epoch": 0.2872340425531915, + "grad_norm": 1.101609468460083, + "learning_rate": 8.36297528019741e-06, + "loss": 0.7749, + "step": 5589 + }, + { + "epoch": 0.2872854352965361, + "grad_norm": 0.784743070602417, + "learning_rate": 8.362359356667548e-06, + "loss": 0.7076, + "step": 5590 + }, + { + "epoch": 0.28733682803988075, + "grad_norm": 1.1941373348236084, + "learning_rate": 8.361743339980586e-06, + "loss": 0.7885, + "step": 5591 + }, + { + "epoch": 0.2873882207832254, + "grad_norm": 1.1541401147842407, + "learning_rate": 8.361127230153588e-06, + "loss": 0.8146, + "step": 5592 + }, + { + "epoch": 0.28743961352657005, + "grad_norm": 1.043007731437683, + "learning_rate": 8.360511027203624e-06, + "loss": 0.7392, + "step": 5593 + }, + { + "epoch": 0.2874910062699147, + "grad_norm": 1.027769923210144, + "learning_rate": 8.359894731147767e-06, + "loss": 0.7828, + "step": 5594 + }, + { + "epoch": 0.28754239901325934, + "grad_norm": 1.0957672595977783, + "learning_rate": 8.359278342003094e-06, + "loss": 0.7927, + "step": 5595 + }, + { + "epoch": 0.287593791756604, + "grad_norm": 1.103031039237976, + "learning_rate": 8.35866185978668e-06, + "loss": 0.8367, + "step": 5596 + }, + { + "epoch": 0.2876451844999486, + "grad_norm": 1.0995343923568726, + "learning_rate": 8.358045284515607e-06, + "loss": 0.7902, + "step": 5597 + }, + { + "epoch": 0.28769657724329323, + "grad_norm": 1.044679045677185, + "learning_rate": 8.357428616206958e-06, + "loss": 0.7699, + "step": 5598 + }, + { + "epoch": 0.2877479699866379, + "grad_norm": 1.236358642578125, + "learning_rate": 8.356811854877815e-06, + "loss": 0.8296, + "step": 5599 + }, + { + "epoch": 0.2877993627299825, + "grad_norm": 1.036624550819397, + "learning_rate": 8.35619500054527e-06, + "loss": 0.7063, + "step": 5600 + }, + { + "epoch": 0.28785075547332717, + "grad_norm": 1.1533541679382324, + "learning_rate": 8.35557805322641e-06, + "loss": 0.7591, + "step": 5601 + }, + { + "epoch": 0.2879021482166718, + "grad_norm": 1.0949684381484985, + "learning_rate": 8.354961012938332e-06, + "loss": 0.7883, + "step": 5602 + }, + { + "epoch": 0.28795354096001646, + "grad_norm": 1.1274027824401855, + "learning_rate": 8.354343879698127e-06, + "loss": 0.8172, + "step": 5603 + }, + { + "epoch": 0.2880049337033611, + "grad_norm": 1.0572094917297363, + "learning_rate": 8.353726653522897e-06, + "loss": 0.7866, + "step": 5604 + }, + { + "epoch": 0.2880563264467057, + "grad_norm": 1.0444684028625488, + "learning_rate": 8.353109334429742e-06, + "loss": 0.7383, + "step": 5605 + }, + { + "epoch": 0.28810771919005035, + "grad_norm": 0.824914813041687, + "learning_rate": 8.352491922435763e-06, + "loss": 0.6817, + "step": 5606 + }, + { + "epoch": 0.288159111933395, + "grad_norm": 1.0018022060394287, + "learning_rate": 8.35187441755807e-06, + "loss": 0.7131, + "step": 5607 + }, + { + "epoch": 0.28821050467673964, + "grad_norm": 1.1074885129928589, + "learning_rate": 8.35125681981377e-06, + "loss": 0.7719, + "step": 5608 + }, + { + "epoch": 0.2882618974200843, + "grad_norm": 1.0992136001586914, + "learning_rate": 8.35063912921997e-06, + "loss": 0.8299, + "step": 5609 + }, + { + "epoch": 0.28831329016342894, + "grad_norm": 1.117929220199585, + "learning_rate": 8.350021345793788e-06, + "loss": 0.7939, + "step": 5610 + }, + { + "epoch": 0.2883646829067736, + "grad_norm": 1.1543155908584595, + "learning_rate": 8.34940346955234e-06, + "loss": 0.8063, + "step": 5611 + }, + { + "epoch": 0.28841607565011823, + "grad_norm": 1.1072852611541748, + "learning_rate": 8.348785500512744e-06, + "loss": 0.7475, + "step": 5612 + }, + { + "epoch": 0.2884674683934628, + "grad_norm": 0.8498141169548035, + "learning_rate": 8.348167438692121e-06, + "loss": 0.681, + "step": 5613 + }, + { + "epoch": 0.28851886113680747, + "grad_norm": 1.1013530492782593, + "learning_rate": 8.347549284107595e-06, + "loss": 0.7977, + "step": 5614 + }, + { + "epoch": 0.2885702538801521, + "grad_norm": 1.0776612758636475, + "learning_rate": 8.346931036776293e-06, + "loss": 0.7545, + "step": 5615 + }, + { + "epoch": 0.28862164662349676, + "grad_norm": 1.1438453197479248, + "learning_rate": 8.346312696715346e-06, + "loss": 0.7399, + "step": 5616 + }, + { + "epoch": 0.2886730393668414, + "grad_norm": 1.0392910242080688, + "learning_rate": 8.34569426394188e-06, + "loss": 0.7671, + "step": 5617 + }, + { + "epoch": 0.28872443211018606, + "grad_norm": 1.5211209058761597, + "learning_rate": 8.345075738473036e-06, + "loss": 0.8003, + "step": 5618 + }, + { + "epoch": 0.2887758248535307, + "grad_norm": 1.0443077087402344, + "learning_rate": 8.344457120325947e-06, + "loss": 0.746, + "step": 5619 + }, + { + "epoch": 0.2888272175968753, + "grad_norm": 1.0974020957946777, + "learning_rate": 8.343838409517752e-06, + "loss": 0.7881, + "step": 5620 + }, + { + "epoch": 0.28887861034021994, + "grad_norm": 1.1144496202468872, + "learning_rate": 8.343219606065594e-06, + "loss": 0.7889, + "step": 5621 + }, + { + "epoch": 0.2889300030835646, + "grad_norm": 1.0476267337799072, + "learning_rate": 8.342600709986617e-06, + "loss": 0.7922, + "step": 5622 + }, + { + "epoch": 0.28898139582690924, + "grad_norm": 1.1764726638793945, + "learning_rate": 8.34198172129797e-06, + "loss": 0.7736, + "step": 5623 + }, + { + "epoch": 0.2890327885702539, + "grad_norm": 1.1430237293243408, + "learning_rate": 8.3413626400168e-06, + "loss": 0.7675, + "step": 5624 + }, + { + "epoch": 0.28908418131359853, + "grad_norm": 1.0761466026306152, + "learning_rate": 8.34074346616026e-06, + "loss": 0.7455, + "step": 5625 + }, + { + "epoch": 0.2891355740569432, + "grad_norm": 1.0496340990066528, + "learning_rate": 8.340124199745504e-06, + "loss": 0.7798, + "step": 5626 + }, + { + "epoch": 0.2891869668002878, + "grad_norm": 1.0536208152770996, + "learning_rate": 8.339504840789692e-06, + "loss": 0.7647, + "step": 5627 + }, + { + "epoch": 0.2892383595436324, + "grad_norm": 1.0972557067871094, + "learning_rate": 8.338885389309983e-06, + "loss": 0.7946, + "step": 5628 + }, + { + "epoch": 0.28928975228697706, + "grad_norm": 1.084070086479187, + "learning_rate": 8.338265845323537e-06, + "loss": 0.8029, + "step": 5629 + }, + { + "epoch": 0.2893411450303217, + "grad_norm": 1.0093731880187988, + "learning_rate": 8.33764620884752e-06, + "loss": 0.7366, + "step": 5630 + }, + { + "epoch": 0.28939253777366636, + "grad_norm": 0.7974849343299866, + "learning_rate": 8.337026479899103e-06, + "loss": 0.7087, + "step": 5631 + }, + { + "epoch": 0.289443930517011, + "grad_norm": 1.091705083847046, + "learning_rate": 8.336406658495451e-06, + "loss": 0.7712, + "step": 5632 + }, + { + "epoch": 0.28949532326035565, + "grad_norm": 1.121004343032837, + "learning_rate": 8.33578674465374e-06, + "loss": 0.7659, + "step": 5633 + }, + { + "epoch": 0.2895467160037003, + "grad_norm": 1.0774955749511719, + "learning_rate": 8.335166738391143e-06, + "loss": 0.8366, + "step": 5634 + }, + { + "epoch": 0.2895981087470449, + "grad_norm": 1.1118353605270386, + "learning_rate": 8.334546639724839e-06, + "loss": 0.7641, + "step": 5635 + }, + { + "epoch": 0.28964950149038954, + "grad_norm": 1.0571403503417969, + "learning_rate": 8.33392644867201e-06, + "loss": 0.7592, + "step": 5636 + }, + { + "epoch": 0.2897008942337342, + "grad_norm": 1.1711331605911255, + "learning_rate": 8.333306165249836e-06, + "loss": 0.7847, + "step": 5637 + }, + { + "epoch": 0.28975228697707883, + "grad_norm": 1.0693656206130981, + "learning_rate": 8.332685789475505e-06, + "loss": 0.7621, + "step": 5638 + }, + { + "epoch": 0.2898036797204235, + "grad_norm": 1.0910197496414185, + "learning_rate": 8.332065321366205e-06, + "loss": 0.7364, + "step": 5639 + }, + { + "epoch": 0.2898550724637681, + "grad_norm": 1.2406820058822632, + "learning_rate": 8.331444760939124e-06, + "loss": 0.8178, + "step": 5640 + }, + { + "epoch": 0.2899064652071128, + "grad_norm": 1.08094322681427, + "learning_rate": 8.330824108211456e-06, + "loss": 0.7003, + "step": 5641 + }, + { + "epoch": 0.2899578579504574, + "grad_norm": 1.0541576147079468, + "learning_rate": 8.3302033632004e-06, + "loss": 0.7594, + "step": 5642 + }, + { + "epoch": 0.290009250693802, + "grad_norm": 1.1146278381347656, + "learning_rate": 8.32958252592315e-06, + "loss": 0.7401, + "step": 5643 + }, + { + "epoch": 0.29006064343714666, + "grad_norm": 1.0300171375274658, + "learning_rate": 8.328961596396909e-06, + "loss": 0.7563, + "step": 5644 + }, + { + "epoch": 0.2901120361804913, + "grad_norm": 1.0906977653503418, + "learning_rate": 8.32834057463888e-06, + "loss": 0.7982, + "step": 5645 + }, + { + "epoch": 0.29016342892383595, + "grad_norm": 0.7406985759735107, + "learning_rate": 8.327719460666268e-06, + "loss": 0.7098, + "step": 5646 + }, + { + "epoch": 0.2902148216671806, + "grad_norm": 1.0802584886550903, + "learning_rate": 8.327098254496283e-06, + "loss": 0.8108, + "step": 5647 + }, + { + "epoch": 0.29026621441052525, + "grad_norm": 1.0948731899261475, + "learning_rate": 8.326476956146137e-06, + "loss": 0.8159, + "step": 5648 + }, + { + "epoch": 0.2903176071538699, + "grad_norm": 1.1170322895050049, + "learning_rate": 8.32585556563304e-06, + "loss": 0.7929, + "step": 5649 + }, + { + "epoch": 0.2903689998972145, + "grad_norm": 1.10662043094635, + "learning_rate": 8.32523408297421e-06, + "loss": 0.7909, + "step": 5650 + }, + { + "epoch": 0.29042039264055913, + "grad_norm": 1.0914491415023804, + "learning_rate": 8.324612508186867e-06, + "loss": 0.8208, + "step": 5651 + }, + { + "epoch": 0.2904717853839038, + "grad_norm": 1.0864827632904053, + "learning_rate": 8.323990841288232e-06, + "loss": 0.7454, + "step": 5652 + }, + { + "epoch": 0.2905231781272484, + "grad_norm": 1.0954060554504395, + "learning_rate": 8.323369082295526e-06, + "loss": 0.8062, + "step": 5653 + }, + { + "epoch": 0.2905745708705931, + "grad_norm": 1.1018965244293213, + "learning_rate": 8.32274723122598e-06, + "loss": 0.8131, + "step": 5654 + }, + { + "epoch": 0.2906259636139377, + "grad_norm": 1.1031262874603271, + "learning_rate": 8.322125288096818e-06, + "loss": 0.7304, + "step": 5655 + }, + { + "epoch": 0.29067735635728237, + "grad_norm": 1.0805996656417847, + "learning_rate": 8.321503252925276e-06, + "loss": 0.7531, + "step": 5656 + }, + { + "epoch": 0.290728749100627, + "grad_norm": 1.2027499675750732, + "learning_rate": 8.320881125728585e-06, + "loss": 0.771, + "step": 5657 + }, + { + "epoch": 0.2907801418439716, + "grad_norm": 0.7643687129020691, + "learning_rate": 8.320258906523983e-06, + "loss": 0.6457, + "step": 5658 + }, + { + "epoch": 0.29083153458731625, + "grad_norm": 1.1028028726577759, + "learning_rate": 8.319636595328709e-06, + "loss": 0.819, + "step": 5659 + }, + { + "epoch": 0.2908829273306609, + "grad_norm": 1.1677026748657227, + "learning_rate": 8.319014192160001e-06, + "loss": 0.7573, + "step": 5660 + }, + { + "epoch": 0.29093432007400555, + "grad_norm": 1.1305981874465942, + "learning_rate": 8.31839169703511e-06, + "loss": 0.7485, + "step": 5661 + }, + { + "epoch": 0.2909857128173502, + "grad_norm": 1.188927412033081, + "learning_rate": 8.317769109971277e-06, + "loss": 0.8072, + "step": 5662 + }, + { + "epoch": 0.29103710556069484, + "grad_norm": 0.8874804973602295, + "learning_rate": 8.317146430985757e-06, + "loss": 0.7183, + "step": 5663 + }, + { + "epoch": 0.2910884983040395, + "grad_norm": 1.0651243925094604, + "learning_rate": 8.316523660095795e-06, + "loss": 0.7513, + "step": 5664 + }, + { + "epoch": 0.29113989104738414, + "grad_norm": 1.0614503622055054, + "learning_rate": 8.31590079731865e-06, + "loss": 0.7364, + "step": 5665 + }, + { + "epoch": 0.2911912837907287, + "grad_norm": 0.7790494561195374, + "learning_rate": 8.315277842671578e-06, + "loss": 0.7104, + "step": 5666 + }, + { + "epoch": 0.2912426765340734, + "grad_norm": 1.0102440118789673, + "learning_rate": 8.314654796171837e-06, + "loss": 0.7188, + "step": 5667 + }, + { + "epoch": 0.291294069277418, + "grad_norm": 1.0722849369049072, + "learning_rate": 8.314031657836692e-06, + "loss": 0.765, + "step": 5668 + }, + { + "epoch": 0.29134546202076267, + "grad_norm": 1.060153841972351, + "learning_rate": 8.313408427683406e-06, + "loss": 0.7128, + "step": 5669 + }, + { + "epoch": 0.2913968547641073, + "grad_norm": 0.6985536217689514, + "learning_rate": 8.312785105729244e-06, + "loss": 0.7427, + "step": 5670 + }, + { + "epoch": 0.29144824750745196, + "grad_norm": 1.0739047527313232, + "learning_rate": 8.31216169199148e-06, + "loss": 0.7983, + "step": 5671 + }, + { + "epoch": 0.2914996402507966, + "grad_norm": 1.0389541387557983, + "learning_rate": 8.311538186487384e-06, + "loss": 0.751, + "step": 5672 + }, + { + "epoch": 0.2915510329941412, + "grad_norm": 1.0273922681808472, + "learning_rate": 8.31091458923423e-06, + "loss": 0.7358, + "step": 5673 + }, + { + "epoch": 0.29160242573748585, + "grad_norm": 1.0913567543029785, + "learning_rate": 8.310290900249297e-06, + "loss": 0.7897, + "step": 5674 + }, + { + "epoch": 0.2916538184808305, + "grad_norm": 1.0414783954620361, + "learning_rate": 8.309667119549862e-06, + "loss": 0.7703, + "step": 5675 + }, + { + "epoch": 0.29170521122417514, + "grad_norm": 1.1161731481552124, + "learning_rate": 8.30904324715321e-06, + "loss": 0.7682, + "step": 5676 + }, + { + "epoch": 0.2917566039675198, + "grad_norm": 1.082031011581421, + "learning_rate": 8.308419283076626e-06, + "loss": 0.7834, + "step": 5677 + }, + { + "epoch": 0.29180799671086444, + "grad_norm": 1.0049506425857544, + "learning_rate": 8.307795227337397e-06, + "loss": 0.7429, + "step": 5678 + }, + { + "epoch": 0.2918593894542091, + "grad_norm": 1.1536693572998047, + "learning_rate": 8.307171079952812e-06, + "loss": 0.797, + "step": 5679 + }, + { + "epoch": 0.29191078219755373, + "grad_norm": 1.1051832437515259, + "learning_rate": 8.306546840940165e-06, + "loss": 0.7571, + "step": 5680 + }, + { + "epoch": 0.2919621749408983, + "grad_norm": 1.1587854623794556, + "learning_rate": 8.305922510316749e-06, + "loss": 0.7965, + "step": 5681 + }, + { + "epoch": 0.29201356768424297, + "grad_norm": 1.0823333263397217, + "learning_rate": 8.305298088099864e-06, + "loss": 0.7235, + "step": 5682 + }, + { + "epoch": 0.2920649604275876, + "grad_norm": 1.1792079210281372, + "learning_rate": 8.304673574306807e-06, + "loss": 0.8006, + "step": 5683 + }, + { + "epoch": 0.29211635317093226, + "grad_norm": 1.0556005239486694, + "learning_rate": 8.304048968954885e-06, + "loss": 0.7532, + "step": 5684 + }, + { + "epoch": 0.2921677459142769, + "grad_norm": 0.747546911239624, + "learning_rate": 8.303424272061402e-06, + "loss": 0.6977, + "step": 5685 + }, + { + "epoch": 0.29221913865762156, + "grad_norm": 1.049843668937683, + "learning_rate": 8.302799483643661e-06, + "loss": 0.7504, + "step": 5686 + }, + { + "epoch": 0.2922705314009662, + "grad_norm": 1.18901789188385, + "learning_rate": 8.302174603718981e-06, + "loss": 0.8433, + "step": 5687 + }, + { + "epoch": 0.2923219241443108, + "grad_norm": 1.1371957063674927, + "learning_rate": 8.301549632304668e-06, + "loss": 0.8193, + "step": 5688 + }, + { + "epoch": 0.29237331688765544, + "grad_norm": 0.9709553122520447, + "learning_rate": 8.300924569418039e-06, + "loss": 0.7664, + "step": 5689 + }, + { + "epoch": 0.2924247096310001, + "grad_norm": 1.0759268999099731, + "learning_rate": 8.300299415076412e-06, + "loss": 0.8017, + "step": 5690 + }, + { + "epoch": 0.29247610237434474, + "grad_norm": 1.1185113191604614, + "learning_rate": 8.299674169297107e-06, + "loss": 0.8092, + "step": 5691 + }, + { + "epoch": 0.2925274951176894, + "grad_norm": 1.0796475410461426, + "learning_rate": 8.29904883209745e-06, + "loss": 0.7633, + "step": 5692 + }, + { + "epoch": 0.29257888786103403, + "grad_norm": 1.038589596748352, + "learning_rate": 8.298423403494764e-06, + "loss": 0.7614, + "step": 5693 + }, + { + "epoch": 0.2926302806043787, + "grad_norm": 1.0312401056289673, + "learning_rate": 8.297797883506376e-06, + "loss": 0.7376, + "step": 5694 + }, + { + "epoch": 0.2926816733477233, + "grad_norm": 1.0788819789886475, + "learning_rate": 8.297172272149618e-06, + "loss": 0.8343, + "step": 5695 + }, + { + "epoch": 0.2927330660910679, + "grad_norm": 0.7674497365951538, + "learning_rate": 8.296546569441824e-06, + "loss": 0.6587, + "step": 5696 + }, + { + "epoch": 0.29278445883441256, + "grad_norm": 1.0895041227340698, + "learning_rate": 8.295920775400326e-06, + "loss": 0.76, + "step": 5697 + }, + { + "epoch": 0.2928358515777572, + "grad_norm": 1.0605055093765259, + "learning_rate": 8.295294890042467e-06, + "loss": 0.7833, + "step": 5698 + }, + { + "epoch": 0.29288724432110186, + "grad_norm": 1.0856248140335083, + "learning_rate": 8.294668913385584e-06, + "loss": 0.7653, + "step": 5699 + }, + { + "epoch": 0.2929386370644465, + "grad_norm": 1.0853033065795898, + "learning_rate": 8.294042845447024e-06, + "loss": 0.7933, + "step": 5700 + }, + { + "epoch": 0.29299002980779115, + "grad_norm": 1.080798864364624, + "learning_rate": 8.293416686244128e-06, + "loss": 0.7273, + "step": 5701 + }, + { + "epoch": 0.2930414225511358, + "grad_norm": 1.074588656425476, + "learning_rate": 8.292790435794249e-06, + "loss": 0.8113, + "step": 5702 + }, + { + "epoch": 0.29309281529448045, + "grad_norm": 0.8278897404670715, + "learning_rate": 8.292164094114734e-06, + "loss": 0.6833, + "step": 5703 + }, + { + "epoch": 0.29314420803782504, + "grad_norm": 1.0970269441604614, + "learning_rate": 8.29153766122294e-06, + "loss": 0.7886, + "step": 5704 + }, + { + "epoch": 0.2931956007811697, + "grad_norm": 1.2030894756317139, + "learning_rate": 8.290911137136219e-06, + "loss": 0.7804, + "step": 5705 + }, + { + "epoch": 0.29324699352451433, + "grad_norm": 1.0756796598434448, + "learning_rate": 8.29028452187193e-06, + "loss": 0.7666, + "step": 5706 + }, + { + "epoch": 0.293298386267859, + "grad_norm": 0.8275964856147766, + "learning_rate": 8.289657815447436e-06, + "loss": 0.679, + "step": 5707 + }, + { + "epoch": 0.2933497790112036, + "grad_norm": 0.7493463158607483, + "learning_rate": 8.289031017880101e-06, + "loss": 0.685, + "step": 5708 + }, + { + "epoch": 0.2934011717545483, + "grad_norm": 1.1303842067718506, + "learning_rate": 8.28840412918729e-06, + "loss": 0.777, + "step": 5709 + }, + { + "epoch": 0.2934525644978929, + "grad_norm": 0.7323131561279297, + "learning_rate": 8.287777149386369e-06, + "loss": 0.6752, + "step": 5710 + }, + { + "epoch": 0.2935039572412375, + "grad_norm": 1.0850731134414673, + "learning_rate": 8.28715007849471e-06, + "loss": 0.7356, + "step": 5711 + }, + { + "epoch": 0.29355534998458216, + "grad_norm": 1.1178058385849, + "learning_rate": 8.28652291652969e-06, + "loss": 0.7879, + "step": 5712 + }, + { + "epoch": 0.2936067427279268, + "grad_norm": 1.0428707599639893, + "learning_rate": 8.285895663508681e-06, + "loss": 0.7542, + "step": 5713 + }, + { + "epoch": 0.29365813547127145, + "grad_norm": 1.1091639995574951, + "learning_rate": 8.285268319449066e-06, + "loss": 0.7433, + "step": 5714 + }, + { + "epoch": 0.2937095282146161, + "grad_norm": 1.0691372156143188, + "learning_rate": 8.28464088436822e-06, + "loss": 0.7508, + "step": 5715 + }, + { + "epoch": 0.29376092095796075, + "grad_norm": 1.0697696208953857, + "learning_rate": 8.284013358283532e-06, + "loss": 0.7236, + "step": 5716 + }, + { + "epoch": 0.2938123137013054, + "grad_norm": 0.7605612874031067, + "learning_rate": 8.283385741212386e-06, + "loss": 0.6525, + "step": 5717 + }, + { + "epoch": 0.29386370644465004, + "grad_norm": 0.9314214587211609, + "learning_rate": 8.28275803317217e-06, + "loss": 0.6916, + "step": 5718 + }, + { + "epoch": 0.29391509918799463, + "grad_norm": 1.143584132194519, + "learning_rate": 8.282130234180277e-06, + "loss": 0.8481, + "step": 5719 + }, + { + "epoch": 0.2939664919313393, + "grad_norm": 1.0635656118392944, + "learning_rate": 8.281502344254099e-06, + "loss": 0.7283, + "step": 5720 + }, + { + "epoch": 0.2940178846746839, + "grad_norm": 0.78325355052948, + "learning_rate": 8.280874363411034e-06, + "loss": 0.6878, + "step": 5721 + }, + { + "epoch": 0.2940692774180286, + "grad_norm": 1.1425780057907104, + "learning_rate": 8.280246291668479e-06, + "loss": 0.7958, + "step": 5722 + }, + { + "epoch": 0.2941206701613732, + "grad_norm": 0.8000555634498596, + "learning_rate": 8.279618129043837e-06, + "loss": 0.6833, + "step": 5723 + }, + { + "epoch": 0.29417206290471787, + "grad_norm": 1.1261862516403198, + "learning_rate": 8.278989875554508e-06, + "loss": 0.7936, + "step": 5724 + }, + { + "epoch": 0.2942234556480625, + "grad_norm": 1.1115299463272095, + "learning_rate": 8.278361531217904e-06, + "loss": 0.7379, + "step": 5725 + }, + { + "epoch": 0.2942748483914071, + "grad_norm": 0.8913065791130066, + "learning_rate": 8.27773309605143e-06, + "loss": 0.6833, + "step": 5726 + }, + { + "epoch": 0.29432624113475175, + "grad_norm": 0.9982072710990906, + "learning_rate": 8.277104570072498e-06, + "loss": 0.8078, + "step": 5727 + }, + { + "epoch": 0.2943776338780964, + "grad_norm": 0.6892127394676208, + "learning_rate": 8.276475953298522e-06, + "loss": 0.6543, + "step": 5728 + }, + { + "epoch": 0.29442902662144105, + "grad_norm": 1.0854095220565796, + "learning_rate": 8.27584724574692e-06, + "loss": 0.7948, + "step": 5729 + }, + { + "epoch": 0.2944804193647857, + "grad_norm": 1.1071066856384277, + "learning_rate": 8.27521844743511e-06, + "loss": 0.7527, + "step": 5730 + }, + { + "epoch": 0.29453181210813034, + "grad_norm": 1.0253219604492188, + "learning_rate": 8.274589558380511e-06, + "loss": 0.6996, + "step": 5731 + }, + { + "epoch": 0.294583204851475, + "grad_norm": 1.0593576431274414, + "learning_rate": 8.273960578600549e-06, + "loss": 0.7827, + "step": 5732 + }, + { + "epoch": 0.29463459759481964, + "grad_norm": 0.7113011479377747, + "learning_rate": 8.27333150811265e-06, + "loss": 0.6749, + "step": 5733 + }, + { + "epoch": 0.2946859903381642, + "grad_norm": 1.2593320608139038, + "learning_rate": 8.272702346934243e-06, + "loss": 0.7625, + "step": 5734 + }, + { + "epoch": 0.2947373830815089, + "grad_norm": 0.9264829754829407, + "learning_rate": 8.272073095082761e-06, + "loss": 0.7198, + "step": 5735 + }, + { + "epoch": 0.2947887758248535, + "grad_norm": 1.0380034446716309, + "learning_rate": 8.271443752575636e-06, + "loss": 0.7223, + "step": 5736 + }, + { + "epoch": 0.29484016856819817, + "grad_norm": 1.0233192443847656, + "learning_rate": 8.270814319430304e-06, + "loss": 0.7809, + "step": 5737 + }, + { + "epoch": 0.2948915613115428, + "grad_norm": 1.0644688606262207, + "learning_rate": 8.270184795664206e-06, + "loss": 0.7714, + "step": 5738 + }, + { + "epoch": 0.29494295405488746, + "grad_norm": 1.343535304069519, + "learning_rate": 8.269555181294781e-06, + "loss": 0.7498, + "step": 5739 + }, + { + "epoch": 0.2949943467982321, + "grad_norm": 1.086153507232666, + "learning_rate": 8.268925476339477e-06, + "loss": 0.7688, + "step": 5740 + }, + { + "epoch": 0.29504573954157676, + "grad_norm": 1.0465667247772217, + "learning_rate": 8.268295680815738e-06, + "loss": 0.7777, + "step": 5741 + }, + { + "epoch": 0.29509713228492135, + "grad_norm": 1.045179843902588, + "learning_rate": 8.26766579474101e-06, + "loss": 0.7345, + "step": 5742 + }, + { + "epoch": 0.295148525028266, + "grad_norm": 1.2030725479125977, + "learning_rate": 8.267035818132752e-06, + "loss": 0.8281, + "step": 5743 + }, + { + "epoch": 0.29519991777161064, + "grad_norm": 1.1048808097839355, + "learning_rate": 8.266405751008411e-06, + "loss": 0.7141, + "step": 5744 + }, + { + "epoch": 0.2952513105149553, + "grad_norm": 1.0474354028701782, + "learning_rate": 8.265775593385448e-06, + "loss": 0.7936, + "step": 5745 + }, + { + "epoch": 0.29530270325829994, + "grad_norm": 1.1040276288986206, + "learning_rate": 8.265145345281319e-06, + "loss": 0.8437, + "step": 5746 + }, + { + "epoch": 0.2953540960016446, + "grad_norm": 1.2018976211547852, + "learning_rate": 8.264515006713488e-06, + "loss": 0.7376, + "step": 5747 + }, + { + "epoch": 0.29540548874498923, + "grad_norm": 1.1449804306030273, + "learning_rate": 8.263884577699416e-06, + "loss": 0.7612, + "step": 5748 + }, + { + "epoch": 0.2954568814883338, + "grad_norm": 1.0426193475723267, + "learning_rate": 8.263254058256573e-06, + "loss": 0.7609, + "step": 5749 + }, + { + "epoch": 0.29550827423167847, + "grad_norm": 1.060174822807312, + "learning_rate": 8.262623448402428e-06, + "loss": 0.7692, + "step": 5750 + }, + { + "epoch": 0.2955596669750231, + "grad_norm": 1.0850762128829956, + "learning_rate": 8.261992748154449e-06, + "loss": 0.7679, + "step": 5751 + }, + { + "epoch": 0.29561105971836776, + "grad_norm": 1.2262543439865112, + "learning_rate": 8.261361957530113e-06, + "loss": 0.7612, + "step": 5752 + }, + { + "epoch": 0.2956624524617124, + "grad_norm": 1.140245795249939, + "learning_rate": 8.260731076546898e-06, + "loss": 0.7625, + "step": 5753 + }, + { + "epoch": 0.29571384520505706, + "grad_norm": 1.0653430223464966, + "learning_rate": 8.260100105222279e-06, + "loss": 0.7685, + "step": 5754 + }, + { + "epoch": 0.2957652379484017, + "grad_norm": 1.0963032245635986, + "learning_rate": 8.25946904357374e-06, + "loss": 0.7677, + "step": 5755 + }, + { + "epoch": 0.29581663069174635, + "grad_norm": 1.1433473825454712, + "learning_rate": 8.258837891618765e-06, + "loss": 0.7283, + "step": 5756 + }, + { + "epoch": 0.29586802343509094, + "grad_norm": 0.863707959651947, + "learning_rate": 8.25820664937484e-06, + "loss": 0.7059, + "step": 5757 + }, + { + "epoch": 0.2959194161784356, + "grad_norm": 1.0429195165634155, + "learning_rate": 8.257575316859454e-06, + "loss": 0.7783, + "step": 5758 + }, + { + "epoch": 0.29597080892178024, + "grad_norm": 1.2195403575897217, + "learning_rate": 8.256943894090101e-06, + "loss": 0.8092, + "step": 5759 + }, + { + "epoch": 0.2960222016651249, + "grad_norm": 1.0325510501861572, + "learning_rate": 8.256312381084272e-06, + "loss": 0.7635, + "step": 5760 + }, + { + "epoch": 0.29607359440846953, + "grad_norm": 1.2305238246917725, + "learning_rate": 8.255680777859466e-06, + "loss": 0.7523, + "step": 5761 + }, + { + "epoch": 0.2961249871518142, + "grad_norm": 0.6854696869850159, + "learning_rate": 8.25504908443318e-06, + "loss": 0.7004, + "step": 5762 + }, + { + "epoch": 0.2961763798951588, + "grad_norm": 1.1869467496871948, + "learning_rate": 8.254417300822916e-06, + "loss": 0.7778, + "step": 5763 + }, + { + "epoch": 0.2962277726385034, + "grad_norm": 1.0577789545059204, + "learning_rate": 8.253785427046181e-06, + "loss": 0.7938, + "step": 5764 + }, + { + "epoch": 0.29627916538184806, + "grad_norm": 0.6905539631843567, + "learning_rate": 8.25315346312048e-06, + "loss": 0.6804, + "step": 5765 + }, + { + "epoch": 0.2963305581251927, + "grad_norm": 1.1074823141098022, + "learning_rate": 8.252521409063318e-06, + "loss": 0.7471, + "step": 5766 + }, + { + "epoch": 0.29638195086853736, + "grad_norm": 1.092867136001587, + "learning_rate": 8.251889264892212e-06, + "loss": 0.778, + "step": 5767 + }, + { + "epoch": 0.296433343611882, + "grad_norm": 1.1158491373062134, + "learning_rate": 8.251257030624675e-06, + "loss": 0.7623, + "step": 5768 + }, + { + "epoch": 0.29648473635522665, + "grad_norm": 1.2056506872177124, + "learning_rate": 8.250624706278222e-06, + "loss": 0.7727, + "step": 5769 + }, + { + "epoch": 0.2965361290985713, + "grad_norm": 0.7627761960029602, + "learning_rate": 8.249992291870373e-06, + "loss": 0.7172, + "step": 5770 + }, + { + "epoch": 0.29658752184191595, + "grad_norm": 1.0663686990737915, + "learning_rate": 8.249359787418649e-06, + "loss": 0.7612, + "step": 5771 + }, + { + "epoch": 0.29663891458526054, + "grad_norm": 1.1841156482696533, + "learning_rate": 8.248727192940575e-06, + "loss": 0.7992, + "step": 5772 + }, + { + "epoch": 0.2966903073286052, + "grad_norm": 0.788904070854187, + "learning_rate": 8.248094508453678e-06, + "loss": 0.6657, + "step": 5773 + }, + { + "epoch": 0.29674170007194983, + "grad_norm": 1.2766172885894775, + "learning_rate": 8.247461733975486e-06, + "loss": 0.7917, + "step": 5774 + }, + { + "epoch": 0.2967930928152945, + "grad_norm": 1.116125464439392, + "learning_rate": 8.24682886952353e-06, + "loss": 0.7292, + "step": 5775 + }, + { + "epoch": 0.2968444855586391, + "grad_norm": 1.2301216125488281, + "learning_rate": 8.246195915115345e-06, + "loss": 0.8084, + "step": 5776 + }, + { + "epoch": 0.2968958783019838, + "grad_norm": 1.4403924942016602, + "learning_rate": 8.24556287076847e-06, + "loss": 0.7705, + "step": 5777 + }, + { + "epoch": 0.2969472710453284, + "grad_norm": 1.063284993171692, + "learning_rate": 8.244929736500437e-06, + "loss": 0.7522, + "step": 5778 + }, + { + "epoch": 0.296998663788673, + "grad_norm": 1.1716357469558716, + "learning_rate": 8.244296512328794e-06, + "loss": 0.856, + "step": 5779 + }, + { + "epoch": 0.29705005653201766, + "grad_norm": 1.045871376991272, + "learning_rate": 8.243663198271083e-06, + "loss": 0.7752, + "step": 5780 + }, + { + "epoch": 0.2971014492753623, + "grad_norm": 1.0875658988952637, + "learning_rate": 8.243029794344852e-06, + "loss": 0.7783, + "step": 5781 + }, + { + "epoch": 0.29715284201870695, + "grad_norm": 1.1296182870864868, + "learning_rate": 8.242396300567647e-06, + "loss": 0.7557, + "step": 5782 + }, + { + "epoch": 0.2972042347620516, + "grad_norm": 1.066383957862854, + "learning_rate": 8.241762716957022e-06, + "loss": 0.8204, + "step": 5783 + }, + { + "epoch": 0.29725562750539625, + "grad_norm": 1.0765166282653809, + "learning_rate": 8.24112904353053e-06, + "loss": 0.7812, + "step": 5784 + }, + { + "epoch": 0.2973070202487409, + "grad_norm": 1.1533211469650269, + "learning_rate": 8.240495280305726e-06, + "loss": 0.7504, + "step": 5785 + }, + { + "epoch": 0.29735841299208554, + "grad_norm": 1.0487736463546753, + "learning_rate": 8.239861427300174e-06, + "loss": 0.7948, + "step": 5786 + }, + { + "epoch": 0.29740980573543013, + "grad_norm": 1.1606242656707764, + "learning_rate": 8.239227484531428e-06, + "loss": 0.7742, + "step": 5787 + }, + { + "epoch": 0.2974611984787748, + "grad_norm": 1.0776646137237549, + "learning_rate": 8.238593452017057e-06, + "loss": 0.7647, + "step": 5788 + }, + { + "epoch": 0.2975125912221194, + "grad_norm": 1.055606484413147, + "learning_rate": 8.237959329774627e-06, + "loss": 0.7366, + "step": 5789 + }, + { + "epoch": 0.2975639839654641, + "grad_norm": 1.1487818956375122, + "learning_rate": 8.237325117821707e-06, + "loss": 0.7463, + "step": 5790 + }, + { + "epoch": 0.2976153767088087, + "grad_norm": 1.061753511428833, + "learning_rate": 8.236690816175867e-06, + "loss": 0.7447, + "step": 5791 + }, + { + "epoch": 0.29766676945215337, + "grad_norm": 1.1628844738006592, + "learning_rate": 8.236056424854681e-06, + "loss": 0.8055, + "step": 5792 + }, + { + "epoch": 0.297718162195498, + "grad_norm": 1.045771837234497, + "learning_rate": 8.235421943875725e-06, + "loss": 0.7178, + "step": 5793 + }, + { + "epoch": 0.29776955493884266, + "grad_norm": 1.1070959568023682, + "learning_rate": 8.23478737325658e-06, + "loss": 0.7857, + "step": 5794 + }, + { + "epoch": 0.29782094768218725, + "grad_norm": 1.1098008155822754, + "learning_rate": 8.234152713014827e-06, + "loss": 0.7615, + "step": 5795 + }, + { + "epoch": 0.2978723404255319, + "grad_norm": 0.9443713426589966, + "learning_rate": 8.233517963168047e-06, + "loss": 0.7576, + "step": 5796 + }, + { + "epoch": 0.29792373316887655, + "grad_norm": 1.1075993776321411, + "learning_rate": 8.23288312373383e-06, + "loss": 0.815, + "step": 5797 + }, + { + "epoch": 0.2979751259122212, + "grad_norm": 1.0640676021575928, + "learning_rate": 8.232248194729761e-06, + "loss": 0.7556, + "step": 5798 + }, + { + "epoch": 0.29802651865556584, + "grad_norm": 1.086224913597107, + "learning_rate": 8.231613176173436e-06, + "loss": 0.7857, + "step": 5799 + }, + { + "epoch": 0.2980779113989105, + "grad_norm": 1.1312247514724731, + "learning_rate": 8.230978068082443e-06, + "loss": 0.8036, + "step": 5800 + }, + { + "epoch": 0.29812930414225514, + "grad_norm": 1.1406044960021973, + "learning_rate": 8.230342870474384e-06, + "loss": 0.8042, + "step": 5801 + }, + { + "epoch": 0.2981806968855997, + "grad_norm": 1.0455833673477173, + "learning_rate": 8.229707583366852e-06, + "loss": 0.7252, + "step": 5802 + }, + { + "epoch": 0.2982320896289444, + "grad_norm": 1.103022575378418, + "learning_rate": 8.229072206777453e-06, + "loss": 0.7537, + "step": 5803 + }, + { + "epoch": 0.298283482372289, + "grad_norm": 1.0311105251312256, + "learning_rate": 8.22843674072379e-06, + "loss": 0.6781, + "step": 5804 + }, + { + "epoch": 0.29833487511563367, + "grad_norm": 1.0067039728164673, + "learning_rate": 8.227801185223465e-06, + "loss": 0.7053, + "step": 5805 + }, + { + "epoch": 0.2983862678589783, + "grad_norm": 1.1207455396652222, + "learning_rate": 8.227165540294088e-06, + "loss": 0.8304, + "step": 5806 + }, + { + "epoch": 0.29843766060232296, + "grad_norm": 0.739044725894928, + "learning_rate": 8.226529805953277e-06, + "loss": 0.7051, + "step": 5807 + }, + { + "epoch": 0.2984890533456676, + "grad_norm": 1.0492382049560547, + "learning_rate": 8.225893982218636e-06, + "loss": 0.7924, + "step": 5808 + }, + { + "epoch": 0.29854044608901226, + "grad_norm": 1.1446664333343506, + "learning_rate": 8.225258069107786e-06, + "loss": 0.732, + "step": 5809 + }, + { + "epoch": 0.29859183883235685, + "grad_norm": 1.0315366983413696, + "learning_rate": 8.224622066638346e-06, + "loss": 0.7694, + "step": 5810 + }, + { + "epoch": 0.2986432315757015, + "grad_norm": 1.0689424276351929, + "learning_rate": 8.223985974827935e-06, + "loss": 0.7548, + "step": 5811 + }, + { + "epoch": 0.29869462431904614, + "grad_norm": 1.1001231670379639, + "learning_rate": 8.223349793694177e-06, + "loss": 0.8299, + "step": 5812 + }, + { + "epoch": 0.2987460170623908, + "grad_norm": 1.1299155950546265, + "learning_rate": 8.222713523254699e-06, + "loss": 0.7607, + "step": 5813 + }, + { + "epoch": 0.29879740980573544, + "grad_norm": 1.1189833879470825, + "learning_rate": 8.222077163527128e-06, + "loss": 0.7758, + "step": 5814 + }, + { + "epoch": 0.2988488025490801, + "grad_norm": 0.7642161250114441, + "learning_rate": 8.221440714529096e-06, + "loss": 0.6578, + "step": 5815 + }, + { + "epoch": 0.29890019529242473, + "grad_norm": 1.0617568492889404, + "learning_rate": 8.220804176278234e-06, + "loss": 0.737, + "step": 5816 + }, + { + "epoch": 0.2989515880357693, + "grad_norm": 1.0783023834228516, + "learning_rate": 8.220167548792183e-06, + "loss": 0.7509, + "step": 5817 + }, + { + "epoch": 0.29900298077911397, + "grad_norm": 1.1129320859909058, + "learning_rate": 8.219530832088576e-06, + "loss": 0.7894, + "step": 5818 + }, + { + "epoch": 0.2990543735224586, + "grad_norm": 0.7372670769691467, + "learning_rate": 8.218894026185055e-06, + "loss": 0.7061, + "step": 5819 + }, + { + "epoch": 0.29910576626580326, + "grad_norm": 1.0834494829177856, + "learning_rate": 8.218257131099267e-06, + "loss": 0.7377, + "step": 5820 + }, + { + "epoch": 0.2991571590091479, + "grad_norm": 1.0054423809051514, + "learning_rate": 8.217620146848852e-06, + "loss": 0.7521, + "step": 5821 + }, + { + "epoch": 0.29920855175249256, + "grad_norm": 0.9185977578163147, + "learning_rate": 8.216983073451463e-06, + "loss": 0.6563, + "step": 5822 + }, + { + "epoch": 0.2992599444958372, + "grad_norm": 0.7151365876197815, + "learning_rate": 8.216345910924749e-06, + "loss": 0.7017, + "step": 5823 + }, + { + "epoch": 0.29931133723918185, + "grad_norm": 0.7121673822402954, + "learning_rate": 8.215708659286362e-06, + "loss": 0.6917, + "step": 5824 + }, + { + "epoch": 0.29936272998252644, + "grad_norm": 1.1187169551849365, + "learning_rate": 8.215071318553958e-06, + "loss": 0.7897, + "step": 5825 + }, + { + "epoch": 0.2994141227258711, + "grad_norm": 1.115398645401001, + "learning_rate": 8.214433888745196e-06, + "loss": 0.8009, + "step": 5826 + }, + { + "epoch": 0.29946551546921574, + "grad_norm": 1.0566033124923706, + "learning_rate": 8.213796369877736e-06, + "loss": 0.7621, + "step": 5827 + }, + { + "epoch": 0.2995169082125604, + "grad_norm": 1.3706203699111938, + "learning_rate": 8.213158761969241e-06, + "loss": 0.8194, + "step": 5828 + }, + { + "epoch": 0.29956830095590503, + "grad_norm": 0.796194851398468, + "learning_rate": 8.212521065037377e-06, + "loss": 0.7286, + "step": 5829 + }, + { + "epoch": 0.2996196936992497, + "grad_norm": 1.0713847875595093, + "learning_rate": 8.211883279099811e-06, + "loss": 0.7256, + "step": 5830 + }, + { + "epoch": 0.2996710864425943, + "grad_norm": 1.009265661239624, + "learning_rate": 8.211245404174215e-06, + "loss": 0.6832, + "step": 5831 + }, + { + "epoch": 0.299722479185939, + "grad_norm": 1.0439300537109375, + "learning_rate": 8.21060744027826e-06, + "loss": 0.7978, + "step": 5832 + }, + { + "epoch": 0.29977387192928356, + "grad_norm": 1.142016887664795, + "learning_rate": 8.209969387429624e-06, + "loss": 0.7922, + "step": 5833 + }, + { + "epoch": 0.2998252646726282, + "grad_norm": 1.0432748794555664, + "learning_rate": 8.209331245645981e-06, + "loss": 0.7843, + "step": 5834 + }, + { + "epoch": 0.29987665741597286, + "grad_norm": 1.1137516498565674, + "learning_rate": 8.208693014945013e-06, + "loss": 0.8004, + "step": 5835 + }, + { + "epoch": 0.2999280501593175, + "grad_norm": 1.073835015296936, + "learning_rate": 8.208054695344404e-06, + "loss": 0.7787, + "step": 5836 + }, + { + "epoch": 0.29997944290266215, + "grad_norm": 1.143459439277649, + "learning_rate": 8.207416286861839e-06, + "loss": 0.7483, + "step": 5837 + }, + { + "epoch": 0.3000308356460068, + "grad_norm": 1.1468329429626465, + "learning_rate": 8.206777789515003e-06, + "loss": 0.8289, + "step": 5838 + }, + { + "epoch": 0.30008222838935145, + "grad_norm": 1.0970426797866821, + "learning_rate": 8.20613920332159e-06, + "loss": 0.7422, + "step": 5839 + }, + { + "epoch": 0.30013362113269604, + "grad_norm": 1.094024419784546, + "learning_rate": 8.205500528299289e-06, + "loss": 0.7995, + "step": 5840 + }, + { + "epoch": 0.3001850138760407, + "grad_norm": 1.0331776142120361, + "learning_rate": 8.204861764465798e-06, + "loss": 0.7193, + "step": 5841 + }, + { + "epoch": 0.30023640661938533, + "grad_norm": 1.0264288187026978, + "learning_rate": 8.204222911838813e-06, + "loss": 0.7479, + "step": 5842 + }, + { + "epoch": 0.30028779936273, + "grad_norm": 1.0783313512802124, + "learning_rate": 8.203583970436033e-06, + "loss": 0.7224, + "step": 5843 + }, + { + "epoch": 0.3003391921060746, + "grad_norm": 1.0672835111618042, + "learning_rate": 8.202944940275164e-06, + "loss": 0.7589, + "step": 5844 + }, + { + "epoch": 0.3003905848494193, + "grad_norm": 0.9001355171203613, + "learning_rate": 8.202305821373907e-06, + "loss": 0.7032, + "step": 5845 + }, + { + "epoch": 0.3004419775927639, + "grad_norm": 0.9846897721290588, + "learning_rate": 8.20166661374997e-06, + "loss": 0.752, + "step": 5846 + }, + { + "epoch": 0.30049337033610857, + "grad_norm": 0.7710789442062378, + "learning_rate": 8.201027317421064e-06, + "loss": 0.7183, + "step": 5847 + }, + { + "epoch": 0.30054476307945316, + "grad_norm": 1.2258596420288086, + "learning_rate": 8.200387932404901e-06, + "loss": 0.7818, + "step": 5848 + }, + { + "epoch": 0.3005961558227978, + "grad_norm": 1.0297565460205078, + "learning_rate": 8.199748458719197e-06, + "loss": 0.784, + "step": 5849 + }, + { + "epoch": 0.30064754856614245, + "grad_norm": 1.1144261360168457, + "learning_rate": 8.199108896381667e-06, + "loss": 0.7241, + "step": 5850 + }, + { + "epoch": 0.3006989413094871, + "grad_norm": 1.1186150312423706, + "learning_rate": 8.19846924541003e-06, + "loss": 0.796, + "step": 5851 + }, + { + "epoch": 0.30075033405283175, + "grad_norm": 1.088149905204773, + "learning_rate": 8.197829505822012e-06, + "loss": 0.7912, + "step": 5852 + }, + { + "epoch": 0.3008017267961764, + "grad_norm": 1.044201374053955, + "learning_rate": 8.197189677635333e-06, + "loss": 0.7433, + "step": 5853 + }, + { + "epoch": 0.30085311953952104, + "grad_norm": 0.8247054219245911, + "learning_rate": 8.196549760867722e-06, + "loss": 0.6375, + "step": 5854 + }, + { + "epoch": 0.30090451228286563, + "grad_norm": 1.0789122581481934, + "learning_rate": 8.195909755536911e-06, + "loss": 0.7611, + "step": 5855 + }, + { + "epoch": 0.3009559050262103, + "grad_norm": 1.0973631143569946, + "learning_rate": 8.195269661660625e-06, + "loss": 0.714, + "step": 5856 + }, + { + "epoch": 0.3010072977695549, + "grad_norm": 1.0778968334197998, + "learning_rate": 8.194629479256605e-06, + "loss": 0.7458, + "step": 5857 + }, + { + "epoch": 0.3010586905128996, + "grad_norm": 0.7376991510391235, + "learning_rate": 8.193989208342588e-06, + "loss": 0.6779, + "step": 5858 + }, + { + "epoch": 0.3011100832562442, + "grad_norm": 1.1209033727645874, + "learning_rate": 8.19334884893631e-06, + "loss": 0.8002, + "step": 5859 + }, + { + "epoch": 0.30116147599958887, + "grad_norm": 1.1218087673187256, + "learning_rate": 8.192708401055512e-06, + "loss": 0.7158, + "step": 5860 + }, + { + "epoch": 0.3012128687429335, + "grad_norm": 0.9160116910934448, + "learning_rate": 8.192067864717939e-06, + "loss": 0.7167, + "step": 5861 + }, + { + "epoch": 0.30126426148627816, + "grad_norm": 1.0480130910873413, + "learning_rate": 8.19142723994134e-06, + "loss": 0.7643, + "step": 5862 + }, + { + "epoch": 0.30131565422962275, + "grad_norm": 1.0397722721099854, + "learning_rate": 8.190786526743462e-06, + "loss": 0.7577, + "step": 5863 + }, + { + "epoch": 0.3013670469729674, + "grad_norm": 1.0381280183792114, + "learning_rate": 8.190145725142056e-06, + "loss": 0.7628, + "step": 5864 + }, + { + "epoch": 0.30141843971631205, + "grad_norm": 0.7517771124839783, + "learning_rate": 8.189504835154878e-06, + "loss": 0.6787, + "step": 5865 + }, + { + "epoch": 0.3014698324596567, + "grad_norm": 0.7399656772613525, + "learning_rate": 8.188863856799685e-06, + "loss": 0.6964, + "step": 5866 + }, + { + "epoch": 0.30152122520300134, + "grad_norm": 0.8144989609718323, + "learning_rate": 8.188222790094231e-06, + "loss": 0.7233, + "step": 5867 + }, + { + "epoch": 0.301572617946346, + "grad_norm": 1.0153788328170776, + "learning_rate": 8.187581635056282e-06, + "loss": 0.7209, + "step": 5868 + }, + { + "epoch": 0.30162401068969064, + "grad_norm": 1.0634764432907104, + "learning_rate": 8.186940391703599e-06, + "loss": 0.7435, + "step": 5869 + }, + { + "epoch": 0.3016754034330353, + "grad_norm": 1.3272348642349243, + "learning_rate": 8.186299060053952e-06, + "loss": 0.7764, + "step": 5870 + }, + { + "epoch": 0.3017267961763799, + "grad_norm": 1.098400592803955, + "learning_rate": 8.185657640125105e-06, + "loss": 0.8083, + "step": 5871 + }, + { + "epoch": 0.3017781889197245, + "grad_norm": 1.0837249755859375, + "learning_rate": 8.185016131934832e-06, + "loss": 0.8055, + "step": 5872 + }, + { + "epoch": 0.30182958166306917, + "grad_norm": 1.107259750366211, + "learning_rate": 8.184374535500907e-06, + "loss": 0.7647, + "step": 5873 + }, + { + "epoch": 0.3018809744064138, + "grad_norm": 0.7389606833457947, + "learning_rate": 8.183732850841102e-06, + "loss": 0.6647, + "step": 5874 + }, + { + "epoch": 0.30193236714975846, + "grad_norm": 1.1562050580978394, + "learning_rate": 8.1830910779732e-06, + "loss": 0.7618, + "step": 5875 + }, + { + "epoch": 0.3019837598931031, + "grad_norm": 0.8052880167961121, + "learning_rate": 8.18244921691498e-06, + "loss": 0.6818, + "step": 5876 + }, + { + "epoch": 0.30203515263644776, + "grad_norm": 1.1951496601104736, + "learning_rate": 8.181807267684225e-06, + "loss": 0.7318, + "step": 5877 + }, + { + "epoch": 0.30208654537979235, + "grad_norm": 1.139418601989746, + "learning_rate": 8.18116523029872e-06, + "loss": 0.7292, + "step": 5878 + }, + { + "epoch": 0.302137938123137, + "grad_norm": 1.0954374074935913, + "learning_rate": 8.180523104776256e-06, + "loss": 0.7899, + "step": 5879 + }, + { + "epoch": 0.30218933086648164, + "grad_norm": 1.0914121866226196, + "learning_rate": 8.179880891134622e-06, + "loss": 0.8042, + "step": 5880 + }, + { + "epoch": 0.3022407236098263, + "grad_norm": 1.0850141048431396, + "learning_rate": 8.179238589391612e-06, + "loss": 0.7275, + "step": 5881 + }, + { + "epoch": 0.30229211635317094, + "grad_norm": 0.7990648746490479, + "learning_rate": 8.178596199565019e-06, + "loss": 0.6706, + "step": 5882 + }, + { + "epoch": 0.3023435090965156, + "grad_norm": 1.0986249446868896, + "learning_rate": 8.177953721672642e-06, + "loss": 0.7477, + "step": 5883 + }, + { + "epoch": 0.30239490183986023, + "grad_norm": 1.0733144283294678, + "learning_rate": 8.177311155732283e-06, + "loss": 0.7647, + "step": 5884 + }, + { + "epoch": 0.3024462945832049, + "grad_norm": 1.0199863910675049, + "learning_rate": 8.176668501761743e-06, + "loss": 0.7784, + "step": 5885 + }, + { + "epoch": 0.30249768732654947, + "grad_norm": 1.0245037078857422, + "learning_rate": 8.176025759778829e-06, + "loss": 0.7605, + "step": 5886 + }, + { + "epoch": 0.3025490800698941, + "grad_norm": 1.0109847784042358, + "learning_rate": 8.17538292980135e-06, + "loss": 0.715, + "step": 5887 + }, + { + "epoch": 0.30260047281323876, + "grad_norm": 0.8150759935379028, + "learning_rate": 8.17474001184711e-06, + "loss": 0.6899, + "step": 5888 + }, + { + "epoch": 0.3026518655565834, + "grad_norm": 0.7515466809272766, + "learning_rate": 8.17409700593393e-06, + "loss": 0.6788, + "step": 5889 + }, + { + "epoch": 0.30270325829992806, + "grad_norm": 1.0849751234054565, + "learning_rate": 8.173453912079618e-06, + "loss": 0.749, + "step": 5890 + }, + { + "epoch": 0.3027546510432727, + "grad_norm": 1.090773582458496, + "learning_rate": 8.172810730301993e-06, + "loss": 0.7742, + "step": 5891 + }, + { + "epoch": 0.30280604378661735, + "grad_norm": 1.112127661705017, + "learning_rate": 8.172167460618879e-06, + "loss": 0.6885, + "step": 5892 + }, + { + "epoch": 0.30285743652996194, + "grad_norm": 0.7266148924827576, + "learning_rate": 8.171524103048092e-06, + "loss": 0.7432, + "step": 5893 + }, + { + "epoch": 0.3029088292733066, + "grad_norm": 1.216835379600525, + "learning_rate": 8.170880657607463e-06, + "loss": 0.7806, + "step": 5894 + }, + { + "epoch": 0.30296022201665124, + "grad_norm": 1.0476655960083008, + "learning_rate": 8.170237124314816e-06, + "loss": 0.7107, + "step": 5895 + }, + { + "epoch": 0.3030116147599959, + "grad_norm": 1.1506412029266357, + "learning_rate": 8.16959350318798e-06, + "loss": 0.7893, + "step": 5896 + }, + { + "epoch": 0.30306300750334053, + "grad_norm": 1.1130180358886719, + "learning_rate": 8.16894979424479e-06, + "loss": 0.7546, + "step": 5897 + }, + { + "epoch": 0.3031144002466852, + "grad_norm": 1.1210126876831055, + "learning_rate": 8.168305997503076e-06, + "loss": 0.7673, + "step": 5898 + }, + { + "epoch": 0.3031657929900298, + "grad_norm": 1.1421161890029907, + "learning_rate": 8.16766211298068e-06, + "loss": 0.7993, + "step": 5899 + }, + { + "epoch": 0.3032171857333745, + "grad_norm": 1.119368314743042, + "learning_rate": 8.167018140695438e-06, + "loss": 0.7861, + "step": 5900 + }, + { + "epoch": 0.30326857847671906, + "grad_norm": 1.0474625825881958, + "learning_rate": 8.166374080665193e-06, + "loss": 0.7166, + "step": 5901 + }, + { + "epoch": 0.3033199712200637, + "grad_norm": 1.1310187578201294, + "learning_rate": 8.165729932907788e-06, + "loss": 0.7694, + "step": 5902 + }, + { + "epoch": 0.30337136396340836, + "grad_norm": 1.2724446058273315, + "learning_rate": 8.165085697441074e-06, + "loss": 0.7849, + "step": 5903 + }, + { + "epoch": 0.303422756706753, + "grad_norm": 1.1535512208938599, + "learning_rate": 8.164441374282895e-06, + "loss": 0.7356, + "step": 5904 + }, + { + "epoch": 0.30347414945009765, + "grad_norm": 1.0342637300491333, + "learning_rate": 8.163796963451102e-06, + "loss": 0.7604, + "step": 5905 + }, + { + "epoch": 0.3035255421934423, + "grad_norm": 0.7419325709342957, + "learning_rate": 8.163152464963555e-06, + "loss": 0.679, + "step": 5906 + }, + { + "epoch": 0.30357693493678695, + "grad_norm": 0.7999743819236755, + "learning_rate": 8.162507878838104e-06, + "loss": 0.7291, + "step": 5907 + }, + { + "epoch": 0.30362832768013154, + "grad_norm": 1.0854853391647339, + "learning_rate": 8.161863205092612e-06, + "loss": 0.7354, + "step": 5908 + }, + { + "epoch": 0.3036797204234762, + "grad_norm": 0.726539134979248, + "learning_rate": 8.161218443744938e-06, + "loss": 0.6625, + "step": 5909 + }, + { + "epoch": 0.30373111316682083, + "grad_norm": 1.0884379148483276, + "learning_rate": 8.160573594812948e-06, + "loss": 0.7671, + "step": 5910 + }, + { + "epoch": 0.3037825059101655, + "grad_norm": 1.0739367008209229, + "learning_rate": 8.159928658314506e-06, + "loss": 0.7471, + "step": 5911 + }, + { + "epoch": 0.3038338986535101, + "grad_norm": 1.1041159629821777, + "learning_rate": 8.159283634267482e-06, + "loss": 0.8212, + "step": 5912 + }, + { + "epoch": 0.3038852913968548, + "grad_norm": 0.7195490598678589, + "learning_rate": 8.158638522689743e-06, + "loss": 0.6969, + "step": 5913 + }, + { + "epoch": 0.3039366841401994, + "grad_norm": 1.0414634943008423, + "learning_rate": 8.15799332359917e-06, + "loss": 0.799, + "step": 5914 + }, + { + "epoch": 0.30398807688354407, + "grad_norm": 1.1665323972702026, + "learning_rate": 8.15734803701363e-06, + "loss": 0.7462, + "step": 5915 + }, + { + "epoch": 0.30403946962688866, + "grad_norm": 1.1425188779830933, + "learning_rate": 8.156702662951007e-06, + "loss": 0.7346, + "step": 5916 + }, + { + "epoch": 0.3040908623702333, + "grad_norm": 1.0286660194396973, + "learning_rate": 8.156057201429179e-06, + "loss": 0.779, + "step": 5917 + }, + { + "epoch": 0.30414225511357795, + "grad_norm": 1.0954350233078003, + "learning_rate": 8.155411652466032e-06, + "loss": 0.7662, + "step": 5918 + }, + { + "epoch": 0.3041936478569226, + "grad_norm": 1.0761384963989258, + "learning_rate": 8.15476601607945e-06, + "loss": 0.7564, + "step": 5919 + }, + { + "epoch": 0.30424504060026725, + "grad_norm": 1.0697237253189087, + "learning_rate": 8.15412029228732e-06, + "loss": 0.7193, + "step": 5920 + }, + { + "epoch": 0.3042964333436119, + "grad_norm": 1.0740307569503784, + "learning_rate": 8.153474481107533e-06, + "loss": 0.7818, + "step": 5921 + }, + { + "epoch": 0.30434782608695654, + "grad_norm": 1.149929165840149, + "learning_rate": 8.152828582557983e-06, + "loss": 0.7952, + "step": 5922 + }, + { + "epoch": 0.3043992188303012, + "grad_norm": 1.093737006187439, + "learning_rate": 8.152182596656561e-06, + "loss": 0.8202, + "step": 5923 + }, + { + "epoch": 0.3044506115736458, + "grad_norm": 1.11074697971344, + "learning_rate": 8.15153652342117e-06, + "loss": 0.7485, + "step": 5924 + }, + { + "epoch": 0.3045020043169904, + "grad_norm": 0.7818731665611267, + "learning_rate": 8.150890362869705e-06, + "loss": 0.6728, + "step": 5925 + }, + { + "epoch": 0.3045533970603351, + "grad_norm": 1.2714394330978394, + "learning_rate": 8.150244115020073e-06, + "loss": 0.8073, + "step": 5926 + }, + { + "epoch": 0.3046047898036797, + "grad_norm": 1.0588757991790771, + "learning_rate": 8.149597779890178e-06, + "loss": 0.7474, + "step": 5927 + }, + { + "epoch": 0.30465618254702437, + "grad_norm": 1.0225762128829956, + "learning_rate": 8.148951357497925e-06, + "loss": 0.7189, + "step": 5928 + }, + { + "epoch": 0.304707575290369, + "grad_norm": 0.8176491260528564, + "learning_rate": 8.148304847861224e-06, + "loss": 0.7421, + "step": 5929 + }, + { + "epoch": 0.30475896803371366, + "grad_norm": 0.7753500938415527, + "learning_rate": 8.14765825099799e-06, + "loss": 0.6843, + "step": 5930 + }, + { + "epoch": 0.30481036077705825, + "grad_norm": 1.0372161865234375, + "learning_rate": 8.147011566926133e-06, + "loss": 0.6853, + "step": 5931 + }, + { + "epoch": 0.3048617535204029, + "grad_norm": 1.0844743251800537, + "learning_rate": 8.146364795663572e-06, + "loss": 0.7505, + "step": 5932 + }, + { + "epoch": 0.30491314626374755, + "grad_norm": 0.845554530620575, + "learning_rate": 8.145717937228228e-06, + "loss": 0.6814, + "step": 5933 + }, + { + "epoch": 0.3049645390070922, + "grad_norm": 1.1065099239349365, + "learning_rate": 8.145070991638023e-06, + "loss": 0.7903, + "step": 5934 + }, + { + "epoch": 0.30501593175043684, + "grad_norm": 1.0979385375976562, + "learning_rate": 8.144423958910879e-06, + "loss": 0.7481, + "step": 5935 + }, + { + "epoch": 0.3050673244937815, + "grad_norm": 1.0313923358917236, + "learning_rate": 8.143776839064723e-06, + "loss": 0.7358, + "step": 5936 + }, + { + "epoch": 0.30511871723712614, + "grad_norm": 1.1388959884643555, + "learning_rate": 8.143129632117484e-06, + "loss": 0.8216, + "step": 5937 + }, + { + "epoch": 0.3051701099804708, + "grad_norm": 1.0782533884048462, + "learning_rate": 8.142482338087095e-06, + "loss": 0.7833, + "step": 5938 + }, + { + "epoch": 0.3052215027238154, + "grad_norm": 0.9653657078742981, + "learning_rate": 8.141834956991485e-06, + "loss": 0.6611, + "step": 5939 + }, + { + "epoch": 0.30527289546716, + "grad_norm": 1.1193424463272095, + "learning_rate": 8.141187488848598e-06, + "loss": 0.8046, + "step": 5940 + }, + { + "epoch": 0.30532428821050467, + "grad_norm": 1.147467851638794, + "learning_rate": 8.140539933676369e-06, + "loss": 0.7573, + "step": 5941 + }, + { + "epoch": 0.3053756809538493, + "grad_norm": 0.7805129885673523, + "learning_rate": 8.139892291492734e-06, + "loss": 0.716, + "step": 5942 + }, + { + "epoch": 0.30542707369719396, + "grad_norm": 1.0494109392166138, + "learning_rate": 8.139244562315646e-06, + "loss": 0.7045, + "step": 5943 + }, + { + "epoch": 0.3054784664405386, + "grad_norm": 1.2248508930206299, + "learning_rate": 8.138596746163044e-06, + "loss": 0.7926, + "step": 5944 + }, + { + "epoch": 0.30552985918388326, + "grad_norm": 0.7430875897407532, + "learning_rate": 8.137948843052878e-06, + "loss": 0.6913, + "step": 5945 + }, + { + "epoch": 0.30558125192722785, + "grad_norm": 1.1389179229736328, + "learning_rate": 8.1373008530031e-06, + "loss": 0.7451, + "step": 5946 + }, + { + "epoch": 0.3056326446705725, + "grad_norm": 1.1152384281158447, + "learning_rate": 8.136652776031662e-06, + "loss": 0.7845, + "step": 5947 + }, + { + "epoch": 0.30568403741391714, + "grad_norm": 1.0958131551742554, + "learning_rate": 8.13600461215652e-06, + "loss": 0.7963, + "step": 5948 + }, + { + "epoch": 0.3057354301572618, + "grad_norm": 1.1137341260910034, + "learning_rate": 8.135356361395633e-06, + "loss": 0.7385, + "step": 5949 + }, + { + "epoch": 0.30578682290060644, + "grad_norm": 1.1153043508529663, + "learning_rate": 8.134708023766957e-06, + "loss": 0.7761, + "step": 5950 + }, + { + "epoch": 0.3058382156439511, + "grad_norm": 0.7776283025741577, + "learning_rate": 8.13405959928846e-06, + "loss": 0.6913, + "step": 5951 + }, + { + "epoch": 0.30588960838729573, + "grad_norm": 1.1174592971801758, + "learning_rate": 8.133411087978105e-06, + "loss": 0.7452, + "step": 5952 + }, + { + "epoch": 0.3059410011306404, + "grad_norm": 0.8260684013366699, + "learning_rate": 8.132762489853858e-06, + "loss": 0.654, + "step": 5953 + }, + { + "epoch": 0.30599239387398497, + "grad_norm": 0.9808185696601868, + "learning_rate": 8.13211380493369e-06, + "loss": 0.746, + "step": 5954 + }, + { + "epoch": 0.3060437866173296, + "grad_norm": 1.0762827396392822, + "learning_rate": 8.131465033235576e-06, + "loss": 0.7517, + "step": 5955 + }, + { + "epoch": 0.30609517936067426, + "grad_norm": 1.0400757789611816, + "learning_rate": 8.130816174777489e-06, + "loss": 0.7216, + "step": 5956 + }, + { + "epoch": 0.3061465721040189, + "grad_norm": 1.1839991807937622, + "learning_rate": 8.130167229577406e-06, + "loss": 0.8049, + "step": 5957 + }, + { + "epoch": 0.30619796484736356, + "grad_norm": 1.0235670804977417, + "learning_rate": 8.129518197653305e-06, + "loss": 0.7577, + "step": 5958 + }, + { + "epoch": 0.3062493575907082, + "grad_norm": 1.0928524732589722, + "learning_rate": 8.128869079023171e-06, + "loss": 0.7483, + "step": 5959 + }, + { + "epoch": 0.30630075033405285, + "grad_norm": 1.0802463293075562, + "learning_rate": 8.128219873704986e-06, + "loss": 0.7401, + "step": 5960 + }, + { + "epoch": 0.3063521430773975, + "grad_norm": 1.1580710411071777, + "learning_rate": 8.127570581716737e-06, + "loss": 0.8207, + "step": 5961 + }, + { + "epoch": 0.3064035358207421, + "grad_norm": 0.7668749690055847, + "learning_rate": 8.126921203076414e-06, + "loss": 0.6601, + "step": 5962 + }, + { + "epoch": 0.30645492856408674, + "grad_norm": 1.0720173120498657, + "learning_rate": 8.126271737802009e-06, + "loss": 0.7512, + "step": 5963 + }, + { + "epoch": 0.3065063213074314, + "grad_norm": 1.0890666246414185, + "learning_rate": 8.125622185911517e-06, + "loss": 0.7771, + "step": 5964 + }, + { + "epoch": 0.30655771405077603, + "grad_norm": 1.1244418621063232, + "learning_rate": 8.124972547422931e-06, + "loss": 0.796, + "step": 5965 + }, + { + "epoch": 0.3066091067941207, + "grad_norm": 1.1394412517547607, + "learning_rate": 8.124322822354252e-06, + "loss": 0.7174, + "step": 5966 + }, + { + "epoch": 0.3066604995374653, + "grad_norm": 1.168843150138855, + "learning_rate": 8.123673010723482e-06, + "loss": 0.7377, + "step": 5967 + }, + { + "epoch": 0.30671189228081, + "grad_norm": 1.0686968564987183, + "learning_rate": 8.123023112548623e-06, + "loss": 0.7667, + "step": 5968 + }, + { + "epoch": 0.30676328502415456, + "grad_norm": 0.8895867466926575, + "learning_rate": 8.12237312784768e-06, + "loss": 0.7133, + "step": 5969 + }, + { + "epoch": 0.3068146777674992, + "grad_norm": 8.128183364868164, + "learning_rate": 8.121723056638664e-06, + "loss": 0.7229, + "step": 5970 + }, + { + "epoch": 0.30686607051084386, + "grad_norm": 1.076133131980896, + "learning_rate": 8.121072898939586e-06, + "loss": 0.7478, + "step": 5971 + }, + { + "epoch": 0.3069174632541885, + "grad_norm": 0.7858735918998718, + "learning_rate": 8.120422654768457e-06, + "loss": 0.6872, + "step": 5972 + }, + { + "epoch": 0.30696885599753315, + "grad_norm": 1.1371923685073853, + "learning_rate": 8.119772324143293e-06, + "loss": 0.7145, + "step": 5973 + }, + { + "epoch": 0.3070202487408778, + "grad_norm": 1.0432298183441162, + "learning_rate": 8.119121907082112e-06, + "loss": 0.7632, + "step": 5974 + }, + { + "epoch": 0.30707164148422245, + "grad_norm": 0.7751417756080627, + "learning_rate": 8.118471403602935e-06, + "loss": 0.6984, + "step": 5975 + }, + { + "epoch": 0.3071230342275671, + "grad_norm": 1.1347055435180664, + "learning_rate": 8.117820813723785e-06, + "loss": 0.7753, + "step": 5976 + }, + { + "epoch": 0.3071744269709117, + "grad_norm": 1.1113218069076538, + "learning_rate": 8.117170137462687e-06, + "loss": 0.8097, + "step": 5977 + }, + { + "epoch": 0.30722581971425633, + "grad_norm": 1.1859724521636963, + "learning_rate": 8.116519374837668e-06, + "loss": 0.7525, + "step": 5978 + }, + { + "epoch": 0.307277212457601, + "grad_norm": 1.1094385385513306, + "learning_rate": 8.115868525866758e-06, + "loss": 0.6916, + "step": 5979 + }, + { + "epoch": 0.3073286052009456, + "grad_norm": 1.0491571426391602, + "learning_rate": 8.11521759056799e-06, + "loss": 0.7682, + "step": 5980 + }, + { + "epoch": 0.3073799979442903, + "grad_norm": 1.083126187324524, + "learning_rate": 8.114566568959397e-06, + "loss": 0.7599, + "step": 5981 + }, + { + "epoch": 0.3074313906876349, + "grad_norm": 1.207197904586792, + "learning_rate": 8.11391546105902e-06, + "loss": 0.7854, + "step": 5982 + }, + { + "epoch": 0.30748278343097957, + "grad_norm": 1.1165351867675781, + "learning_rate": 8.113264266884895e-06, + "loss": 0.7414, + "step": 5983 + }, + { + "epoch": 0.30753417617432416, + "grad_norm": 1.1018052101135254, + "learning_rate": 8.112612986455064e-06, + "loss": 0.7336, + "step": 5984 + }, + { + "epoch": 0.3075855689176688, + "grad_norm": 1.0903675556182861, + "learning_rate": 8.111961619787571e-06, + "loss": 0.807, + "step": 5985 + }, + { + "epoch": 0.30763696166101345, + "grad_norm": 2.0057973861694336, + "learning_rate": 8.111310166900465e-06, + "loss": 0.793, + "step": 5986 + }, + { + "epoch": 0.3076883544043581, + "grad_norm": 1.1542638540267944, + "learning_rate": 8.110658627811796e-06, + "loss": 0.7872, + "step": 5987 + }, + { + "epoch": 0.30773974714770275, + "grad_norm": 1.0234479904174805, + "learning_rate": 8.110007002539611e-06, + "loss": 0.7329, + "step": 5988 + }, + { + "epoch": 0.3077911398910474, + "grad_norm": 1.092002034187317, + "learning_rate": 8.109355291101965e-06, + "loss": 0.763, + "step": 5989 + }, + { + "epoch": 0.30784253263439204, + "grad_norm": 1.1079283952713013, + "learning_rate": 8.108703493516919e-06, + "loss": 0.7866, + "step": 5990 + }, + { + "epoch": 0.3078939253777367, + "grad_norm": 1.048746109008789, + "learning_rate": 8.108051609802526e-06, + "loss": 0.791, + "step": 5991 + }, + { + "epoch": 0.3079453181210813, + "grad_norm": 1.0536657571792603, + "learning_rate": 8.10739963997685e-06, + "loss": 0.8016, + "step": 5992 + }, + { + "epoch": 0.3079967108644259, + "grad_norm": 1.053540825843811, + "learning_rate": 8.106747584057952e-06, + "loss": 0.7657, + "step": 5993 + }, + { + "epoch": 0.3080481036077706, + "grad_norm": 1.0464996099472046, + "learning_rate": 8.1060954420639e-06, + "loss": 0.733, + "step": 5994 + }, + { + "epoch": 0.3080994963511152, + "grad_norm": 1.1036494970321655, + "learning_rate": 8.105443214012762e-06, + "loss": 0.7752, + "step": 5995 + }, + { + "epoch": 0.30815088909445987, + "grad_norm": 1.1091340780258179, + "learning_rate": 8.104790899922608e-06, + "loss": 0.805, + "step": 5996 + }, + { + "epoch": 0.3082022818378045, + "grad_norm": 0.9466354250907898, + "learning_rate": 8.10413849981151e-06, + "loss": 0.7175, + "step": 5997 + }, + { + "epoch": 0.30825367458114916, + "grad_norm": 1.1139405965805054, + "learning_rate": 8.103486013697544e-06, + "loss": 0.7918, + "step": 5998 + }, + { + "epoch": 0.3083050673244938, + "grad_norm": 1.1200132369995117, + "learning_rate": 8.102833441598787e-06, + "loss": 0.8174, + "step": 5999 + }, + { + "epoch": 0.3083564600678384, + "grad_norm": 1.2123644351959229, + "learning_rate": 8.102180783533322e-06, + "loss": 0.792, + "step": 6000 + }, + { + "epoch": 0.30840785281118305, + "grad_norm": 1.0958315134048462, + "learning_rate": 8.101528039519229e-06, + "loss": 0.7897, + "step": 6001 + }, + { + "epoch": 0.3084592455545277, + "grad_norm": 1.1540733575820923, + "learning_rate": 8.100875209574592e-06, + "loss": 0.8121, + "step": 6002 + }, + { + "epoch": 0.30851063829787234, + "grad_norm": 0.8598061203956604, + "learning_rate": 8.100222293717502e-06, + "loss": 0.7041, + "step": 6003 + }, + { + "epoch": 0.308562031041217, + "grad_norm": 1.0622655153274536, + "learning_rate": 8.099569291966044e-06, + "loss": 0.8124, + "step": 6004 + }, + { + "epoch": 0.30861342378456164, + "grad_norm": 1.0091347694396973, + "learning_rate": 8.098916204338311e-06, + "loss": 0.7461, + "step": 6005 + }, + { + "epoch": 0.3086648165279063, + "grad_norm": 1.0971156358718872, + "learning_rate": 8.098263030852403e-06, + "loss": 0.7217, + "step": 6006 + }, + { + "epoch": 0.3087162092712509, + "grad_norm": 1.2319941520690918, + "learning_rate": 8.097609771526409e-06, + "loss": 0.7748, + "step": 6007 + }, + { + "epoch": 0.3087676020145955, + "grad_norm": 1.0875362157821655, + "learning_rate": 8.09695642637843e-06, + "loss": 0.7745, + "step": 6008 + }, + { + "epoch": 0.30881899475794017, + "grad_norm": 1.0700194835662842, + "learning_rate": 8.09630299542657e-06, + "loss": 0.7432, + "step": 6009 + }, + { + "epoch": 0.3088703875012848, + "grad_norm": 1.0887404680252075, + "learning_rate": 8.095649478688934e-06, + "loss": 0.7575, + "step": 6010 + }, + { + "epoch": 0.30892178024462946, + "grad_norm": 1.0383274555206299, + "learning_rate": 8.094995876183623e-06, + "loss": 0.7441, + "step": 6011 + }, + { + "epoch": 0.3089731729879741, + "grad_norm": 1.1790608167648315, + "learning_rate": 8.09434218792875e-06, + "loss": 0.8092, + "step": 6012 + }, + { + "epoch": 0.30902456573131876, + "grad_norm": 0.8077702522277832, + "learning_rate": 8.093688413942424e-06, + "loss": 0.6484, + "step": 6013 + }, + { + "epoch": 0.3090759584746634, + "grad_norm": 1.0213290452957153, + "learning_rate": 8.09303455424276e-06, + "loss": 0.8183, + "step": 6014 + }, + { + "epoch": 0.309127351218008, + "grad_norm": 0.6674919128417969, + "learning_rate": 8.092380608847871e-06, + "loss": 0.7314, + "step": 6015 + }, + { + "epoch": 0.30917874396135264, + "grad_norm": 1.076224684715271, + "learning_rate": 8.091726577775878e-06, + "loss": 0.7477, + "step": 6016 + }, + { + "epoch": 0.3092301367046973, + "grad_norm": 0.8392216563224792, + "learning_rate": 8.0910724610449e-06, + "loss": 0.7284, + "step": 6017 + }, + { + "epoch": 0.30928152944804194, + "grad_norm": 1.079715609550476, + "learning_rate": 8.090418258673061e-06, + "loss": 0.836, + "step": 6018 + }, + { + "epoch": 0.3093329221913866, + "grad_norm": 1.1012918949127197, + "learning_rate": 8.089763970678484e-06, + "loss": 0.7506, + "step": 6019 + }, + { + "epoch": 0.30938431493473123, + "grad_norm": 1.0374441146850586, + "learning_rate": 8.0891095970793e-06, + "loss": 0.7295, + "step": 6020 + }, + { + "epoch": 0.3094357076780759, + "grad_norm": 1.0798447132110596, + "learning_rate": 8.088455137893634e-06, + "loss": 0.7435, + "step": 6021 + }, + { + "epoch": 0.30948710042142047, + "grad_norm": 0.7154061794281006, + "learning_rate": 8.087800593139623e-06, + "loss": 0.7008, + "step": 6022 + }, + { + "epoch": 0.3095384931647651, + "grad_norm": 1.0409945249557495, + "learning_rate": 8.087145962835401e-06, + "loss": 0.7346, + "step": 6023 + }, + { + "epoch": 0.30958988590810976, + "grad_norm": 1.182163953781128, + "learning_rate": 8.086491246999104e-06, + "loss": 0.7545, + "step": 6024 + }, + { + "epoch": 0.3096412786514544, + "grad_norm": 1.0903769731521606, + "learning_rate": 8.085836445648872e-06, + "loss": 0.7564, + "step": 6025 + }, + { + "epoch": 0.30969267139479906, + "grad_norm": 1.1197534799575806, + "learning_rate": 8.085181558802844e-06, + "loss": 0.7752, + "step": 6026 + }, + { + "epoch": 0.3097440641381437, + "grad_norm": 1.044586420059204, + "learning_rate": 8.08452658647917e-06, + "loss": 0.7841, + "step": 6027 + }, + { + "epoch": 0.30979545688148835, + "grad_norm": 0.7822510004043579, + "learning_rate": 8.083871528695992e-06, + "loss": 0.6583, + "step": 6028 + }, + { + "epoch": 0.309846849624833, + "grad_norm": 1.0788698196411133, + "learning_rate": 8.083216385471462e-06, + "loss": 0.7984, + "step": 6029 + }, + { + "epoch": 0.3098982423681776, + "grad_norm": 1.1136059761047363, + "learning_rate": 8.082561156823726e-06, + "loss": 0.7849, + "step": 6030 + }, + { + "epoch": 0.30994963511152224, + "grad_norm": 1.1260932683944702, + "learning_rate": 8.081905842770945e-06, + "loss": 0.758, + "step": 6031 + }, + { + "epoch": 0.3100010278548669, + "grad_norm": 1.029131293296814, + "learning_rate": 8.08125044333127e-06, + "loss": 0.724, + "step": 6032 + }, + { + "epoch": 0.31005242059821153, + "grad_norm": 1.1441344022750854, + "learning_rate": 8.08059495852286e-06, + "loss": 0.76, + "step": 6033 + }, + { + "epoch": 0.3101038133415562, + "grad_norm": 1.0803885459899902, + "learning_rate": 8.079939388363878e-06, + "loss": 0.7489, + "step": 6034 + }, + { + "epoch": 0.3101552060849008, + "grad_norm": 1.1191673278808594, + "learning_rate": 8.079283732872483e-06, + "loss": 0.7948, + "step": 6035 + }, + { + "epoch": 0.3102065988282455, + "grad_norm": 1.0153367519378662, + "learning_rate": 8.078627992066846e-06, + "loss": 0.7649, + "step": 6036 + }, + { + "epoch": 0.31025799157159006, + "grad_norm": 1.0923380851745605, + "learning_rate": 8.077972165965131e-06, + "loss": 0.748, + "step": 6037 + }, + { + "epoch": 0.3103093843149347, + "grad_norm": 1.0747849941253662, + "learning_rate": 8.07731625458551e-06, + "loss": 0.7497, + "step": 6038 + }, + { + "epoch": 0.31036077705827936, + "grad_norm": 1.110371708869934, + "learning_rate": 8.076660257946155e-06, + "loss": 0.7853, + "step": 6039 + }, + { + "epoch": 0.310412169801624, + "grad_norm": 1.027322769165039, + "learning_rate": 8.07600417606524e-06, + "loss": 0.6806, + "step": 6040 + }, + { + "epoch": 0.31046356254496865, + "grad_norm": 1.0725265741348267, + "learning_rate": 8.075348008960945e-06, + "loss": 0.7653, + "step": 6041 + }, + { + "epoch": 0.3105149552883133, + "grad_norm": 1.0926538705825806, + "learning_rate": 8.074691756651445e-06, + "loss": 0.7472, + "step": 6042 + }, + { + "epoch": 0.31056634803165795, + "grad_norm": 1.1099355220794678, + "learning_rate": 8.074035419154927e-06, + "loss": 0.7637, + "step": 6043 + }, + { + "epoch": 0.3106177407750026, + "grad_norm": 1.0330449342727661, + "learning_rate": 8.07337899648957e-06, + "loss": 0.7512, + "step": 6044 + }, + { + "epoch": 0.3106691335183472, + "grad_norm": 1.14008367061615, + "learning_rate": 8.072722488673568e-06, + "loss": 0.7696, + "step": 6045 + }, + { + "epoch": 0.31072052626169183, + "grad_norm": 1.211603045463562, + "learning_rate": 8.072065895725105e-06, + "loss": 0.8355, + "step": 6046 + }, + { + "epoch": 0.3107719190050365, + "grad_norm": 1.139074683189392, + "learning_rate": 8.071409217662373e-06, + "loss": 0.75, + "step": 6047 + }, + { + "epoch": 0.3108233117483811, + "grad_norm": 1.1317499876022339, + "learning_rate": 8.070752454503567e-06, + "loss": 0.773, + "step": 6048 + }, + { + "epoch": 0.3108747044917258, + "grad_norm": 0.700289249420166, + "learning_rate": 8.070095606266885e-06, + "loss": 0.6899, + "step": 6049 + }, + { + "epoch": 0.3109260972350704, + "grad_norm": 1.1059043407440186, + "learning_rate": 8.06943867297052e-06, + "loss": 0.7965, + "step": 6050 + }, + { + "epoch": 0.31097748997841507, + "grad_norm": 1.0818907022476196, + "learning_rate": 8.068781654632679e-06, + "loss": 0.7901, + "step": 6051 + }, + { + "epoch": 0.3110288827217597, + "grad_norm": 1.1344012022018433, + "learning_rate": 8.06812455127156e-06, + "loss": 0.7626, + "step": 6052 + }, + { + "epoch": 0.3110802754651043, + "grad_norm": 0.7358098030090332, + "learning_rate": 8.067467362905373e-06, + "loss": 0.6474, + "step": 6053 + }, + { + "epoch": 0.31113166820844895, + "grad_norm": 1.1475454568862915, + "learning_rate": 8.066810089552322e-06, + "loss": 0.7716, + "step": 6054 + }, + { + "epoch": 0.3111830609517936, + "grad_norm": 0.840649425983429, + "learning_rate": 8.06615273123062e-06, + "loss": 0.7182, + "step": 6055 + }, + { + "epoch": 0.31123445369513825, + "grad_norm": 1.0517313480377197, + "learning_rate": 8.06549528795848e-06, + "loss": 0.7631, + "step": 6056 + }, + { + "epoch": 0.3112858464384829, + "grad_norm": 1.101258397102356, + "learning_rate": 8.064837759754116e-06, + "loss": 0.7621, + "step": 6057 + }, + { + "epoch": 0.31133723918182754, + "grad_norm": 1.0718002319335938, + "learning_rate": 8.064180146635745e-06, + "loss": 0.7571, + "step": 6058 + }, + { + "epoch": 0.3113886319251722, + "grad_norm": 1.0681408643722534, + "learning_rate": 8.063522448621588e-06, + "loss": 0.7742, + "step": 6059 + }, + { + "epoch": 0.3114400246685168, + "grad_norm": 1.1885234117507935, + "learning_rate": 8.062864665729865e-06, + "loss": 0.8138, + "step": 6060 + }, + { + "epoch": 0.3114914174118614, + "grad_norm": 1.0517560243606567, + "learning_rate": 8.062206797978803e-06, + "loss": 0.7319, + "step": 6061 + }, + { + "epoch": 0.3115428101552061, + "grad_norm": 1.0204495191574097, + "learning_rate": 8.061548845386627e-06, + "loss": 0.7673, + "step": 6062 + }, + { + "epoch": 0.3115942028985507, + "grad_norm": 1.0379875898361206, + "learning_rate": 8.060890807971566e-06, + "loss": 0.7486, + "step": 6063 + }, + { + "epoch": 0.31164559564189537, + "grad_norm": 1.0938879251480103, + "learning_rate": 8.060232685751854e-06, + "loss": 0.8022, + "step": 6064 + }, + { + "epoch": 0.31169698838524, + "grad_norm": 1.041452169418335, + "learning_rate": 8.059574478745723e-06, + "loss": 0.8404, + "step": 6065 + }, + { + "epoch": 0.31174838112858466, + "grad_norm": 1.1018478870391846, + "learning_rate": 8.058916186971407e-06, + "loss": 0.7744, + "step": 6066 + }, + { + "epoch": 0.3117997738719293, + "grad_norm": 1.0761829614639282, + "learning_rate": 8.05825781044715e-06, + "loss": 0.8018, + "step": 6067 + }, + { + "epoch": 0.3118511666152739, + "grad_norm": 1.131446361541748, + "learning_rate": 8.057599349191187e-06, + "loss": 0.7954, + "step": 6068 + }, + { + "epoch": 0.31190255935861855, + "grad_norm": 1.0285425186157227, + "learning_rate": 8.056940803221767e-06, + "loss": 0.7106, + "step": 6069 + }, + { + "epoch": 0.3119539521019632, + "grad_norm": 0.9851360321044922, + "learning_rate": 8.05628217255713e-06, + "loss": 0.7396, + "step": 6070 + }, + { + "epoch": 0.31200534484530784, + "grad_norm": 1.158008098602295, + "learning_rate": 8.055623457215526e-06, + "loss": 0.8469, + "step": 6071 + }, + { + "epoch": 0.3120567375886525, + "grad_norm": 1.0554207563400269, + "learning_rate": 8.054964657215208e-06, + "loss": 0.7781, + "step": 6072 + }, + { + "epoch": 0.31210813033199714, + "grad_norm": 1.1226186752319336, + "learning_rate": 8.054305772574427e-06, + "loss": 0.7817, + "step": 6073 + }, + { + "epoch": 0.3121595230753418, + "grad_norm": 0.7736796736717224, + "learning_rate": 8.053646803311435e-06, + "loss": 0.708, + "step": 6074 + }, + { + "epoch": 0.3122109158186864, + "grad_norm": 0.7992336750030518, + "learning_rate": 8.052987749444493e-06, + "loss": 0.6865, + "step": 6075 + }, + { + "epoch": 0.312262308562031, + "grad_norm": 1.025526523590088, + "learning_rate": 8.05232861099186e-06, + "loss": 0.7436, + "step": 6076 + }, + { + "epoch": 0.31231370130537567, + "grad_norm": 1.123650312423706, + "learning_rate": 8.051669387971796e-06, + "loss": 0.7597, + "step": 6077 + }, + { + "epoch": 0.3123650940487203, + "grad_norm": 0.835012674331665, + "learning_rate": 8.051010080402568e-06, + "loss": 0.6625, + "step": 6078 + }, + { + "epoch": 0.31241648679206496, + "grad_norm": 1.0923599004745483, + "learning_rate": 8.050350688302439e-06, + "loss": 0.7649, + "step": 6079 + }, + { + "epoch": 0.3124678795354096, + "grad_norm": 1.101789951324463, + "learning_rate": 8.049691211689684e-06, + "loss": 0.8024, + "step": 6080 + }, + { + "epoch": 0.31251927227875426, + "grad_norm": 1.0039594173431396, + "learning_rate": 8.049031650582568e-06, + "loss": 0.7408, + "step": 6081 + }, + { + "epoch": 0.3125706650220989, + "grad_norm": 1.1349210739135742, + "learning_rate": 8.048372004999369e-06, + "loss": 0.8075, + "step": 6082 + }, + { + "epoch": 0.3126220577654435, + "grad_norm": 1.2421382665634155, + "learning_rate": 8.047712274958362e-06, + "loss": 0.7652, + "step": 6083 + }, + { + "epoch": 0.31267345050878814, + "grad_norm": 1.1897873878479004, + "learning_rate": 8.047052460477825e-06, + "loss": 0.853, + "step": 6084 + }, + { + "epoch": 0.3127248432521328, + "grad_norm": 1.069273829460144, + "learning_rate": 8.046392561576037e-06, + "loss": 0.7878, + "step": 6085 + }, + { + "epoch": 0.31277623599547744, + "grad_norm": 1.1286972761154175, + "learning_rate": 8.045732578271284e-06, + "loss": 0.7312, + "step": 6086 + }, + { + "epoch": 0.3128276287388221, + "grad_norm": 1.0797669887542725, + "learning_rate": 8.04507251058185e-06, + "loss": 0.7904, + "step": 6087 + }, + { + "epoch": 0.31287902148216673, + "grad_norm": 0.9267917275428772, + "learning_rate": 8.044412358526024e-06, + "loss": 0.7048, + "step": 6088 + }, + { + "epoch": 0.3129304142255114, + "grad_norm": 1.1157392263412476, + "learning_rate": 8.043752122122094e-06, + "loss": 0.75, + "step": 6089 + }, + { + "epoch": 0.312981806968856, + "grad_norm": 1.1277902126312256, + "learning_rate": 8.043091801388354e-06, + "loss": 0.7483, + "step": 6090 + }, + { + "epoch": 0.3130331997122006, + "grad_norm": 1.1129240989685059, + "learning_rate": 8.042431396343099e-06, + "loss": 0.7391, + "step": 6091 + }, + { + "epoch": 0.31308459245554526, + "grad_norm": 1.1318491697311401, + "learning_rate": 8.041770907004624e-06, + "loss": 0.7111, + "step": 6092 + }, + { + "epoch": 0.3131359851988899, + "grad_norm": 1.0705219507217407, + "learning_rate": 8.041110333391229e-06, + "loss": 0.8057, + "step": 6093 + }, + { + "epoch": 0.31318737794223456, + "grad_norm": 0.8475239276885986, + "learning_rate": 8.04044967552122e-06, + "loss": 0.6906, + "step": 6094 + }, + { + "epoch": 0.3132387706855792, + "grad_norm": 0.8920000195503235, + "learning_rate": 8.039788933412894e-06, + "loss": 0.6755, + "step": 6095 + }, + { + "epoch": 0.31329016342892385, + "grad_norm": 0.8327194452285767, + "learning_rate": 8.039128107084562e-06, + "loss": 0.6801, + "step": 6096 + }, + { + "epoch": 0.3133415561722685, + "grad_norm": 1.0118904113769531, + "learning_rate": 8.038467196554533e-06, + "loss": 0.7339, + "step": 6097 + }, + { + "epoch": 0.3133929489156131, + "grad_norm": 1.0632312297821045, + "learning_rate": 8.037806201841117e-06, + "loss": 0.7574, + "step": 6098 + }, + { + "epoch": 0.31344434165895774, + "grad_norm": 1.0491844415664673, + "learning_rate": 8.037145122962627e-06, + "loss": 0.7582, + "step": 6099 + }, + { + "epoch": 0.3134957344023024, + "grad_norm": 1.0903195142745972, + "learning_rate": 8.036483959937381e-06, + "loss": 0.7408, + "step": 6100 + }, + { + "epoch": 0.31354712714564703, + "grad_norm": 0.8597413897514343, + "learning_rate": 8.035822712783692e-06, + "loss": 0.6869, + "step": 6101 + }, + { + "epoch": 0.3135985198889917, + "grad_norm": 1.0731096267700195, + "learning_rate": 8.035161381519884e-06, + "loss": 0.7635, + "step": 6102 + }, + { + "epoch": 0.3136499126323363, + "grad_norm": 1.0981554985046387, + "learning_rate": 8.034499966164282e-06, + "loss": 0.7607, + "step": 6103 + }, + { + "epoch": 0.31370130537568097, + "grad_norm": 1.0279661417007446, + "learning_rate": 8.033838466735208e-06, + "loss": 0.7496, + "step": 6104 + }, + { + "epoch": 0.3137526981190256, + "grad_norm": 1.0758126974105835, + "learning_rate": 8.033176883250989e-06, + "loss": 0.7595, + "step": 6105 + }, + { + "epoch": 0.3138040908623702, + "grad_norm": 1.069772481918335, + "learning_rate": 8.032515215729956e-06, + "loss": 0.7998, + "step": 6106 + }, + { + "epoch": 0.31385548360571486, + "grad_norm": 1.0834616422653198, + "learning_rate": 8.031853464190442e-06, + "loss": 0.7995, + "step": 6107 + }, + { + "epoch": 0.3139068763490595, + "grad_norm": 1.0511776208877563, + "learning_rate": 8.031191628650778e-06, + "loss": 0.8146, + "step": 6108 + }, + { + "epoch": 0.31395826909240415, + "grad_norm": 1.0922143459320068, + "learning_rate": 8.030529709129305e-06, + "loss": 0.8326, + "step": 6109 + }, + { + "epoch": 0.3140096618357488, + "grad_norm": 0.8175050020217896, + "learning_rate": 8.029867705644358e-06, + "loss": 0.7226, + "step": 6110 + }, + { + "epoch": 0.31406105457909345, + "grad_norm": 0.8077910542488098, + "learning_rate": 8.029205618214282e-06, + "loss": 0.6643, + "step": 6111 + }, + { + "epoch": 0.3141124473224381, + "grad_norm": 1.1243703365325928, + "learning_rate": 8.028543446857419e-06, + "loss": 0.7477, + "step": 6112 + }, + { + "epoch": 0.3141638400657827, + "grad_norm": 1.075564980506897, + "learning_rate": 8.027881191592115e-06, + "loss": 0.7632, + "step": 6113 + }, + { + "epoch": 0.31421523280912733, + "grad_norm": 1.0410335063934326, + "learning_rate": 8.027218852436719e-06, + "loss": 0.8005, + "step": 6114 + }, + { + "epoch": 0.314266625552472, + "grad_norm": 0.9974504709243774, + "learning_rate": 8.02655642940958e-06, + "loss": 0.727, + "step": 6115 + }, + { + "epoch": 0.3143180182958166, + "grad_norm": 1.1410577297210693, + "learning_rate": 8.025893922529053e-06, + "loss": 0.7229, + "step": 6116 + }, + { + "epoch": 0.3143694110391613, + "grad_norm": 1.0655590295791626, + "learning_rate": 8.02523133181349e-06, + "loss": 0.7395, + "step": 6117 + }, + { + "epoch": 0.3144208037825059, + "grad_norm": 1.0955928564071655, + "learning_rate": 8.024568657281255e-06, + "loss": 0.71, + "step": 6118 + }, + { + "epoch": 0.31447219652585057, + "grad_norm": 1.0123881101608276, + "learning_rate": 8.023905898950702e-06, + "loss": 0.7566, + "step": 6119 + }, + { + "epoch": 0.3145235892691952, + "grad_norm": 1.0509437322616577, + "learning_rate": 8.023243056840196e-06, + "loss": 0.7698, + "step": 6120 + }, + { + "epoch": 0.3145749820125398, + "grad_norm": 1.032643437385559, + "learning_rate": 8.0225801309681e-06, + "loss": 0.7659, + "step": 6121 + }, + { + "epoch": 0.31462637475588445, + "grad_norm": 1.0893285274505615, + "learning_rate": 8.021917121352784e-06, + "loss": 0.8137, + "step": 6122 + }, + { + "epoch": 0.3146777674992291, + "grad_norm": 1.0889554023742676, + "learning_rate": 8.021254028012614e-06, + "loss": 0.7452, + "step": 6123 + }, + { + "epoch": 0.31472916024257375, + "grad_norm": 1.229623794555664, + "learning_rate": 8.020590850965965e-06, + "loss": 0.7653, + "step": 6124 + }, + { + "epoch": 0.3147805529859184, + "grad_norm": 1.0645302534103394, + "learning_rate": 8.019927590231207e-06, + "loss": 0.7546, + "step": 6125 + }, + { + "epoch": 0.31483194572926304, + "grad_norm": 0.9680023789405823, + "learning_rate": 8.019264245826718e-06, + "loss": 0.6873, + "step": 6126 + }, + { + "epoch": 0.3148833384726077, + "grad_norm": 1.074756383895874, + "learning_rate": 8.018600817770876e-06, + "loss": 0.7568, + "step": 6127 + }, + { + "epoch": 0.31493473121595233, + "grad_norm": 1.1985946893692017, + "learning_rate": 8.017937306082062e-06, + "loss": 0.7334, + "step": 6128 + }, + { + "epoch": 0.3149861239592969, + "grad_norm": 1.1500622034072876, + "learning_rate": 8.017273710778661e-06, + "loss": 0.7592, + "step": 6129 + }, + { + "epoch": 0.3150375167026416, + "grad_norm": 1.0453654527664185, + "learning_rate": 8.016610031879057e-06, + "loss": 0.7347, + "step": 6130 + }, + { + "epoch": 0.3150889094459862, + "grad_norm": 1.0796889066696167, + "learning_rate": 8.015946269401637e-06, + "loss": 0.7883, + "step": 6131 + }, + { + "epoch": 0.31514030218933087, + "grad_norm": 1.0624734163284302, + "learning_rate": 8.015282423364792e-06, + "loss": 0.791, + "step": 6132 + }, + { + "epoch": 0.3151916949326755, + "grad_norm": 1.0314422845840454, + "learning_rate": 8.014618493786913e-06, + "loss": 0.7209, + "step": 6133 + }, + { + "epoch": 0.31524308767602016, + "grad_norm": 1.0661484003067017, + "learning_rate": 8.0139544806864e-06, + "loss": 0.7803, + "step": 6134 + }, + { + "epoch": 0.3152944804193648, + "grad_norm": 0.9232274889945984, + "learning_rate": 8.013290384081643e-06, + "loss": 0.664, + "step": 6135 + }, + { + "epoch": 0.3153458731627094, + "grad_norm": 0.8029962182044983, + "learning_rate": 8.012626203991046e-06, + "loss": 0.7226, + "step": 6136 + }, + { + "epoch": 0.31539726590605405, + "grad_norm": 1.1550087928771973, + "learning_rate": 8.01196194043301e-06, + "loss": 0.8251, + "step": 6137 + }, + { + "epoch": 0.3154486586493987, + "grad_norm": 1.0771244764328003, + "learning_rate": 8.011297593425936e-06, + "loss": 0.7586, + "step": 6138 + }, + { + "epoch": 0.31550005139274334, + "grad_norm": 1.117776870727539, + "learning_rate": 8.010633162988234e-06, + "loss": 0.8063, + "step": 6139 + }, + { + "epoch": 0.315551444136088, + "grad_norm": 1.0716899633407593, + "learning_rate": 8.00996864913831e-06, + "loss": 0.7758, + "step": 6140 + }, + { + "epoch": 0.31560283687943264, + "grad_norm": 0.8726239204406738, + "learning_rate": 8.009304051894577e-06, + "loss": 0.6857, + "step": 6141 + }, + { + "epoch": 0.3156542296227773, + "grad_norm": 1.0879548788070679, + "learning_rate": 8.008639371275449e-06, + "loss": 0.8012, + "step": 6142 + }, + { + "epoch": 0.31570562236612193, + "grad_norm": 3.052990436553955, + "learning_rate": 8.007974607299337e-06, + "loss": 0.7581, + "step": 6143 + }, + { + "epoch": 0.3157570151094665, + "grad_norm": 1.054518461227417, + "learning_rate": 8.007309759984664e-06, + "loss": 0.7348, + "step": 6144 + }, + { + "epoch": 0.31580840785281117, + "grad_norm": 1.1291894912719727, + "learning_rate": 8.006644829349848e-06, + "loss": 0.8026, + "step": 6145 + }, + { + "epoch": 0.3158598005961558, + "grad_norm": 1.045858383178711, + "learning_rate": 8.005979815413309e-06, + "loss": 0.7565, + "step": 6146 + }, + { + "epoch": 0.31591119333950046, + "grad_norm": 1.1505354642868042, + "learning_rate": 8.005314718193476e-06, + "loss": 0.813, + "step": 6147 + }, + { + "epoch": 0.3159625860828451, + "grad_norm": 1.0502619743347168, + "learning_rate": 8.004649537708775e-06, + "loss": 0.7511, + "step": 6148 + }, + { + "epoch": 0.31601397882618976, + "grad_norm": 1.0704747438430786, + "learning_rate": 8.003984273977634e-06, + "loss": 0.7931, + "step": 6149 + }, + { + "epoch": 0.3160653715695344, + "grad_norm": 1.0849039554595947, + "learning_rate": 8.003318927018485e-06, + "loss": 0.7664, + "step": 6150 + }, + { + "epoch": 0.316116764312879, + "grad_norm": 1.0384345054626465, + "learning_rate": 8.002653496849762e-06, + "loss": 0.7167, + "step": 6151 + }, + { + "epoch": 0.31616815705622364, + "grad_norm": 0.9973111152648926, + "learning_rate": 8.001987983489904e-06, + "loss": 0.7296, + "step": 6152 + }, + { + "epoch": 0.3162195497995683, + "grad_norm": 0.8917011618614197, + "learning_rate": 8.001322386957347e-06, + "loss": 0.7105, + "step": 6153 + }, + { + "epoch": 0.31627094254291294, + "grad_norm": 1.1230117082595825, + "learning_rate": 8.000656707270531e-06, + "loss": 0.7281, + "step": 6154 + }, + { + "epoch": 0.3163223352862576, + "grad_norm": 1.0961412191390991, + "learning_rate": 7.999990944447902e-06, + "loss": 0.7737, + "step": 6155 + }, + { + "epoch": 0.31637372802960223, + "grad_norm": 0.789203941822052, + "learning_rate": 7.999325098507902e-06, + "loss": 0.6927, + "step": 6156 + }, + { + "epoch": 0.3164251207729469, + "grad_norm": 1.0528838634490967, + "learning_rate": 7.998659169468983e-06, + "loss": 0.7696, + "step": 6157 + }, + { + "epoch": 0.3164765135162915, + "grad_norm": 1.1201075315475464, + "learning_rate": 7.997993157349593e-06, + "loss": 0.7573, + "step": 6158 + }, + { + "epoch": 0.3165279062596361, + "grad_norm": 0.7588522434234619, + "learning_rate": 7.997327062168182e-06, + "loss": 0.6951, + "step": 6159 + }, + { + "epoch": 0.31657929900298076, + "grad_norm": 1.0413274765014648, + "learning_rate": 7.99666088394321e-06, + "loss": 0.7421, + "step": 6160 + }, + { + "epoch": 0.3166306917463254, + "grad_norm": 0.7666056156158447, + "learning_rate": 7.995994622693133e-06, + "loss": 0.7008, + "step": 6161 + }, + { + "epoch": 0.31668208448967006, + "grad_norm": 1.1226341724395752, + "learning_rate": 7.995328278436406e-06, + "loss": 0.7148, + "step": 6162 + }, + { + "epoch": 0.3167334772330147, + "grad_norm": 1.0278602838516235, + "learning_rate": 7.994661851191494e-06, + "loss": 0.7526, + "step": 6163 + }, + { + "epoch": 0.31678486997635935, + "grad_norm": 1.1021512746810913, + "learning_rate": 7.99399534097686e-06, + "loss": 0.751, + "step": 6164 + }, + { + "epoch": 0.316836262719704, + "grad_norm": 1.0913143157958984, + "learning_rate": 7.993328747810971e-06, + "loss": 0.7255, + "step": 6165 + }, + { + "epoch": 0.31688765546304865, + "grad_norm": 1.0669944286346436, + "learning_rate": 7.992662071712296e-06, + "loss": 0.7594, + "step": 6166 + }, + { + "epoch": 0.31693904820639324, + "grad_norm": 1.1349221467971802, + "learning_rate": 7.991995312699303e-06, + "loss": 0.766, + "step": 6167 + }, + { + "epoch": 0.3169904409497379, + "grad_norm": 1.0363006591796875, + "learning_rate": 7.991328470790467e-06, + "loss": 0.7405, + "step": 6168 + }, + { + "epoch": 0.31704183369308253, + "grad_norm": 1.0679699182510376, + "learning_rate": 7.990661546004266e-06, + "loss": 0.738, + "step": 6169 + }, + { + "epoch": 0.3170932264364272, + "grad_norm": 1.1096373796463013, + "learning_rate": 7.989994538359174e-06, + "loss": 0.7304, + "step": 6170 + }, + { + "epoch": 0.3171446191797718, + "grad_norm": 1.102096676826477, + "learning_rate": 7.989327447873672e-06, + "loss": 0.7528, + "step": 6171 + }, + { + "epoch": 0.31719601192311647, + "grad_norm": 1.0745854377746582, + "learning_rate": 7.988660274566244e-06, + "loss": 0.7517, + "step": 6172 + }, + { + "epoch": 0.3172474046664611, + "grad_norm": 1.0453765392303467, + "learning_rate": 7.98799301845537e-06, + "loss": 0.7523, + "step": 6173 + }, + { + "epoch": 0.3172987974098057, + "grad_norm": 1.0133247375488281, + "learning_rate": 7.987325679559545e-06, + "loss": 0.7375, + "step": 6174 + }, + { + "epoch": 0.31735019015315036, + "grad_norm": 1.053646445274353, + "learning_rate": 7.986658257897249e-06, + "loss": 0.7727, + "step": 6175 + }, + { + "epoch": 0.317401582896495, + "grad_norm": 1.1851238012313843, + "learning_rate": 7.985990753486981e-06, + "loss": 0.8222, + "step": 6176 + }, + { + "epoch": 0.31745297563983965, + "grad_norm": 1.016196846961975, + "learning_rate": 7.985323166347231e-06, + "loss": 0.7326, + "step": 6177 + }, + { + "epoch": 0.3175043683831843, + "grad_norm": 1.0063202381134033, + "learning_rate": 7.984655496496494e-06, + "loss": 0.7664, + "step": 6178 + }, + { + "epoch": 0.31755576112652895, + "grad_norm": 1.0691715478897095, + "learning_rate": 7.983987743953272e-06, + "loss": 0.834, + "step": 6179 + }, + { + "epoch": 0.3176071538698736, + "grad_norm": 1.1008692979812622, + "learning_rate": 7.983319908736062e-06, + "loss": 0.7434, + "step": 6180 + }, + { + "epoch": 0.31765854661321824, + "grad_norm": 1.0929378271102905, + "learning_rate": 7.98265199086337e-06, + "loss": 0.7449, + "step": 6181 + }, + { + "epoch": 0.31770993935656283, + "grad_norm": 1.1153590679168701, + "learning_rate": 7.981983990353701e-06, + "loss": 0.8249, + "step": 6182 + }, + { + "epoch": 0.3177613320999075, + "grad_norm": 1.0066204071044922, + "learning_rate": 7.98131590722556e-06, + "loss": 0.7507, + "step": 6183 + }, + { + "epoch": 0.3178127248432521, + "grad_norm": 1.1370506286621094, + "learning_rate": 7.980647741497458e-06, + "loss": 0.8231, + "step": 6184 + }, + { + "epoch": 0.3178641175865968, + "grad_norm": 0.791109561920166, + "learning_rate": 7.979979493187909e-06, + "loss": 0.6683, + "step": 6185 + }, + { + "epoch": 0.3179155103299414, + "grad_norm": 1.0686885118484497, + "learning_rate": 7.979311162315425e-06, + "loss": 0.764, + "step": 6186 + }, + { + "epoch": 0.31796690307328607, + "grad_norm": 1.1198740005493164, + "learning_rate": 7.978642748898524e-06, + "loss": 0.7418, + "step": 6187 + }, + { + "epoch": 0.3180182958166307, + "grad_norm": 1.1276917457580566, + "learning_rate": 7.977974252955725e-06, + "loss": 0.7943, + "step": 6188 + }, + { + "epoch": 0.3180696885599753, + "grad_norm": 1.0539357662200928, + "learning_rate": 7.977305674505547e-06, + "loss": 0.7692, + "step": 6189 + }, + { + "epoch": 0.31812108130331995, + "grad_norm": 1.0832836627960205, + "learning_rate": 7.976637013566517e-06, + "loss": 0.7834, + "step": 6190 + }, + { + "epoch": 0.3181724740466646, + "grad_norm": 1.096444845199585, + "learning_rate": 7.97596827015716e-06, + "loss": 0.7756, + "step": 6191 + }, + { + "epoch": 0.31822386679000925, + "grad_norm": 1.1156165599822998, + "learning_rate": 7.975299444296001e-06, + "loss": 0.8029, + "step": 6192 + }, + { + "epoch": 0.3182752595333539, + "grad_norm": 1.0446009635925293, + "learning_rate": 7.974630536001573e-06, + "loss": 0.7969, + "step": 6193 + }, + { + "epoch": 0.31832665227669854, + "grad_norm": 1.0631943941116333, + "learning_rate": 7.97396154529241e-06, + "loss": 0.7212, + "step": 6194 + }, + { + "epoch": 0.3183780450200432, + "grad_norm": 1.1738412380218506, + "learning_rate": 7.973292472187045e-06, + "loss": 0.7881, + "step": 6195 + }, + { + "epoch": 0.31842943776338783, + "grad_norm": 1.0756568908691406, + "learning_rate": 7.972623316704015e-06, + "loss": 0.7769, + "step": 6196 + }, + { + "epoch": 0.3184808305067324, + "grad_norm": 1.0694456100463867, + "learning_rate": 7.97195407886186e-06, + "loss": 0.7027, + "step": 6197 + }, + { + "epoch": 0.3185322232500771, + "grad_norm": 1.0254652500152588, + "learning_rate": 7.971284758679122e-06, + "loss": 0.6879, + "step": 6198 + }, + { + "epoch": 0.3185836159934217, + "grad_norm": 1.163947343826294, + "learning_rate": 7.970615356174348e-06, + "loss": 0.7698, + "step": 6199 + }, + { + "epoch": 0.31863500873676637, + "grad_norm": 1.053846001625061, + "learning_rate": 7.969945871366079e-06, + "loss": 0.8041, + "step": 6200 + }, + { + "epoch": 0.318686401480111, + "grad_norm": 1.0574476718902588, + "learning_rate": 7.969276304272865e-06, + "loss": 0.7585, + "step": 6201 + }, + { + "epoch": 0.31873779422345566, + "grad_norm": 0.7400321960449219, + "learning_rate": 7.968606654913259e-06, + "loss": 0.693, + "step": 6202 + }, + { + "epoch": 0.3187891869668003, + "grad_norm": 1.1234692335128784, + "learning_rate": 7.967936923305815e-06, + "loss": 0.7862, + "step": 6203 + }, + { + "epoch": 0.3188405797101449, + "grad_norm": 0.8033209443092346, + "learning_rate": 7.967267109469085e-06, + "loss": 0.7019, + "step": 6204 + }, + { + "epoch": 0.31889197245348955, + "grad_norm": 0.7713354825973511, + "learning_rate": 7.96659721342163e-06, + "loss": 0.6811, + "step": 6205 + }, + { + "epoch": 0.3189433651968342, + "grad_norm": 0.7624207735061646, + "learning_rate": 7.965927235182008e-06, + "loss": 0.6901, + "step": 6206 + }, + { + "epoch": 0.31899475794017884, + "grad_norm": 0.7091054320335388, + "learning_rate": 7.96525717476878e-06, + "loss": 0.6948, + "step": 6207 + }, + { + "epoch": 0.3190461506835235, + "grad_norm": 1.1390208005905151, + "learning_rate": 7.964587032200514e-06, + "loss": 0.7632, + "step": 6208 + }, + { + "epoch": 0.31909754342686814, + "grad_norm": 1.0724012851715088, + "learning_rate": 7.963916807495776e-06, + "loss": 0.7864, + "step": 6209 + }, + { + "epoch": 0.3191489361702128, + "grad_norm": 1.094946265220642, + "learning_rate": 7.963246500673134e-06, + "loss": 0.7352, + "step": 6210 + }, + { + "epoch": 0.31920032891355743, + "grad_norm": 1.1584030389785767, + "learning_rate": 7.96257611175116e-06, + "loss": 0.7632, + "step": 6211 + }, + { + "epoch": 0.319251721656902, + "grad_norm": 1.1413359642028809, + "learning_rate": 7.961905640748428e-06, + "loss": 0.8289, + "step": 6212 + }, + { + "epoch": 0.31930311440024667, + "grad_norm": 0.7515755295753479, + "learning_rate": 7.961235087683514e-06, + "loss": 0.6572, + "step": 6213 + }, + { + "epoch": 0.3193545071435913, + "grad_norm": 1.4042295217514038, + "learning_rate": 7.960564452574997e-06, + "loss": 0.7716, + "step": 6214 + }, + { + "epoch": 0.31940589988693596, + "grad_norm": 0.7963113188743591, + "learning_rate": 7.959893735441455e-06, + "loss": 0.6629, + "step": 6215 + }, + { + "epoch": 0.3194572926302806, + "grad_norm": 0.8841925859451294, + "learning_rate": 7.959222936301472e-06, + "loss": 0.7294, + "step": 6216 + }, + { + "epoch": 0.31950868537362526, + "grad_norm": 1.0700139999389648, + "learning_rate": 7.958552055173635e-06, + "loss": 0.8318, + "step": 6217 + }, + { + "epoch": 0.3195600781169699, + "grad_norm": 0.7045684456825256, + "learning_rate": 7.957881092076529e-06, + "loss": 0.7228, + "step": 6218 + }, + { + "epoch": 0.31961147086031455, + "grad_norm": 1.0172303915023804, + "learning_rate": 7.957210047028744e-06, + "loss": 0.7973, + "step": 6219 + }, + { + "epoch": 0.31966286360365914, + "grad_norm": 1.0464023351669312, + "learning_rate": 7.956538920048873e-06, + "loss": 0.7503, + "step": 6220 + }, + { + "epoch": 0.3197142563470038, + "grad_norm": 1.0726381540298462, + "learning_rate": 7.955867711155509e-06, + "loss": 0.7176, + "step": 6221 + }, + { + "epoch": 0.31976564909034844, + "grad_norm": 1.0692014694213867, + "learning_rate": 7.95519642036725e-06, + "loss": 0.791, + "step": 6222 + }, + { + "epoch": 0.3198170418336931, + "grad_norm": 1.1003714799880981, + "learning_rate": 7.954525047702691e-06, + "loss": 0.7874, + "step": 6223 + }, + { + "epoch": 0.31986843457703773, + "grad_norm": 1.0203440189361572, + "learning_rate": 7.953853593180439e-06, + "loss": 0.7148, + "step": 6224 + }, + { + "epoch": 0.3199198273203824, + "grad_norm": 0.7499158978462219, + "learning_rate": 7.953182056819092e-06, + "loss": 0.6843, + "step": 6225 + }, + { + "epoch": 0.319971220063727, + "grad_norm": 1.0867787599563599, + "learning_rate": 7.952510438637255e-06, + "loss": 0.7566, + "step": 6226 + }, + { + "epoch": 0.3200226128070716, + "grad_norm": 1.1037946939468384, + "learning_rate": 7.951838738653541e-06, + "loss": 0.7288, + "step": 6227 + }, + { + "epoch": 0.32007400555041626, + "grad_norm": 1.0909851789474487, + "learning_rate": 7.951166956886556e-06, + "loss": 0.804, + "step": 6228 + }, + { + "epoch": 0.3201253982937609, + "grad_norm": 1.1042238473892212, + "learning_rate": 7.950495093354914e-06, + "loss": 0.8448, + "step": 6229 + }, + { + "epoch": 0.32017679103710556, + "grad_norm": 1.1097403764724731, + "learning_rate": 7.949823148077228e-06, + "loss": 0.7715, + "step": 6230 + }, + { + "epoch": 0.3202281837804502, + "grad_norm": 1.0580353736877441, + "learning_rate": 7.949151121072116e-06, + "loss": 0.7293, + "step": 6231 + }, + { + "epoch": 0.32027957652379485, + "grad_norm": 0.7942004203796387, + "learning_rate": 7.948479012358198e-06, + "loss": 0.6698, + "step": 6232 + }, + { + "epoch": 0.3203309692671395, + "grad_norm": 1.2243528366088867, + "learning_rate": 7.947806821954094e-06, + "loss": 0.7246, + "step": 6233 + }, + { + "epoch": 0.32038236201048415, + "grad_norm": 1.0543429851531982, + "learning_rate": 7.947134549878426e-06, + "loss": 0.8022, + "step": 6234 + }, + { + "epoch": 0.32043375475382874, + "grad_norm": 0.6779053807258606, + "learning_rate": 7.946462196149824e-06, + "loss": 0.6862, + "step": 6235 + }, + { + "epoch": 0.3204851474971734, + "grad_norm": 0.8913925886154175, + "learning_rate": 7.945789760786912e-06, + "loss": 0.7275, + "step": 6236 + }, + { + "epoch": 0.32053654024051803, + "grad_norm": 1.1080243587493896, + "learning_rate": 7.945117243808323e-06, + "loss": 0.7687, + "step": 6237 + }, + { + "epoch": 0.3205879329838627, + "grad_norm": 1.0549356937408447, + "learning_rate": 7.944444645232689e-06, + "loss": 0.7302, + "step": 6238 + }, + { + "epoch": 0.3206393257272073, + "grad_norm": 1.1131651401519775, + "learning_rate": 7.943771965078646e-06, + "loss": 0.7619, + "step": 6239 + }, + { + "epoch": 0.32069071847055197, + "grad_norm": 1.1166050434112549, + "learning_rate": 7.943099203364827e-06, + "loss": 0.7842, + "step": 6240 + }, + { + "epoch": 0.3207421112138966, + "grad_norm": 1.0342165231704712, + "learning_rate": 7.942426360109879e-06, + "loss": 0.7508, + "step": 6241 + }, + { + "epoch": 0.3207935039572412, + "grad_norm": 1.0975227355957031, + "learning_rate": 7.941753435332435e-06, + "loss": 0.7799, + "step": 6242 + }, + { + "epoch": 0.32084489670058586, + "grad_norm": 0.7095525860786438, + "learning_rate": 7.941080429051147e-06, + "loss": 0.6434, + "step": 6243 + }, + { + "epoch": 0.3208962894439305, + "grad_norm": 0.8280468583106995, + "learning_rate": 7.940407341284654e-06, + "loss": 0.7301, + "step": 6244 + }, + { + "epoch": 0.32094768218727515, + "grad_norm": 1.074492335319519, + "learning_rate": 7.93973417205161e-06, + "loss": 0.7429, + "step": 6245 + }, + { + "epoch": 0.3209990749306198, + "grad_norm": 0.7331387400627136, + "learning_rate": 7.939060921370663e-06, + "loss": 0.6729, + "step": 6246 + }, + { + "epoch": 0.32105046767396445, + "grad_norm": 1.237428903579712, + "learning_rate": 7.938387589260465e-06, + "loss": 0.8511, + "step": 6247 + }, + { + "epoch": 0.3211018604173091, + "grad_norm": 1.053725004196167, + "learning_rate": 7.937714175739677e-06, + "loss": 0.7609, + "step": 6248 + }, + { + "epoch": 0.32115325316065374, + "grad_norm": 1.0612396001815796, + "learning_rate": 7.937040680826948e-06, + "loss": 0.7878, + "step": 6249 + }, + { + "epoch": 0.32120464590399833, + "grad_norm": 1.149339199066162, + "learning_rate": 7.936367104540946e-06, + "loss": 0.8547, + "step": 6250 + }, + { + "epoch": 0.321256038647343, + "grad_norm": 1.0942442417144775, + "learning_rate": 7.935693446900326e-06, + "loss": 0.8101, + "step": 6251 + }, + { + "epoch": 0.3213074313906876, + "grad_norm": 1.146078109741211, + "learning_rate": 7.935019707923757e-06, + "loss": 0.8229, + "step": 6252 + }, + { + "epoch": 0.3213588241340323, + "grad_norm": 0.7818619608879089, + "learning_rate": 7.934345887629904e-06, + "loss": 0.6865, + "step": 6253 + }, + { + "epoch": 0.3214102168773769, + "grad_norm": 1.0790176391601562, + "learning_rate": 7.933671986037436e-06, + "loss": 0.7889, + "step": 6254 + }, + { + "epoch": 0.32146160962072157, + "grad_norm": 1.0117785930633545, + "learning_rate": 7.932998003165025e-06, + "loss": 0.7698, + "step": 6255 + }, + { + "epoch": 0.3215130023640662, + "grad_norm": 1.024689793586731, + "learning_rate": 7.93232393903134e-06, + "loss": 0.7457, + "step": 6256 + }, + { + "epoch": 0.32156439510741086, + "grad_norm": 1.006550908088684, + "learning_rate": 7.931649793655062e-06, + "loss": 0.6864, + "step": 6257 + }, + { + "epoch": 0.32161578785075545, + "grad_norm": 1.0390712022781372, + "learning_rate": 7.930975567054866e-06, + "loss": 0.72, + "step": 6258 + }, + { + "epoch": 0.3216671805941001, + "grad_norm": 1.0208220481872559, + "learning_rate": 7.930301259249431e-06, + "loss": 0.736, + "step": 6259 + }, + { + "epoch": 0.32171857333744475, + "grad_norm": 0.8802581429481506, + "learning_rate": 7.929626870257443e-06, + "loss": 0.6492, + "step": 6260 + }, + { + "epoch": 0.3217699660807894, + "grad_norm": 1.1177890300750732, + "learning_rate": 7.928952400097583e-06, + "loss": 0.7245, + "step": 6261 + }, + { + "epoch": 0.32182135882413404, + "grad_norm": 1.0833348035812378, + "learning_rate": 7.928277848788539e-06, + "loss": 0.7925, + "step": 6262 + }, + { + "epoch": 0.3218727515674787, + "grad_norm": 1.086914300918579, + "learning_rate": 7.927603216349e-06, + "loss": 0.8045, + "step": 6263 + }, + { + "epoch": 0.32192414431082333, + "grad_norm": 1.0966742038726807, + "learning_rate": 7.926928502797657e-06, + "loss": 0.7519, + "step": 6264 + }, + { + "epoch": 0.3219755370541679, + "grad_norm": 1.0158556699752808, + "learning_rate": 7.926253708153205e-06, + "loss": 0.7391, + "step": 6265 + }, + { + "epoch": 0.3220269297975126, + "grad_norm": 1.2897666692733765, + "learning_rate": 7.925578832434337e-06, + "loss": 0.771, + "step": 6266 + }, + { + "epoch": 0.3220783225408572, + "grad_norm": 1.0462756156921387, + "learning_rate": 7.924903875659755e-06, + "loss": 0.7503, + "step": 6267 + }, + { + "epoch": 0.32212971528420187, + "grad_norm": 1.1187732219696045, + "learning_rate": 7.924228837848155e-06, + "loss": 0.7585, + "step": 6268 + }, + { + "epoch": 0.3221811080275465, + "grad_norm": 1.010280966758728, + "learning_rate": 7.923553719018244e-06, + "loss": 0.7341, + "step": 6269 + }, + { + "epoch": 0.32223250077089116, + "grad_norm": 1.013115406036377, + "learning_rate": 7.922878519188723e-06, + "loss": 0.6999, + "step": 6270 + }, + { + "epoch": 0.3222838935142358, + "grad_norm": 1.1518288850784302, + "learning_rate": 7.922203238378299e-06, + "loss": 0.761, + "step": 6271 + }, + { + "epoch": 0.32233528625758046, + "grad_norm": 1.0540426969528198, + "learning_rate": 7.921527876605684e-06, + "loss": 0.7776, + "step": 6272 + }, + { + "epoch": 0.32238667900092505, + "grad_norm": 0.8120995163917542, + "learning_rate": 7.920852433889587e-06, + "loss": 0.6461, + "step": 6273 + }, + { + "epoch": 0.3224380717442697, + "grad_norm": 0.8062677383422852, + "learning_rate": 7.920176910248722e-06, + "loss": 0.6663, + "step": 6274 + }, + { + "epoch": 0.32248946448761434, + "grad_norm": 1.1233443021774292, + "learning_rate": 7.919501305701808e-06, + "loss": 0.7915, + "step": 6275 + }, + { + "epoch": 0.322540857230959, + "grad_norm": 1.1094499826431274, + "learning_rate": 7.91882562026756e-06, + "loss": 0.8091, + "step": 6276 + }, + { + "epoch": 0.32259224997430364, + "grad_norm": 1.0231685638427734, + "learning_rate": 7.918149853964698e-06, + "loss": 0.8293, + "step": 6277 + }, + { + "epoch": 0.3226436427176483, + "grad_norm": 1.0143455266952515, + "learning_rate": 7.917474006811948e-06, + "loss": 0.7527, + "step": 6278 + }, + { + "epoch": 0.32269503546099293, + "grad_norm": 1.097053050994873, + "learning_rate": 7.916798078828032e-06, + "loss": 0.7312, + "step": 6279 + }, + { + "epoch": 0.3227464282043375, + "grad_norm": 1.010754942893982, + "learning_rate": 7.916122070031678e-06, + "loss": 0.7178, + "step": 6280 + }, + { + "epoch": 0.32279782094768217, + "grad_norm": 1.1419605016708374, + "learning_rate": 7.915445980441616e-06, + "loss": 0.7353, + "step": 6281 + }, + { + "epoch": 0.3228492136910268, + "grad_norm": 1.4826608896255493, + "learning_rate": 7.914769810076577e-06, + "loss": 0.7681, + "step": 6282 + }, + { + "epoch": 0.32290060643437146, + "grad_norm": 1.065619707107544, + "learning_rate": 7.914093558955296e-06, + "loss": 0.7373, + "step": 6283 + }, + { + "epoch": 0.3229519991777161, + "grad_norm": 0.7846266627311707, + "learning_rate": 7.913417227096505e-06, + "loss": 0.6418, + "step": 6284 + }, + { + "epoch": 0.32300339192106076, + "grad_norm": 1.079285740852356, + "learning_rate": 7.91274081451895e-06, + "loss": 0.797, + "step": 6285 + }, + { + "epoch": 0.3230547846644054, + "grad_norm": 0.7065392136573792, + "learning_rate": 7.912064321241363e-06, + "loss": 0.6448, + "step": 6286 + }, + { + "epoch": 0.32310617740775005, + "grad_norm": 1.0687423944473267, + "learning_rate": 7.911387747282493e-06, + "loss": 0.7902, + "step": 6287 + }, + { + "epoch": 0.32315757015109464, + "grad_norm": 1.0653470754623413, + "learning_rate": 7.910711092661085e-06, + "loss": 0.7601, + "step": 6288 + }, + { + "epoch": 0.3232089628944393, + "grad_norm": 0.7815491557121277, + "learning_rate": 7.910034357395883e-06, + "loss": 0.6921, + "step": 6289 + }, + { + "epoch": 0.32326035563778394, + "grad_norm": 1.1885305643081665, + "learning_rate": 7.909357541505637e-06, + "loss": 0.775, + "step": 6290 + }, + { + "epoch": 0.3233117483811286, + "grad_norm": 1.0751159191131592, + "learning_rate": 7.908680645009099e-06, + "loss": 0.7729, + "step": 6291 + }, + { + "epoch": 0.32336314112447323, + "grad_norm": 1.161222219467163, + "learning_rate": 7.908003667925026e-06, + "loss": 0.7533, + "step": 6292 + }, + { + "epoch": 0.3234145338678179, + "grad_norm": 1.1146621704101562, + "learning_rate": 7.90732661027217e-06, + "loss": 0.7693, + "step": 6293 + }, + { + "epoch": 0.3234659266111625, + "grad_norm": 1.1772619485855103, + "learning_rate": 7.906649472069294e-06, + "loss": 0.8412, + "step": 6294 + }, + { + "epoch": 0.32351731935450717, + "grad_norm": 1.008787989616394, + "learning_rate": 7.905972253335154e-06, + "loss": 0.7347, + "step": 6295 + }, + { + "epoch": 0.32356871209785176, + "grad_norm": 1.1262861490249634, + "learning_rate": 7.905294954088515e-06, + "loss": 0.7875, + "step": 6296 + }, + { + "epoch": 0.3236201048411964, + "grad_norm": 1.0279282331466675, + "learning_rate": 7.904617574348144e-06, + "loss": 0.7042, + "step": 6297 + }, + { + "epoch": 0.32367149758454106, + "grad_norm": 1.1229885816574097, + "learning_rate": 7.903940114132806e-06, + "loss": 0.7736, + "step": 6298 + }, + { + "epoch": 0.3237228903278857, + "grad_norm": 1.104676365852356, + "learning_rate": 7.90326257346127e-06, + "loss": 0.7734, + "step": 6299 + }, + { + "epoch": 0.32377428307123035, + "grad_norm": 1.0531375408172607, + "learning_rate": 7.90258495235231e-06, + "loss": 0.7442, + "step": 6300 + }, + { + "epoch": 0.323825675814575, + "grad_norm": 1.1484031677246094, + "learning_rate": 7.901907250824701e-06, + "loss": 0.7827, + "step": 6301 + }, + { + "epoch": 0.32387706855791965, + "grad_norm": 0.8167237639427185, + "learning_rate": 7.901229468897215e-06, + "loss": 0.6459, + "step": 6302 + }, + { + "epoch": 0.32392846130126424, + "grad_norm": 1.0975357294082642, + "learning_rate": 7.900551606588633e-06, + "loss": 0.7271, + "step": 6303 + }, + { + "epoch": 0.3239798540446089, + "grad_norm": 1.0882256031036377, + "learning_rate": 7.89987366391774e-06, + "loss": 0.7253, + "step": 6304 + }, + { + "epoch": 0.32403124678795353, + "grad_norm": 1.136600136756897, + "learning_rate": 7.89919564090331e-06, + "loss": 0.7981, + "step": 6305 + }, + { + "epoch": 0.3240826395312982, + "grad_norm": 1.0951790809631348, + "learning_rate": 7.898517537564134e-06, + "loss": 0.7874, + "step": 6306 + }, + { + "epoch": 0.3241340322746428, + "grad_norm": 1.1364099979400635, + "learning_rate": 7.897839353919e-06, + "loss": 0.7662, + "step": 6307 + }, + { + "epoch": 0.32418542501798747, + "grad_norm": 0.8488683104515076, + "learning_rate": 7.897161089986696e-06, + "loss": 0.6872, + "step": 6308 + }, + { + "epoch": 0.3242368177613321, + "grad_norm": 0.796160876750946, + "learning_rate": 7.896482745786014e-06, + "loss": 0.7036, + "step": 6309 + }, + { + "epoch": 0.32428821050467677, + "grad_norm": 1.0552352666854858, + "learning_rate": 7.895804321335748e-06, + "loss": 0.7113, + "step": 6310 + }, + { + "epoch": 0.32433960324802136, + "grad_norm": 0.772499144077301, + "learning_rate": 7.895125816654693e-06, + "loss": 0.6841, + "step": 6311 + }, + { + "epoch": 0.324390995991366, + "grad_norm": 1.1797508001327515, + "learning_rate": 7.89444723176165e-06, + "loss": 0.7356, + "step": 6312 + }, + { + "epoch": 0.32444238873471065, + "grad_norm": 1.0207335948944092, + "learning_rate": 7.89376856667542e-06, + "loss": 0.7452, + "step": 6313 + }, + { + "epoch": 0.3244937814780553, + "grad_norm": 1.0907418727874756, + "learning_rate": 7.893089821414804e-06, + "loss": 0.7626, + "step": 6314 + }, + { + "epoch": 0.32454517422139995, + "grad_norm": 0.9334765672683716, + "learning_rate": 7.892410995998609e-06, + "loss": 0.7115, + "step": 6315 + }, + { + "epoch": 0.3245965669647446, + "grad_norm": 6.051718235015869, + "learning_rate": 7.891732090445641e-06, + "loss": 0.9259, + "step": 6316 + }, + { + "epoch": 0.32464795970808924, + "grad_norm": 1.096279263496399, + "learning_rate": 7.891053104774709e-06, + "loss": 0.729, + "step": 6317 + }, + { + "epoch": 0.32469935245143383, + "grad_norm": 0.7192781567573547, + "learning_rate": 7.890374039004628e-06, + "loss": 0.6712, + "step": 6318 + }, + { + "epoch": 0.3247507451947785, + "grad_norm": 0.8034277558326721, + "learning_rate": 7.88969489315421e-06, + "loss": 0.679, + "step": 6319 + }, + { + "epoch": 0.3248021379381231, + "grad_norm": 1.3794063329696655, + "learning_rate": 7.88901566724227e-06, + "loss": 0.7519, + "step": 6320 + }, + { + "epoch": 0.3248535306814678, + "grad_norm": 1.125765085220337, + "learning_rate": 7.88833636128763e-06, + "loss": 0.7518, + "step": 6321 + }, + { + "epoch": 0.3249049234248124, + "grad_norm": 1.070083737373352, + "learning_rate": 7.887656975309106e-06, + "loss": 0.7688, + "step": 6322 + }, + { + "epoch": 0.32495631616815707, + "grad_norm": 1.0824249982833862, + "learning_rate": 7.886977509325527e-06, + "loss": 0.7803, + "step": 6323 + }, + { + "epoch": 0.3250077089115017, + "grad_norm": 0.8180423378944397, + "learning_rate": 7.886297963355714e-06, + "loss": 0.6868, + "step": 6324 + }, + { + "epoch": 0.32505910165484636, + "grad_norm": 1.103896141052246, + "learning_rate": 7.885618337418496e-06, + "loss": 0.6936, + "step": 6325 + }, + { + "epoch": 0.32511049439819095, + "grad_norm": 1.1351890563964844, + "learning_rate": 7.884938631532702e-06, + "loss": 0.7511, + "step": 6326 + }, + { + "epoch": 0.3251618871415356, + "grad_norm": 0.8250926733016968, + "learning_rate": 7.884258845717163e-06, + "loss": 0.7118, + "step": 6327 + }, + { + "epoch": 0.32521327988488025, + "grad_norm": 0.7486550807952881, + "learning_rate": 7.883578979990715e-06, + "loss": 0.648, + "step": 6328 + }, + { + "epoch": 0.3252646726282249, + "grad_norm": 1.0689102411270142, + "learning_rate": 7.882899034372193e-06, + "loss": 0.7299, + "step": 6329 + }, + { + "epoch": 0.32531606537156954, + "grad_norm": 1.0918580293655396, + "learning_rate": 7.882219008880435e-06, + "loss": 0.7562, + "step": 6330 + }, + { + "epoch": 0.3253674581149142, + "grad_norm": 0.7655850052833557, + "learning_rate": 7.881538903534283e-06, + "loss": 0.7082, + "step": 6331 + }, + { + "epoch": 0.32541885085825883, + "grad_norm": 1.088153600692749, + "learning_rate": 7.880858718352579e-06, + "loss": 0.8159, + "step": 6332 + }, + { + "epoch": 0.3254702436016034, + "grad_norm": 0.8767863512039185, + "learning_rate": 7.88017845335417e-06, + "loss": 0.6692, + "step": 6333 + }, + { + "epoch": 0.3255216363449481, + "grad_norm": 1.0915653705596924, + "learning_rate": 7.879498108557902e-06, + "loss": 0.7674, + "step": 6334 + }, + { + "epoch": 0.3255730290882927, + "grad_norm": 1.0005794763565063, + "learning_rate": 7.878817683982623e-06, + "loss": 0.763, + "step": 6335 + }, + { + "epoch": 0.32562442183163737, + "grad_norm": 1.0348544120788574, + "learning_rate": 7.878137179647187e-06, + "loss": 0.748, + "step": 6336 + }, + { + "epoch": 0.325675814574982, + "grad_norm": 1.0506395101547241, + "learning_rate": 7.877456595570446e-06, + "loss": 0.7537, + "step": 6337 + }, + { + "epoch": 0.32572720731832666, + "grad_norm": 1.082154393196106, + "learning_rate": 7.876775931771257e-06, + "loss": 0.762, + "step": 6338 + }, + { + "epoch": 0.3257786000616713, + "grad_norm": 1.1421531438827515, + "learning_rate": 7.87609518826848e-06, + "loss": 0.8107, + "step": 6339 + }, + { + "epoch": 0.32582999280501596, + "grad_norm": 1.1036674976348877, + "learning_rate": 7.875414365080974e-06, + "loss": 0.7507, + "step": 6340 + }, + { + "epoch": 0.32588138554836055, + "grad_norm": 1.0783600807189941, + "learning_rate": 7.874733462227602e-06, + "loss": 0.7466, + "step": 6341 + }, + { + "epoch": 0.3259327782917052, + "grad_norm": 1.0899938344955444, + "learning_rate": 7.874052479727228e-06, + "loss": 0.7836, + "step": 6342 + }, + { + "epoch": 0.32598417103504984, + "grad_norm": 1.085042119026184, + "learning_rate": 7.87337141759872e-06, + "loss": 0.7645, + "step": 6343 + }, + { + "epoch": 0.3260355637783945, + "grad_norm": 1.0419930219650269, + "learning_rate": 7.872690275860951e-06, + "loss": 0.7524, + "step": 6344 + }, + { + "epoch": 0.32608695652173914, + "grad_norm": 0.9993297457695007, + "learning_rate": 7.872009054532787e-06, + "loss": 0.756, + "step": 6345 + }, + { + "epoch": 0.3261383492650838, + "grad_norm": 0.9247534275054932, + "learning_rate": 7.871327753633105e-06, + "loss": 0.7107, + "step": 6346 + }, + { + "epoch": 0.32618974200842843, + "grad_norm": 0.8019363880157471, + "learning_rate": 7.87064637318078e-06, + "loss": 0.6757, + "step": 6347 + }, + { + "epoch": 0.3262411347517731, + "grad_norm": 0.8444534540176392, + "learning_rate": 7.86996491319469e-06, + "loss": 0.6968, + "step": 6348 + }, + { + "epoch": 0.32629252749511767, + "grad_norm": 1.0891953706741333, + "learning_rate": 7.869283373693717e-06, + "loss": 0.7516, + "step": 6349 + }, + { + "epoch": 0.3263439202384623, + "grad_norm": 0.7442377209663391, + "learning_rate": 7.868601754696741e-06, + "loss": 0.6909, + "step": 6350 + }, + { + "epoch": 0.32639531298180696, + "grad_norm": 1.0952770709991455, + "learning_rate": 7.867920056222648e-06, + "loss": 0.7663, + "step": 6351 + }, + { + "epoch": 0.3264467057251516, + "grad_norm": 1.1148467063903809, + "learning_rate": 7.867238278290327e-06, + "loss": 0.7757, + "step": 6352 + }, + { + "epoch": 0.32649809846849626, + "grad_norm": 1.0050616264343262, + "learning_rate": 7.866556420918666e-06, + "loss": 0.7134, + "step": 6353 + }, + { + "epoch": 0.3265494912118409, + "grad_norm": 1.083299160003662, + "learning_rate": 7.865874484126554e-06, + "loss": 0.7625, + "step": 6354 + }, + { + "epoch": 0.32660088395518555, + "grad_norm": 1.0797746181488037, + "learning_rate": 7.86519246793289e-06, + "loss": 0.7415, + "step": 6355 + }, + { + "epoch": 0.32665227669853014, + "grad_norm": 0.979505717754364, + "learning_rate": 7.864510372356564e-06, + "loss": 0.7267, + "step": 6356 + }, + { + "epoch": 0.3267036694418748, + "grad_norm": 0.776856541633606, + "learning_rate": 7.863828197416479e-06, + "loss": 0.6932, + "step": 6357 + }, + { + "epoch": 0.32675506218521944, + "grad_norm": 1.1441924571990967, + "learning_rate": 7.863145943131531e-06, + "loss": 0.8434, + "step": 6358 + }, + { + "epoch": 0.3268064549285641, + "grad_norm": 1.0159094333648682, + "learning_rate": 7.862463609520625e-06, + "loss": 0.746, + "step": 6359 + }, + { + "epoch": 0.32685784767190873, + "grad_norm": 1.0966925621032715, + "learning_rate": 7.861781196602665e-06, + "loss": 0.7595, + "step": 6360 + }, + { + "epoch": 0.3269092404152534, + "grad_norm": 1.0946656465530396, + "learning_rate": 7.861098704396557e-06, + "loss": 0.7521, + "step": 6361 + }, + { + "epoch": 0.326960633158598, + "grad_norm": 1.1001232862472534, + "learning_rate": 7.860416132921213e-06, + "loss": 0.7894, + "step": 6362 + }, + { + "epoch": 0.32701202590194267, + "grad_norm": 1.1131579875946045, + "learning_rate": 7.85973348219554e-06, + "loss": 0.8359, + "step": 6363 + }, + { + "epoch": 0.32706341864528726, + "grad_norm": 0.9947192668914795, + "learning_rate": 7.859050752238455e-06, + "loss": 0.7125, + "step": 6364 + }, + { + "epoch": 0.3271148113886319, + "grad_norm": 1.0519870519638062, + "learning_rate": 7.858367943068873e-06, + "loss": 0.7246, + "step": 6365 + }, + { + "epoch": 0.32716620413197656, + "grad_norm": 1.0918675661087036, + "learning_rate": 7.857685054705711e-06, + "loss": 0.7328, + "step": 6366 + }, + { + "epoch": 0.3272175968753212, + "grad_norm": 1.1316570043563843, + "learning_rate": 7.857002087167886e-06, + "loss": 0.7632, + "step": 6367 + }, + { + "epoch": 0.32726898961866585, + "grad_norm": 1.135451316833496, + "learning_rate": 7.856319040474326e-06, + "loss": 0.7821, + "step": 6368 + }, + { + "epoch": 0.3273203823620105, + "grad_norm": 1.0485374927520752, + "learning_rate": 7.855635914643953e-06, + "loss": 0.7228, + "step": 6369 + }, + { + "epoch": 0.32737177510535515, + "grad_norm": 1.0320537090301514, + "learning_rate": 7.854952709695692e-06, + "loss": 0.8066, + "step": 6370 + }, + { + "epoch": 0.32742316784869974, + "grad_norm": 1.0669602155685425, + "learning_rate": 7.854269425648475e-06, + "loss": 0.7378, + "step": 6371 + }, + { + "epoch": 0.3274745605920444, + "grad_norm": 1.1363108158111572, + "learning_rate": 7.853586062521229e-06, + "loss": 0.7668, + "step": 6372 + }, + { + "epoch": 0.32752595333538903, + "grad_norm": 1.0675972700119019, + "learning_rate": 7.85290262033289e-06, + "loss": 0.7882, + "step": 6373 + }, + { + "epoch": 0.3275773460787337, + "grad_norm": 1.0326762199401855, + "learning_rate": 7.852219099102392e-06, + "loss": 0.7373, + "step": 6374 + }, + { + "epoch": 0.3276287388220783, + "grad_norm": 1.1321542263031006, + "learning_rate": 7.851535498848674e-06, + "loss": 0.6985, + "step": 6375 + }, + { + "epoch": 0.32768013156542297, + "grad_norm": 1.1553641557693481, + "learning_rate": 7.850851819590675e-06, + "loss": 0.8503, + "step": 6376 + }, + { + "epoch": 0.3277315243087676, + "grad_norm": 1.089906096458435, + "learning_rate": 7.850168061347336e-06, + "loss": 0.7126, + "step": 6377 + }, + { + "epoch": 0.32778291705211227, + "grad_norm": 1.146667242050171, + "learning_rate": 7.849484224137603e-06, + "loss": 0.8311, + "step": 6378 + }, + { + "epoch": 0.32783430979545686, + "grad_norm": 1.105252981185913, + "learning_rate": 7.84880030798042e-06, + "loss": 0.6919, + "step": 6379 + }, + { + "epoch": 0.3278857025388015, + "grad_norm": 1.1272486448287964, + "learning_rate": 7.848116312894737e-06, + "loss": 0.7897, + "step": 6380 + }, + { + "epoch": 0.32793709528214615, + "grad_norm": 1.0277893543243408, + "learning_rate": 7.847432238899507e-06, + "loss": 0.7866, + "step": 6381 + }, + { + "epoch": 0.3279884880254908, + "grad_norm": 1.1312599182128906, + "learning_rate": 7.846748086013679e-06, + "loss": 0.8072, + "step": 6382 + }, + { + "epoch": 0.32803988076883545, + "grad_norm": 1.1221188306808472, + "learning_rate": 7.846063854256207e-06, + "loss": 0.7906, + "step": 6383 + }, + { + "epoch": 0.3280912735121801, + "grad_norm": 1.190596342086792, + "learning_rate": 7.845379543646053e-06, + "loss": 0.7597, + "step": 6384 + }, + { + "epoch": 0.32814266625552474, + "grad_norm": 1.1072745323181152, + "learning_rate": 7.844695154202173e-06, + "loss": 0.823, + "step": 6385 + }, + { + "epoch": 0.3281940589988694, + "grad_norm": 1.0647591352462769, + "learning_rate": 7.84401068594353e-06, + "loss": 0.7572, + "step": 6386 + }, + { + "epoch": 0.328245451742214, + "grad_norm": 1.1113308668136597, + "learning_rate": 7.843326138889088e-06, + "loss": 0.7667, + "step": 6387 + }, + { + "epoch": 0.3282968444855586, + "grad_norm": 0.7089352607727051, + "learning_rate": 7.842641513057811e-06, + "loss": 0.6799, + "step": 6388 + }, + { + "epoch": 0.3283482372289033, + "grad_norm": 1.0567331314086914, + "learning_rate": 7.84195680846867e-06, + "loss": 0.7672, + "step": 6389 + }, + { + "epoch": 0.3283996299722479, + "grad_norm": 1.1223642826080322, + "learning_rate": 7.841272025140633e-06, + "loss": 0.7921, + "step": 6390 + }, + { + "epoch": 0.32845102271559257, + "grad_norm": 1.1001906394958496, + "learning_rate": 7.840587163092674e-06, + "loss": 0.7513, + "step": 6391 + }, + { + "epoch": 0.3285024154589372, + "grad_norm": 1.064583420753479, + "learning_rate": 7.839902222343767e-06, + "loss": 0.7636, + "step": 6392 + }, + { + "epoch": 0.32855380820228186, + "grad_norm": 0.695277214050293, + "learning_rate": 7.83921720291289e-06, + "loss": 0.6708, + "step": 6393 + }, + { + "epoch": 0.32860520094562645, + "grad_norm": 1.039265513420105, + "learning_rate": 7.838532104819018e-06, + "loss": 0.7441, + "step": 6394 + }, + { + "epoch": 0.3286565936889711, + "grad_norm": 0.7285810112953186, + "learning_rate": 7.837846928081138e-06, + "loss": 0.6646, + "step": 6395 + }, + { + "epoch": 0.32870798643231575, + "grad_norm": 1.0592541694641113, + "learning_rate": 7.83716167271823e-06, + "loss": 0.7316, + "step": 6396 + }, + { + "epoch": 0.3287593791756604, + "grad_norm": 1.15116548538208, + "learning_rate": 7.836476338749282e-06, + "loss": 0.7952, + "step": 6397 + }, + { + "epoch": 0.32881077191900504, + "grad_norm": 1.1327732801437378, + "learning_rate": 7.835790926193277e-06, + "loss": 0.8228, + "step": 6398 + }, + { + "epoch": 0.3288621646623497, + "grad_norm": 1.070181965827942, + "learning_rate": 7.83510543506921e-06, + "loss": 0.7814, + "step": 6399 + }, + { + "epoch": 0.32891355740569433, + "grad_norm": 0.8484385013580322, + "learning_rate": 7.83441986539607e-06, + "loss": 0.7367, + "step": 6400 + }, + { + "epoch": 0.328964950149039, + "grad_norm": 1.1784979104995728, + "learning_rate": 7.833734217192852e-06, + "loss": 0.702, + "step": 6401 + }, + { + "epoch": 0.3290163428923836, + "grad_norm": 0.7750094532966614, + "learning_rate": 7.833048490478556e-06, + "loss": 0.6754, + "step": 6402 + }, + { + "epoch": 0.3290677356357282, + "grad_norm": 0.7292079925537109, + "learning_rate": 7.832362685272176e-06, + "loss": 0.6794, + "step": 6403 + }, + { + "epoch": 0.32911912837907287, + "grad_norm": 1.128514289855957, + "learning_rate": 7.831676801592714e-06, + "loss": 0.7626, + "step": 6404 + }, + { + "epoch": 0.3291705211224175, + "grad_norm": 1.0972299575805664, + "learning_rate": 7.830990839459174e-06, + "loss": 0.7216, + "step": 6405 + }, + { + "epoch": 0.32922191386576216, + "grad_norm": 0.9930311441421509, + "learning_rate": 7.83030479889056e-06, + "loss": 0.7308, + "step": 6406 + }, + { + "epoch": 0.3292733066091068, + "grad_norm": 1.0696057081222534, + "learning_rate": 7.829618679905879e-06, + "loss": 0.7831, + "step": 6407 + }, + { + "epoch": 0.32932469935245146, + "grad_norm": 1.1358466148376465, + "learning_rate": 7.828932482524143e-06, + "loss": 0.739, + "step": 6408 + }, + { + "epoch": 0.32937609209579605, + "grad_norm": 1.0338650941848755, + "learning_rate": 7.828246206764363e-06, + "loss": 0.7882, + "step": 6409 + }, + { + "epoch": 0.3294274848391407, + "grad_norm": 1.081601619720459, + "learning_rate": 7.827559852645549e-06, + "loss": 0.7375, + "step": 6410 + }, + { + "epoch": 0.32947887758248534, + "grad_norm": 1.0984724760055542, + "learning_rate": 7.826873420186725e-06, + "loss": 0.7758, + "step": 6411 + }, + { + "epoch": 0.32953027032583, + "grad_norm": 1.1807386875152588, + "learning_rate": 7.826186909406901e-06, + "loss": 0.7888, + "step": 6412 + }, + { + "epoch": 0.32958166306917464, + "grad_norm": 1.1002141237258911, + "learning_rate": 7.8255003203251e-06, + "loss": 0.781, + "step": 6413 + }, + { + "epoch": 0.3296330558125193, + "grad_norm": 1.0721930265426636, + "learning_rate": 7.824813652960347e-06, + "loss": 0.799, + "step": 6414 + }, + { + "epoch": 0.32968444855586393, + "grad_norm": 1.0996019840240479, + "learning_rate": 7.824126907331666e-06, + "loss": 0.7208, + "step": 6415 + }, + { + "epoch": 0.3297358412992086, + "grad_norm": 1.1063165664672852, + "learning_rate": 7.82344008345808e-06, + "loss": 0.7584, + "step": 6416 + }, + { + "epoch": 0.32978723404255317, + "grad_norm": 1.0351561307907104, + "learning_rate": 7.822753181358623e-06, + "loss": 0.7222, + "step": 6417 + }, + { + "epoch": 0.3298386267858978, + "grad_norm": 1.056374430656433, + "learning_rate": 7.822066201052324e-06, + "loss": 0.7485, + "step": 6418 + }, + { + "epoch": 0.32989001952924246, + "grad_norm": 1.028637409210205, + "learning_rate": 7.821379142558217e-06, + "loss": 0.7249, + "step": 6419 + }, + { + "epoch": 0.3299414122725871, + "grad_norm": 1.0313010215759277, + "learning_rate": 7.820692005895335e-06, + "loss": 0.7259, + "step": 6420 + }, + { + "epoch": 0.32999280501593176, + "grad_norm": 1.0892776250839233, + "learning_rate": 7.82000479108272e-06, + "loss": 0.7456, + "step": 6421 + }, + { + "epoch": 0.3300441977592764, + "grad_norm": 1.0584865808486938, + "learning_rate": 7.819317498139407e-06, + "loss": 0.801, + "step": 6422 + }, + { + "epoch": 0.33009559050262105, + "grad_norm": 0.7319139838218689, + "learning_rate": 7.818630127084443e-06, + "loss": 0.7185, + "step": 6423 + }, + { + "epoch": 0.3301469832459657, + "grad_norm": 1.071027398109436, + "learning_rate": 7.81794267793687e-06, + "loss": 0.788, + "step": 6424 + }, + { + "epoch": 0.3301983759893103, + "grad_norm": 1.078505516052246, + "learning_rate": 7.817255150715734e-06, + "loss": 0.7896, + "step": 6425 + }, + { + "epoch": 0.33024976873265494, + "grad_norm": 0.7777031064033508, + "learning_rate": 7.816567545440084e-06, + "loss": 0.6669, + "step": 6426 + }, + { + "epoch": 0.3303011614759996, + "grad_norm": 0.7845080494880676, + "learning_rate": 7.815879862128972e-06, + "loss": 0.6928, + "step": 6427 + }, + { + "epoch": 0.33035255421934423, + "grad_norm": 1.0688804388046265, + "learning_rate": 7.815192100801446e-06, + "loss": 0.744, + "step": 6428 + }, + { + "epoch": 0.3304039469626889, + "grad_norm": 1.1140738725662231, + "learning_rate": 7.814504261476567e-06, + "loss": 0.7594, + "step": 6429 + }, + { + "epoch": 0.3304553397060335, + "grad_norm": 1.0074553489685059, + "learning_rate": 7.813816344173388e-06, + "loss": 0.7566, + "step": 6430 + }, + { + "epoch": 0.33050673244937817, + "grad_norm": 0.8538505434989929, + "learning_rate": 7.813128348910972e-06, + "loss": 0.718, + "step": 6431 + }, + { + "epoch": 0.33055812519272276, + "grad_norm": 1.0806833505630493, + "learning_rate": 7.812440275708377e-06, + "loss": 0.7082, + "step": 6432 + }, + { + "epoch": 0.3306095179360674, + "grad_norm": 1.1388258934020996, + "learning_rate": 7.81175212458467e-06, + "loss": 0.7251, + "step": 6433 + }, + { + "epoch": 0.33066091067941206, + "grad_norm": 1.0561637878417969, + "learning_rate": 7.811063895558913e-06, + "loss": 0.7394, + "step": 6434 + }, + { + "epoch": 0.3307123034227567, + "grad_norm": 1.701047658920288, + "learning_rate": 7.810375588650177e-06, + "loss": 0.7975, + "step": 6435 + }, + { + "epoch": 0.33076369616610135, + "grad_norm": 1.039394497871399, + "learning_rate": 7.809687203877532e-06, + "loss": 0.6964, + "step": 6436 + }, + { + "epoch": 0.330815088909446, + "grad_norm": 0.9989181160926819, + "learning_rate": 7.808998741260047e-06, + "loss": 0.7726, + "step": 6437 + }, + { + "epoch": 0.33086648165279064, + "grad_norm": 1.0428011417388916, + "learning_rate": 7.8083102008168e-06, + "loss": 0.7043, + "step": 6438 + }, + { + "epoch": 0.3309178743961353, + "grad_norm": 1.0471765995025635, + "learning_rate": 7.80762158256687e-06, + "loss": 0.7559, + "step": 6439 + }, + { + "epoch": 0.3309692671394799, + "grad_norm": 0.8309373259544373, + "learning_rate": 7.806932886529328e-06, + "loss": 0.6748, + "step": 6440 + }, + { + "epoch": 0.33102065988282453, + "grad_norm": 1.0479280948638916, + "learning_rate": 7.80624411272326e-06, + "loss": 0.798, + "step": 6441 + }, + { + "epoch": 0.3310720526261692, + "grad_norm": 1.1494965553283691, + "learning_rate": 7.80555526116775e-06, + "loss": 0.7014, + "step": 6442 + }, + { + "epoch": 0.3311234453695138, + "grad_norm": 1.1162176132202148, + "learning_rate": 7.80486633188188e-06, + "loss": 0.8006, + "step": 6443 + }, + { + "epoch": 0.33117483811285847, + "grad_norm": 1.1520477533340454, + "learning_rate": 7.80417732488474e-06, + "loss": 0.7906, + "step": 6444 + }, + { + "epoch": 0.3312262308562031, + "grad_norm": 0.8363535404205322, + "learning_rate": 7.803488240195418e-06, + "loss": 0.6661, + "step": 6445 + }, + { + "epoch": 0.33127762359954777, + "grad_norm": 1.0864309072494507, + "learning_rate": 7.802799077833005e-06, + "loss": 0.7539, + "step": 6446 + }, + { + "epoch": 0.33132901634289236, + "grad_norm": 1.0707521438598633, + "learning_rate": 7.802109837816598e-06, + "loss": 0.7461, + "step": 6447 + }, + { + "epoch": 0.331380409086237, + "grad_norm": 1.1968246698379517, + "learning_rate": 7.80142052016529e-06, + "loss": 0.7807, + "step": 6448 + }, + { + "epoch": 0.33143180182958165, + "grad_norm": 1.0774612426757812, + "learning_rate": 7.80073112489818e-06, + "loss": 0.7502, + "step": 6449 + }, + { + "epoch": 0.3314831945729263, + "grad_norm": 1.079773187637329, + "learning_rate": 7.800041652034369e-06, + "loss": 0.7572, + "step": 6450 + }, + { + "epoch": 0.33153458731627095, + "grad_norm": 1.1255322694778442, + "learning_rate": 7.799352101592958e-06, + "loss": 0.812, + "step": 6451 + }, + { + "epoch": 0.3315859800596156, + "grad_norm": 1.0406098365783691, + "learning_rate": 7.798662473593054e-06, + "loss": 0.738, + "step": 6452 + }, + { + "epoch": 0.33163737280296024, + "grad_norm": 0.8371950387954712, + "learning_rate": 7.797972768053761e-06, + "loss": 0.6757, + "step": 6453 + }, + { + "epoch": 0.3316887655463049, + "grad_norm": 1.1555914878845215, + "learning_rate": 7.79728298499419e-06, + "loss": 0.7337, + "step": 6454 + }, + { + "epoch": 0.3317401582896495, + "grad_norm": 1.041063904762268, + "learning_rate": 7.796593124433451e-06, + "loss": 0.8317, + "step": 6455 + }, + { + "epoch": 0.3317915510329941, + "grad_norm": 0.7440322637557983, + "learning_rate": 7.795903186390656e-06, + "loss": 0.7236, + "step": 6456 + }, + { + "epoch": 0.3318429437763388, + "grad_norm": 1.1185075044631958, + "learning_rate": 7.795213170884923e-06, + "loss": 0.7827, + "step": 6457 + }, + { + "epoch": 0.3318943365196834, + "grad_norm": 1.0305125713348389, + "learning_rate": 7.79452307793537e-06, + "loss": 0.7497, + "step": 6458 + }, + { + "epoch": 0.33194572926302807, + "grad_norm": 1.0552159547805786, + "learning_rate": 7.793832907561113e-06, + "loss": 0.7932, + "step": 6459 + }, + { + "epoch": 0.3319971220063727, + "grad_norm": 0.8533280491828918, + "learning_rate": 7.793142659781274e-06, + "loss": 0.7078, + "step": 6460 + }, + { + "epoch": 0.33204851474971736, + "grad_norm": 0.72590571641922, + "learning_rate": 7.792452334614981e-06, + "loss": 0.7149, + "step": 6461 + }, + { + "epoch": 0.33209990749306195, + "grad_norm": 1.1133724451065063, + "learning_rate": 7.791761932081356e-06, + "loss": 0.7286, + "step": 6462 + }, + { + "epoch": 0.3321513002364066, + "grad_norm": 1.209977388381958, + "learning_rate": 7.79107145219953e-06, + "loss": 0.8303, + "step": 6463 + }, + { + "epoch": 0.33220269297975125, + "grad_norm": 0.7041590213775635, + "learning_rate": 7.790380894988632e-06, + "loss": 0.698, + "step": 6464 + }, + { + "epoch": 0.3322540857230959, + "grad_norm": 1.0389889478683472, + "learning_rate": 7.789690260467793e-06, + "loss": 0.6747, + "step": 6465 + }, + { + "epoch": 0.33230547846644054, + "grad_norm": 1.145019769668579, + "learning_rate": 7.78899954865615e-06, + "loss": 0.734, + "step": 6466 + }, + { + "epoch": 0.3323568712097852, + "grad_norm": 1.0577338933944702, + "learning_rate": 7.78830875957284e-06, + "loss": 0.7551, + "step": 6467 + }, + { + "epoch": 0.33240826395312983, + "grad_norm": 1.0348252058029175, + "learning_rate": 7.787617893237e-06, + "loss": 0.7723, + "step": 6468 + }, + { + "epoch": 0.3324596566964745, + "grad_norm": 1.1440144777297974, + "learning_rate": 7.786926949667773e-06, + "loss": 0.7815, + "step": 6469 + }, + { + "epoch": 0.3325110494398191, + "grad_norm": 0.9907153844833374, + "learning_rate": 7.7862359288843e-06, + "loss": 0.7149, + "step": 6470 + }, + { + "epoch": 0.3325624421831637, + "grad_norm": 1.0473151206970215, + "learning_rate": 7.785544830905729e-06, + "loss": 0.7489, + "step": 6471 + }, + { + "epoch": 0.33261383492650837, + "grad_norm": 1.0405088663101196, + "learning_rate": 7.784853655751205e-06, + "loss": 0.7444, + "step": 6472 + }, + { + "epoch": 0.332665227669853, + "grad_norm": 1.0650453567504883, + "learning_rate": 7.784162403439877e-06, + "loss": 0.7469, + "step": 6473 + }, + { + "epoch": 0.33271662041319766, + "grad_norm": 1.0978261232376099, + "learning_rate": 7.783471073990901e-06, + "loss": 0.7974, + "step": 6474 + }, + { + "epoch": 0.3327680131565423, + "grad_norm": 0.7830113768577576, + "learning_rate": 7.782779667423428e-06, + "loss": 0.6755, + "step": 6475 + }, + { + "epoch": 0.33281940589988696, + "grad_norm": 1.2977112531661987, + "learning_rate": 7.782088183756613e-06, + "loss": 0.8099, + "step": 6476 + }, + { + "epoch": 0.3328707986432316, + "grad_norm": 1.103999376296997, + "learning_rate": 7.781396623009616e-06, + "loss": 0.7321, + "step": 6477 + }, + { + "epoch": 0.3329221913865762, + "grad_norm": 1.0974771976470947, + "learning_rate": 7.780704985201598e-06, + "loss": 0.746, + "step": 6478 + }, + { + "epoch": 0.33297358412992084, + "grad_norm": 1.1084760427474976, + "learning_rate": 7.780013270351718e-06, + "loss": 0.7849, + "step": 6479 + }, + { + "epoch": 0.3330249768732655, + "grad_norm": 1.0800585746765137, + "learning_rate": 7.779321478479143e-06, + "loss": 0.7429, + "step": 6480 + }, + { + "epoch": 0.33307636961661014, + "grad_norm": 1.0699074268341064, + "learning_rate": 7.77862960960304e-06, + "loss": 0.7638, + "step": 6481 + }, + { + "epoch": 0.3331277623599548, + "grad_norm": 1.0960007905960083, + "learning_rate": 7.777937663742578e-06, + "loss": 0.7763, + "step": 6482 + }, + { + "epoch": 0.33317915510329943, + "grad_norm": 1.0531806945800781, + "learning_rate": 7.777245640916926e-06, + "loss": 0.7773, + "step": 6483 + }, + { + "epoch": 0.3332305478466441, + "grad_norm": 0.9032198190689087, + "learning_rate": 7.77655354114526e-06, + "loss": 0.7, + "step": 6484 + }, + { + "epoch": 0.33328194058998867, + "grad_norm": 1.0535330772399902, + "learning_rate": 7.775861364446752e-06, + "loss": 0.7447, + "step": 6485 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 1.073972225189209, + "learning_rate": 7.77516911084058e-06, + "loss": 0.7947, + "step": 6486 + }, + { + "epoch": 0.33338472607667796, + "grad_norm": 0.7861914038658142, + "learning_rate": 7.774476780345926e-06, + "loss": 0.739, + "step": 6487 + }, + { + "epoch": 0.3334361188200226, + "grad_norm": 0.9564030170440674, + "learning_rate": 7.77378437298197e-06, + "loss": 0.742, + "step": 6488 + }, + { + "epoch": 0.33348751156336726, + "grad_norm": 1.1033538579940796, + "learning_rate": 7.773091888767894e-06, + "loss": 0.7681, + "step": 6489 + }, + { + "epoch": 0.3335389043067119, + "grad_norm": 1.0671360492706299, + "learning_rate": 7.772399327722888e-06, + "loss": 0.7397, + "step": 6490 + }, + { + "epoch": 0.33359029705005655, + "grad_norm": 0.7774039506912231, + "learning_rate": 7.771706689866137e-06, + "loss": 0.6806, + "step": 6491 + }, + { + "epoch": 0.3336416897934012, + "grad_norm": 1.1928331851959229, + "learning_rate": 7.77101397521683e-06, + "loss": 0.7744, + "step": 6492 + }, + { + "epoch": 0.3336930825367458, + "grad_norm": 1.020980954170227, + "learning_rate": 7.770321183794164e-06, + "loss": 0.7454, + "step": 6493 + }, + { + "epoch": 0.33374447528009044, + "grad_norm": 0.8944103121757507, + "learning_rate": 7.769628315617328e-06, + "loss": 0.7115, + "step": 6494 + }, + { + "epoch": 0.3337958680234351, + "grad_norm": 0.6898053884506226, + "learning_rate": 7.768935370705523e-06, + "loss": 0.6738, + "step": 6495 + }, + { + "epoch": 0.33384726076677973, + "grad_norm": 1.0660370588302612, + "learning_rate": 7.768242349077944e-06, + "loss": 0.7005, + "step": 6496 + }, + { + "epoch": 0.3338986535101244, + "grad_norm": 1.1497564315795898, + "learning_rate": 7.767549250753793e-06, + "loss": 0.7826, + "step": 6497 + }, + { + "epoch": 0.333950046253469, + "grad_norm": 1.0521914958953857, + "learning_rate": 7.766856075752276e-06, + "loss": 0.7165, + "step": 6498 + }, + { + "epoch": 0.33400143899681367, + "grad_norm": 0.9812231659889221, + "learning_rate": 7.766162824092593e-06, + "loss": 0.6942, + "step": 6499 + }, + { + "epoch": 0.33405283174015826, + "grad_norm": 1.1092708110809326, + "learning_rate": 7.765469495793953e-06, + "loss": 0.7253, + "step": 6500 + }, + { + "epoch": 0.3341042244835029, + "grad_norm": 1.0689315795898438, + "learning_rate": 7.764776090875566e-06, + "loss": 0.7609, + "step": 6501 + }, + { + "epoch": 0.33415561722684756, + "grad_norm": 1.1116224527359009, + "learning_rate": 7.764082609356644e-06, + "loss": 0.8015, + "step": 6502 + }, + { + "epoch": 0.3342070099701922, + "grad_norm": 1.068313717842102, + "learning_rate": 7.763389051256399e-06, + "loss": 0.7782, + "step": 6503 + }, + { + "epoch": 0.33425840271353685, + "grad_norm": 1.153111219406128, + "learning_rate": 7.762695416594048e-06, + "loss": 0.7531, + "step": 6504 + }, + { + "epoch": 0.3343097954568815, + "grad_norm": 0.7740160226821899, + "learning_rate": 7.762001705388807e-06, + "loss": 0.7261, + "step": 6505 + }, + { + "epoch": 0.33436118820022614, + "grad_norm": 1.1248310804367065, + "learning_rate": 7.761307917659897e-06, + "loss": 0.8001, + "step": 6506 + }, + { + "epoch": 0.3344125809435708, + "grad_norm": 1.078467845916748, + "learning_rate": 7.76061405342654e-06, + "loss": 0.7407, + "step": 6507 + }, + { + "epoch": 0.3344639736869154, + "grad_norm": 1.1185346841812134, + "learning_rate": 7.759920112707959e-06, + "loss": 0.7744, + "step": 6508 + }, + { + "epoch": 0.33451536643026003, + "grad_norm": 1.1144089698791504, + "learning_rate": 7.759226095523385e-06, + "loss": 0.7316, + "step": 6509 + }, + { + "epoch": 0.3345667591736047, + "grad_norm": 1.2007198333740234, + "learning_rate": 7.758532001892039e-06, + "loss": 0.7768, + "step": 6510 + }, + { + "epoch": 0.3346181519169493, + "grad_norm": 1.033096432685852, + "learning_rate": 7.757837831833155e-06, + "loss": 0.7943, + "step": 6511 + }, + { + "epoch": 0.33466954466029397, + "grad_norm": 1.1185754537582397, + "learning_rate": 7.757143585365967e-06, + "loss": 0.8029, + "step": 6512 + }, + { + "epoch": 0.3347209374036386, + "grad_norm": 1.06898832321167, + "learning_rate": 7.756449262509706e-06, + "loss": 0.7296, + "step": 6513 + }, + { + "epoch": 0.33477233014698327, + "grad_norm": 1.115201711654663, + "learning_rate": 7.755754863283614e-06, + "loss": 0.7986, + "step": 6514 + }, + { + "epoch": 0.3348237228903279, + "grad_norm": 1.1190185546875, + "learning_rate": 7.755060387706926e-06, + "loss": 0.7463, + "step": 6515 + }, + { + "epoch": 0.3348751156336725, + "grad_norm": 1.111128568649292, + "learning_rate": 7.754365835798884e-06, + "loss": 0.7935, + "step": 6516 + }, + { + "epoch": 0.33492650837701715, + "grad_norm": 1.0540839433670044, + "learning_rate": 7.753671207578731e-06, + "loss": 0.7794, + "step": 6517 + }, + { + "epoch": 0.3349779011203618, + "grad_norm": 1.5448687076568604, + "learning_rate": 7.752976503065713e-06, + "loss": 0.7345, + "step": 6518 + }, + { + "epoch": 0.33502929386370645, + "grad_norm": 1.1589034795761108, + "learning_rate": 7.752281722279077e-06, + "loss": 0.7302, + "step": 6519 + }, + { + "epoch": 0.3350806866070511, + "grad_norm": 1.0417239665985107, + "learning_rate": 7.751586865238073e-06, + "loss": 0.7167, + "step": 6520 + }, + { + "epoch": 0.33513207935039574, + "grad_norm": 1.1238305568695068, + "learning_rate": 7.75089193196195e-06, + "loss": 0.7765, + "step": 6521 + }, + { + "epoch": 0.3351834720937404, + "grad_norm": 0.8735576868057251, + "learning_rate": 7.750196922469966e-06, + "loss": 0.6691, + "step": 6522 + }, + { + "epoch": 0.335234864837085, + "grad_norm": 1.110530138015747, + "learning_rate": 7.749501836781374e-06, + "loss": 0.7944, + "step": 6523 + }, + { + "epoch": 0.3352862575804296, + "grad_norm": 1.03462553024292, + "learning_rate": 7.748806674915433e-06, + "loss": 0.7473, + "step": 6524 + }, + { + "epoch": 0.3353376503237743, + "grad_norm": 0.9000665545463562, + "learning_rate": 7.748111436891401e-06, + "loss": 0.7145, + "step": 6525 + }, + { + "epoch": 0.3353890430671189, + "grad_norm": 1.0227618217468262, + "learning_rate": 7.747416122728544e-06, + "loss": 0.7596, + "step": 6526 + }, + { + "epoch": 0.33544043581046357, + "grad_norm": 1.0274015665054321, + "learning_rate": 7.746720732446123e-06, + "loss": 0.7805, + "step": 6527 + }, + { + "epoch": 0.3354918285538082, + "grad_norm": 1.029577374458313, + "learning_rate": 7.746025266063406e-06, + "loss": 0.7699, + "step": 6528 + }, + { + "epoch": 0.33554322129715286, + "grad_norm": 1.1308836936950684, + "learning_rate": 7.74532972359966e-06, + "loss": 0.7941, + "step": 6529 + }, + { + "epoch": 0.3355946140404975, + "grad_norm": 0.8033400774002075, + "learning_rate": 7.744634105074158e-06, + "loss": 0.7163, + "step": 6530 + }, + { + "epoch": 0.3356460067838421, + "grad_norm": 1.0922857522964478, + "learning_rate": 7.74393841050617e-06, + "loss": 0.7899, + "step": 6531 + }, + { + "epoch": 0.33569739952718675, + "grad_norm": 0.7340012788772583, + "learning_rate": 7.743242639914974e-06, + "loss": 0.6737, + "step": 6532 + }, + { + "epoch": 0.3357487922705314, + "grad_norm": 1.0350805521011353, + "learning_rate": 7.742546793319844e-06, + "loss": 0.7083, + "step": 6533 + }, + { + "epoch": 0.33580018501387604, + "grad_norm": 1.076413631439209, + "learning_rate": 7.74185087074006e-06, + "loss": 0.7588, + "step": 6534 + }, + { + "epoch": 0.3358515777572207, + "grad_norm": 1.0699633359909058, + "learning_rate": 7.741154872194905e-06, + "loss": 0.7328, + "step": 6535 + }, + { + "epoch": 0.33590297050056533, + "grad_norm": 0.7075090408325195, + "learning_rate": 7.740458797703658e-06, + "loss": 0.6978, + "step": 6536 + }, + { + "epoch": 0.33595436324391, + "grad_norm": 1.016798973083496, + "learning_rate": 7.73976264728561e-06, + "loss": 0.733, + "step": 6537 + }, + { + "epoch": 0.3360057559872546, + "grad_norm": 1.0697447061538696, + "learning_rate": 7.739066420960042e-06, + "loss": 0.787, + "step": 6538 + }, + { + "epoch": 0.3360571487305992, + "grad_norm": 1.019687533378601, + "learning_rate": 7.738370118746248e-06, + "loss": 0.7474, + "step": 6539 + }, + { + "epoch": 0.33610854147394387, + "grad_norm": 1.1537458896636963, + "learning_rate": 7.73767374066352e-06, + "loss": 0.7492, + "step": 6540 + }, + { + "epoch": 0.3361599342172885, + "grad_norm": 1.0208557844161987, + "learning_rate": 7.73697728673115e-06, + "loss": 0.7001, + "step": 6541 + }, + { + "epoch": 0.33621132696063316, + "grad_norm": 1.057917833328247, + "learning_rate": 7.736280756968433e-06, + "loss": 0.7946, + "step": 6542 + }, + { + "epoch": 0.3362627197039778, + "grad_norm": 1.049900770187378, + "learning_rate": 7.73558415139467e-06, + "loss": 0.7571, + "step": 6543 + }, + { + "epoch": 0.33631411244732246, + "grad_norm": 1.0712857246398926, + "learning_rate": 7.734887470029157e-06, + "loss": 0.8518, + "step": 6544 + }, + { + "epoch": 0.3363655051906671, + "grad_norm": 1.0285158157348633, + "learning_rate": 7.7341907128912e-06, + "loss": 0.7323, + "step": 6545 + }, + { + "epoch": 0.3364168979340117, + "grad_norm": 1.0724751949310303, + "learning_rate": 7.733493880000102e-06, + "loss": 0.7728, + "step": 6546 + }, + { + "epoch": 0.33646829067735634, + "grad_norm": 1.0794790983200073, + "learning_rate": 7.732796971375167e-06, + "loss": 0.776, + "step": 6547 + }, + { + "epoch": 0.336519683420701, + "grad_norm": 1.0515930652618408, + "learning_rate": 7.732099987035707e-06, + "loss": 0.792, + "step": 6548 + }, + { + "epoch": 0.33657107616404563, + "grad_norm": 1.1038976907730103, + "learning_rate": 7.731402927001032e-06, + "loss": 0.7361, + "step": 6549 + }, + { + "epoch": 0.3366224689073903, + "grad_norm": 0.7011096477508545, + "learning_rate": 7.730705791290452e-06, + "loss": 0.705, + "step": 6550 + }, + { + "epoch": 0.33667386165073493, + "grad_norm": 1.0449178218841553, + "learning_rate": 7.730008579923285e-06, + "loss": 0.7841, + "step": 6551 + }, + { + "epoch": 0.3367252543940796, + "grad_norm": 1.1194484233856201, + "learning_rate": 7.729311292918844e-06, + "loss": 0.8356, + "step": 6552 + }, + { + "epoch": 0.3367766471374242, + "grad_norm": 1.0231194496154785, + "learning_rate": 7.728613930296452e-06, + "loss": 0.7613, + "step": 6553 + }, + { + "epoch": 0.3368280398807688, + "grad_norm": 1.1133205890655518, + "learning_rate": 7.727916492075429e-06, + "loss": 0.7492, + "step": 6554 + }, + { + "epoch": 0.33687943262411346, + "grad_norm": 1.065483808517456, + "learning_rate": 7.727218978275095e-06, + "loss": 0.7644, + "step": 6555 + }, + { + "epoch": 0.3369308253674581, + "grad_norm": 1.0070241689682007, + "learning_rate": 7.72652138891478e-06, + "loss": 0.7781, + "step": 6556 + }, + { + "epoch": 0.33698221811080276, + "grad_norm": 1.058231234550476, + "learning_rate": 7.725823724013808e-06, + "loss": 0.7534, + "step": 6557 + }, + { + "epoch": 0.3370336108541474, + "grad_norm": 1.1050045490264893, + "learning_rate": 7.725125983591511e-06, + "loss": 0.7883, + "step": 6558 + }, + { + "epoch": 0.33708500359749205, + "grad_norm": 1.0163242816925049, + "learning_rate": 7.724428167667216e-06, + "loss": 0.7581, + "step": 6559 + }, + { + "epoch": 0.3371363963408367, + "grad_norm": 1.0442469120025635, + "learning_rate": 7.723730276260262e-06, + "loss": 0.7913, + "step": 6560 + }, + { + "epoch": 0.3371877890841813, + "grad_norm": 1.0541136264801025, + "learning_rate": 7.723032309389981e-06, + "loss": 0.7769, + "step": 6561 + }, + { + "epoch": 0.33723918182752594, + "grad_norm": 1.15898859500885, + "learning_rate": 7.722334267075713e-06, + "loss": 0.8122, + "step": 6562 + }, + { + "epoch": 0.3372905745708706, + "grad_norm": 1.0498425960540771, + "learning_rate": 7.721636149336795e-06, + "loss": 0.8236, + "step": 6563 + }, + { + "epoch": 0.33734196731421523, + "grad_norm": 1.058797836303711, + "learning_rate": 7.72093795619257e-06, + "loss": 0.7305, + "step": 6564 + }, + { + "epoch": 0.3373933600575599, + "grad_norm": 1.0965588092803955, + "learning_rate": 7.720239687662388e-06, + "loss": 0.7691, + "step": 6565 + }, + { + "epoch": 0.3374447528009045, + "grad_norm": 1.0800951719284058, + "learning_rate": 7.719541343765586e-06, + "loss": 0.7693, + "step": 6566 + }, + { + "epoch": 0.33749614554424917, + "grad_norm": 1.0936470031738281, + "learning_rate": 7.718842924521516e-06, + "loss": 0.7813, + "step": 6567 + }, + { + "epoch": 0.3375475382875938, + "grad_norm": 1.066245675086975, + "learning_rate": 7.71814442994953e-06, + "loss": 0.7072, + "step": 6568 + }, + { + "epoch": 0.3375989310309384, + "grad_norm": 1.1159350872039795, + "learning_rate": 7.717445860068979e-06, + "loss": 0.7107, + "step": 6569 + }, + { + "epoch": 0.33765032377428306, + "grad_norm": 1.117701530456543, + "learning_rate": 7.716747214899217e-06, + "loss": 0.6961, + "step": 6570 + }, + { + "epoch": 0.3377017165176277, + "grad_norm": 0.7588281035423279, + "learning_rate": 7.716048494459601e-06, + "loss": 0.6804, + "step": 6571 + }, + { + "epoch": 0.33775310926097235, + "grad_norm": 1.1264597177505493, + "learning_rate": 7.71534969876949e-06, + "loss": 0.7958, + "step": 6572 + }, + { + "epoch": 0.337804502004317, + "grad_norm": 1.1038175821304321, + "learning_rate": 7.714650827848245e-06, + "loss": 0.7376, + "step": 6573 + }, + { + "epoch": 0.33785589474766164, + "grad_norm": 0.7939095497131348, + "learning_rate": 7.713951881715227e-06, + "loss": 0.7041, + "step": 6574 + }, + { + "epoch": 0.3379072874910063, + "grad_norm": 0.7537832856178284, + "learning_rate": 7.713252860389803e-06, + "loss": 0.6624, + "step": 6575 + }, + { + "epoch": 0.3379586802343509, + "grad_norm": 1.0547189712524414, + "learning_rate": 7.71255376389134e-06, + "loss": 0.7439, + "step": 6576 + }, + { + "epoch": 0.33801007297769553, + "grad_norm": 0.7780723571777344, + "learning_rate": 7.711854592239203e-06, + "loss": 0.6928, + "step": 6577 + }, + { + "epoch": 0.3380614657210402, + "grad_norm": 1.0735018253326416, + "learning_rate": 7.711155345452769e-06, + "loss": 0.8039, + "step": 6578 + }, + { + "epoch": 0.3381128584643848, + "grad_norm": 0.7522705793380737, + "learning_rate": 7.710456023551409e-06, + "loss": 0.706, + "step": 6579 + }, + { + "epoch": 0.33816425120772947, + "grad_norm": 1.1249234676361084, + "learning_rate": 7.709756626554496e-06, + "loss": 0.7497, + "step": 6580 + }, + { + "epoch": 0.3382156439510741, + "grad_norm": 1.1204546689987183, + "learning_rate": 7.709057154481413e-06, + "loss": 0.7495, + "step": 6581 + }, + { + "epoch": 0.33826703669441877, + "grad_norm": 1.0404729843139648, + "learning_rate": 7.70835760735153e-06, + "loss": 0.741, + "step": 6582 + }, + { + "epoch": 0.3383184294377634, + "grad_norm": 1.0971697568893433, + "learning_rate": 7.70765798518424e-06, + "loss": 0.752, + "step": 6583 + }, + { + "epoch": 0.338369822181108, + "grad_norm": 1.1227312088012695, + "learning_rate": 7.706958287998918e-06, + "loss": 0.8168, + "step": 6584 + }, + { + "epoch": 0.33842121492445265, + "grad_norm": 1.063349723815918, + "learning_rate": 7.706258515814953e-06, + "loss": 0.8158, + "step": 6585 + }, + { + "epoch": 0.3384726076677973, + "grad_norm": 1.0252267122268677, + "learning_rate": 7.705558668651733e-06, + "loss": 0.7443, + "step": 6586 + }, + { + "epoch": 0.33852400041114195, + "grad_norm": 1.0455039739608765, + "learning_rate": 7.704858746528649e-06, + "loss": 0.7674, + "step": 6587 + }, + { + "epoch": 0.3385753931544866, + "grad_norm": 1.143568754196167, + "learning_rate": 7.70415874946509e-06, + "loss": 0.7628, + "step": 6588 + }, + { + "epoch": 0.33862678589783124, + "grad_norm": 1.0962506532669067, + "learning_rate": 7.703458677480451e-06, + "loss": 0.7511, + "step": 6589 + }, + { + "epoch": 0.3386781786411759, + "grad_norm": 0.7368639707565308, + "learning_rate": 7.70275853059413e-06, + "loss": 0.6692, + "step": 6590 + }, + { + "epoch": 0.3387295713845205, + "grad_norm": 1.1238256692886353, + "learning_rate": 7.702058308825523e-06, + "loss": 0.7117, + "step": 6591 + }, + { + "epoch": 0.3387809641278651, + "grad_norm": 1.0725135803222656, + "learning_rate": 7.701358012194031e-06, + "loss": 0.735, + "step": 6592 + }, + { + "epoch": 0.33883235687120977, + "grad_norm": 1.1122689247131348, + "learning_rate": 7.700657640719055e-06, + "loss": 0.7568, + "step": 6593 + }, + { + "epoch": 0.3388837496145544, + "grad_norm": 1.095253348350525, + "learning_rate": 7.699957194420004e-06, + "loss": 0.7471, + "step": 6594 + }, + { + "epoch": 0.33893514235789907, + "grad_norm": 0.7063501477241516, + "learning_rate": 7.699256673316277e-06, + "loss": 0.7085, + "step": 6595 + }, + { + "epoch": 0.3389865351012437, + "grad_norm": 1.101936936378479, + "learning_rate": 7.698556077427291e-06, + "loss": 0.7814, + "step": 6596 + }, + { + "epoch": 0.33903792784458836, + "grad_norm": 1.0311704874038696, + "learning_rate": 7.69785540677245e-06, + "loss": 0.6829, + "step": 6597 + }, + { + "epoch": 0.339089320587933, + "grad_norm": 1.50922691822052, + "learning_rate": 7.69715466137117e-06, + "loss": 0.7658, + "step": 6598 + }, + { + "epoch": 0.3391407133312776, + "grad_norm": 1.0183446407318115, + "learning_rate": 7.696453841242863e-06, + "loss": 0.7819, + "step": 6599 + }, + { + "epoch": 0.33919210607462225, + "grad_norm": 0.956010103225708, + "learning_rate": 7.69575294640695e-06, + "loss": 0.7294, + "step": 6600 + }, + { + "epoch": 0.3392434988179669, + "grad_norm": 1.0670047998428345, + "learning_rate": 7.695051976882845e-06, + "loss": 0.7691, + "step": 6601 + }, + { + "epoch": 0.33929489156131154, + "grad_norm": 1.0840113162994385, + "learning_rate": 7.694350932689974e-06, + "loss": 0.7746, + "step": 6602 + }, + { + "epoch": 0.3393462843046562, + "grad_norm": 0.7704764604568481, + "learning_rate": 7.693649813847756e-06, + "loss": 0.6765, + "step": 6603 + }, + { + "epoch": 0.33939767704800083, + "grad_norm": 1.0854947566986084, + "learning_rate": 7.69294862037562e-06, + "loss": 0.719, + "step": 6604 + }, + { + "epoch": 0.3394490697913455, + "grad_norm": 1.0851550102233887, + "learning_rate": 7.69224735229299e-06, + "loss": 0.7043, + "step": 6605 + }, + { + "epoch": 0.33950046253469013, + "grad_norm": 1.1060842275619507, + "learning_rate": 7.691546009619292e-06, + "loss": 0.8084, + "step": 6606 + }, + { + "epoch": 0.3395518552780347, + "grad_norm": 1.1430354118347168, + "learning_rate": 7.690844592373967e-06, + "loss": 0.7344, + "step": 6607 + }, + { + "epoch": 0.33960324802137937, + "grad_norm": 0.7923489212989807, + "learning_rate": 7.69014310057644e-06, + "loss": 0.7025, + "step": 6608 + }, + { + "epoch": 0.339654640764724, + "grad_norm": 4.38373327255249, + "learning_rate": 7.68944153424615e-06, + "loss": 0.9484, + "step": 6609 + }, + { + "epoch": 0.33970603350806866, + "grad_norm": 1.0721818208694458, + "learning_rate": 7.688739893402532e-06, + "loss": 0.7739, + "step": 6610 + }, + { + "epoch": 0.3397574262514133, + "grad_norm": 0.8866074085235596, + "learning_rate": 7.688038178065029e-06, + "loss": 0.6952, + "step": 6611 + }, + { + "epoch": 0.33980881899475796, + "grad_norm": 1.0030882358551025, + "learning_rate": 7.687336388253081e-06, + "loss": 0.7384, + "step": 6612 + }, + { + "epoch": 0.3398602117381026, + "grad_norm": 1.0326414108276367, + "learning_rate": 7.686634523986133e-06, + "loss": 0.758, + "step": 6613 + }, + { + "epoch": 0.3399116044814472, + "grad_norm": 0.925369381904602, + "learning_rate": 7.685932585283627e-06, + "loss": 0.6654, + "step": 6614 + }, + { + "epoch": 0.33996299722479184, + "grad_norm": 0.7294013500213623, + "learning_rate": 7.685230572165014e-06, + "loss": 0.6803, + "step": 6615 + }, + { + "epoch": 0.3400143899681365, + "grad_norm": 1.0630218982696533, + "learning_rate": 7.684528484649744e-06, + "loss": 0.765, + "step": 6616 + }, + { + "epoch": 0.34006578271148113, + "grad_norm": 1.0217283964157104, + "learning_rate": 7.683826322757268e-06, + "loss": 0.726, + "step": 6617 + }, + { + "epoch": 0.3401171754548258, + "grad_norm": 1.120306372642517, + "learning_rate": 7.68312408650704e-06, + "loss": 0.7658, + "step": 6618 + }, + { + "epoch": 0.34016856819817043, + "grad_norm": 0.7621231079101562, + "learning_rate": 7.682421775918514e-06, + "loss": 0.6936, + "step": 6619 + }, + { + "epoch": 0.3402199609415151, + "grad_norm": 1.1259628534317017, + "learning_rate": 7.681719391011154e-06, + "loss": 0.7965, + "step": 6620 + }, + { + "epoch": 0.3402713536848597, + "grad_norm": 1.0145872831344604, + "learning_rate": 7.681016931804413e-06, + "loss": 0.8164, + "step": 6621 + }, + { + "epoch": 0.3403227464282043, + "grad_norm": 0.9857984781265259, + "learning_rate": 7.680314398317758e-06, + "loss": 0.7505, + "step": 6622 + }, + { + "epoch": 0.34037413917154896, + "grad_norm": 1.232672095298767, + "learning_rate": 7.679611790570653e-06, + "loss": 0.8069, + "step": 6623 + }, + { + "epoch": 0.3404255319148936, + "grad_norm": 1.0144422054290771, + "learning_rate": 7.678909108582561e-06, + "loss": 0.8147, + "step": 6624 + }, + { + "epoch": 0.34047692465823826, + "grad_norm": 1.0247306823730469, + "learning_rate": 7.678206352372955e-06, + "loss": 0.7364, + "step": 6625 + }, + { + "epoch": 0.3405283174015829, + "grad_norm": 1.0642890930175781, + "learning_rate": 7.677503521961303e-06, + "loss": 0.758, + "step": 6626 + }, + { + "epoch": 0.34057971014492755, + "grad_norm": 1.030613660812378, + "learning_rate": 7.676800617367078e-06, + "loss": 0.7377, + "step": 6627 + }, + { + "epoch": 0.3406311028882722, + "grad_norm": 1.0734517574310303, + "learning_rate": 7.676097638609753e-06, + "loss": 0.7733, + "step": 6628 + }, + { + "epoch": 0.3406824956316168, + "grad_norm": 1.098297119140625, + "learning_rate": 7.675394585708807e-06, + "loss": 0.7939, + "step": 6629 + }, + { + "epoch": 0.34073388837496144, + "grad_norm": 1.0911213159561157, + "learning_rate": 7.674691458683718e-06, + "loss": 0.7609, + "step": 6630 + }, + { + "epoch": 0.3407852811183061, + "grad_norm": 1.0663267374038696, + "learning_rate": 7.673988257553966e-06, + "loss": 0.7948, + "step": 6631 + }, + { + "epoch": 0.34083667386165073, + "grad_norm": 0.6964709162712097, + "learning_rate": 7.673284982339035e-06, + "loss": 0.6704, + "step": 6632 + }, + { + "epoch": 0.3408880666049954, + "grad_norm": 1.0632686614990234, + "learning_rate": 7.672581633058408e-06, + "loss": 0.7707, + "step": 6633 + }, + { + "epoch": 0.34093945934834, + "grad_norm": 1.0994741916656494, + "learning_rate": 7.671878209731573e-06, + "loss": 0.7925, + "step": 6634 + }, + { + "epoch": 0.34099085209168467, + "grad_norm": 1.0407366752624512, + "learning_rate": 7.67117471237802e-06, + "loss": 0.7492, + "step": 6635 + }, + { + "epoch": 0.3410422448350293, + "grad_norm": 1.099822998046875, + "learning_rate": 7.67047114101724e-06, + "loss": 0.7819, + "step": 6636 + }, + { + "epoch": 0.3410936375783739, + "grad_norm": 1.0465624332427979, + "learning_rate": 7.669767495668723e-06, + "loss": 0.692, + "step": 6637 + }, + { + "epoch": 0.34114503032171856, + "grad_norm": 1.0452357530593872, + "learning_rate": 7.669063776351966e-06, + "loss": 0.7312, + "step": 6638 + }, + { + "epoch": 0.3411964230650632, + "grad_norm": 0.8059674501419067, + "learning_rate": 7.668359983086468e-06, + "loss": 0.6994, + "step": 6639 + }, + { + "epoch": 0.34124781580840785, + "grad_norm": 1.024306058883667, + "learning_rate": 7.667656115891726e-06, + "loss": 0.7562, + "step": 6640 + }, + { + "epoch": 0.3412992085517525, + "grad_norm": 0.8661328554153442, + "learning_rate": 7.666952174787241e-06, + "loss": 0.6936, + "step": 6641 + }, + { + "epoch": 0.34135060129509714, + "grad_norm": 0.6932468414306641, + "learning_rate": 7.666248159792517e-06, + "loss": 0.7029, + "step": 6642 + }, + { + "epoch": 0.3414019940384418, + "grad_norm": 1.052911400794983, + "learning_rate": 7.66554407092706e-06, + "loss": 0.825, + "step": 6643 + }, + { + "epoch": 0.34145338678178644, + "grad_norm": 1.0475996732711792, + "learning_rate": 7.664839908210378e-06, + "loss": 0.7404, + "step": 6644 + }, + { + "epoch": 0.34150477952513103, + "grad_norm": 1.0537971258163452, + "learning_rate": 7.664135671661978e-06, + "loss": 0.7326, + "step": 6645 + }, + { + "epoch": 0.3415561722684757, + "grad_norm": 1.0285451412200928, + "learning_rate": 7.663431361301372e-06, + "loss": 0.7462, + "step": 6646 + }, + { + "epoch": 0.3416075650118203, + "grad_norm": 1.0768494606018066, + "learning_rate": 7.662726977148077e-06, + "loss": 0.8133, + "step": 6647 + }, + { + "epoch": 0.34165895775516497, + "grad_norm": 1.0296823978424072, + "learning_rate": 7.662022519221604e-06, + "loss": 0.7481, + "step": 6648 + }, + { + "epoch": 0.3417103504985096, + "grad_norm": 0.7730618715286255, + "learning_rate": 7.661317987541472e-06, + "loss": 0.7083, + "step": 6649 + }, + { + "epoch": 0.34176174324185427, + "grad_norm": 1.1077300310134888, + "learning_rate": 7.660613382127201e-06, + "loss": 0.7341, + "step": 6650 + }, + { + "epoch": 0.3418131359851989, + "grad_norm": 1.0408177375793457, + "learning_rate": 7.659908702998313e-06, + "loss": 0.766, + "step": 6651 + }, + { + "epoch": 0.3418645287285435, + "grad_norm": 1.0484448671340942, + "learning_rate": 7.659203950174333e-06, + "loss": 0.7667, + "step": 6652 + }, + { + "epoch": 0.34191592147188815, + "grad_norm": 1.048295021057129, + "learning_rate": 7.658499123674784e-06, + "loss": 0.7772, + "step": 6653 + }, + { + "epoch": 0.3419673142152328, + "grad_norm": 1.1377633810043335, + "learning_rate": 7.657794223519198e-06, + "loss": 0.7285, + "step": 6654 + }, + { + "epoch": 0.34201870695857745, + "grad_norm": 0.8304348587989807, + "learning_rate": 7.6570892497271e-06, + "loss": 0.6968, + "step": 6655 + }, + { + "epoch": 0.3420700997019221, + "grad_norm": 1.0792810916900635, + "learning_rate": 7.656384202318024e-06, + "loss": 0.7899, + "step": 6656 + }, + { + "epoch": 0.34212149244526674, + "grad_norm": 1.0511394739151, + "learning_rate": 7.655679081311505e-06, + "loss": 0.7919, + "step": 6657 + }, + { + "epoch": 0.3421728851886114, + "grad_norm": 0.8317001461982727, + "learning_rate": 7.654973886727076e-06, + "loss": 0.6945, + "step": 6658 + }, + { + "epoch": 0.34222427793195603, + "grad_norm": 0.8909692764282227, + "learning_rate": 7.654268618584277e-06, + "loss": 0.6842, + "step": 6659 + }, + { + "epoch": 0.3422756706753006, + "grad_norm": 1.1138473749160767, + "learning_rate": 7.653563276902651e-06, + "loss": 0.7422, + "step": 6660 + }, + { + "epoch": 0.34232706341864527, + "grad_norm": 1.1204191446304321, + "learning_rate": 7.652857861701735e-06, + "loss": 0.7375, + "step": 6661 + }, + { + "epoch": 0.3423784561619899, + "grad_norm": 1.0435632467269897, + "learning_rate": 7.652152373001077e-06, + "loss": 0.7576, + "step": 6662 + }, + { + "epoch": 0.34242984890533457, + "grad_norm": 0.8679373860359192, + "learning_rate": 7.65144681082022e-06, + "loss": 0.6911, + "step": 6663 + }, + { + "epoch": 0.3424812416486792, + "grad_norm": 0.7349409461021423, + "learning_rate": 7.650741175178712e-06, + "loss": 0.6712, + "step": 6664 + }, + { + "epoch": 0.34253263439202386, + "grad_norm": 0.8016467690467834, + "learning_rate": 7.650035466096109e-06, + "loss": 0.7141, + "step": 6665 + }, + { + "epoch": 0.3425840271353685, + "grad_norm": 1.0859202146530151, + "learning_rate": 7.649329683591959e-06, + "loss": 0.7475, + "step": 6666 + }, + { + "epoch": 0.3426354198787131, + "grad_norm": 1.0483160018920898, + "learning_rate": 7.648623827685813e-06, + "loss": 0.7195, + "step": 6667 + }, + { + "epoch": 0.34268681262205775, + "grad_norm": 0.7807158827781677, + "learning_rate": 7.647917898397235e-06, + "loss": 0.695, + "step": 6668 + }, + { + "epoch": 0.3427382053654024, + "grad_norm": 1.1061903238296509, + "learning_rate": 7.647211895745777e-06, + "loss": 0.7297, + "step": 6669 + }, + { + "epoch": 0.34278959810874704, + "grad_norm": 0.7125497460365295, + "learning_rate": 7.646505819751e-06, + "loss": 0.6822, + "step": 6670 + }, + { + "epoch": 0.3428409908520917, + "grad_norm": 1.074361801147461, + "learning_rate": 7.645799670432473e-06, + "loss": 0.8248, + "step": 6671 + }, + { + "epoch": 0.34289238359543633, + "grad_norm": 1.0187973976135254, + "learning_rate": 7.645093447809751e-06, + "loss": 0.7552, + "step": 6672 + }, + { + "epoch": 0.342943776338781, + "grad_norm": 1.1492890119552612, + "learning_rate": 7.644387151902408e-06, + "loss": 0.7254, + "step": 6673 + }, + { + "epoch": 0.34299516908212563, + "grad_norm": 1.1094372272491455, + "learning_rate": 7.64368078273001e-06, + "loss": 0.8263, + "step": 6674 + }, + { + "epoch": 0.3430465618254702, + "grad_norm": 1.0237451791763306, + "learning_rate": 7.642974340312126e-06, + "loss": 0.7543, + "step": 6675 + }, + { + "epoch": 0.34309795456881487, + "grad_norm": 1.027280569076538, + "learning_rate": 7.642267824668331e-06, + "loss": 0.7684, + "step": 6676 + }, + { + "epoch": 0.3431493473121595, + "grad_norm": 1.05669105052948, + "learning_rate": 7.641561235818197e-06, + "loss": 0.7646, + "step": 6677 + }, + { + "epoch": 0.34320074005550416, + "grad_norm": 1.0904415845870972, + "learning_rate": 7.640854573781303e-06, + "loss": 0.7659, + "step": 6678 + }, + { + "epoch": 0.3432521327988488, + "grad_norm": 1.1408905982971191, + "learning_rate": 7.640147838577228e-06, + "loss": 0.7244, + "step": 6679 + }, + { + "epoch": 0.34330352554219346, + "grad_norm": 1.0549665689468384, + "learning_rate": 7.63944103022555e-06, + "loss": 0.7508, + "step": 6680 + }, + { + "epoch": 0.3433549182855381, + "grad_norm": 1.075059413909912, + "learning_rate": 7.638734148745855e-06, + "loss": 0.75, + "step": 6681 + }, + { + "epoch": 0.34340631102888275, + "grad_norm": 0.7598670721054077, + "learning_rate": 7.638027194157725e-06, + "loss": 0.6965, + "step": 6682 + }, + { + "epoch": 0.34345770377222734, + "grad_norm": 0.813686192035675, + "learning_rate": 7.637320166480746e-06, + "loss": 0.6938, + "step": 6683 + }, + { + "epoch": 0.343509096515572, + "grad_norm": 1.097036361694336, + "learning_rate": 7.636613065734513e-06, + "loss": 0.737, + "step": 6684 + }, + { + "epoch": 0.34356048925891663, + "grad_norm": 1.075984001159668, + "learning_rate": 7.63590589193861e-06, + "loss": 0.7918, + "step": 6685 + }, + { + "epoch": 0.3436118820022613, + "grad_norm": 1.144142508506775, + "learning_rate": 7.635198645112634e-06, + "loss": 0.7713, + "step": 6686 + }, + { + "epoch": 0.34366327474560593, + "grad_norm": 1.0059248208999634, + "learning_rate": 7.634491325276176e-06, + "loss": 0.7067, + "step": 6687 + }, + { + "epoch": 0.3437146674889506, + "grad_norm": 1.200853943824768, + "learning_rate": 7.633783932448837e-06, + "loss": 0.7558, + "step": 6688 + }, + { + "epoch": 0.3437660602322952, + "grad_norm": 1.134192943572998, + "learning_rate": 7.633076466650213e-06, + "loss": 0.725, + "step": 6689 + }, + { + "epoch": 0.3438174529756398, + "grad_norm": 1.0616955757141113, + "learning_rate": 7.632368927899909e-06, + "loss": 0.7827, + "step": 6690 + }, + { + "epoch": 0.34386884571898446, + "grad_norm": 1.05637526512146, + "learning_rate": 7.631661316217523e-06, + "loss": 0.7579, + "step": 6691 + }, + { + "epoch": 0.3439202384623291, + "grad_norm": 1.069056749343872, + "learning_rate": 7.630953631622662e-06, + "loss": 0.7994, + "step": 6692 + }, + { + "epoch": 0.34397163120567376, + "grad_norm": 1.095033049583435, + "learning_rate": 7.630245874134934e-06, + "loss": 0.7668, + "step": 6693 + }, + { + "epoch": 0.3440230239490184, + "grad_norm": 1.039291501045227, + "learning_rate": 7.629538043773945e-06, + "loss": 0.7995, + "step": 6694 + }, + { + "epoch": 0.34407441669236305, + "grad_norm": 0.8052263855934143, + "learning_rate": 7.628830140559311e-06, + "loss": 0.6517, + "step": 6695 + }, + { + "epoch": 0.3441258094357077, + "grad_norm": 1.031767725944519, + "learning_rate": 7.628122164510641e-06, + "loss": 0.7341, + "step": 6696 + }, + { + "epoch": 0.34417720217905234, + "grad_norm": 0.7558072209358215, + "learning_rate": 7.627414115647554e-06, + "loss": 0.7498, + "step": 6697 + }, + { + "epoch": 0.34422859492239694, + "grad_norm": 1.075257658958435, + "learning_rate": 7.626705993989662e-06, + "loss": 0.7377, + "step": 6698 + }, + { + "epoch": 0.3442799876657416, + "grad_norm": 1.0834792852401733, + "learning_rate": 7.625997799556586e-06, + "loss": 0.849, + "step": 6699 + }, + { + "epoch": 0.34433138040908623, + "grad_norm": 0.8612902164459229, + "learning_rate": 7.625289532367948e-06, + "loss": 0.6877, + "step": 6700 + }, + { + "epoch": 0.3443827731524309, + "grad_norm": 0.7838582396507263, + "learning_rate": 7.624581192443372e-06, + "loss": 0.697, + "step": 6701 + }, + { + "epoch": 0.3444341658957755, + "grad_norm": 1.0737199783325195, + "learning_rate": 7.623872779802483e-06, + "loss": 0.74, + "step": 6702 + }, + { + "epoch": 0.34448555863912017, + "grad_norm": 1.087113380432129, + "learning_rate": 7.623164294464906e-06, + "loss": 0.7156, + "step": 6703 + }, + { + "epoch": 0.3445369513824648, + "grad_norm": 1.1013505458831787, + "learning_rate": 7.6224557364502715e-06, + "loss": 0.7895, + "step": 6704 + }, + { + "epoch": 0.3445883441258094, + "grad_norm": 0.8384442329406738, + "learning_rate": 7.621747105778212e-06, + "loss": 0.6927, + "step": 6705 + }, + { + "epoch": 0.34463973686915406, + "grad_norm": 1.1046620607376099, + "learning_rate": 7.621038402468359e-06, + "loss": 0.797, + "step": 6706 + }, + { + "epoch": 0.3446911296124987, + "grad_norm": 1.0412647724151611, + "learning_rate": 7.620329626540348e-06, + "loss": 0.7453, + "step": 6707 + }, + { + "epoch": 0.34474252235584335, + "grad_norm": 1.1194000244140625, + "learning_rate": 7.6196207780138164e-06, + "loss": 0.7848, + "step": 6708 + }, + { + "epoch": 0.344793915099188, + "grad_norm": 1.0361796617507935, + "learning_rate": 7.6189118569084045e-06, + "loss": 0.7678, + "step": 6709 + }, + { + "epoch": 0.34484530784253264, + "grad_norm": 1.1156220436096191, + "learning_rate": 7.618202863243751e-06, + "loss": 0.7893, + "step": 6710 + }, + { + "epoch": 0.3448967005858773, + "grad_norm": 1.0447503328323364, + "learning_rate": 7.617493797039501e-06, + "loss": 0.7257, + "step": 6711 + }, + { + "epoch": 0.34494809332922194, + "grad_norm": 1.0587265491485596, + "learning_rate": 7.6167846583153e-06, + "loss": 0.7635, + "step": 6712 + }, + { + "epoch": 0.34499948607256653, + "grad_norm": 1.1661818027496338, + "learning_rate": 7.616075447090796e-06, + "loss": 0.7363, + "step": 6713 + }, + { + "epoch": 0.3450508788159112, + "grad_norm": 0.8274634480476379, + "learning_rate": 7.6153661633856365e-06, + "loss": 0.6875, + "step": 6714 + }, + { + "epoch": 0.3451022715592558, + "grad_norm": 1.0906699895858765, + "learning_rate": 7.614656807219474e-06, + "loss": 0.7972, + "step": 6715 + }, + { + "epoch": 0.34515366430260047, + "grad_norm": 1.0328997373580933, + "learning_rate": 7.613947378611961e-06, + "loss": 0.738, + "step": 6716 + }, + { + "epoch": 0.3452050570459451, + "grad_norm": 1.0382572412490845, + "learning_rate": 7.613237877582753e-06, + "loss": 0.7686, + "step": 6717 + }, + { + "epoch": 0.34525644978928977, + "grad_norm": 1.0837785005569458, + "learning_rate": 7.6125283041515085e-06, + "loss": 0.7576, + "step": 6718 + }, + { + "epoch": 0.3453078425326344, + "grad_norm": 1.086661458015442, + "learning_rate": 7.611818658337886e-06, + "loss": 0.7586, + "step": 6719 + }, + { + "epoch": 0.345359235275979, + "grad_norm": 1.0097092390060425, + "learning_rate": 7.611108940161546e-06, + "loss": 0.7825, + "step": 6720 + }, + { + "epoch": 0.34541062801932365, + "grad_norm": 0.71791672706604, + "learning_rate": 7.610399149642151e-06, + "loss": 0.6978, + "step": 6721 + }, + { + "epoch": 0.3454620207626683, + "grad_norm": 1.0498000383377075, + "learning_rate": 7.609689286799371e-06, + "loss": 0.7426, + "step": 6722 + }, + { + "epoch": 0.34551341350601295, + "grad_norm": 0.6963216066360474, + "learning_rate": 7.60897935165287e-06, + "loss": 0.6501, + "step": 6723 + }, + { + "epoch": 0.3455648062493576, + "grad_norm": 1.1035126447677612, + "learning_rate": 7.608269344222317e-06, + "loss": 0.8557, + "step": 6724 + }, + { + "epoch": 0.34561619899270224, + "grad_norm": 1.0338149070739746, + "learning_rate": 7.607559264527384e-06, + "loss": 0.7149, + "step": 6725 + }, + { + "epoch": 0.3456675917360469, + "grad_norm": 1.0522608757019043, + "learning_rate": 7.606849112587744e-06, + "loss": 0.792, + "step": 6726 + }, + { + "epoch": 0.34571898447939153, + "grad_norm": 1.0813616514205933, + "learning_rate": 7.6061388884230755e-06, + "loss": 0.7489, + "step": 6727 + }, + { + "epoch": 0.3457703772227361, + "grad_norm": 1.1051362752914429, + "learning_rate": 7.60542859205305e-06, + "loss": 0.791, + "step": 6728 + }, + { + "epoch": 0.34582176996608077, + "grad_norm": 1.0514551401138306, + "learning_rate": 7.604718223497352e-06, + "loss": 0.7541, + "step": 6729 + }, + { + "epoch": 0.3458731627094254, + "grad_norm": 1.0483429431915283, + "learning_rate": 7.604007782775662e-06, + "loss": 0.7409, + "step": 6730 + }, + { + "epoch": 0.34592455545277007, + "grad_norm": 1.0433050394058228, + "learning_rate": 7.60329726990766e-06, + "loss": 0.7601, + "step": 6731 + }, + { + "epoch": 0.3459759481961147, + "grad_norm": 1.1083686351776123, + "learning_rate": 7.602586684913036e-06, + "loss": 0.773, + "step": 6732 + }, + { + "epoch": 0.34602734093945936, + "grad_norm": 1.089171051979065, + "learning_rate": 7.601876027811475e-06, + "loss": 0.7481, + "step": 6733 + }, + { + "epoch": 0.346078733682804, + "grad_norm": 1.0874563455581665, + "learning_rate": 7.601165298622664e-06, + "loss": 0.734, + "step": 6734 + }, + { + "epoch": 0.34613012642614865, + "grad_norm": 0.6653624773025513, + "learning_rate": 7.600454497366299e-06, + "loss": 0.6827, + "step": 6735 + }, + { + "epoch": 0.34618151916949325, + "grad_norm": 0.6917997598648071, + "learning_rate": 7.5997436240620715e-06, + "loss": 0.68, + "step": 6736 + }, + { + "epoch": 0.3462329119128379, + "grad_norm": 1.1056466102600098, + "learning_rate": 7.599032678729676e-06, + "loss": 0.7292, + "step": 6737 + }, + { + "epoch": 0.34628430465618254, + "grad_norm": 1.0500496625900269, + "learning_rate": 7.598321661388812e-06, + "loss": 0.767, + "step": 6738 + }, + { + "epoch": 0.3463356973995272, + "grad_norm": 1.1421741247177124, + "learning_rate": 7.597610572059175e-06, + "loss": 0.7128, + "step": 6739 + }, + { + "epoch": 0.34638709014287183, + "grad_norm": 1.1197861433029175, + "learning_rate": 7.59689941076047e-06, + "loss": 0.8204, + "step": 6740 + }, + { + "epoch": 0.3464384828862165, + "grad_norm": 1.0676395893096924, + "learning_rate": 7.5961881775124e-06, + "loss": 0.742, + "step": 6741 + }, + { + "epoch": 0.34648987562956113, + "grad_norm": 1.0702288150787354, + "learning_rate": 7.595476872334668e-06, + "loss": 0.7079, + "step": 6742 + }, + { + "epoch": 0.3465412683729057, + "grad_norm": 1.075451135635376, + "learning_rate": 7.594765495246984e-06, + "loss": 0.7928, + "step": 6743 + }, + { + "epoch": 0.34659266111625037, + "grad_norm": 1.091914415359497, + "learning_rate": 7.594054046269055e-06, + "loss": 0.7077, + "step": 6744 + }, + { + "epoch": 0.346644053859595, + "grad_norm": 1.174403190612793, + "learning_rate": 7.593342525420595e-06, + "loss": 0.8204, + "step": 6745 + }, + { + "epoch": 0.34669544660293966, + "grad_norm": 1.0626065731048584, + "learning_rate": 7.5926309327213145e-06, + "loss": 0.7017, + "step": 6746 + }, + { + "epoch": 0.3467468393462843, + "grad_norm": 1.1501697301864624, + "learning_rate": 7.591919268190929e-06, + "loss": 0.7783, + "step": 6747 + }, + { + "epoch": 0.34679823208962895, + "grad_norm": 1.070454716682434, + "learning_rate": 7.591207531849159e-06, + "loss": 0.7179, + "step": 6748 + }, + { + "epoch": 0.3468496248329736, + "grad_norm": 1.0858380794525146, + "learning_rate": 7.59049572371572e-06, + "loss": 0.7692, + "step": 6749 + }, + { + "epoch": 0.34690101757631825, + "grad_norm": 1.0918105840682983, + "learning_rate": 7.589783843810336e-06, + "loss": 0.792, + "step": 6750 + }, + { + "epoch": 0.34695241031966284, + "grad_norm": 1.0945814847946167, + "learning_rate": 7.589071892152728e-06, + "loss": 0.7761, + "step": 6751 + }, + { + "epoch": 0.3470038030630075, + "grad_norm": 1.0814858675003052, + "learning_rate": 7.588359868762622e-06, + "loss": 0.7801, + "step": 6752 + }, + { + "epoch": 0.34705519580635213, + "grad_norm": 1.0508838891983032, + "learning_rate": 7.587647773659745e-06, + "loss": 0.7974, + "step": 6753 + }, + { + "epoch": 0.3471065885496968, + "grad_norm": 1.156484603881836, + "learning_rate": 7.58693560686383e-06, + "loss": 0.7397, + "step": 6754 + }, + { + "epoch": 0.34715798129304143, + "grad_norm": 0.9922685623168945, + "learning_rate": 7.5862233683946025e-06, + "loss": 0.7558, + "step": 6755 + }, + { + "epoch": 0.3472093740363861, + "grad_norm": 1.0413216352462769, + "learning_rate": 7.585511058271799e-06, + "loss": 0.6774, + "step": 6756 + }, + { + "epoch": 0.3472607667797307, + "grad_norm": 1.5578668117523193, + "learning_rate": 7.584798676515154e-06, + "loss": 0.7583, + "step": 6757 + }, + { + "epoch": 0.3473121595230753, + "grad_norm": 1.089167594909668, + "learning_rate": 7.5840862231444035e-06, + "loss": 0.7416, + "step": 6758 + }, + { + "epoch": 0.34736355226641996, + "grad_norm": 0.8310548067092896, + "learning_rate": 7.583373698179288e-06, + "loss": 0.7035, + "step": 6759 + }, + { + "epoch": 0.3474149450097646, + "grad_norm": 1.0672982931137085, + "learning_rate": 7.582661101639548e-06, + "loss": 0.7422, + "step": 6760 + }, + { + "epoch": 0.34746633775310926, + "grad_norm": 1.1020509004592896, + "learning_rate": 7.581948433544928e-06, + "loss": 0.7593, + "step": 6761 + }, + { + "epoch": 0.3475177304964539, + "grad_norm": 1.0925347805023193, + "learning_rate": 7.581235693915172e-06, + "loss": 0.8331, + "step": 6762 + }, + { + "epoch": 0.34756912323979855, + "grad_norm": 0.8080524206161499, + "learning_rate": 7.580522882770025e-06, + "loss": 0.6884, + "step": 6763 + }, + { + "epoch": 0.3476205159831432, + "grad_norm": 0.7478787302970886, + "learning_rate": 7.57981000012924e-06, + "loss": 0.677, + "step": 6764 + }, + { + "epoch": 0.34767190872648784, + "grad_norm": 1.0973118543624878, + "learning_rate": 7.579097046012565e-06, + "loss": 0.7944, + "step": 6765 + }, + { + "epoch": 0.34772330146983244, + "grad_norm": 1.0880799293518066, + "learning_rate": 7.578384020439755e-06, + "loss": 0.7507, + "step": 6766 + }, + { + "epoch": 0.3477746942131771, + "grad_norm": 0.7046155333518982, + "learning_rate": 7.577670923430565e-06, + "loss": 0.6908, + "step": 6767 + }, + { + "epoch": 0.34782608695652173, + "grad_norm": 1.08762526512146, + "learning_rate": 7.57695775500475e-06, + "loss": 0.7944, + "step": 6768 + }, + { + "epoch": 0.3478774796998664, + "grad_norm": 1.0283652544021606, + "learning_rate": 7.576244515182071e-06, + "loss": 0.7143, + "step": 6769 + }, + { + "epoch": 0.347928872443211, + "grad_norm": 1.0499347448349, + "learning_rate": 7.575531203982287e-06, + "loss": 0.791, + "step": 6770 + }, + { + "epoch": 0.34798026518655567, + "grad_norm": 1.0651031732559204, + "learning_rate": 7.574817821425162e-06, + "loss": 0.7428, + "step": 6771 + }, + { + "epoch": 0.3480316579299003, + "grad_norm": 0.7700686454772949, + "learning_rate": 7.574104367530461e-06, + "loss": 0.6662, + "step": 6772 + }, + { + "epoch": 0.34808305067324496, + "grad_norm": 0.790373682975769, + "learning_rate": 7.573390842317949e-06, + "loss": 0.7072, + "step": 6773 + }, + { + "epoch": 0.34813444341658956, + "grad_norm": 1.0704606771469116, + "learning_rate": 7.5726772458073985e-06, + "loss": 0.7894, + "step": 6774 + }, + { + "epoch": 0.3481858361599342, + "grad_norm": 0.762983500957489, + "learning_rate": 7.57196357801858e-06, + "loss": 0.6327, + "step": 6775 + }, + { + "epoch": 0.34823722890327885, + "grad_norm": 1.1258478164672852, + "learning_rate": 7.5712498389712615e-06, + "loss": 0.8141, + "step": 6776 + }, + { + "epoch": 0.3482886216466235, + "grad_norm": 0.7803865075111389, + "learning_rate": 7.570536028685222e-06, + "loss": 0.6919, + "step": 6777 + }, + { + "epoch": 0.34834001438996814, + "grad_norm": 0.7991212606430054, + "learning_rate": 7.569822147180237e-06, + "loss": 0.716, + "step": 6778 + }, + { + "epoch": 0.3483914071333128, + "grad_norm": 1.1707431077957153, + "learning_rate": 7.569108194476086e-06, + "loss": 0.7836, + "step": 6779 + }, + { + "epoch": 0.34844279987665744, + "grad_norm": 0.7451980113983154, + "learning_rate": 7.568394170592548e-06, + "loss": 0.6988, + "step": 6780 + }, + { + "epoch": 0.34849419262000203, + "grad_norm": 1.089350938796997, + "learning_rate": 7.567680075549407e-06, + "loss": 0.7931, + "step": 6781 + }, + { + "epoch": 0.3485455853633467, + "grad_norm": 1.1157748699188232, + "learning_rate": 7.566965909366447e-06, + "loss": 0.82, + "step": 6782 + }, + { + "epoch": 0.3485969781066913, + "grad_norm": 1.0351017713546753, + "learning_rate": 7.566251672063456e-06, + "loss": 0.6785, + "step": 6783 + }, + { + "epoch": 0.34864837085003597, + "grad_norm": 1.056187391281128, + "learning_rate": 7.565537363660221e-06, + "loss": 0.7413, + "step": 6784 + }, + { + "epoch": 0.3486997635933806, + "grad_norm": 1.0081517696380615, + "learning_rate": 7.564822984176532e-06, + "loss": 0.7038, + "step": 6785 + }, + { + "epoch": 0.34875115633672527, + "grad_norm": 1.1150524616241455, + "learning_rate": 7.564108533632184e-06, + "loss": 0.7739, + "step": 6786 + }, + { + "epoch": 0.3488025490800699, + "grad_norm": 1.0703020095825195, + "learning_rate": 7.56339401204697e-06, + "loss": 0.7164, + "step": 6787 + }, + { + "epoch": 0.34885394182341456, + "grad_norm": 1.088294506072998, + "learning_rate": 7.562679419440685e-06, + "loss": 0.7567, + "step": 6788 + }, + { + "epoch": 0.34890533456675915, + "grad_norm": 0.7753811478614807, + "learning_rate": 7.561964755833129e-06, + "loss": 0.7296, + "step": 6789 + }, + { + "epoch": 0.3489567273101038, + "grad_norm": 0.9371304512023926, + "learning_rate": 7.561250021244103e-06, + "loss": 0.7233, + "step": 6790 + }, + { + "epoch": 0.34900812005344845, + "grad_norm": 1.0753734111785889, + "learning_rate": 7.560535215693408e-06, + "loss": 0.7127, + "step": 6791 + }, + { + "epoch": 0.3490595127967931, + "grad_norm": 1.0859993696212769, + "learning_rate": 7.5598203392008495e-06, + "loss": 0.7527, + "step": 6792 + }, + { + "epoch": 0.34911090554013774, + "grad_norm": 1.0808069705963135, + "learning_rate": 7.559105391786232e-06, + "loss": 0.7884, + "step": 6793 + }, + { + "epoch": 0.3491622982834824, + "grad_norm": 1.0388644933700562, + "learning_rate": 7.558390373469366e-06, + "loss": 0.779, + "step": 6794 + }, + { + "epoch": 0.34921369102682703, + "grad_norm": 1.0808041095733643, + "learning_rate": 7.5576752842700606e-06, + "loss": 0.6875, + "step": 6795 + }, + { + "epoch": 0.3492650837701716, + "grad_norm": 1.0876045227050781, + "learning_rate": 7.556960124208128e-06, + "loss": 0.7862, + "step": 6796 + }, + { + "epoch": 0.34931647651351627, + "grad_norm": 1.0943766832351685, + "learning_rate": 7.556244893303382e-06, + "loss": 0.7539, + "step": 6797 + }, + { + "epoch": 0.3493678692568609, + "grad_norm": 1.1543569564819336, + "learning_rate": 7.555529591575639e-06, + "loss": 0.7622, + "step": 6798 + }, + { + "epoch": 0.34941926200020557, + "grad_norm": 1.072258710861206, + "learning_rate": 7.554814219044718e-06, + "loss": 0.7477, + "step": 6799 + }, + { + "epoch": 0.3494706547435502, + "grad_norm": 1.0761510133743286, + "learning_rate": 7.554098775730436e-06, + "loss": 0.7509, + "step": 6800 + }, + { + "epoch": 0.34952204748689486, + "grad_norm": 0.7720167636871338, + "learning_rate": 7.55338326165262e-06, + "loss": 0.6725, + "step": 6801 + }, + { + "epoch": 0.3495734402302395, + "grad_norm": 0.7899565696716309, + "learning_rate": 7.552667676831089e-06, + "loss": 0.7295, + "step": 6802 + }, + { + "epoch": 0.34962483297358415, + "grad_norm": 1.1246100664138794, + "learning_rate": 7.55195202128567e-06, + "loss": 0.7161, + "step": 6803 + }, + { + "epoch": 0.34967622571692875, + "grad_norm": 1.1196826696395874, + "learning_rate": 7.551236295036193e-06, + "loss": 0.7678, + "step": 6804 + }, + { + "epoch": 0.3497276184602734, + "grad_norm": 1.1064887046813965, + "learning_rate": 7.550520498102487e-06, + "loss": 0.8177, + "step": 6805 + }, + { + "epoch": 0.34977901120361804, + "grad_norm": 0.9994227290153503, + "learning_rate": 7.549804630504383e-06, + "loss": 0.721, + "step": 6806 + }, + { + "epoch": 0.3498304039469627, + "grad_norm": 1.091808557510376, + "learning_rate": 7.549088692261716e-06, + "loss": 0.7833, + "step": 6807 + }, + { + "epoch": 0.34988179669030733, + "grad_norm": 0.8548765182495117, + "learning_rate": 7.548372683394318e-06, + "loss": 0.6919, + "step": 6808 + }, + { + "epoch": 0.349933189433652, + "grad_norm": 1.167075276374817, + "learning_rate": 7.5476566039220335e-06, + "loss": 0.7844, + "step": 6809 + }, + { + "epoch": 0.34998458217699663, + "grad_norm": 0.8158947229385376, + "learning_rate": 7.546940453864695e-06, + "loss": 0.693, + "step": 6810 + }, + { + "epoch": 0.3500359749203413, + "grad_norm": 1.0715219974517822, + "learning_rate": 7.546224233242147e-06, + "loss": 0.7108, + "step": 6811 + }, + { + "epoch": 0.35008736766368587, + "grad_norm": 1.1415905952453613, + "learning_rate": 7.545507942074235e-06, + "loss": 0.7078, + "step": 6812 + }, + { + "epoch": 0.3501387604070305, + "grad_norm": 1.0799692869186401, + "learning_rate": 7.544791580380801e-06, + "loss": 0.7155, + "step": 6813 + }, + { + "epoch": 0.35019015315037516, + "grad_norm": 1.131688117980957, + "learning_rate": 7.544075148181696e-06, + "loss": 0.7773, + "step": 6814 + }, + { + "epoch": 0.3502415458937198, + "grad_norm": 1.1263842582702637, + "learning_rate": 7.543358645496766e-06, + "loss": 0.7664, + "step": 6815 + }, + { + "epoch": 0.35029293863706445, + "grad_norm": 1.0170152187347412, + "learning_rate": 7.542642072345864e-06, + "loss": 0.7277, + "step": 6816 + }, + { + "epoch": 0.3503443313804091, + "grad_norm": 1.0750617980957031, + "learning_rate": 7.541925428748843e-06, + "loss": 0.7951, + "step": 6817 + }, + { + "epoch": 0.35039572412375375, + "grad_norm": 1.1086397171020508, + "learning_rate": 7.541208714725558e-06, + "loss": 0.7836, + "step": 6818 + }, + { + "epoch": 0.35044711686709834, + "grad_norm": 1.094606637954712, + "learning_rate": 7.540491930295867e-06, + "loss": 0.7568, + "step": 6819 + }, + { + "epoch": 0.350498509610443, + "grad_norm": 1.059920072555542, + "learning_rate": 7.5397750754796296e-06, + "loss": 0.7445, + "step": 6820 + }, + { + "epoch": 0.35054990235378763, + "grad_norm": 1.0477545261383057, + "learning_rate": 7.539058150296703e-06, + "loss": 0.762, + "step": 6821 + }, + { + "epoch": 0.3506012950971323, + "grad_norm": 1.0866957902908325, + "learning_rate": 7.538341154766955e-06, + "loss": 0.7379, + "step": 6822 + }, + { + "epoch": 0.35065268784047693, + "grad_norm": 1.0484524965286255, + "learning_rate": 7.537624088910249e-06, + "loss": 0.7612, + "step": 6823 + }, + { + "epoch": 0.3507040805838216, + "grad_norm": 0.8495510220527649, + "learning_rate": 7.53690695274645e-06, + "loss": 0.7191, + "step": 6824 + }, + { + "epoch": 0.3507554733271662, + "grad_norm": 1.0750449895858765, + "learning_rate": 7.5361897462954305e-06, + "loss": 0.8086, + "step": 6825 + }, + { + "epoch": 0.35080686607051087, + "grad_norm": 0.8097819089889526, + "learning_rate": 7.535472469577059e-06, + "loss": 0.708, + "step": 6826 + }, + { + "epoch": 0.35085825881385546, + "grad_norm": 1.0629990100860596, + "learning_rate": 7.534755122611208e-06, + "loss": 0.7699, + "step": 6827 + }, + { + "epoch": 0.3509096515572001, + "grad_norm": 0.8553597927093506, + "learning_rate": 7.534037705417754e-06, + "loss": 0.6331, + "step": 6828 + }, + { + "epoch": 0.35096104430054476, + "grad_norm": 1.1731806993484497, + "learning_rate": 7.533320218016571e-06, + "loss": 0.8059, + "step": 6829 + }, + { + "epoch": 0.3510124370438894, + "grad_norm": 1.2728736400604248, + "learning_rate": 7.5326026604275395e-06, + "loss": 0.7608, + "step": 6830 + }, + { + "epoch": 0.35106382978723405, + "grad_norm": 1.064273715019226, + "learning_rate": 7.531885032670541e-06, + "loss": 0.746, + "step": 6831 + }, + { + "epoch": 0.3511152225305787, + "grad_norm": 1.0850670337677002, + "learning_rate": 7.531167334765455e-06, + "loss": 0.7815, + "step": 6832 + }, + { + "epoch": 0.35116661527392334, + "grad_norm": 1.1325316429138184, + "learning_rate": 7.530449566732167e-06, + "loss": 0.7685, + "step": 6833 + }, + { + "epoch": 0.35121800801726794, + "grad_norm": 1.0625728368759155, + "learning_rate": 7.529731728590567e-06, + "loss": 0.7752, + "step": 6834 + }, + { + "epoch": 0.3512694007606126, + "grad_norm": 1.1588932275772095, + "learning_rate": 7.529013820360538e-06, + "loss": 0.7049, + "step": 6835 + }, + { + "epoch": 0.35132079350395723, + "grad_norm": 1.1938048601150513, + "learning_rate": 7.528295842061974e-06, + "loss": 0.8379, + "step": 6836 + }, + { + "epoch": 0.3513721862473019, + "grad_norm": 1.054770827293396, + "learning_rate": 7.5275777937147645e-06, + "loss": 0.7146, + "step": 6837 + }, + { + "epoch": 0.3514235789906465, + "grad_norm": 1.1136995553970337, + "learning_rate": 7.526859675338807e-06, + "loss": 0.7427, + "step": 6838 + }, + { + "epoch": 0.35147497173399117, + "grad_norm": 1.0492607355117798, + "learning_rate": 7.526141486953995e-06, + "loss": 0.7809, + "step": 6839 + }, + { + "epoch": 0.3515263644773358, + "grad_norm": 0.8977307081222534, + "learning_rate": 7.525423228580227e-06, + "loss": 0.6765, + "step": 6840 + }, + { + "epoch": 0.35157775722068046, + "grad_norm": 1.0170862674713135, + "learning_rate": 7.524704900237403e-06, + "loss": 0.6955, + "step": 6841 + }, + { + "epoch": 0.35162914996402506, + "grad_norm": 1.1323602199554443, + "learning_rate": 7.523986501945424e-06, + "loss": 0.748, + "step": 6842 + }, + { + "epoch": 0.3516805427073697, + "grad_norm": 1.1696484088897705, + "learning_rate": 7.523268033724196e-06, + "loss": 0.7598, + "step": 6843 + }, + { + "epoch": 0.35173193545071435, + "grad_norm": 1.1741268634796143, + "learning_rate": 7.522549495593623e-06, + "loss": 0.752, + "step": 6844 + }, + { + "epoch": 0.351783328194059, + "grad_norm": 1.0321629047393799, + "learning_rate": 7.521830887573614e-06, + "loss": 0.7262, + "step": 6845 + }, + { + "epoch": 0.35183472093740364, + "grad_norm": 1.0211818218231201, + "learning_rate": 7.521112209684079e-06, + "loss": 0.7547, + "step": 6846 + }, + { + "epoch": 0.3518861136807483, + "grad_norm": 0.7404752373695374, + "learning_rate": 7.520393461944926e-06, + "loss": 0.6625, + "step": 6847 + }, + { + "epoch": 0.35193750642409294, + "grad_norm": 1.0592786073684692, + "learning_rate": 7.519674644376073e-06, + "loss": 0.762, + "step": 6848 + }, + { + "epoch": 0.3519888991674376, + "grad_norm": 1.0230878591537476, + "learning_rate": 7.518955756997435e-06, + "loss": 0.6874, + "step": 6849 + }, + { + "epoch": 0.3520402919107822, + "grad_norm": 1.0642518997192383, + "learning_rate": 7.518236799828926e-06, + "loss": 0.7295, + "step": 6850 + }, + { + "epoch": 0.3520916846541268, + "grad_norm": 1.1067473888397217, + "learning_rate": 7.517517772890468e-06, + "loss": 0.74, + "step": 6851 + }, + { + "epoch": 0.35214307739747147, + "grad_norm": 1.1237318515777588, + "learning_rate": 7.516798676201981e-06, + "loss": 0.7882, + "step": 6852 + }, + { + "epoch": 0.3521944701408161, + "grad_norm": 1.055605411529541, + "learning_rate": 7.51607950978339e-06, + "loss": 0.7554, + "step": 6853 + }, + { + "epoch": 0.35224586288416077, + "grad_norm": 1.1078089475631714, + "learning_rate": 7.5153602736546195e-06, + "loss": 0.7919, + "step": 6854 + }, + { + "epoch": 0.3522972556275054, + "grad_norm": 1.077776551246643, + "learning_rate": 7.514640967835595e-06, + "loss": 0.7097, + "step": 6855 + }, + { + "epoch": 0.35234864837085006, + "grad_norm": 1.0163429975509644, + "learning_rate": 7.513921592346247e-06, + "loss": 0.7649, + "step": 6856 + }, + { + "epoch": 0.35240004111419465, + "grad_norm": 1.1352893114089966, + "learning_rate": 7.513202147206506e-06, + "loss": 0.7401, + "step": 6857 + }, + { + "epoch": 0.3524514338575393, + "grad_norm": 1.0356788635253906, + "learning_rate": 7.512482632436304e-06, + "loss": 0.7933, + "step": 6858 + }, + { + "epoch": 0.35250282660088395, + "grad_norm": 1.1079896688461304, + "learning_rate": 7.5117630480555785e-06, + "loss": 0.7913, + "step": 6859 + }, + { + "epoch": 0.3525542193442286, + "grad_norm": 1.0858827829360962, + "learning_rate": 7.511043394084263e-06, + "loss": 0.7737, + "step": 6860 + }, + { + "epoch": 0.35260561208757324, + "grad_norm": 1.062917709350586, + "learning_rate": 7.510323670542298e-06, + "loss": 0.7228, + "step": 6861 + }, + { + "epoch": 0.3526570048309179, + "grad_norm": 1.0806519985198975, + "learning_rate": 7.509603877449624e-06, + "loss": 0.7324, + "step": 6862 + }, + { + "epoch": 0.35270839757426253, + "grad_norm": 1.054922103881836, + "learning_rate": 7.508884014826181e-06, + "loss": 0.8115, + "step": 6863 + }, + { + "epoch": 0.3527597903176072, + "grad_norm": 1.006226897239685, + "learning_rate": 7.508164082691918e-06, + "loss": 0.7916, + "step": 6864 + }, + { + "epoch": 0.35281118306095177, + "grad_norm": 1.0074583292007446, + "learning_rate": 7.507444081066777e-06, + "loss": 0.7328, + "step": 6865 + }, + { + "epoch": 0.3528625758042964, + "grad_norm": 1.0810068845748901, + "learning_rate": 7.50672400997071e-06, + "loss": 0.7895, + "step": 6866 + }, + { + "epoch": 0.35291396854764107, + "grad_norm": 1.06901216506958, + "learning_rate": 7.506003869423664e-06, + "loss": 0.763, + "step": 6867 + }, + { + "epoch": 0.3529653612909857, + "grad_norm": 1.0450772047042847, + "learning_rate": 7.505283659445593e-06, + "loss": 0.7704, + "step": 6868 + }, + { + "epoch": 0.35301675403433036, + "grad_norm": 1.1080470085144043, + "learning_rate": 7.5045633800564495e-06, + "loss": 0.7856, + "step": 6869 + }, + { + "epoch": 0.353068146777675, + "grad_norm": 0.8002954125404358, + "learning_rate": 7.503843031276192e-06, + "loss": 0.6677, + "step": 6870 + }, + { + "epoch": 0.35311953952101965, + "grad_norm": 1.0990715026855469, + "learning_rate": 7.5031226131247755e-06, + "loss": 0.7996, + "step": 6871 + }, + { + "epoch": 0.35317093226436425, + "grad_norm": 0.7224442958831787, + "learning_rate": 7.502402125622162e-06, + "loss": 0.69, + "step": 6872 + }, + { + "epoch": 0.3532223250077089, + "grad_norm": 0.7838165760040283, + "learning_rate": 7.501681568788313e-06, + "loss": 0.7186, + "step": 6873 + }, + { + "epoch": 0.35327371775105354, + "grad_norm": 0.6799275279045105, + "learning_rate": 7.500960942643189e-06, + "loss": 0.6654, + "step": 6874 + }, + { + "epoch": 0.3533251104943982, + "grad_norm": 1.0534428358078003, + "learning_rate": 7.5002402472067605e-06, + "loss": 0.7461, + "step": 6875 + }, + { + "epoch": 0.35337650323774283, + "grad_norm": 1.0646260976791382, + "learning_rate": 7.499519482498992e-06, + "loss": 0.7655, + "step": 6876 + }, + { + "epoch": 0.3534278959810875, + "grad_norm": 1.1189757585525513, + "learning_rate": 7.498798648539853e-06, + "loss": 0.8056, + "step": 6877 + }, + { + "epoch": 0.35347928872443213, + "grad_norm": 0.8376971483230591, + "learning_rate": 7.498077745349317e-06, + "loss": 0.6922, + "step": 6878 + }, + { + "epoch": 0.3535306814677768, + "grad_norm": 1.1545957326889038, + "learning_rate": 7.497356772947355e-06, + "loss": 0.7867, + "step": 6879 + }, + { + "epoch": 0.35358207421112137, + "grad_norm": 1.0761505365371704, + "learning_rate": 7.496635731353942e-06, + "loss": 0.806, + "step": 6880 + }, + { + "epoch": 0.353633466954466, + "grad_norm": 0.7250274419784546, + "learning_rate": 7.495914620589056e-06, + "loss": 0.6909, + "step": 6881 + }, + { + "epoch": 0.35368485969781066, + "grad_norm": 1.067765712738037, + "learning_rate": 7.495193440672676e-06, + "loss": 0.767, + "step": 6882 + }, + { + "epoch": 0.3537362524411553, + "grad_norm": 1.0351186990737915, + "learning_rate": 7.494472191624783e-06, + "loss": 0.7499, + "step": 6883 + }, + { + "epoch": 0.35378764518449995, + "grad_norm": 0.761271595954895, + "learning_rate": 7.49375087346536e-06, + "loss": 0.6923, + "step": 6884 + }, + { + "epoch": 0.3538390379278446, + "grad_norm": 1.1362197399139404, + "learning_rate": 7.49302948621439e-06, + "loss": 0.7729, + "step": 6885 + }, + { + "epoch": 0.35389043067118925, + "grad_norm": 1.0826493501663208, + "learning_rate": 7.492308029891863e-06, + "loss": 0.7605, + "step": 6886 + }, + { + "epoch": 0.35394182341453384, + "grad_norm": 1.075717806816101, + "learning_rate": 7.491586504517765e-06, + "loss": 0.7046, + "step": 6887 + }, + { + "epoch": 0.3539932161578785, + "grad_norm": 1.0867955684661865, + "learning_rate": 7.490864910112086e-06, + "loss": 0.7322, + "step": 6888 + }, + { + "epoch": 0.35404460890122313, + "grad_norm": 1.0871459245681763, + "learning_rate": 7.490143246694821e-06, + "loss": 0.7619, + "step": 6889 + }, + { + "epoch": 0.3540960016445678, + "grad_norm": 1.0426596403121948, + "learning_rate": 7.4894215142859614e-06, + "loss": 0.711, + "step": 6890 + }, + { + "epoch": 0.35414739438791243, + "grad_norm": 2.9700584411621094, + "learning_rate": 7.488699712905506e-06, + "loss": 0.7517, + "step": 6891 + }, + { + "epoch": 0.3541987871312571, + "grad_norm": 1.1073030233383179, + "learning_rate": 7.487977842573453e-06, + "loss": 0.769, + "step": 6892 + }, + { + "epoch": 0.3542501798746017, + "grad_norm": 1.1013861894607544, + "learning_rate": 7.487255903309798e-06, + "loss": 0.7637, + "step": 6893 + }, + { + "epoch": 0.35430157261794637, + "grad_norm": 1.091839075088501, + "learning_rate": 7.486533895134549e-06, + "loss": 0.7636, + "step": 6894 + }, + { + "epoch": 0.35435296536129096, + "grad_norm": 0.7659119367599487, + "learning_rate": 7.485811818067705e-06, + "loss": 0.6472, + "step": 6895 + }, + { + "epoch": 0.3544043581046356, + "grad_norm": 1.0987520217895508, + "learning_rate": 7.485089672129275e-06, + "loss": 0.7292, + "step": 6896 + }, + { + "epoch": 0.35445575084798026, + "grad_norm": 1.033129096031189, + "learning_rate": 7.484367457339265e-06, + "loss": 0.7171, + "step": 6897 + }, + { + "epoch": 0.3545071435913249, + "grad_norm": 1.0865167379379272, + "learning_rate": 7.483645173717686e-06, + "loss": 0.6974, + "step": 6898 + }, + { + "epoch": 0.35455853633466955, + "grad_norm": 0.7539694905281067, + "learning_rate": 7.4829228212845485e-06, + "loss": 0.6827, + "step": 6899 + }, + { + "epoch": 0.3546099290780142, + "grad_norm": 0.6504347324371338, + "learning_rate": 7.482200400059867e-06, + "loss": 0.6765, + "step": 6900 + }, + { + "epoch": 0.35466132182135884, + "grad_norm": 1.068043828010559, + "learning_rate": 7.481477910063654e-06, + "loss": 0.7365, + "step": 6901 + }, + { + "epoch": 0.3547127145647035, + "grad_norm": 1.0536192655563354, + "learning_rate": 7.480755351315929e-06, + "loss": 0.7438, + "step": 6902 + }, + { + "epoch": 0.3547641073080481, + "grad_norm": 6.880355358123779, + "learning_rate": 7.4800327238367125e-06, + "loss": 0.7662, + "step": 6903 + }, + { + "epoch": 0.35481550005139273, + "grad_norm": 1.1039276123046875, + "learning_rate": 7.479310027646021e-06, + "loss": 0.7473, + "step": 6904 + }, + { + "epoch": 0.3548668927947374, + "grad_norm": 0.826196014881134, + "learning_rate": 7.47858726276388e-06, + "loss": 0.6588, + "step": 6905 + }, + { + "epoch": 0.354918285538082, + "grad_norm": 1.102452039718628, + "learning_rate": 7.477864429210315e-06, + "loss": 0.8111, + "step": 6906 + }, + { + "epoch": 0.35496967828142667, + "grad_norm": 1.0219717025756836, + "learning_rate": 7.477141527005354e-06, + "loss": 0.769, + "step": 6907 + }, + { + "epoch": 0.3550210710247713, + "grad_norm": 1.101218819618225, + "learning_rate": 7.476418556169023e-06, + "loss": 0.7274, + "step": 6908 + }, + { + "epoch": 0.35507246376811596, + "grad_norm": 1.11151123046875, + "learning_rate": 7.475695516721353e-06, + "loss": 0.7682, + "step": 6909 + }, + { + "epoch": 0.35512385651146056, + "grad_norm": 1.0312201976776123, + "learning_rate": 7.474972408682377e-06, + "loss": 0.7367, + "step": 6910 + }, + { + "epoch": 0.3551752492548052, + "grad_norm": 1.1744149923324585, + "learning_rate": 7.47424923207213e-06, + "loss": 0.7564, + "step": 6911 + }, + { + "epoch": 0.35522664199814985, + "grad_norm": 1.1267353296279907, + "learning_rate": 7.473525986910646e-06, + "loss": 0.7502, + "step": 6912 + }, + { + "epoch": 0.3552780347414945, + "grad_norm": 1.1210782527923584, + "learning_rate": 7.472802673217965e-06, + "loss": 0.7855, + "step": 6913 + }, + { + "epoch": 0.35532942748483914, + "grad_norm": 0.8535274863243103, + "learning_rate": 7.472079291014127e-06, + "loss": 0.6712, + "step": 6914 + }, + { + "epoch": 0.3553808202281838, + "grad_norm": 1.039354681968689, + "learning_rate": 7.471355840319172e-06, + "loss": 0.6666, + "step": 6915 + }, + { + "epoch": 0.35543221297152844, + "grad_norm": 1.2426090240478516, + "learning_rate": 7.470632321153148e-06, + "loss": 0.809, + "step": 6916 + }, + { + "epoch": 0.3554836057148731, + "grad_norm": 1.0780504941940308, + "learning_rate": 7.469908733536095e-06, + "loss": 0.749, + "step": 6917 + }, + { + "epoch": 0.3555349984582177, + "grad_norm": 1.0589290857315063, + "learning_rate": 7.469185077488066e-06, + "loss": 0.7853, + "step": 6918 + }, + { + "epoch": 0.3555863912015623, + "grad_norm": 1.063048243522644, + "learning_rate": 7.468461353029109e-06, + "loss": 0.7845, + "step": 6919 + }, + { + "epoch": 0.35563778394490697, + "grad_norm": 1.0464750528335571, + "learning_rate": 7.4677375601792715e-06, + "loss": 0.7766, + "step": 6920 + }, + { + "epoch": 0.3556891766882516, + "grad_norm": 1.0719619989395142, + "learning_rate": 7.467013698958613e-06, + "loss": 0.7545, + "step": 6921 + }, + { + "epoch": 0.35574056943159627, + "grad_norm": 1.0718047618865967, + "learning_rate": 7.466289769387183e-06, + "loss": 0.7719, + "step": 6922 + }, + { + "epoch": 0.3557919621749409, + "grad_norm": 0.8977873921394348, + "learning_rate": 7.465565771485044e-06, + "loss": 0.6847, + "step": 6923 + }, + { + "epoch": 0.35584335491828556, + "grad_norm": 0.7830643653869629, + "learning_rate": 7.464841705272251e-06, + "loss": 0.6772, + "step": 6924 + }, + { + "epoch": 0.35589474766163015, + "grad_norm": 0.825669527053833, + "learning_rate": 7.464117570768865e-06, + "loss": 0.6989, + "step": 6925 + }, + { + "epoch": 0.3559461404049748, + "grad_norm": 1.020740032196045, + "learning_rate": 7.463393367994951e-06, + "loss": 0.7576, + "step": 6926 + }, + { + "epoch": 0.35599753314831944, + "grad_norm": 0.7267536520957947, + "learning_rate": 7.462669096970573e-06, + "loss": 0.6765, + "step": 6927 + }, + { + "epoch": 0.3560489258916641, + "grad_norm": 1.1210054159164429, + "learning_rate": 7.4619447577157955e-06, + "loss": 0.8293, + "step": 6928 + }, + { + "epoch": 0.35610031863500874, + "grad_norm": 1.0775084495544434, + "learning_rate": 7.4612203502506906e-06, + "loss": 0.7804, + "step": 6929 + }, + { + "epoch": 0.3561517113783534, + "grad_norm": 1.1095895767211914, + "learning_rate": 7.460495874595325e-06, + "loss": 0.7378, + "step": 6930 + }, + { + "epoch": 0.35620310412169803, + "grad_norm": 0.8248242139816284, + "learning_rate": 7.4597713307697735e-06, + "loss": 0.7269, + "step": 6931 + }, + { + "epoch": 0.3562544968650427, + "grad_norm": 1.0864014625549316, + "learning_rate": 7.45904671879411e-06, + "loss": 0.7733, + "step": 6932 + }, + { + "epoch": 0.35630588960838727, + "grad_norm": 1.0493333339691162, + "learning_rate": 7.458322038688408e-06, + "loss": 0.6973, + "step": 6933 + }, + { + "epoch": 0.3563572823517319, + "grad_norm": 1.1172261238098145, + "learning_rate": 7.457597290472749e-06, + "loss": 0.8583, + "step": 6934 + }, + { + "epoch": 0.35640867509507657, + "grad_norm": 0.7226443886756897, + "learning_rate": 7.456872474167211e-06, + "loss": 0.6483, + "step": 6935 + }, + { + "epoch": 0.3564600678384212, + "grad_norm": 1.0405539274215698, + "learning_rate": 7.4561475897918735e-06, + "loss": 0.8193, + "step": 6936 + }, + { + "epoch": 0.35651146058176586, + "grad_norm": 1.048471450805664, + "learning_rate": 7.455422637366823e-06, + "loss": 0.761, + "step": 6937 + }, + { + "epoch": 0.3565628533251105, + "grad_norm": 1.0708593130111694, + "learning_rate": 7.454697616912146e-06, + "loss": 0.7825, + "step": 6938 + }, + { + "epoch": 0.35661424606845515, + "grad_norm": 1.019115686416626, + "learning_rate": 7.453972528447926e-06, + "loss": 0.7776, + "step": 6939 + }, + { + "epoch": 0.3566656388117998, + "grad_norm": 0.804804265499115, + "learning_rate": 7.453247371994256e-06, + "loss": 0.7317, + "step": 6940 + }, + { + "epoch": 0.3567170315551444, + "grad_norm": 1.0177278518676758, + "learning_rate": 7.452522147571224e-06, + "loss": 0.7484, + "step": 6941 + }, + { + "epoch": 0.35676842429848904, + "grad_norm": 1.0316647291183472, + "learning_rate": 7.451796855198925e-06, + "loss": 0.6673, + "step": 6942 + }, + { + "epoch": 0.3568198170418337, + "grad_norm": 1.0306365489959717, + "learning_rate": 7.451071494897452e-06, + "loss": 0.7097, + "step": 6943 + }, + { + "epoch": 0.35687120978517833, + "grad_norm": 0.9948005080223083, + "learning_rate": 7.4503460666869036e-06, + "loss": 0.7002, + "step": 6944 + }, + { + "epoch": 0.356922602528523, + "grad_norm": 1.0587838888168335, + "learning_rate": 7.449620570587377e-06, + "loss": 0.7608, + "step": 6945 + }, + { + "epoch": 0.35697399527186763, + "grad_norm": 0.7872125506401062, + "learning_rate": 7.448895006618973e-06, + "loss": 0.6574, + "step": 6946 + }, + { + "epoch": 0.3570253880152123, + "grad_norm": 1.0763453245162964, + "learning_rate": 7.448169374801796e-06, + "loss": 0.7341, + "step": 6947 + }, + { + "epoch": 0.35707678075855687, + "grad_norm": 1.0607833862304688, + "learning_rate": 7.4474436751559474e-06, + "loss": 0.7192, + "step": 6948 + }, + { + "epoch": 0.3571281735019015, + "grad_norm": 0.8524363040924072, + "learning_rate": 7.446717907701535e-06, + "loss": 0.6967, + "step": 6949 + }, + { + "epoch": 0.35717956624524616, + "grad_norm": 1.1366530656814575, + "learning_rate": 7.445992072458666e-06, + "loss": 0.7357, + "step": 6950 + }, + { + "epoch": 0.3572309589885908, + "grad_norm": 1.0873734951019287, + "learning_rate": 7.445266169447453e-06, + "loss": 0.6983, + "step": 6951 + }, + { + "epoch": 0.35728235173193545, + "grad_norm": 0.8307279944419861, + "learning_rate": 7.444540198688002e-06, + "loss": 0.7042, + "step": 6952 + }, + { + "epoch": 0.3573337444752801, + "grad_norm": 1.1543588638305664, + "learning_rate": 7.443814160200432e-06, + "loss": 0.7474, + "step": 6953 + }, + { + "epoch": 0.35738513721862475, + "grad_norm": 1.0813277959823608, + "learning_rate": 7.443088054004857e-06, + "loss": 0.7472, + "step": 6954 + }, + { + "epoch": 0.3574365299619694, + "grad_norm": 1.0217931270599365, + "learning_rate": 7.442361880121392e-06, + "loss": 0.756, + "step": 6955 + }, + { + "epoch": 0.357487922705314, + "grad_norm": 0.9164536595344543, + "learning_rate": 7.441635638570161e-06, + "loss": 0.6586, + "step": 6956 + }, + { + "epoch": 0.35753931544865863, + "grad_norm": 1.15981924533844, + "learning_rate": 7.44090932937128e-06, + "loss": 0.7406, + "step": 6957 + }, + { + "epoch": 0.3575907081920033, + "grad_norm": 1.0778522491455078, + "learning_rate": 7.440182952544876e-06, + "loss": 0.775, + "step": 6958 + }, + { + "epoch": 0.35764210093534793, + "grad_norm": 0.6957578063011169, + "learning_rate": 7.439456508111072e-06, + "loss": 0.6491, + "step": 6959 + }, + { + "epoch": 0.3576934936786926, + "grad_norm": 0.6759206056594849, + "learning_rate": 7.438729996089995e-06, + "loss": 0.6619, + "step": 6960 + }, + { + "epoch": 0.3577448864220372, + "grad_norm": 0.7313870191574097, + "learning_rate": 7.438003416501774e-06, + "loss": 0.7205, + "step": 6961 + }, + { + "epoch": 0.35779627916538187, + "grad_norm": 1.0625187158584595, + "learning_rate": 7.437276769366539e-06, + "loss": 0.7365, + "step": 6962 + }, + { + "epoch": 0.35784767190872646, + "grad_norm": 1.1405127048492432, + "learning_rate": 7.436550054704424e-06, + "loss": 0.7685, + "step": 6963 + }, + { + "epoch": 0.3578990646520711, + "grad_norm": 1.028132677078247, + "learning_rate": 7.435823272535563e-06, + "loss": 0.7423, + "step": 6964 + }, + { + "epoch": 0.35795045739541576, + "grad_norm": 0.8267134428024292, + "learning_rate": 7.4350964228800885e-06, + "loss": 0.6942, + "step": 6965 + }, + { + "epoch": 0.3580018501387604, + "grad_norm": 1.0466917753219604, + "learning_rate": 7.434369505758141e-06, + "loss": 0.7523, + "step": 6966 + }, + { + "epoch": 0.35805324288210505, + "grad_norm": 1.0389124155044556, + "learning_rate": 7.433642521189863e-06, + "loss": 0.7635, + "step": 6967 + }, + { + "epoch": 0.3581046356254497, + "grad_norm": 1.077820062637329, + "learning_rate": 7.4329154691953916e-06, + "loss": 0.7773, + "step": 6968 + }, + { + "epoch": 0.35815602836879434, + "grad_norm": 1.1435121297836304, + "learning_rate": 7.432188349794874e-06, + "loss": 0.7616, + "step": 6969 + }, + { + "epoch": 0.358207421112139, + "grad_norm": 1.0581074953079224, + "learning_rate": 7.431461163008453e-06, + "loss": 0.7587, + "step": 6970 + }, + { + "epoch": 0.3582588138554836, + "grad_norm": 1.0587475299835205, + "learning_rate": 7.430733908856279e-06, + "loss": 0.7599, + "step": 6971 + }, + { + "epoch": 0.35831020659882823, + "grad_norm": 1.0356876850128174, + "learning_rate": 7.4300065873584985e-06, + "loss": 0.734, + "step": 6972 + }, + { + "epoch": 0.3583615993421729, + "grad_norm": 1.041116714477539, + "learning_rate": 7.429279198535263e-06, + "loss": 0.7672, + "step": 6973 + }, + { + "epoch": 0.3584129920855175, + "grad_norm": 1.041608214378357, + "learning_rate": 7.4285517424067266e-06, + "loss": 0.748, + "step": 6974 + }, + { + "epoch": 0.35846438482886217, + "grad_norm": 1.0443669557571411, + "learning_rate": 7.4278242189930435e-06, + "loss": 0.7727, + "step": 6975 + }, + { + "epoch": 0.3585157775722068, + "grad_norm": 0.8906344175338745, + "learning_rate": 7.42709662831437e-06, + "loss": 0.6738, + "step": 6976 + }, + { + "epoch": 0.35856717031555146, + "grad_norm": 0.8813257217407227, + "learning_rate": 7.426368970390865e-06, + "loss": 0.6431, + "step": 6977 + }, + { + "epoch": 0.3586185630588961, + "grad_norm": 1.0723553895950317, + "learning_rate": 7.425641245242689e-06, + "loss": 0.7724, + "step": 6978 + }, + { + "epoch": 0.3586699558022407, + "grad_norm": 0.9958428144454956, + "learning_rate": 7.424913452890004e-06, + "loss": 0.7787, + "step": 6979 + }, + { + "epoch": 0.35872134854558535, + "grad_norm": 1.1119393110275269, + "learning_rate": 7.424185593352975e-06, + "loss": 0.8162, + "step": 6980 + }, + { + "epoch": 0.35877274128893, + "grad_norm": 1.0245420932769775, + "learning_rate": 7.423457666651765e-06, + "loss": 0.7036, + "step": 6981 + }, + { + "epoch": 0.35882413403227464, + "grad_norm": 0.8802167773246765, + "learning_rate": 7.422729672806547e-06, + "loss": 0.7385, + "step": 6982 + }, + { + "epoch": 0.3588755267756193, + "grad_norm": 0.800028920173645, + "learning_rate": 7.422001611837486e-06, + "loss": 0.6764, + "step": 6983 + }, + { + "epoch": 0.35892691951896394, + "grad_norm": 0.7771615982055664, + "learning_rate": 7.421273483764755e-06, + "loss": 0.6804, + "step": 6984 + }, + { + "epoch": 0.3589783122623086, + "grad_norm": 1.0924482345581055, + "learning_rate": 7.420545288608529e-06, + "loss": 0.748, + "step": 6985 + }, + { + "epoch": 0.3590297050056532, + "grad_norm": 1.154711365699768, + "learning_rate": 7.4198170263889815e-06, + "loss": 0.7546, + "step": 6986 + }, + { + "epoch": 0.3590810977489978, + "grad_norm": 1.0513806343078613, + "learning_rate": 7.41908869712629e-06, + "loss": 0.7679, + "step": 6987 + }, + { + "epoch": 0.35913249049234247, + "grad_norm": 1.0149319171905518, + "learning_rate": 7.418360300840635e-06, + "loss": 0.7259, + "step": 6988 + }, + { + "epoch": 0.3591838832356871, + "grad_norm": 1.0402402877807617, + "learning_rate": 7.417631837552194e-06, + "loss": 0.7308, + "step": 6989 + }, + { + "epoch": 0.35923527597903177, + "grad_norm": 1.1022813320159912, + "learning_rate": 7.416903307281153e-06, + "loss": 0.8066, + "step": 6990 + }, + { + "epoch": 0.3592866687223764, + "grad_norm": 0.9306803345680237, + "learning_rate": 7.416174710047696e-06, + "loss": 0.6944, + "step": 6991 + }, + { + "epoch": 0.35933806146572106, + "grad_norm": 1.0914621353149414, + "learning_rate": 7.415446045872007e-06, + "loss": 0.8124, + "step": 6992 + }, + { + "epoch": 0.3593894542090657, + "grad_norm": 1.050011396408081, + "learning_rate": 7.4147173147742765e-06, + "loss": 0.737, + "step": 6993 + }, + { + "epoch": 0.3594408469524103, + "grad_norm": 0.7137007713317871, + "learning_rate": 7.413988516774695e-06, + "loss": 0.7059, + "step": 6994 + }, + { + "epoch": 0.35949223969575494, + "grad_norm": 1.11740243434906, + "learning_rate": 7.413259651893453e-06, + "loss": 0.7153, + "step": 6995 + }, + { + "epoch": 0.3595436324390996, + "grad_norm": 0.7023674845695496, + "learning_rate": 7.412530720150746e-06, + "loss": 0.6804, + "step": 6996 + }, + { + "epoch": 0.35959502518244424, + "grad_norm": 1.0877118110656738, + "learning_rate": 7.411801721566767e-06, + "loss": 0.7757, + "step": 6997 + }, + { + "epoch": 0.3596464179257889, + "grad_norm": 0.738810658454895, + "learning_rate": 7.4110726561617155e-06, + "loss": 0.6329, + "step": 6998 + }, + { + "epoch": 0.35969781066913353, + "grad_norm": 1.0508731603622437, + "learning_rate": 7.410343523955791e-06, + "loss": 0.7896, + "step": 6999 + }, + { + "epoch": 0.3597492034124782, + "grad_norm": 1.0721107721328735, + "learning_rate": 7.409614324969195e-06, + "loss": 0.7313, + "step": 7000 + }, + { + "epoch": 0.35980059615582277, + "grad_norm": 1.0753889083862305, + "learning_rate": 7.40888505922213e-06, + "loss": 0.7532, + "step": 7001 + }, + { + "epoch": 0.3598519888991674, + "grad_norm": 1.0790959596633911, + "learning_rate": 7.4081557267348e-06, + "loss": 0.757, + "step": 7002 + }, + { + "epoch": 0.35990338164251207, + "grad_norm": 1.073265552520752, + "learning_rate": 7.407426327527413e-06, + "loss": 0.7599, + "step": 7003 + }, + { + "epoch": 0.3599547743858567, + "grad_norm": 1.1742689609527588, + "learning_rate": 7.406696861620177e-06, + "loss": 0.7199, + "step": 7004 + }, + { + "epoch": 0.36000616712920136, + "grad_norm": 1.0453686714172363, + "learning_rate": 7.405967329033303e-06, + "loss": 0.769, + "step": 7005 + }, + { + "epoch": 0.360057559872546, + "grad_norm": 0.7631792426109314, + "learning_rate": 7.4052377297870035e-06, + "loss": 0.6846, + "step": 7006 + }, + { + "epoch": 0.36010895261589065, + "grad_norm": 1.1031794548034668, + "learning_rate": 7.404508063901492e-06, + "loss": 0.7817, + "step": 7007 + }, + { + "epoch": 0.3601603453592353, + "grad_norm": 1.057044506072998, + "learning_rate": 7.4037783313969845e-06, + "loss": 0.7784, + "step": 7008 + }, + { + "epoch": 0.3602117381025799, + "grad_norm": 1.1388499736785889, + "learning_rate": 7.4030485322937e-06, + "loss": 0.7329, + "step": 7009 + }, + { + "epoch": 0.36026313084592454, + "grad_norm": 1.1187939643859863, + "learning_rate": 7.402318666611856e-06, + "loss": 0.8233, + "step": 7010 + }, + { + "epoch": 0.3603145235892692, + "grad_norm": 1.1181526184082031, + "learning_rate": 7.4015887343716785e-06, + "loss": 0.7481, + "step": 7011 + }, + { + "epoch": 0.36036591633261383, + "grad_norm": 1.0606495141983032, + "learning_rate": 7.400858735593386e-06, + "loss": 0.7506, + "step": 7012 + }, + { + "epoch": 0.3604173090759585, + "grad_norm": 1.084176778793335, + "learning_rate": 7.400128670297205e-06, + "loss": 0.7081, + "step": 7013 + }, + { + "epoch": 0.36046870181930313, + "grad_norm": 1.1095635890960693, + "learning_rate": 7.399398538503365e-06, + "loss": 0.8121, + "step": 7014 + }, + { + "epoch": 0.3605200945626478, + "grad_norm": 0.6976984143257141, + "learning_rate": 7.398668340232091e-06, + "loss": 0.661, + "step": 7015 + }, + { + "epoch": 0.36057148730599237, + "grad_norm": 1.0054301023483276, + "learning_rate": 7.397938075503619e-06, + "loss": 0.7356, + "step": 7016 + }, + { + "epoch": 0.360622880049337, + "grad_norm": 1.0758758783340454, + "learning_rate": 7.3972077443381764e-06, + "loss": 0.7667, + "step": 7017 + }, + { + "epoch": 0.36067427279268166, + "grad_norm": 1.063114047050476, + "learning_rate": 7.3964773467560015e-06, + "loss": 0.7385, + "step": 7018 + }, + { + "epoch": 0.3607256655360263, + "grad_norm": 1.0553628206253052, + "learning_rate": 7.395746882777327e-06, + "loss": 0.7564, + "step": 7019 + }, + { + "epoch": 0.36077705827937095, + "grad_norm": 1.050002932548523, + "learning_rate": 7.395016352422395e-06, + "loss": 0.7238, + "step": 7020 + }, + { + "epoch": 0.3608284510227156, + "grad_norm": 1.0819106101989746, + "learning_rate": 7.394285755711441e-06, + "loss": 0.7468, + "step": 7021 + }, + { + "epoch": 0.36087984376606025, + "grad_norm": 0.8149110674858093, + "learning_rate": 7.3935550926647125e-06, + "loss": 0.6886, + "step": 7022 + }, + { + "epoch": 0.3609312365094049, + "grad_norm": 1.0606942176818848, + "learning_rate": 7.392824363302448e-06, + "loss": 0.7653, + "step": 7023 + }, + { + "epoch": 0.3609826292527495, + "grad_norm": 1.1929858922958374, + "learning_rate": 7.392093567644896e-06, + "loss": 0.7099, + "step": 7024 + }, + { + "epoch": 0.36103402199609413, + "grad_norm": 1.099770188331604, + "learning_rate": 7.391362705712302e-06, + "loss": 0.7854, + "step": 7025 + }, + { + "epoch": 0.3610854147394388, + "grad_norm": 1.014477014541626, + "learning_rate": 7.390631777524916e-06, + "loss": 0.7692, + "step": 7026 + }, + { + "epoch": 0.36113680748278343, + "grad_norm": 1.177830696105957, + "learning_rate": 7.38990078310299e-06, + "loss": 0.7757, + "step": 7027 + }, + { + "epoch": 0.3611882002261281, + "grad_norm": 1.0108778476715088, + "learning_rate": 7.389169722466773e-06, + "loss": 0.7667, + "step": 7028 + }, + { + "epoch": 0.3612395929694727, + "grad_norm": 1.0059643983840942, + "learning_rate": 7.388438595636525e-06, + "loss": 0.7173, + "step": 7029 + }, + { + "epoch": 0.36129098571281737, + "grad_norm": 1.0134066343307495, + "learning_rate": 7.3877074026325e-06, + "loss": 0.7131, + "step": 7030 + }, + { + "epoch": 0.361342378456162, + "grad_norm": 0.7488081455230713, + "learning_rate": 7.386976143474955e-06, + "loss": 0.7106, + "step": 7031 + }, + { + "epoch": 0.3613937711995066, + "grad_norm": 1.0309405326843262, + "learning_rate": 7.386244818184154e-06, + "loss": 0.7186, + "step": 7032 + }, + { + "epoch": 0.36144516394285126, + "grad_norm": 1.044551134109497, + "learning_rate": 7.385513426780355e-06, + "loss": 0.7383, + "step": 7033 + }, + { + "epoch": 0.3614965566861959, + "grad_norm": 1.1181553602218628, + "learning_rate": 7.384781969283823e-06, + "loss": 0.7922, + "step": 7034 + }, + { + "epoch": 0.36154794942954055, + "grad_norm": 1.1420637369155884, + "learning_rate": 7.384050445714825e-06, + "loss": 0.7108, + "step": 7035 + }, + { + "epoch": 0.3615993421728852, + "grad_norm": 1.1736031770706177, + "learning_rate": 7.3833188560936275e-06, + "loss": 0.7311, + "step": 7036 + }, + { + "epoch": 0.36165073491622984, + "grad_norm": 1.0539335012435913, + "learning_rate": 7.382587200440498e-06, + "loss": 0.6899, + "step": 7037 + }, + { + "epoch": 0.3617021276595745, + "grad_norm": 1.0954970121383667, + "learning_rate": 7.381855478775711e-06, + "loss": 0.7649, + "step": 7038 + }, + { + "epoch": 0.3617535204029191, + "grad_norm": 1.039907693862915, + "learning_rate": 7.381123691119538e-06, + "loss": 0.7344, + "step": 7039 + }, + { + "epoch": 0.36180491314626373, + "grad_norm": 0.9794743657112122, + "learning_rate": 7.3803918374922545e-06, + "loss": 0.7264, + "step": 7040 + }, + { + "epoch": 0.3618563058896084, + "grad_norm": 1.142135739326477, + "learning_rate": 7.379659917914136e-06, + "loss": 0.7393, + "step": 7041 + }, + { + "epoch": 0.361907698632953, + "grad_norm": 1.2996269464492798, + "learning_rate": 7.378927932405461e-06, + "loss": 0.8249, + "step": 7042 + }, + { + "epoch": 0.36195909137629767, + "grad_norm": 1.0665199756622314, + "learning_rate": 7.378195880986511e-06, + "loss": 0.7995, + "step": 7043 + }, + { + "epoch": 0.3620104841196423, + "grad_norm": 1.058315396308899, + "learning_rate": 7.377463763677567e-06, + "loss": 0.7347, + "step": 7044 + }, + { + "epoch": 0.36206187686298696, + "grad_norm": 0.9880419373512268, + "learning_rate": 7.376731580498912e-06, + "loss": 0.7003, + "step": 7045 + }, + { + "epoch": 0.3621132696063316, + "grad_norm": 1.1635642051696777, + "learning_rate": 7.3759993314708355e-06, + "loss": 0.8007, + "step": 7046 + }, + { + "epoch": 0.3621646623496762, + "grad_norm": 1.004340648651123, + "learning_rate": 7.375267016613621e-06, + "loss": 0.7065, + "step": 7047 + }, + { + "epoch": 0.36221605509302085, + "grad_norm": 1.037726879119873, + "learning_rate": 7.37453463594756e-06, + "loss": 0.7536, + "step": 7048 + }, + { + "epoch": 0.3622674478363655, + "grad_norm": 1.1057242155075073, + "learning_rate": 7.373802189492943e-06, + "loss": 0.7254, + "step": 7049 + }, + { + "epoch": 0.36231884057971014, + "grad_norm": 1.1648019552230835, + "learning_rate": 7.3730696772700635e-06, + "loss": 0.8049, + "step": 7050 + }, + { + "epoch": 0.3623702333230548, + "grad_norm": 1.0979446172714233, + "learning_rate": 7.3723370992992175e-06, + "loss": 0.7784, + "step": 7051 + }, + { + "epoch": 0.36242162606639944, + "grad_norm": 0.8164729475975037, + "learning_rate": 7.3716044556007e-06, + "loss": 0.6803, + "step": 7052 + }, + { + "epoch": 0.3624730188097441, + "grad_norm": 1.1668245792388916, + "learning_rate": 7.37087174619481e-06, + "loss": 0.7875, + "step": 7053 + }, + { + "epoch": 0.3625244115530887, + "grad_norm": 1.1078276634216309, + "learning_rate": 7.370138971101848e-06, + "loss": 0.7505, + "step": 7054 + }, + { + "epoch": 0.3625758042964333, + "grad_norm": 1.0943260192871094, + "learning_rate": 7.369406130342115e-06, + "loss": 0.7735, + "step": 7055 + }, + { + "epoch": 0.36262719703977797, + "grad_norm": 1.1172891855239868, + "learning_rate": 7.3686732239359174e-06, + "loss": 0.8424, + "step": 7056 + }, + { + "epoch": 0.3626785897831226, + "grad_norm": 0.7857499718666077, + "learning_rate": 7.3679402519035595e-06, + "loss": 0.6845, + "step": 7057 + }, + { + "epoch": 0.36272998252646727, + "grad_norm": 1.0472089052200317, + "learning_rate": 7.367207214265348e-06, + "loss": 0.6895, + "step": 7058 + }, + { + "epoch": 0.3627813752698119, + "grad_norm": 1.0975582599639893, + "learning_rate": 7.366474111041594e-06, + "loss": 0.795, + "step": 7059 + }, + { + "epoch": 0.36283276801315656, + "grad_norm": 0.9648140072822571, + "learning_rate": 7.365740942252609e-06, + "loss": 0.738, + "step": 7060 + }, + { + "epoch": 0.3628841607565012, + "grad_norm": 1.0345007181167603, + "learning_rate": 7.3650077079187045e-06, + "loss": 0.7508, + "step": 7061 + }, + { + "epoch": 0.3629355534998458, + "grad_norm": 1.1473277807235718, + "learning_rate": 7.364274408060197e-06, + "loss": 0.7974, + "step": 7062 + }, + { + "epoch": 0.36298694624319044, + "grad_norm": 1.4830121994018555, + "learning_rate": 7.3635410426974015e-06, + "loss": 0.7307, + "step": 7063 + }, + { + "epoch": 0.3630383389865351, + "grad_norm": 1.0932552814483643, + "learning_rate": 7.3628076118506386e-06, + "loss": 0.7265, + "step": 7064 + }, + { + "epoch": 0.36308973172987974, + "grad_norm": 1.0681736469268799, + "learning_rate": 7.362074115540228e-06, + "loss": 0.7573, + "step": 7065 + }, + { + "epoch": 0.3631411244732244, + "grad_norm": 1.1017327308654785, + "learning_rate": 7.361340553786489e-06, + "loss": 0.7306, + "step": 7066 + }, + { + "epoch": 0.36319251721656903, + "grad_norm": 1.062074065208435, + "learning_rate": 7.3606069266097504e-06, + "loss": 0.7319, + "step": 7067 + }, + { + "epoch": 0.3632439099599137, + "grad_norm": 1.1176702976226807, + "learning_rate": 7.359873234030334e-06, + "loss": 0.7683, + "step": 7068 + }, + { + "epoch": 0.3632953027032583, + "grad_norm": 1.0860753059387207, + "learning_rate": 7.359139476068568e-06, + "loss": 0.8024, + "step": 7069 + }, + { + "epoch": 0.3633466954466029, + "grad_norm": 1.0659818649291992, + "learning_rate": 7.3584056527447845e-06, + "loss": 0.7496, + "step": 7070 + }, + { + "epoch": 0.36339808818994757, + "grad_norm": 1.4316399097442627, + "learning_rate": 7.3576717640793114e-06, + "loss": 0.7651, + "step": 7071 + }, + { + "epoch": 0.3634494809332922, + "grad_norm": 0.9972586035728455, + "learning_rate": 7.356937810092486e-06, + "loss": 0.7189, + "step": 7072 + }, + { + "epoch": 0.36350087367663686, + "grad_norm": 0.7550626397132874, + "learning_rate": 7.356203790804638e-06, + "loss": 0.6586, + "step": 7073 + }, + { + "epoch": 0.3635522664199815, + "grad_norm": 1.0677191019058228, + "learning_rate": 7.355469706236107e-06, + "loss": 0.7326, + "step": 7074 + }, + { + "epoch": 0.36360365916332615, + "grad_norm": 0.7250852584838867, + "learning_rate": 7.354735556407231e-06, + "loss": 0.6581, + "step": 7075 + }, + { + "epoch": 0.3636550519066708, + "grad_norm": 1.0729914903640747, + "learning_rate": 7.354001341338351e-06, + "loss": 0.7754, + "step": 7076 + }, + { + "epoch": 0.3637064446500154, + "grad_norm": 1.0173065662384033, + "learning_rate": 7.353267061049807e-06, + "loss": 0.7992, + "step": 7077 + }, + { + "epoch": 0.36375783739336004, + "grad_norm": 1.1127136945724487, + "learning_rate": 7.352532715561944e-06, + "loss": 0.7962, + "step": 7078 + }, + { + "epoch": 0.3638092301367047, + "grad_norm": 1.062265157699585, + "learning_rate": 7.351798304895108e-06, + "loss": 0.7429, + "step": 7079 + }, + { + "epoch": 0.36386062288004933, + "grad_norm": 1.0323481559753418, + "learning_rate": 7.351063829069647e-06, + "loss": 0.7017, + "step": 7080 + }, + { + "epoch": 0.363912015623394, + "grad_norm": 1.0366523265838623, + "learning_rate": 7.350329288105909e-06, + "loss": 0.7428, + "step": 7081 + }, + { + "epoch": 0.3639634083667386, + "grad_norm": 1.0216281414031982, + "learning_rate": 7.3495946820242445e-06, + "loss": 0.7434, + "step": 7082 + }, + { + "epoch": 0.3640148011100833, + "grad_norm": 0.732904314994812, + "learning_rate": 7.348860010845009e-06, + "loss": 0.7228, + "step": 7083 + }, + { + "epoch": 0.3640661938534279, + "grad_norm": 1.1881695985794067, + "learning_rate": 7.348125274588557e-06, + "loss": 0.7996, + "step": 7084 + }, + { + "epoch": 0.3641175865967725, + "grad_norm": 1.0608257055282593, + "learning_rate": 7.347390473275242e-06, + "loss": 0.759, + "step": 7085 + }, + { + "epoch": 0.36416897934011716, + "grad_norm": 1.0205358266830444, + "learning_rate": 7.346655606925425e-06, + "loss": 0.7811, + "step": 7086 + }, + { + "epoch": 0.3642203720834618, + "grad_norm": 1.0110136270523071, + "learning_rate": 7.3459206755594635e-06, + "loss": 0.7187, + "step": 7087 + }, + { + "epoch": 0.36427176482680645, + "grad_norm": 1.0529857873916626, + "learning_rate": 7.345185679197723e-06, + "loss": 0.7307, + "step": 7088 + }, + { + "epoch": 0.3643231575701511, + "grad_norm": 1.040067195892334, + "learning_rate": 7.344450617860563e-06, + "loss": 0.752, + "step": 7089 + }, + { + "epoch": 0.36437455031349575, + "grad_norm": 0.7865608334541321, + "learning_rate": 7.343715491568353e-06, + "loss": 0.6868, + "step": 7090 + }, + { + "epoch": 0.3644259430568404, + "grad_norm": 1.2349356412887573, + "learning_rate": 7.34298030034146e-06, + "loss": 0.7716, + "step": 7091 + }, + { + "epoch": 0.364477335800185, + "grad_norm": 1.035965085029602, + "learning_rate": 7.3422450442002506e-06, + "loss": 0.7697, + "step": 7092 + }, + { + "epoch": 0.36452872854352963, + "grad_norm": 1.0702332258224487, + "learning_rate": 7.341509723165096e-06, + "loss": 0.7343, + "step": 7093 + }, + { + "epoch": 0.3645801212868743, + "grad_norm": 1.0724430084228516, + "learning_rate": 7.340774337256371e-06, + "loss": 0.8056, + "step": 7094 + }, + { + "epoch": 0.36463151403021893, + "grad_norm": 1.1434651613235474, + "learning_rate": 7.340038886494447e-06, + "loss": 0.8159, + "step": 7095 + }, + { + "epoch": 0.3646829067735636, + "grad_norm": 1.179262399673462, + "learning_rate": 7.339303370899705e-06, + "loss": 0.837, + "step": 7096 + }, + { + "epoch": 0.3647342995169082, + "grad_norm": 7.245774745941162, + "learning_rate": 7.338567790492519e-06, + "loss": 0.8884, + "step": 7097 + }, + { + "epoch": 0.36478569226025287, + "grad_norm": 1.1961015462875366, + "learning_rate": 7.33783214529327e-06, + "loss": 0.7691, + "step": 7098 + }, + { + "epoch": 0.3648370850035975, + "grad_norm": 1.198743462562561, + "learning_rate": 7.33709643532234e-06, + "loss": 0.7944, + "step": 7099 + }, + { + "epoch": 0.3648884777469421, + "grad_norm": 1.1719127893447876, + "learning_rate": 7.3363606606001125e-06, + "loss": 0.7577, + "step": 7100 + }, + { + "epoch": 0.36493987049028676, + "grad_norm": 0.7972054481506348, + "learning_rate": 7.335624821146973e-06, + "loss": 0.6822, + "step": 7101 + }, + { + "epoch": 0.3649912632336314, + "grad_norm": 0.8277395963668823, + "learning_rate": 7.3348889169833086e-06, + "loss": 0.7072, + "step": 7102 + }, + { + "epoch": 0.36504265597697605, + "grad_norm": 1.1211965084075928, + "learning_rate": 7.334152948129506e-06, + "loss": 0.7973, + "step": 7103 + }, + { + "epoch": 0.3650940487203207, + "grad_norm": 1.1106305122375488, + "learning_rate": 7.33341691460596e-06, + "loss": 0.7658, + "step": 7104 + }, + { + "epoch": 0.36514544146366534, + "grad_norm": 1.1830538511276245, + "learning_rate": 7.33268081643306e-06, + "loss": 0.7867, + "step": 7105 + }, + { + "epoch": 0.36519683420701, + "grad_norm": 1.0430490970611572, + "learning_rate": 7.3319446536312e-06, + "loss": 0.7782, + "step": 7106 + }, + { + "epoch": 0.36524822695035464, + "grad_norm": 1.0280683040618896, + "learning_rate": 7.3312084262207775e-06, + "loss": 0.786, + "step": 7107 + }, + { + "epoch": 0.36529961969369923, + "grad_norm": 1.128024697303772, + "learning_rate": 7.330472134222191e-06, + "loss": 0.7507, + "step": 7108 + }, + { + "epoch": 0.3653510124370439, + "grad_norm": 0.8541820645332336, + "learning_rate": 7.329735777655837e-06, + "loss": 0.6279, + "step": 7109 + }, + { + "epoch": 0.3654024051803885, + "grad_norm": 1.0252243280410767, + "learning_rate": 7.328999356542119e-06, + "loss": 0.7712, + "step": 7110 + }, + { + "epoch": 0.36545379792373317, + "grad_norm": 1.1370036602020264, + "learning_rate": 7.328262870901441e-06, + "loss": 0.7506, + "step": 7111 + }, + { + "epoch": 0.3655051906670778, + "grad_norm": 1.098652958869934, + "learning_rate": 7.327526320754207e-06, + "loss": 0.7954, + "step": 7112 + }, + { + "epoch": 0.36555658341042246, + "grad_norm": 1.1582125425338745, + "learning_rate": 7.326789706120824e-06, + "loss": 0.7442, + "step": 7113 + }, + { + "epoch": 0.3656079761537671, + "grad_norm": 1.023566722869873, + "learning_rate": 7.3260530270217e-06, + "loss": 0.7589, + "step": 7114 + }, + { + "epoch": 0.3656593688971117, + "grad_norm": 1.0819870233535767, + "learning_rate": 7.325316283477246e-06, + "loss": 0.7947, + "step": 7115 + }, + { + "epoch": 0.36571076164045635, + "grad_norm": 0.8672119975090027, + "learning_rate": 7.324579475507873e-06, + "loss": 0.708, + "step": 7116 + }, + { + "epoch": 0.365762154383801, + "grad_norm": 1.1197950839996338, + "learning_rate": 7.323842603133996e-06, + "loss": 0.7675, + "step": 7117 + }, + { + "epoch": 0.36581354712714564, + "grad_norm": 1.104225516319275, + "learning_rate": 7.323105666376031e-06, + "loss": 0.7948, + "step": 7118 + }, + { + "epoch": 0.3658649398704903, + "grad_norm": 1.0510808229446411, + "learning_rate": 7.322368665254394e-06, + "loss": 0.698, + "step": 7119 + }, + { + "epoch": 0.36591633261383494, + "grad_norm": 1.1101880073547363, + "learning_rate": 7.321631599789506e-06, + "loss": 0.7041, + "step": 7120 + }, + { + "epoch": 0.3659677253571796, + "grad_norm": 0.7290747761726379, + "learning_rate": 7.320894470001787e-06, + "loss": 0.7158, + "step": 7121 + }, + { + "epoch": 0.36601911810052423, + "grad_norm": 1.1536113023757935, + "learning_rate": 7.320157275911659e-06, + "loss": 0.7722, + "step": 7122 + }, + { + "epoch": 0.3660705108438688, + "grad_norm": 1.1032837629318237, + "learning_rate": 7.319420017539549e-06, + "loss": 0.6992, + "step": 7123 + }, + { + "epoch": 0.36612190358721347, + "grad_norm": 1.1017006635665894, + "learning_rate": 7.318682694905881e-06, + "loss": 0.794, + "step": 7124 + }, + { + "epoch": 0.3661732963305581, + "grad_norm": 1.120020866394043, + "learning_rate": 7.317945308031085e-06, + "loss": 0.7736, + "step": 7125 + }, + { + "epoch": 0.36622468907390276, + "grad_norm": 1.0796051025390625, + "learning_rate": 7.31720785693559e-06, + "loss": 0.7875, + "step": 7126 + }, + { + "epoch": 0.3662760818172474, + "grad_norm": 1.0570719242095947, + "learning_rate": 7.316470341639827e-06, + "loss": 0.762, + "step": 7127 + }, + { + "epoch": 0.36632747456059206, + "grad_norm": 1.3974111080169678, + "learning_rate": 7.3157327621642305e-06, + "loss": 0.7371, + "step": 7128 + }, + { + "epoch": 0.3663788673039367, + "grad_norm": 1.145666480064392, + "learning_rate": 7.314995118529237e-06, + "loss": 0.8007, + "step": 7129 + }, + { + "epoch": 0.3664302600472813, + "grad_norm": 0.6940429210662842, + "learning_rate": 7.31425741075528e-06, + "loss": 0.6541, + "step": 7130 + }, + { + "epoch": 0.36648165279062594, + "grad_norm": 0.7124250531196594, + "learning_rate": 7.313519638862802e-06, + "loss": 0.6804, + "step": 7131 + }, + { + "epoch": 0.3665330455339706, + "grad_norm": 1.100724458694458, + "learning_rate": 7.312781802872241e-06, + "loss": 0.7615, + "step": 7132 + }, + { + "epoch": 0.36658443827731524, + "grad_norm": 1.1048794984817505, + "learning_rate": 7.312043902804042e-06, + "loss": 0.7817, + "step": 7133 + }, + { + "epoch": 0.3666358310206599, + "grad_norm": 1.1387883424758911, + "learning_rate": 7.311305938678647e-06, + "loss": 0.7934, + "step": 7134 + }, + { + "epoch": 0.36668722376400453, + "grad_norm": 1.0954293012619019, + "learning_rate": 7.310567910516502e-06, + "loss": 0.7611, + "step": 7135 + }, + { + "epoch": 0.3667386165073492, + "grad_norm": 1.072045087814331, + "learning_rate": 7.309829818338057e-06, + "loss": 0.7247, + "step": 7136 + }, + { + "epoch": 0.3667900092506938, + "grad_norm": 1.1318070888519287, + "learning_rate": 7.309091662163759e-06, + "loss": 0.7754, + "step": 7137 + }, + { + "epoch": 0.3668414019940384, + "grad_norm": 1.1188750267028809, + "learning_rate": 7.308353442014059e-06, + "loss": 0.7501, + "step": 7138 + }, + { + "epoch": 0.36689279473738307, + "grad_norm": 1.0617001056671143, + "learning_rate": 7.307615157909413e-06, + "loss": 0.7524, + "step": 7139 + }, + { + "epoch": 0.3669441874807277, + "grad_norm": 0.7577106356620789, + "learning_rate": 7.306876809870272e-06, + "loss": 0.7164, + "step": 7140 + }, + { + "epoch": 0.36699558022407236, + "grad_norm": 1.0569356679916382, + "learning_rate": 7.306138397917095e-06, + "loss": 0.7607, + "step": 7141 + }, + { + "epoch": 0.367046972967417, + "grad_norm": 1.190929651260376, + "learning_rate": 7.305399922070341e-06, + "loss": 0.7766, + "step": 7142 + }, + { + "epoch": 0.36709836571076165, + "grad_norm": 1.1272035837173462, + "learning_rate": 7.304661382350467e-06, + "loss": 0.7446, + "step": 7143 + }, + { + "epoch": 0.3671497584541063, + "grad_norm": 1.1266297101974487, + "learning_rate": 7.303922778777939e-06, + "loss": 0.7303, + "step": 7144 + }, + { + "epoch": 0.3672011511974509, + "grad_norm": 1.0627626180648804, + "learning_rate": 7.303184111373218e-06, + "loss": 0.7722, + "step": 7145 + }, + { + "epoch": 0.36725254394079554, + "grad_norm": 1.1550190448760986, + "learning_rate": 7.302445380156769e-06, + "loss": 0.7688, + "step": 7146 + }, + { + "epoch": 0.3673039366841402, + "grad_norm": 0.8579680323600769, + "learning_rate": 7.301706585149062e-06, + "loss": 0.6771, + "step": 7147 + }, + { + "epoch": 0.36735532942748483, + "grad_norm": 1.0349979400634766, + "learning_rate": 7.300967726370563e-06, + "loss": 0.7824, + "step": 7148 + }, + { + "epoch": 0.3674067221708295, + "grad_norm": 1.0926611423492432, + "learning_rate": 7.300228803841744e-06, + "loss": 0.7426, + "step": 7149 + }, + { + "epoch": 0.3674581149141741, + "grad_norm": 1.0555914640426636, + "learning_rate": 7.299489817583077e-06, + "loss": 0.7364, + "step": 7150 + }, + { + "epoch": 0.3675095076575188, + "grad_norm": 1.1215845346450806, + "learning_rate": 7.298750767615037e-06, + "loss": 0.7797, + "step": 7151 + }, + { + "epoch": 0.3675609004008634, + "grad_norm": 1.173999547958374, + "learning_rate": 7.298011653958099e-06, + "loss": 0.7846, + "step": 7152 + }, + { + "epoch": 0.367612293144208, + "grad_norm": 1.2063031196594238, + "learning_rate": 7.297272476632742e-06, + "loss": 0.7314, + "step": 7153 + }, + { + "epoch": 0.36766368588755266, + "grad_norm": 1.0700774192810059, + "learning_rate": 7.296533235659444e-06, + "loss": 0.7645, + "step": 7154 + }, + { + "epoch": 0.3677150786308973, + "grad_norm": 1.0352728366851807, + "learning_rate": 7.295793931058688e-06, + "loss": 0.75, + "step": 7155 + }, + { + "epoch": 0.36776647137424195, + "grad_norm": 1.1127796173095703, + "learning_rate": 7.295054562850956e-06, + "loss": 0.7731, + "step": 7156 + }, + { + "epoch": 0.3678178641175866, + "grad_norm": 1.106644868850708, + "learning_rate": 7.294315131056734e-06, + "loss": 0.7386, + "step": 7157 + }, + { + "epoch": 0.36786925686093125, + "grad_norm": 1.0459643602371216, + "learning_rate": 7.293575635696508e-06, + "loss": 0.7528, + "step": 7158 + }, + { + "epoch": 0.3679206496042759, + "grad_norm": 1.0873206853866577, + "learning_rate": 7.292836076790764e-06, + "loss": 0.7635, + "step": 7159 + }, + { + "epoch": 0.36797204234762054, + "grad_norm": 5.0787882804870605, + "learning_rate": 7.292096454359997e-06, + "loss": 0.838, + "step": 7160 + }, + { + "epoch": 0.36802343509096513, + "grad_norm": 1.0541611909866333, + "learning_rate": 7.291356768424695e-06, + "loss": 0.7368, + "step": 7161 + }, + { + "epoch": 0.3680748278343098, + "grad_norm": 1.167487621307373, + "learning_rate": 7.290617019005352e-06, + "loss": 0.8085, + "step": 7162 + }, + { + "epoch": 0.36812622057765443, + "grad_norm": 1.095430850982666, + "learning_rate": 7.289877206122466e-06, + "loss": 0.7467, + "step": 7163 + }, + { + "epoch": 0.3681776133209991, + "grad_norm": 1.168697476387024, + "learning_rate": 7.28913732979653e-06, + "loss": 0.782, + "step": 7164 + }, + { + "epoch": 0.3682290060643437, + "grad_norm": 0.7951579093933105, + "learning_rate": 7.2883973900480475e-06, + "loss": 0.6669, + "step": 7165 + }, + { + "epoch": 0.36828039880768837, + "grad_norm": 1.0883334875106812, + "learning_rate": 7.287657386897517e-06, + "loss": 0.7838, + "step": 7166 + }, + { + "epoch": 0.368331791551033, + "grad_norm": 1.1320598125457764, + "learning_rate": 7.286917320365439e-06, + "loss": 0.721, + "step": 7167 + }, + { + "epoch": 0.3683831842943776, + "grad_norm": 0.7666705846786499, + "learning_rate": 7.286177190472321e-06, + "loss": 0.6821, + "step": 7168 + }, + { + "epoch": 0.36843457703772226, + "grad_norm": 1.1280375719070435, + "learning_rate": 7.285436997238668e-06, + "loss": 0.7841, + "step": 7169 + }, + { + "epoch": 0.3684859697810669, + "grad_norm": 1.066644549369812, + "learning_rate": 7.284696740684986e-06, + "loss": 0.725, + "step": 7170 + }, + { + "epoch": 0.36853736252441155, + "grad_norm": 1.026785969734192, + "learning_rate": 7.283956420831786e-06, + "loss": 0.7342, + "step": 7171 + }, + { + "epoch": 0.3685887552677562, + "grad_norm": 1.0876283645629883, + "learning_rate": 7.2832160376995785e-06, + "loss": 0.7544, + "step": 7172 + }, + { + "epoch": 0.36864014801110084, + "grad_norm": 1.0831308364868164, + "learning_rate": 7.2824755913088776e-06, + "loss": 0.7518, + "step": 7173 + }, + { + "epoch": 0.3686915407544455, + "grad_norm": 1.1821656227111816, + "learning_rate": 7.281735081680197e-06, + "loss": 0.7279, + "step": 7174 + }, + { + "epoch": 0.36874293349779014, + "grad_norm": 1.056152105331421, + "learning_rate": 7.280994508834054e-06, + "loss": 0.7531, + "step": 7175 + }, + { + "epoch": 0.36879432624113473, + "grad_norm": 0.9553729295730591, + "learning_rate": 7.280253872790965e-06, + "loss": 0.6825, + "step": 7176 + }, + { + "epoch": 0.3688457189844794, + "grad_norm": 1.0687272548675537, + "learning_rate": 7.279513173571453e-06, + "loss": 0.7781, + "step": 7177 + }, + { + "epoch": 0.368897111727824, + "grad_norm": 1.0544956922531128, + "learning_rate": 7.278772411196037e-06, + "loss": 0.7328, + "step": 7178 + }, + { + "epoch": 0.36894850447116867, + "grad_norm": 0.7564080953598022, + "learning_rate": 7.278031585685241e-06, + "loss": 0.7531, + "step": 7179 + }, + { + "epoch": 0.3689998972145133, + "grad_norm": 1.0805671215057373, + "learning_rate": 7.2772906970595915e-06, + "loss": 0.7741, + "step": 7180 + }, + { + "epoch": 0.36905128995785796, + "grad_norm": 1.0677911043167114, + "learning_rate": 7.276549745339614e-06, + "loss": 0.7, + "step": 7181 + }, + { + "epoch": 0.3691026827012026, + "grad_norm": 1.0912599563598633, + "learning_rate": 7.275808730545838e-06, + "loss": 0.7368, + "step": 7182 + }, + { + "epoch": 0.3691540754445472, + "grad_norm": 0.7646737098693848, + "learning_rate": 7.275067652698793e-06, + "loss": 0.6439, + "step": 7183 + }, + { + "epoch": 0.36920546818789185, + "grad_norm": 1.1100205183029175, + "learning_rate": 7.274326511819014e-06, + "loss": 0.7758, + "step": 7184 + }, + { + "epoch": 0.3692568609312365, + "grad_norm": 1.1088718175888062, + "learning_rate": 7.273585307927031e-06, + "loss": 0.7446, + "step": 7185 + }, + { + "epoch": 0.36930825367458114, + "grad_norm": 1.0760447978973389, + "learning_rate": 7.272844041043383e-06, + "loss": 0.7232, + "step": 7186 + }, + { + "epoch": 0.3693596464179258, + "grad_norm": 0.724325954914093, + "learning_rate": 7.272102711188606e-06, + "loss": 0.7281, + "step": 7187 + }, + { + "epoch": 0.36941103916127044, + "grad_norm": 1.071779727935791, + "learning_rate": 7.2713613183832375e-06, + "loss": 0.793, + "step": 7188 + }, + { + "epoch": 0.3694624319046151, + "grad_norm": 1.5506794452667236, + "learning_rate": 7.270619862647823e-06, + "loss": 0.7342, + "step": 7189 + }, + { + "epoch": 0.36951382464795973, + "grad_norm": 1.1077038049697876, + "learning_rate": 7.269878344002901e-06, + "loss": 0.7508, + "step": 7190 + }, + { + "epoch": 0.3695652173913043, + "grad_norm": 0.760924220085144, + "learning_rate": 7.269136762469017e-06, + "loss": 0.6567, + "step": 7191 + }, + { + "epoch": 0.36961661013464897, + "grad_norm": 1.122255563735962, + "learning_rate": 7.2683951180667166e-06, + "loss": 0.7491, + "step": 7192 + }, + { + "epoch": 0.3696680028779936, + "grad_norm": 1.021269679069519, + "learning_rate": 7.267653410816551e-06, + "loss": 0.7505, + "step": 7193 + }, + { + "epoch": 0.36971939562133826, + "grad_norm": 1.0304324626922607, + "learning_rate": 7.266911640739065e-06, + "loss": 0.7588, + "step": 7194 + }, + { + "epoch": 0.3697707883646829, + "grad_norm": 0.7316375970840454, + "learning_rate": 7.266169807854815e-06, + "loss": 0.6824, + "step": 7195 + }, + { + "epoch": 0.36982218110802756, + "grad_norm": 1.3811365365982056, + "learning_rate": 7.265427912184349e-06, + "loss": 0.7709, + "step": 7196 + }, + { + "epoch": 0.3698735738513722, + "grad_norm": 0.8304383158683777, + "learning_rate": 7.264685953748226e-06, + "loss": 0.6729, + "step": 7197 + }, + { + "epoch": 0.36992496659471685, + "grad_norm": 1.0946966409683228, + "learning_rate": 7.263943932567001e-06, + "loss": 0.7446, + "step": 7198 + }, + { + "epoch": 0.36997635933806144, + "grad_norm": 1.1057438850402832, + "learning_rate": 7.263201848661231e-06, + "loss": 0.7434, + "step": 7199 + }, + { + "epoch": 0.3700277520814061, + "grad_norm": 1.0597110986709595, + "learning_rate": 7.2624597020514785e-06, + "loss": 0.7846, + "step": 7200 + }, + { + "epoch": 0.37007914482475074, + "grad_norm": 0.7607244253158569, + "learning_rate": 7.261717492758306e-06, + "loss": 0.6808, + "step": 7201 + }, + { + "epoch": 0.3701305375680954, + "grad_norm": 1.0400351285934448, + "learning_rate": 7.260975220802272e-06, + "loss": 0.7057, + "step": 7202 + }, + { + "epoch": 0.37018193031144003, + "grad_norm": 1.0781724452972412, + "learning_rate": 7.2602328862039465e-06, + "loss": 0.7741, + "step": 7203 + }, + { + "epoch": 0.3702333230547847, + "grad_norm": 1.0425996780395508, + "learning_rate": 7.259490488983895e-06, + "loss": 0.7363, + "step": 7204 + }, + { + "epoch": 0.3702847157981293, + "grad_norm": 1.0915305614471436, + "learning_rate": 7.258748029162688e-06, + "loss": 0.7144, + "step": 7205 + }, + { + "epoch": 0.3703361085414739, + "grad_norm": 1.0756149291992188, + "learning_rate": 7.258005506760892e-06, + "loss": 0.7342, + "step": 7206 + }, + { + "epoch": 0.37038750128481857, + "grad_norm": 1.1805963516235352, + "learning_rate": 7.257262921799083e-06, + "loss": 0.7976, + "step": 7207 + }, + { + "epoch": 0.3704388940281632, + "grad_norm": 1.1794517040252686, + "learning_rate": 7.256520274297833e-06, + "loss": 0.7267, + "step": 7208 + }, + { + "epoch": 0.37049028677150786, + "grad_norm": 1.0186569690704346, + "learning_rate": 7.25577756427772e-06, + "loss": 0.7168, + "step": 7209 + }, + { + "epoch": 0.3705416795148525, + "grad_norm": 1.283857822418213, + "learning_rate": 7.255034791759318e-06, + "loss": 0.7475, + "step": 7210 + }, + { + "epoch": 0.37059307225819715, + "grad_norm": 0.827991783618927, + "learning_rate": 7.254291956763208e-06, + "loss": 0.6881, + "step": 7211 + }, + { + "epoch": 0.3706444650015418, + "grad_norm": 1.1486775875091553, + "learning_rate": 7.253549059309971e-06, + "loss": 0.7644, + "step": 7212 + }, + { + "epoch": 0.37069585774488645, + "grad_norm": 1.1158725023269653, + "learning_rate": 7.25280609942019e-06, + "loss": 0.8371, + "step": 7213 + }, + { + "epoch": 0.37074725048823104, + "grad_norm": 1.2006750106811523, + "learning_rate": 7.252063077114449e-06, + "loss": 0.8024, + "step": 7214 + }, + { + "epoch": 0.3707986432315757, + "grad_norm": 1.051665186882019, + "learning_rate": 7.251319992413333e-06, + "loss": 0.7583, + "step": 7215 + }, + { + "epoch": 0.37085003597492033, + "grad_norm": 1.0720508098602295, + "learning_rate": 7.25057684533743e-06, + "loss": 0.7633, + "step": 7216 + }, + { + "epoch": 0.370901428718265, + "grad_norm": 1.020507574081421, + "learning_rate": 7.249833635907332e-06, + "loss": 0.7524, + "step": 7217 + }, + { + "epoch": 0.3709528214616096, + "grad_norm": 1.0020560026168823, + "learning_rate": 7.249090364143627e-06, + "loss": 0.745, + "step": 7218 + }, + { + "epoch": 0.3710042142049543, + "grad_norm": 1.20321524143219, + "learning_rate": 7.24834703006691e-06, + "loss": 0.8087, + "step": 7219 + }, + { + "epoch": 0.3710556069482989, + "grad_norm": 1.0610431432724, + "learning_rate": 7.247603633697775e-06, + "loss": 0.7839, + "step": 7220 + }, + { + "epoch": 0.3711069996916435, + "grad_norm": 1.0345391035079956, + "learning_rate": 7.246860175056819e-06, + "loss": 0.7443, + "step": 7221 + }, + { + "epoch": 0.37115839243498816, + "grad_norm": 1.1314092874526978, + "learning_rate": 7.246116654164639e-06, + "loss": 0.7738, + "step": 7222 + }, + { + "epoch": 0.3712097851783328, + "grad_norm": 1.0557901859283447, + "learning_rate": 7.245373071041837e-06, + "loss": 0.7226, + "step": 7223 + }, + { + "epoch": 0.37126117792167745, + "grad_norm": 1.0262720584869385, + "learning_rate": 7.244629425709012e-06, + "loss": 0.7213, + "step": 7224 + }, + { + "epoch": 0.3713125706650221, + "grad_norm": 1.1466014385223389, + "learning_rate": 7.2438857181867685e-06, + "loss": 0.7467, + "step": 7225 + }, + { + "epoch": 0.37136396340836675, + "grad_norm": 0.8458223938941956, + "learning_rate": 7.243141948495714e-06, + "loss": 0.6657, + "step": 7226 + }, + { + "epoch": 0.3714153561517114, + "grad_norm": 1.1147453784942627, + "learning_rate": 7.24239811665645e-06, + "loss": 0.7902, + "step": 7227 + }, + { + "epoch": 0.37146674889505604, + "grad_norm": 1.1038767099380493, + "learning_rate": 7.241654222689589e-06, + "loss": 0.7678, + "step": 7228 + }, + { + "epoch": 0.37151814163840063, + "grad_norm": 1.1124335527420044, + "learning_rate": 7.2409102666157415e-06, + "loss": 0.7297, + "step": 7229 + }, + { + "epoch": 0.3715695343817453, + "grad_norm": 1.0584475994110107, + "learning_rate": 7.240166248455518e-06, + "loss": 0.6697, + "step": 7230 + }, + { + "epoch": 0.37162092712508993, + "grad_norm": 1.0489606857299805, + "learning_rate": 7.239422168229532e-06, + "loss": 0.7699, + "step": 7231 + }, + { + "epoch": 0.3716723198684346, + "grad_norm": 1.1438466310501099, + "learning_rate": 7.238678025958399e-06, + "loss": 0.7685, + "step": 7232 + }, + { + "epoch": 0.3717237126117792, + "grad_norm": 1.0332139730453491, + "learning_rate": 7.237933821662736e-06, + "loss": 0.8229, + "step": 7233 + }, + { + "epoch": 0.37177510535512387, + "grad_norm": 0.80059814453125, + "learning_rate": 7.237189555363162e-06, + "loss": 0.6626, + "step": 7234 + }, + { + "epoch": 0.3718264980984685, + "grad_norm": 1.0390715599060059, + "learning_rate": 7.2364452270802984e-06, + "loss": 0.7166, + "step": 7235 + }, + { + "epoch": 0.37187789084181316, + "grad_norm": 1.0490527153015137, + "learning_rate": 7.235700836834767e-06, + "loss": 0.7519, + "step": 7236 + }, + { + "epoch": 0.37192928358515775, + "grad_norm": 1.1012601852416992, + "learning_rate": 7.234956384647191e-06, + "loss": 0.7765, + "step": 7237 + }, + { + "epoch": 0.3719806763285024, + "grad_norm": 0.6621500849723816, + "learning_rate": 7.234211870538198e-06, + "loss": 0.6866, + "step": 7238 + }, + { + "epoch": 0.37203206907184705, + "grad_norm": 1.015376091003418, + "learning_rate": 7.2334672945284135e-06, + "loss": 0.7642, + "step": 7239 + }, + { + "epoch": 0.3720834618151917, + "grad_norm": 1.1455764770507812, + "learning_rate": 7.232722656638467e-06, + "loss": 0.7583, + "step": 7240 + }, + { + "epoch": 0.37213485455853634, + "grad_norm": 0.9944524168968201, + "learning_rate": 7.2319779568889895e-06, + "loss": 0.7014, + "step": 7241 + }, + { + "epoch": 0.372186247301881, + "grad_norm": 1.0475542545318604, + "learning_rate": 7.231233195300613e-06, + "loss": 0.7113, + "step": 7242 + }, + { + "epoch": 0.37223764004522564, + "grad_norm": 0.7069808840751648, + "learning_rate": 7.230488371893974e-06, + "loss": 0.663, + "step": 7243 + }, + { + "epoch": 0.37228903278857023, + "grad_norm": 1.0262353420257568, + "learning_rate": 7.229743486689705e-06, + "loss": 0.7079, + "step": 7244 + }, + { + "epoch": 0.3723404255319149, + "grad_norm": 0.7837396860122681, + "learning_rate": 7.228998539708447e-06, + "loss": 0.7176, + "step": 7245 + }, + { + "epoch": 0.3723918182752595, + "grad_norm": 1.1227233409881592, + "learning_rate": 7.228253530970838e-06, + "loss": 0.821, + "step": 7246 + }, + { + "epoch": 0.37244321101860417, + "grad_norm": 1.050466537475586, + "learning_rate": 7.227508460497519e-06, + "loss": 0.7381, + "step": 7247 + }, + { + "epoch": 0.3724946037619488, + "grad_norm": 1.0220190286636353, + "learning_rate": 7.226763328309132e-06, + "loss": 0.7602, + "step": 7248 + }, + { + "epoch": 0.37254599650529346, + "grad_norm": 1.0997636318206787, + "learning_rate": 7.2260181344263226e-06, + "loss": 0.7702, + "step": 7249 + }, + { + "epoch": 0.3725973892486381, + "grad_norm": 1.13875150680542, + "learning_rate": 7.225272878869738e-06, + "loss": 0.7097, + "step": 7250 + }, + { + "epoch": 0.37264878199198276, + "grad_norm": 0.7041965126991272, + "learning_rate": 7.224527561660025e-06, + "loss": 0.7071, + "step": 7251 + }, + { + "epoch": 0.37270017473532735, + "grad_norm": 1.0214099884033203, + "learning_rate": 7.2237821828178325e-06, + "loss": 0.7281, + "step": 7252 + }, + { + "epoch": 0.372751567478672, + "grad_norm": 1.0446522235870361, + "learning_rate": 7.223036742363814e-06, + "loss": 0.7275, + "step": 7253 + }, + { + "epoch": 0.37280296022201664, + "grad_norm": 1.0758243799209595, + "learning_rate": 7.22229124031862e-06, + "loss": 0.7751, + "step": 7254 + }, + { + "epoch": 0.3728543529653613, + "grad_norm": 1.4733484983444214, + "learning_rate": 7.2215456767029055e-06, + "loss": 0.7556, + "step": 7255 + }, + { + "epoch": 0.37290574570870594, + "grad_norm": 0.8676440119743347, + "learning_rate": 7.220800051537331e-06, + "loss": 0.6917, + "step": 7256 + }, + { + "epoch": 0.3729571384520506, + "grad_norm": 1.1262989044189453, + "learning_rate": 7.220054364842549e-06, + "loss": 0.7351, + "step": 7257 + }, + { + "epoch": 0.37300853119539523, + "grad_norm": 1.1365312337875366, + "learning_rate": 7.219308616639225e-06, + "loss": 0.725, + "step": 7258 + }, + { + "epoch": 0.3730599239387398, + "grad_norm": 1.041709303855896, + "learning_rate": 7.218562806948016e-06, + "loss": 0.7391, + "step": 7259 + }, + { + "epoch": 0.37311131668208447, + "grad_norm": 0.7140436172485352, + "learning_rate": 7.217816935789588e-06, + "loss": 0.7031, + "step": 7260 + }, + { + "epoch": 0.3731627094254291, + "grad_norm": 1.033147931098938, + "learning_rate": 7.217071003184604e-06, + "loss": 0.7527, + "step": 7261 + }, + { + "epoch": 0.37321410216877376, + "grad_norm": 0.7554978728294373, + "learning_rate": 7.2163250091537325e-06, + "loss": 0.6995, + "step": 7262 + }, + { + "epoch": 0.3732654949121184, + "grad_norm": 1.1233800649642944, + "learning_rate": 7.215578953717641e-06, + "loss": 0.7994, + "step": 7263 + }, + { + "epoch": 0.37331688765546306, + "grad_norm": 1.1571245193481445, + "learning_rate": 7.214832836896999e-06, + "loss": 0.7814, + "step": 7264 + }, + { + "epoch": 0.3733682803988077, + "grad_norm": 1.062779188156128, + "learning_rate": 7.21408665871248e-06, + "loss": 0.7486, + "step": 7265 + }, + { + "epoch": 0.37341967314215235, + "grad_norm": 0.774655818939209, + "learning_rate": 7.213340419184758e-06, + "loss": 0.705, + "step": 7266 + }, + { + "epoch": 0.37347106588549694, + "grad_norm": 2.3855013847351074, + "learning_rate": 7.212594118334506e-06, + "loss": 0.7836, + "step": 7267 + }, + { + "epoch": 0.3735224586288416, + "grad_norm": 0.779570460319519, + "learning_rate": 7.211847756182401e-06, + "loss": 0.7014, + "step": 7268 + }, + { + "epoch": 0.37357385137218624, + "grad_norm": 1.1229965686798096, + "learning_rate": 7.211101332749123e-06, + "loss": 0.7532, + "step": 7269 + }, + { + "epoch": 0.3736252441155309, + "grad_norm": 1.0663634538650513, + "learning_rate": 7.210354848055352e-06, + "loss": 0.7517, + "step": 7270 + }, + { + "epoch": 0.37367663685887553, + "grad_norm": 1.044818639755249, + "learning_rate": 7.209608302121769e-06, + "loss": 0.737, + "step": 7271 + }, + { + "epoch": 0.3737280296022202, + "grad_norm": 1.050997018814087, + "learning_rate": 7.208861694969059e-06, + "loss": 0.7129, + "step": 7272 + }, + { + "epoch": 0.3737794223455648, + "grad_norm": 1.0292834043502808, + "learning_rate": 7.208115026617906e-06, + "loss": 0.7591, + "step": 7273 + }, + { + "epoch": 0.3738308150889094, + "grad_norm": 1.0508451461791992, + "learning_rate": 7.207368297088999e-06, + "loss": 0.7011, + "step": 7274 + }, + { + "epoch": 0.37388220783225407, + "grad_norm": 1.173186182975769, + "learning_rate": 7.2066215064030255e-06, + "loss": 0.7027, + "step": 7275 + }, + { + "epoch": 0.3739336005755987, + "grad_norm": 1.0943999290466309, + "learning_rate": 7.2058746545806755e-06, + "loss": 0.7297, + "step": 7276 + }, + { + "epoch": 0.37398499331894336, + "grad_norm": 1.2071597576141357, + "learning_rate": 7.2051277416426435e-06, + "loss": 0.8096, + "step": 7277 + }, + { + "epoch": 0.374036386062288, + "grad_norm": 0.8069635629653931, + "learning_rate": 7.204380767609621e-06, + "loss": 0.6631, + "step": 7278 + }, + { + "epoch": 0.37408777880563265, + "grad_norm": 1.0536696910858154, + "learning_rate": 7.203633732502304e-06, + "loss": 0.7808, + "step": 7279 + }, + { + "epoch": 0.3741391715489773, + "grad_norm": 1.1158949136734009, + "learning_rate": 7.202886636341391e-06, + "loss": 0.745, + "step": 7280 + }, + { + "epoch": 0.37419056429232195, + "grad_norm": 1.2475515604019165, + "learning_rate": 7.202139479147578e-06, + "loss": 0.7383, + "step": 7281 + }, + { + "epoch": 0.37424195703566654, + "grad_norm": 1.0595248937606812, + "learning_rate": 7.201392260941571e-06, + "loss": 0.7964, + "step": 7282 + }, + { + "epoch": 0.3742933497790112, + "grad_norm": 0.9792985320091248, + "learning_rate": 7.200644981744067e-06, + "loss": 0.7101, + "step": 7283 + }, + { + "epoch": 0.37434474252235583, + "grad_norm": 1.112979531288147, + "learning_rate": 7.199897641575773e-06, + "loss": 0.7711, + "step": 7284 + }, + { + "epoch": 0.3743961352657005, + "grad_norm": 1.0467731952667236, + "learning_rate": 7.199150240457392e-06, + "loss": 0.7324, + "step": 7285 + }, + { + "epoch": 0.3744475280090451, + "grad_norm": 0.7209861278533936, + "learning_rate": 7.1984027784096346e-06, + "loss": 0.7212, + "step": 7286 + }, + { + "epoch": 0.3744989207523898, + "grad_norm": 1.0659093856811523, + "learning_rate": 7.197655255453207e-06, + "loss": 0.7478, + "step": 7287 + }, + { + "epoch": 0.3745503134957344, + "grad_norm": 1.1402009725570679, + "learning_rate": 7.196907671608824e-06, + "loss": 0.7991, + "step": 7288 + }, + { + "epoch": 0.37460170623907907, + "grad_norm": 0.8056334257125854, + "learning_rate": 7.1961600268971945e-06, + "loss": 0.6804, + "step": 7289 + }, + { + "epoch": 0.37465309898242366, + "grad_norm": 1.1002293825149536, + "learning_rate": 7.1954123213390324e-06, + "loss": 0.6984, + "step": 7290 + }, + { + "epoch": 0.3747044917257683, + "grad_norm": 1.0335334539413452, + "learning_rate": 7.1946645549550575e-06, + "loss": 0.7574, + "step": 7291 + }, + { + "epoch": 0.37475588446911295, + "grad_norm": 1.0619194507598877, + "learning_rate": 7.193916727765981e-06, + "loss": 0.7646, + "step": 7292 + }, + { + "epoch": 0.3748072772124576, + "grad_norm": 1.1324498653411865, + "learning_rate": 7.193168839792529e-06, + "loss": 0.7571, + "step": 7293 + }, + { + "epoch": 0.37485866995580225, + "grad_norm": 1.1125948429107666, + "learning_rate": 7.192420891055418e-06, + "loss": 0.7616, + "step": 7294 + }, + { + "epoch": 0.3749100626991469, + "grad_norm": 1.1340450048446655, + "learning_rate": 7.191672881575371e-06, + "loss": 0.7423, + "step": 7295 + }, + { + "epoch": 0.37496145544249154, + "grad_norm": 1.1012948751449585, + "learning_rate": 7.190924811373113e-06, + "loss": 0.799, + "step": 7296 + }, + { + "epoch": 0.37501284818583613, + "grad_norm": 1.117787480354309, + "learning_rate": 7.190176680469369e-06, + "loss": 0.8074, + "step": 7297 + }, + { + "epoch": 0.3750642409291808, + "grad_norm": 1.1589043140411377, + "learning_rate": 7.189428488884871e-06, + "loss": 0.7294, + "step": 7298 + }, + { + "epoch": 0.37511563367252543, + "grad_norm": 1.0923062562942505, + "learning_rate": 7.188680236640343e-06, + "loss": 0.7172, + "step": 7299 + }, + { + "epoch": 0.3751670264158701, + "grad_norm": 1.0963752269744873, + "learning_rate": 7.187931923756517e-06, + "loss": 0.7283, + "step": 7300 + }, + { + "epoch": 0.3752184191592147, + "grad_norm": 1.082520604133606, + "learning_rate": 7.187183550254126e-06, + "loss": 0.7337, + "step": 7301 + }, + { + "epoch": 0.37526981190255937, + "grad_norm": 1.0780723094940186, + "learning_rate": 7.186435116153907e-06, + "loss": 0.7499, + "step": 7302 + }, + { + "epoch": 0.375321204645904, + "grad_norm": 0.8717724084854126, + "learning_rate": 7.185686621476592e-06, + "loss": 0.6621, + "step": 7303 + }, + { + "epoch": 0.37537259738924866, + "grad_norm": 0.7696239948272705, + "learning_rate": 7.184938066242922e-06, + "loss": 0.6616, + "step": 7304 + }, + { + "epoch": 0.37542399013259325, + "grad_norm": 1.0677084922790527, + "learning_rate": 7.184189450473633e-06, + "loss": 0.7607, + "step": 7305 + }, + { + "epoch": 0.3754753828759379, + "grad_norm": 1.009419322013855, + "learning_rate": 7.183440774189468e-06, + "loss": 0.7308, + "step": 7306 + }, + { + "epoch": 0.37552677561928255, + "grad_norm": 0.7687630653381348, + "learning_rate": 7.182692037411172e-06, + "loss": 0.6118, + "step": 7307 + }, + { + "epoch": 0.3755781683626272, + "grad_norm": 1.1391465663909912, + "learning_rate": 7.181943240159484e-06, + "loss": 0.7638, + "step": 7308 + }, + { + "epoch": 0.37562956110597184, + "grad_norm": 1.0756276845932007, + "learning_rate": 7.181194382455156e-06, + "loss": 0.7757, + "step": 7309 + }, + { + "epoch": 0.3756809538493165, + "grad_norm": 0.70989590883255, + "learning_rate": 7.180445464318932e-06, + "loss": 0.6735, + "step": 7310 + }, + { + "epoch": 0.37573234659266114, + "grad_norm": 1.1215742826461792, + "learning_rate": 7.179696485771561e-06, + "loss": 0.8149, + "step": 7311 + }, + { + "epoch": 0.37578373933600573, + "grad_norm": 1.0532371997833252, + "learning_rate": 7.178947446833798e-06, + "loss": 0.768, + "step": 7312 + }, + { + "epoch": 0.3758351320793504, + "grad_norm": 1.0300507545471191, + "learning_rate": 7.1781983475263905e-06, + "loss": 0.7628, + "step": 7313 + }, + { + "epoch": 0.375886524822695, + "grad_norm": 0.938471257686615, + "learning_rate": 7.177449187870098e-06, + "loss": 0.6749, + "step": 7314 + }, + { + "epoch": 0.37593791756603967, + "grad_norm": 1.066704273223877, + "learning_rate": 7.176699967885672e-06, + "loss": 0.7305, + "step": 7315 + }, + { + "epoch": 0.3759893103093843, + "grad_norm": 0.8025457859039307, + "learning_rate": 7.175950687593873e-06, + "loss": 0.6846, + "step": 7316 + }, + { + "epoch": 0.37604070305272896, + "grad_norm": 1.107663631439209, + "learning_rate": 7.175201347015463e-06, + "loss": 0.7519, + "step": 7317 + }, + { + "epoch": 0.3760920957960736, + "grad_norm": 1.068961501121521, + "learning_rate": 7.174451946171198e-06, + "loss": 0.744, + "step": 7318 + }, + { + "epoch": 0.37614348853941826, + "grad_norm": 1.0240728855133057, + "learning_rate": 7.173702485081843e-06, + "loss": 0.7047, + "step": 7319 + }, + { + "epoch": 0.37619488128276285, + "grad_norm": 1.0331439971923828, + "learning_rate": 7.172952963768163e-06, + "loss": 0.7721, + "step": 7320 + }, + { + "epoch": 0.3762462740261075, + "grad_norm": 1.0934516191482544, + "learning_rate": 7.172203382250922e-06, + "loss": 0.7207, + "step": 7321 + }, + { + "epoch": 0.37629766676945214, + "grad_norm": 1.0872308015823364, + "learning_rate": 7.171453740550891e-06, + "loss": 0.7285, + "step": 7322 + }, + { + "epoch": 0.3763490595127968, + "grad_norm": 1.00347101688385, + "learning_rate": 7.170704038688838e-06, + "loss": 0.77, + "step": 7323 + }, + { + "epoch": 0.37640045225614144, + "grad_norm": 1.1341794729232788, + "learning_rate": 7.169954276685533e-06, + "loss": 0.727, + "step": 7324 + }, + { + "epoch": 0.3764518449994861, + "grad_norm": 0.8027995824813843, + "learning_rate": 7.169204454561752e-06, + "loss": 0.6679, + "step": 7325 + }, + { + "epoch": 0.37650323774283073, + "grad_norm": 1.0181519985198975, + "learning_rate": 7.168454572338265e-06, + "loss": 0.7293, + "step": 7326 + }, + { + "epoch": 0.3765546304861754, + "grad_norm": 1.0490118265151978, + "learning_rate": 7.16770463003585e-06, + "loss": 0.7489, + "step": 7327 + }, + { + "epoch": 0.37660602322951997, + "grad_norm": 1.0263705253601074, + "learning_rate": 7.166954627675288e-06, + "loss": 0.7423, + "step": 7328 + }, + { + "epoch": 0.3766574159728646, + "grad_norm": 0.9749269485473633, + "learning_rate": 7.166204565277353e-06, + "loss": 0.7123, + "step": 7329 + }, + { + "epoch": 0.37670880871620926, + "grad_norm": 0.7837164402008057, + "learning_rate": 7.165454442862832e-06, + "loss": 0.6576, + "step": 7330 + }, + { + "epoch": 0.3767602014595539, + "grad_norm": 0.7601831555366516, + "learning_rate": 7.164704260452502e-06, + "loss": 0.6958, + "step": 7331 + }, + { + "epoch": 0.37681159420289856, + "grad_norm": 1.1043617725372314, + "learning_rate": 7.1639540180671504e-06, + "loss": 0.7359, + "step": 7332 + }, + { + "epoch": 0.3768629869462432, + "grad_norm": 1.0691887140274048, + "learning_rate": 7.163203715727564e-06, + "loss": 0.8021, + "step": 7333 + }, + { + "epoch": 0.37691437968958785, + "grad_norm": 1.1172916889190674, + "learning_rate": 7.162453353454531e-06, + "loss": 0.8136, + "step": 7334 + }, + { + "epoch": 0.37696577243293244, + "grad_norm": 0.8592469096183777, + "learning_rate": 7.1617029312688365e-06, + "loss": 0.7172, + "step": 7335 + }, + { + "epoch": 0.3770171651762771, + "grad_norm": 1.0296452045440674, + "learning_rate": 7.1609524491912765e-06, + "loss": 0.8158, + "step": 7336 + }, + { + "epoch": 0.37706855791962174, + "grad_norm": 1.1944377422332764, + "learning_rate": 7.160201907242641e-06, + "loss": 0.7349, + "step": 7337 + }, + { + "epoch": 0.3771199506629664, + "grad_norm": 1.046364426612854, + "learning_rate": 7.1594513054437255e-06, + "loss": 0.736, + "step": 7338 + }, + { + "epoch": 0.37717134340631103, + "grad_norm": 1.1314833164215088, + "learning_rate": 7.1587006438153265e-06, + "loss": 0.7764, + "step": 7339 + }, + { + "epoch": 0.3772227361496557, + "grad_norm": 1.0466359853744507, + "learning_rate": 7.15794992237824e-06, + "loss": 0.7549, + "step": 7340 + }, + { + "epoch": 0.3772741288930003, + "grad_norm": 1.0583604574203491, + "learning_rate": 7.157199141153268e-06, + "loss": 0.7875, + "step": 7341 + }, + { + "epoch": 0.377325521636345, + "grad_norm": 0.770566463470459, + "learning_rate": 7.156448300161208e-06, + "loss": 0.6973, + "step": 7342 + }, + { + "epoch": 0.37737691437968957, + "grad_norm": 1.0318974256515503, + "learning_rate": 7.155697399422866e-06, + "loss": 0.7308, + "step": 7343 + }, + { + "epoch": 0.3774283071230342, + "grad_norm": 1.068851113319397, + "learning_rate": 7.154946438959046e-06, + "loss": 0.7863, + "step": 7344 + }, + { + "epoch": 0.37747969986637886, + "grad_norm": 1.0377678871154785, + "learning_rate": 7.154195418790551e-06, + "loss": 0.679, + "step": 7345 + }, + { + "epoch": 0.3775310926097235, + "grad_norm": 1.0351192951202393, + "learning_rate": 7.153444338938191e-06, + "loss": 0.7331, + "step": 7346 + }, + { + "epoch": 0.37758248535306815, + "grad_norm": 0.8633785247802734, + "learning_rate": 7.152693199422778e-06, + "loss": 0.6943, + "step": 7347 + }, + { + "epoch": 0.3776338780964128, + "grad_norm": 0.7407357096672058, + "learning_rate": 7.151942000265117e-06, + "loss": 0.6631, + "step": 7348 + }, + { + "epoch": 0.37768527083975745, + "grad_norm": 0.8120464086532593, + "learning_rate": 7.1511907414860245e-06, + "loss": 0.7125, + "step": 7349 + }, + { + "epoch": 0.37773666358310204, + "grad_norm": 1.0121363401412964, + "learning_rate": 7.150439423106314e-06, + "loss": 0.7513, + "step": 7350 + }, + { + "epoch": 0.3777880563264467, + "grad_norm": 0.8445212841033936, + "learning_rate": 7.149688045146802e-06, + "loss": 0.6854, + "step": 7351 + }, + { + "epoch": 0.37783944906979133, + "grad_norm": 1.1821459531784058, + "learning_rate": 7.148936607628306e-06, + "loss": 0.798, + "step": 7352 + }, + { + "epoch": 0.377890841813136, + "grad_norm": 1.0836069583892822, + "learning_rate": 7.148185110571644e-06, + "loss": 0.7694, + "step": 7353 + }, + { + "epoch": 0.3779422345564806, + "grad_norm": 1.0769526958465576, + "learning_rate": 7.147433553997638e-06, + "loss": 0.7606, + "step": 7354 + }, + { + "epoch": 0.3779936272998253, + "grad_norm": 1.072932243347168, + "learning_rate": 7.1466819379271095e-06, + "loss": 0.7795, + "step": 7355 + }, + { + "epoch": 0.3780450200431699, + "grad_norm": 1.130855679512024, + "learning_rate": 7.145930262380883e-06, + "loss": 0.8146, + "step": 7356 + }, + { + "epoch": 0.37809641278651457, + "grad_norm": 1.0468519926071167, + "learning_rate": 7.1451785273797845e-06, + "loss": 0.7836, + "step": 7357 + }, + { + "epoch": 0.37814780552985916, + "grad_norm": 1.1851191520690918, + "learning_rate": 7.144426732944642e-06, + "loss": 0.7745, + "step": 7358 + }, + { + "epoch": 0.3781991982732038, + "grad_norm": 1.0813720226287842, + "learning_rate": 7.143674879096285e-06, + "loss": 0.7202, + "step": 7359 + }, + { + "epoch": 0.37825059101654845, + "grad_norm": 1.0562858581542969, + "learning_rate": 7.142922965855543e-06, + "loss": 0.7442, + "step": 7360 + }, + { + "epoch": 0.3783019837598931, + "grad_norm": 1.1034413576126099, + "learning_rate": 7.142170993243249e-06, + "loss": 0.7873, + "step": 7361 + }, + { + "epoch": 0.37835337650323775, + "grad_norm": 1.109531283378601, + "learning_rate": 7.141418961280238e-06, + "loss": 0.7666, + "step": 7362 + }, + { + "epoch": 0.3784047692465824, + "grad_norm": 1.1119484901428223, + "learning_rate": 7.140666869987344e-06, + "loss": 0.7103, + "step": 7363 + }, + { + "epoch": 0.37845616198992704, + "grad_norm": 0.8298102021217346, + "learning_rate": 7.139914719385405e-06, + "loss": 0.7316, + "step": 7364 + }, + { + "epoch": 0.3785075547332717, + "grad_norm": 1.1190065145492554, + "learning_rate": 7.13916250949526e-06, + "loss": 0.7654, + "step": 7365 + }, + { + "epoch": 0.3785589474766163, + "grad_norm": 1.0564727783203125, + "learning_rate": 7.1384102403377485e-06, + "loss": 0.7184, + "step": 7366 + }, + { + "epoch": 0.37861034021996093, + "grad_norm": 1.0378198623657227, + "learning_rate": 7.137657911933714e-06, + "loss": 0.7255, + "step": 7367 + }, + { + "epoch": 0.3786617329633056, + "grad_norm": 0.8552929759025574, + "learning_rate": 7.136905524304002e-06, + "loss": 0.674, + "step": 7368 + }, + { + "epoch": 0.3787131257066502, + "grad_norm": 0.7065010666847229, + "learning_rate": 7.136153077469456e-06, + "loss": 0.6896, + "step": 7369 + }, + { + "epoch": 0.37876451844999487, + "grad_norm": 1.2000168561935425, + "learning_rate": 7.135400571450923e-06, + "loss": 0.7731, + "step": 7370 + }, + { + "epoch": 0.3788159111933395, + "grad_norm": 1.1487095355987549, + "learning_rate": 7.134648006269253e-06, + "loss": 0.7471, + "step": 7371 + }, + { + "epoch": 0.37886730393668416, + "grad_norm": 1.0376038551330566, + "learning_rate": 7.133895381945295e-06, + "loss": 0.7488, + "step": 7372 + }, + { + "epoch": 0.37891869668002875, + "grad_norm": 0.8188409805297852, + "learning_rate": 7.1331426984999045e-06, + "loss": 0.6456, + "step": 7373 + }, + { + "epoch": 0.3789700894233734, + "grad_norm": 1.1227713823318481, + "learning_rate": 7.132389955953929e-06, + "loss": 0.7836, + "step": 7374 + }, + { + "epoch": 0.37902148216671805, + "grad_norm": 0.7337707877159119, + "learning_rate": 7.131637154328231e-06, + "loss": 0.66, + "step": 7375 + }, + { + "epoch": 0.3790728749100627, + "grad_norm": 3.011390209197998, + "learning_rate": 7.130884293643663e-06, + "loss": 0.6954, + "step": 7376 + }, + { + "epoch": 0.37912426765340734, + "grad_norm": 1.0165685415267944, + "learning_rate": 7.130131373921084e-06, + "loss": 0.6994, + "step": 7377 + }, + { + "epoch": 0.379175660396752, + "grad_norm": 1.0319876670837402, + "learning_rate": 7.129378395181355e-06, + "loss": 0.7551, + "step": 7378 + }, + { + "epoch": 0.37922705314009664, + "grad_norm": 1.0393660068511963, + "learning_rate": 7.1286253574453405e-06, + "loss": 0.7657, + "step": 7379 + }, + { + "epoch": 0.3792784458834413, + "grad_norm": 1.1224186420440674, + "learning_rate": 7.127872260733899e-06, + "loss": 0.7529, + "step": 7380 + }, + { + "epoch": 0.3793298386267859, + "grad_norm": 0.6923304200172424, + "learning_rate": 7.1271191050679e-06, + "loss": 0.6582, + "step": 7381 + }, + { + "epoch": 0.3793812313701305, + "grad_norm": 1.0675216913223267, + "learning_rate": 7.126365890468207e-06, + "loss": 0.7523, + "step": 7382 + }, + { + "epoch": 0.37943262411347517, + "grad_norm": 1.0050772428512573, + "learning_rate": 7.125612616955693e-06, + "loss": 0.688, + "step": 7383 + }, + { + "epoch": 0.3794840168568198, + "grad_norm": 1.1310421228408813, + "learning_rate": 7.124859284551223e-06, + "loss": 0.7754, + "step": 7384 + }, + { + "epoch": 0.37953540960016446, + "grad_norm": 1.026774525642395, + "learning_rate": 7.124105893275671e-06, + "loss": 0.7354, + "step": 7385 + }, + { + "epoch": 0.3795868023435091, + "grad_norm": 1.0665091276168823, + "learning_rate": 7.123352443149913e-06, + "loss": 0.7643, + "step": 7386 + }, + { + "epoch": 0.37963819508685376, + "grad_norm": 0.7611773610115051, + "learning_rate": 7.1225989341948175e-06, + "loss": 0.706, + "step": 7387 + }, + { + "epoch": 0.37968958783019835, + "grad_norm": 1.1037728786468506, + "learning_rate": 7.121845366431267e-06, + "loss": 0.8075, + "step": 7388 + }, + { + "epoch": 0.379740980573543, + "grad_norm": 1.048171877861023, + "learning_rate": 7.121091739880137e-06, + "loss": 0.7157, + "step": 7389 + }, + { + "epoch": 0.37979237331688764, + "grad_norm": 0.7966815233230591, + "learning_rate": 7.120338054562309e-06, + "loss": 0.6845, + "step": 7390 + }, + { + "epoch": 0.3798437660602323, + "grad_norm": 1.0229524374008179, + "learning_rate": 7.119584310498664e-06, + "loss": 0.75, + "step": 7391 + }, + { + "epoch": 0.37989515880357694, + "grad_norm": 1.0557529926300049, + "learning_rate": 7.118830507710084e-06, + "loss": 0.7695, + "step": 7392 + }, + { + "epoch": 0.3799465515469216, + "grad_norm": 1.0853312015533447, + "learning_rate": 7.118076646217454e-06, + "loss": 0.7969, + "step": 7393 + }, + { + "epoch": 0.37999794429026623, + "grad_norm": 0.7926809787750244, + "learning_rate": 7.117322726041662e-06, + "loss": 0.6688, + "step": 7394 + }, + { + "epoch": 0.3800493370336109, + "grad_norm": 1.1121256351470947, + "learning_rate": 7.116568747203596e-06, + "loss": 0.7077, + "step": 7395 + }, + { + "epoch": 0.38010072977695547, + "grad_norm": 1.1015864610671997, + "learning_rate": 7.1158147097241425e-06, + "loss": 0.7351, + "step": 7396 + }, + { + "epoch": 0.3801521225203001, + "grad_norm": 1.099389910697937, + "learning_rate": 7.115060613624194e-06, + "loss": 0.7038, + "step": 7397 + }, + { + "epoch": 0.38020351526364476, + "grad_norm": 0.7381818890571594, + "learning_rate": 7.114306458924647e-06, + "loss": 0.6953, + "step": 7398 + }, + { + "epoch": 0.3802549080069894, + "grad_norm": 1.0690655708312988, + "learning_rate": 7.1135522456463914e-06, + "loss": 0.8036, + "step": 7399 + }, + { + "epoch": 0.38030630075033406, + "grad_norm": 1.0561307668685913, + "learning_rate": 7.112797973810326e-06, + "loss": 0.75, + "step": 7400 + }, + { + "epoch": 0.3803576934936787, + "grad_norm": 1.1480324268341064, + "learning_rate": 7.112043643437347e-06, + "loss": 0.7692, + "step": 7401 + }, + { + "epoch": 0.38040908623702335, + "grad_norm": 0.9994832277297974, + "learning_rate": 7.111289254548355e-06, + "loss": 0.712, + "step": 7402 + }, + { + "epoch": 0.38046047898036794, + "grad_norm": 1.1053189039230347, + "learning_rate": 7.110534807164251e-06, + "loss": 0.7286, + "step": 7403 + }, + { + "epoch": 0.3805118717237126, + "grad_norm": 0.7683562636375427, + "learning_rate": 7.109780301305935e-06, + "loss": 0.6631, + "step": 7404 + }, + { + "epoch": 0.38056326446705724, + "grad_norm": 1.1451411247253418, + "learning_rate": 7.109025736994316e-06, + "loss": 0.7953, + "step": 7405 + }, + { + "epoch": 0.3806146572104019, + "grad_norm": 1.0412806272506714, + "learning_rate": 7.108271114250296e-06, + "loss": 0.7247, + "step": 7406 + }, + { + "epoch": 0.38066604995374653, + "grad_norm": 0.706887423992157, + "learning_rate": 7.107516433094785e-06, + "loss": 0.6699, + "step": 7407 + }, + { + "epoch": 0.3807174426970912, + "grad_norm": 1.0059337615966797, + "learning_rate": 7.1067616935486895e-06, + "loss": 0.7278, + "step": 7408 + }, + { + "epoch": 0.3807688354404358, + "grad_norm": 1.1390876770019531, + "learning_rate": 7.106006895632921e-06, + "loss": 0.7509, + "step": 7409 + }, + { + "epoch": 0.3808202281837805, + "grad_norm": 1.060619592666626, + "learning_rate": 7.105252039368393e-06, + "loss": 0.7573, + "step": 7410 + }, + { + "epoch": 0.38087162092712507, + "grad_norm": 1.036307454109192, + "learning_rate": 7.10449712477602e-06, + "loss": 0.7141, + "step": 7411 + }, + { + "epoch": 0.3809230136704697, + "grad_norm": 1.157056450843811, + "learning_rate": 7.103742151876716e-06, + "loss": 0.7156, + "step": 7412 + }, + { + "epoch": 0.38097440641381436, + "grad_norm": 1.0527716875076294, + "learning_rate": 7.102987120691398e-06, + "loss": 0.7212, + "step": 7413 + }, + { + "epoch": 0.381025799157159, + "grad_norm": 1.0051586627960205, + "learning_rate": 7.102232031240985e-06, + "loss": 0.7077, + "step": 7414 + }, + { + "epoch": 0.38107719190050365, + "grad_norm": 0.8157718777656555, + "learning_rate": 7.101476883546399e-06, + "loss": 0.6481, + "step": 7415 + }, + { + "epoch": 0.3811285846438483, + "grad_norm": 1.1330392360687256, + "learning_rate": 7.100721677628561e-06, + "loss": 0.7749, + "step": 7416 + }, + { + "epoch": 0.38117997738719295, + "grad_norm": 1.1204426288604736, + "learning_rate": 7.099966413508393e-06, + "loss": 0.8013, + "step": 7417 + }, + { + "epoch": 0.3812313701305376, + "grad_norm": 1.0978925228118896, + "learning_rate": 7.099211091206822e-06, + "loss": 0.6828, + "step": 7418 + }, + { + "epoch": 0.3812827628738822, + "grad_norm": 1.1226383447647095, + "learning_rate": 7.098455710744776e-06, + "loss": 0.7879, + "step": 7419 + }, + { + "epoch": 0.38133415561722683, + "grad_norm": 0.7480544447898865, + "learning_rate": 7.09770027214318e-06, + "loss": 0.6927, + "step": 7420 + }, + { + "epoch": 0.3813855483605715, + "grad_norm": 1.2020505666732788, + "learning_rate": 7.0969447754229685e-06, + "loss": 0.7694, + "step": 7421 + }, + { + "epoch": 0.3814369411039161, + "grad_norm": 1.1181451082229614, + "learning_rate": 7.096189220605069e-06, + "loss": 0.7111, + "step": 7422 + }, + { + "epoch": 0.3814883338472608, + "grad_norm": 0.9593185782432556, + "learning_rate": 7.095433607710418e-06, + "loss": 0.662, + "step": 7423 + }, + { + "epoch": 0.3815397265906054, + "grad_norm": 1.0330407619476318, + "learning_rate": 7.094677936759948e-06, + "loss": 0.6899, + "step": 7424 + }, + { + "epoch": 0.38159111933395007, + "grad_norm": 1.0459941625595093, + "learning_rate": 7.093922207774597e-06, + "loss": 0.7674, + "step": 7425 + }, + { + "epoch": 0.38164251207729466, + "grad_norm": 0.9885093569755554, + "learning_rate": 7.0931664207753035e-06, + "loss": 0.6997, + "step": 7426 + }, + { + "epoch": 0.3816939048206393, + "grad_norm": 1.044066309928894, + "learning_rate": 7.0924105757830066e-06, + "loss": 0.639, + "step": 7427 + }, + { + "epoch": 0.38174529756398395, + "grad_norm": 0.8201054334640503, + "learning_rate": 7.091654672818646e-06, + "loss": 0.6981, + "step": 7428 + }, + { + "epoch": 0.3817966903073286, + "grad_norm": 0.992118239402771, + "learning_rate": 7.090898711903166e-06, + "loss": 0.6869, + "step": 7429 + }, + { + "epoch": 0.38184808305067325, + "grad_norm": 1.0499683618545532, + "learning_rate": 7.090142693057511e-06, + "loss": 0.7147, + "step": 7430 + }, + { + "epoch": 0.3818994757940179, + "grad_norm": 1.0943446159362793, + "learning_rate": 7.08938661630263e-06, + "loss": 0.7706, + "step": 7431 + }, + { + "epoch": 0.38195086853736254, + "grad_norm": 1.1973525285720825, + "learning_rate": 7.088630481659466e-06, + "loss": 0.8135, + "step": 7432 + }, + { + "epoch": 0.3820022612807072, + "grad_norm": 1.0697773694992065, + "learning_rate": 7.087874289148972e-06, + "loss": 0.72, + "step": 7433 + }, + { + "epoch": 0.3820536540240518, + "grad_norm": 1.1684834957122803, + "learning_rate": 7.087118038792097e-06, + "loss": 0.821, + "step": 7434 + }, + { + "epoch": 0.38210504676739643, + "grad_norm": 1.0874080657958984, + "learning_rate": 7.086361730609795e-06, + "loss": 0.7813, + "step": 7435 + }, + { + "epoch": 0.3821564395107411, + "grad_norm": 1.0783307552337646, + "learning_rate": 7.085605364623018e-06, + "loss": 0.7079, + "step": 7436 + }, + { + "epoch": 0.3822078322540857, + "grad_norm": 1.127070665359497, + "learning_rate": 7.0848489408527246e-06, + "loss": 0.7824, + "step": 7437 + }, + { + "epoch": 0.38225922499743037, + "grad_norm": 0.9674443602561951, + "learning_rate": 7.084092459319869e-06, + "loss": 0.6562, + "step": 7438 + }, + { + "epoch": 0.382310617740775, + "grad_norm": 1.038142204284668, + "learning_rate": 7.083335920045411e-06, + "loss": 0.7548, + "step": 7439 + }, + { + "epoch": 0.38236201048411966, + "grad_norm": 0.7793687582015991, + "learning_rate": 7.082579323050315e-06, + "loss": 0.6965, + "step": 7440 + }, + { + "epoch": 0.38241340322746425, + "grad_norm": 0.7511942982673645, + "learning_rate": 7.081822668355538e-06, + "loss": 0.6844, + "step": 7441 + }, + { + "epoch": 0.3824647959708089, + "grad_norm": 1.076296329498291, + "learning_rate": 7.0810659559820484e-06, + "loss": 0.801, + "step": 7442 + }, + { + "epoch": 0.38251618871415355, + "grad_norm": 1.1076669692993164, + "learning_rate": 7.080309185950808e-06, + "loss": 0.808, + "step": 7443 + }, + { + "epoch": 0.3825675814574982, + "grad_norm": 1.051639437675476, + "learning_rate": 7.079552358282784e-06, + "loss": 0.7372, + "step": 7444 + }, + { + "epoch": 0.38261897420084284, + "grad_norm": 1.0340163707733154, + "learning_rate": 7.078795472998947e-06, + "loss": 0.7475, + "step": 7445 + }, + { + "epoch": 0.3826703669441875, + "grad_norm": 1.1695044040679932, + "learning_rate": 7.078038530120266e-06, + "loss": 0.7881, + "step": 7446 + }, + { + "epoch": 0.38272175968753214, + "grad_norm": 1.4706473350524902, + "learning_rate": 7.077281529667713e-06, + "loss": 0.7352, + "step": 7447 + }, + { + "epoch": 0.3827731524308768, + "grad_norm": 0.9759430885314941, + "learning_rate": 7.076524471662261e-06, + "loss": 0.7177, + "step": 7448 + }, + { + "epoch": 0.3828245451742214, + "grad_norm": 1.0149638652801514, + "learning_rate": 7.0757673561248855e-06, + "loss": 0.7474, + "step": 7449 + }, + { + "epoch": 0.382875937917566, + "grad_norm": 1.0085992813110352, + "learning_rate": 7.075010183076563e-06, + "loss": 0.6806, + "step": 7450 + }, + { + "epoch": 0.38292733066091067, + "grad_norm": 1.0078824758529663, + "learning_rate": 7.0742529525382706e-06, + "loss": 0.7324, + "step": 7451 + }, + { + "epoch": 0.3829787234042553, + "grad_norm": 1.0417908430099487, + "learning_rate": 7.073495664530989e-06, + "loss": 0.7989, + "step": 7452 + }, + { + "epoch": 0.38303011614759996, + "grad_norm": 1.040493130683899, + "learning_rate": 7.0727383190757e-06, + "loss": 0.7376, + "step": 7453 + }, + { + "epoch": 0.3830815088909446, + "grad_norm": 1.0949937105178833, + "learning_rate": 7.071980916193384e-06, + "loss": 0.8, + "step": 7454 + }, + { + "epoch": 0.38313290163428926, + "grad_norm": 1.0469510555267334, + "learning_rate": 7.0712234559050295e-06, + "loss": 0.7888, + "step": 7455 + }, + { + "epoch": 0.3831842943776339, + "grad_norm": 1.0587806701660156, + "learning_rate": 7.070465938231621e-06, + "loss": 0.7709, + "step": 7456 + }, + { + "epoch": 0.3832356871209785, + "grad_norm": 0.854256808757782, + "learning_rate": 7.0697083631941436e-06, + "loss": 0.7104, + "step": 7457 + }, + { + "epoch": 0.38328707986432314, + "grad_norm": 1.1093567609786987, + "learning_rate": 7.06895073081359e-06, + "loss": 0.7997, + "step": 7458 + }, + { + "epoch": 0.3833384726076678, + "grad_norm": 1.0292497873306274, + "learning_rate": 7.068193041110948e-06, + "loss": 0.7054, + "step": 7459 + }, + { + "epoch": 0.38338986535101244, + "grad_norm": 1.2124760150909424, + "learning_rate": 7.067435294107212e-06, + "loss": 0.7894, + "step": 7460 + }, + { + "epoch": 0.3834412580943571, + "grad_norm": 1.109876036643982, + "learning_rate": 7.066677489823378e-06, + "loss": 0.7056, + "step": 7461 + }, + { + "epoch": 0.38349265083770173, + "grad_norm": 1.056516408920288, + "learning_rate": 7.065919628280437e-06, + "loss": 0.6755, + "step": 7462 + }, + { + "epoch": 0.3835440435810464, + "grad_norm": 1.118664026260376, + "learning_rate": 7.065161709499391e-06, + "loss": 0.7443, + "step": 7463 + }, + { + "epoch": 0.38359543632439097, + "grad_norm": 1.0236186981201172, + "learning_rate": 7.064403733501235e-06, + "loss": 0.7476, + "step": 7464 + }, + { + "epoch": 0.3836468290677356, + "grad_norm": 1.1197069883346558, + "learning_rate": 7.063645700306971e-06, + "loss": 0.7836, + "step": 7465 + }, + { + "epoch": 0.38369822181108026, + "grad_norm": 0.8992980122566223, + "learning_rate": 7.062887609937601e-06, + "loss": 0.6816, + "step": 7466 + }, + { + "epoch": 0.3837496145544249, + "grad_norm": 1.0554096698760986, + "learning_rate": 7.062129462414128e-06, + "loss": 0.7044, + "step": 7467 + }, + { + "epoch": 0.38380100729776956, + "grad_norm": 0.7279976606369019, + "learning_rate": 7.0613712577575576e-06, + "loss": 0.6884, + "step": 7468 + }, + { + "epoch": 0.3838524000411142, + "grad_norm": 0.7279179692268372, + "learning_rate": 7.060612995988896e-06, + "loss": 0.6688, + "step": 7469 + }, + { + "epoch": 0.38390379278445885, + "grad_norm": 1.1429977416992188, + "learning_rate": 7.059854677129153e-06, + "loss": 0.7366, + "step": 7470 + }, + { + "epoch": 0.3839551855278035, + "grad_norm": 1.150233507156372, + "learning_rate": 7.059096301199336e-06, + "loss": 0.8176, + "step": 7471 + }, + { + "epoch": 0.3840065782711481, + "grad_norm": 0.7654420137405396, + "learning_rate": 7.058337868220459e-06, + "loss": 0.685, + "step": 7472 + }, + { + "epoch": 0.38405797101449274, + "grad_norm": 1.0729304552078247, + "learning_rate": 7.057579378213534e-06, + "loss": 0.7771, + "step": 7473 + }, + { + "epoch": 0.3841093637578374, + "grad_norm": 0.7671759128570557, + "learning_rate": 7.056820831199576e-06, + "loss": 0.6778, + "step": 7474 + }, + { + "epoch": 0.38416075650118203, + "grad_norm": 1.0311747789382935, + "learning_rate": 7.0560622271995995e-06, + "loss": 0.7304, + "step": 7475 + }, + { + "epoch": 0.3842121492445267, + "grad_norm": 1.1003468036651611, + "learning_rate": 7.055303566234624e-06, + "loss": 0.7643, + "step": 7476 + }, + { + "epoch": 0.3842635419878713, + "grad_norm": 0.9820614457130432, + "learning_rate": 7.05454484832567e-06, + "loss": 0.6997, + "step": 7477 + }, + { + "epoch": 0.384314934731216, + "grad_norm": 0.9953899383544922, + "learning_rate": 7.053786073493753e-06, + "loss": 0.6681, + "step": 7478 + }, + { + "epoch": 0.38436632747456057, + "grad_norm": 1.0100626945495605, + "learning_rate": 7.053027241759903e-06, + "loss": 0.7153, + "step": 7479 + }, + { + "epoch": 0.3844177202179052, + "grad_norm": 1.0939350128173828, + "learning_rate": 7.05226835314514e-06, + "loss": 0.7764, + "step": 7480 + }, + { + "epoch": 0.38446911296124986, + "grad_norm": 0.8679288625717163, + "learning_rate": 7.0515094076704885e-06, + "loss": 0.6613, + "step": 7481 + }, + { + "epoch": 0.3845205057045945, + "grad_norm": 1.0305333137512207, + "learning_rate": 7.0507504053569785e-06, + "loss": 0.7695, + "step": 7482 + }, + { + "epoch": 0.38457189844793915, + "grad_norm": 1.0743308067321777, + "learning_rate": 7.049991346225636e-06, + "loss": 0.7629, + "step": 7483 + }, + { + "epoch": 0.3846232911912838, + "grad_norm": 1.085890293121338, + "learning_rate": 7.049232230297496e-06, + "loss": 0.7914, + "step": 7484 + }, + { + "epoch": 0.38467468393462845, + "grad_norm": 1.103814721107483, + "learning_rate": 7.048473057593586e-06, + "loss": 0.771, + "step": 7485 + }, + { + "epoch": 0.3847260766779731, + "grad_norm": 1.1282713413238525, + "learning_rate": 7.047713828134941e-06, + "loss": 0.8059, + "step": 7486 + }, + { + "epoch": 0.3847774694213177, + "grad_norm": 1.077272891998291, + "learning_rate": 7.046954541942597e-06, + "loss": 0.7709, + "step": 7487 + }, + { + "epoch": 0.38482886216466233, + "grad_norm": 1.0906522274017334, + "learning_rate": 7.04619519903759e-06, + "loss": 0.7568, + "step": 7488 + }, + { + "epoch": 0.384880254908007, + "grad_norm": 1.1227941513061523, + "learning_rate": 7.045435799440957e-06, + "loss": 0.8007, + "step": 7489 + }, + { + "epoch": 0.3849316476513516, + "grad_norm": 1.031508207321167, + "learning_rate": 7.044676343173738e-06, + "loss": 0.7166, + "step": 7490 + }, + { + "epoch": 0.3849830403946963, + "grad_norm": 1.109743356704712, + "learning_rate": 7.043916830256977e-06, + "loss": 0.8055, + "step": 7491 + }, + { + "epoch": 0.3850344331380409, + "grad_norm": 1.0598150491714478, + "learning_rate": 7.043157260711716e-06, + "loss": 0.7053, + "step": 7492 + }, + { + "epoch": 0.38508582588138557, + "grad_norm": 0.9949901103973389, + "learning_rate": 7.042397634559e-06, + "loss": 0.7339, + "step": 7493 + }, + { + "epoch": 0.3851372186247302, + "grad_norm": 0.7405714988708496, + "learning_rate": 7.041637951819871e-06, + "loss": 0.6678, + "step": 7494 + }, + { + "epoch": 0.3851886113680748, + "grad_norm": 1.0699007511138916, + "learning_rate": 7.040878212515382e-06, + "loss": 0.7673, + "step": 7495 + }, + { + "epoch": 0.38524000411141945, + "grad_norm": 0.8301787972450256, + "learning_rate": 7.040118416666579e-06, + "loss": 0.6526, + "step": 7496 + }, + { + "epoch": 0.3852913968547641, + "grad_norm": 0.9950885772705078, + "learning_rate": 7.039358564294514e-06, + "loss": 0.7471, + "step": 7497 + }, + { + "epoch": 0.38534278959810875, + "grad_norm": 0.9953995943069458, + "learning_rate": 7.038598655420239e-06, + "loss": 0.7515, + "step": 7498 + }, + { + "epoch": 0.3853941823414534, + "grad_norm": 1.0471075773239136, + "learning_rate": 7.037838690064807e-06, + "loss": 0.7626, + "step": 7499 + }, + { + "epoch": 0.38544557508479804, + "grad_norm": 1.0842597484588623, + "learning_rate": 7.037078668249275e-06, + "loss": 0.7422, + "step": 7500 + }, + { + "epoch": 0.3854969678281427, + "grad_norm": 1.0505601167678833, + "learning_rate": 7.036318589994701e-06, + "loss": 0.7454, + "step": 7501 + }, + { + "epoch": 0.3855483605714873, + "grad_norm": 1.1080377101898193, + "learning_rate": 7.03555845532214e-06, + "loss": 0.8134, + "step": 7502 + }, + { + "epoch": 0.38559975331483193, + "grad_norm": 1.049881100654602, + "learning_rate": 7.034798264252657e-06, + "loss": 0.7332, + "step": 7503 + }, + { + "epoch": 0.3856511460581766, + "grad_norm": 1.0777125358581543, + "learning_rate": 7.034038016807309e-06, + "loss": 0.6827, + "step": 7504 + }, + { + "epoch": 0.3857025388015212, + "grad_norm": 1.0869483947753906, + "learning_rate": 7.0332777130071625e-06, + "loss": 0.8075, + "step": 7505 + }, + { + "epoch": 0.38575393154486587, + "grad_norm": 1.065034031867981, + "learning_rate": 7.032517352873283e-06, + "loss": 0.7443, + "step": 7506 + }, + { + "epoch": 0.3858053242882105, + "grad_norm": 1.0039747953414917, + "learning_rate": 7.031756936426733e-06, + "loss": 0.7571, + "step": 7507 + }, + { + "epoch": 0.38585671703155516, + "grad_norm": 1.1473783254623413, + "learning_rate": 7.030996463688584e-06, + "loss": 0.7946, + "step": 7508 + }, + { + "epoch": 0.3859081097748998, + "grad_norm": 0.8165815472602844, + "learning_rate": 7.030235934679905e-06, + "loss": 0.6869, + "step": 7509 + }, + { + "epoch": 0.3859595025182444, + "grad_norm": 0.8059452176094055, + "learning_rate": 7.029475349421765e-06, + "loss": 0.6842, + "step": 7510 + }, + { + "epoch": 0.38601089526158905, + "grad_norm": 1.0866498947143555, + "learning_rate": 7.028714707935238e-06, + "loss": 0.7538, + "step": 7511 + }, + { + "epoch": 0.3860622880049337, + "grad_norm": 1.1299792528152466, + "learning_rate": 7.0279540102414e-06, + "loss": 0.7527, + "step": 7512 + }, + { + "epoch": 0.38611368074827834, + "grad_norm": 1.0811710357666016, + "learning_rate": 7.027193256361324e-06, + "loss": 0.6644, + "step": 7513 + }, + { + "epoch": 0.386165073491623, + "grad_norm": 1.2123768329620361, + "learning_rate": 7.026432446316089e-06, + "loss": 0.7903, + "step": 7514 + }, + { + "epoch": 0.38621646623496764, + "grad_norm": 1.1964912414550781, + "learning_rate": 7.025671580126774e-06, + "loss": 0.7088, + "step": 7515 + }, + { + "epoch": 0.3862678589783123, + "grad_norm": 0.8216059803962708, + "learning_rate": 7.024910657814459e-06, + "loss": 0.686, + "step": 7516 + }, + { + "epoch": 0.3863192517216569, + "grad_norm": 1.0295400619506836, + "learning_rate": 7.024149679400226e-06, + "loss": 0.7249, + "step": 7517 + }, + { + "epoch": 0.3863706444650015, + "grad_norm": 1.0168280601501465, + "learning_rate": 7.023388644905158e-06, + "loss": 0.764, + "step": 7518 + }, + { + "epoch": 0.38642203720834617, + "grad_norm": 1.2338663339614868, + "learning_rate": 7.0226275543503406e-06, + "loss": 0.7941, + "step": 7519 + }, + { + "epoch": 0.3864734299516908, + "grad_norm": 1.1368939876556396, + "learning_rate": 7.021866407756861e-06, + "loss": 0.7777, + "step": 7520 + }, + { + "epoch": 0.38652482269503546, + "grad_norm": 1.0670526027679443, + "learning_rate": 7.021105205145806e-06, + "loss": 0.7379, + "step": 7521 + }, + { + "epoch": 0.3865762154383801, + "grad_norm": 0.813489556312561, + "learning_rate": 7.020343946538269e-06, + "loss": 0.7026, + "step": 7522 + }, + { + "epoch": 0.38662760818172476, + "grad_norm": 1.0601547956466675, + "learning_rate": 7.019582631955335e-06, + "loss": 0.6988, + "step": 7523 + }, + { + "epoch": 0.3866790009250694, + "grad_norm": 1.1179819107055664, + "learning_rate": 7.018821261418104e-06, + "loss": 0.6947, + "step": 7524 + }, + { + "epoch": 0.386730393668414, + "grad_norm": 1.155249834060669, + "learning_rate": 7.018059834947666e-06, + "loss": 0.7566, + "step": 7525 + }, + { + "epoch": 0.38678178641175864, + "grad_norm": 1.030011534690857, + "learning_rate": 7.017298352565116e-06, + "loss": 0.7497, + "step": 7526 + }, + { + "epoch": 0.3868331791551033, + "grad_norm": 1.040596604347229, + "learning_rate": 7.016536814291557e-06, + "loss": 0.7651, + "step": 7527 + }, + { + "epoch": 0.38688457189844794, + "grad_norm": 1.2649948596954346, + "learning_rate": 7.015775220148082e-06, + "loss": 0.792, + "step": 7528 + }, + { + "epoch": 0.3869359646417926, + "grad_norm": 0.7738057374954224, + "learning_rate": 7.015013570155795e-06, + "loss": 0.6876, + "step": 7529 + }, + { + "epoch": 0.38698735738513723, + "grad_norm": 1.05807626247406, + "learning_rate": 7.014251864335798e-06, + "loss": 0.7962, + "step": 7530 + }, + { + "epoch": 0.3870387501284819, + "grad_norm": 1.0294122695922852, + "learning_rate": 7.0134901027091925e-06, + "loss": 0.7393, + "step": 7531 + }, + { + "epoch": 0.3870901428718265, + "grad_norm": 1.131093144416809, + "learning_rate": 7.012728285297086e-06, + "loss": 0.8163, + "step": 7532 + }, + { + "epoch": 0.3871415356151711, + "grad_norm": 1.7459410429000854, + "learning_rate": 7.011966412120586e-06, + "loss": 0.7297, + "step": 7533 + }, + { + "epoch": 0.38719292835851576, + "grad_norm": 1.1630157232284546, + "learning_rate": 7.011204483200799e-06, + "loss": 0.7281, + "step": 7534 + }, + { + "epoch": 0.3872443211018604, + "grad_norm": 1.1783239841461182, + "learning_rate": 7.010442498558836e-06, + "loss": 0.8154, + "step": 7535 + }, + { + "epoch": 0.38729571384520506, + "grad_norm": 0.7386965751647949, + "learning_rate": 7.009680458215807e-06, + "loss": 0.6724, + "step": 7536 + }, + { + "epoch": 0.3873471065885497, + "grad_norm": 1.050032138824463, + "learning_rate": 7.008918362192826e-06, + "loss": 0.7518, + "step": 7537 + }, + { + "epoch": 0.38739849933189435, + "grad_norm": 1.0325521230697632, + "learning_rate": 7.008156210511008e-06, + "loss": 0.7705, + "step": 7538 + }, + { + "epoch": 0.387449892075239, + "grad_norm": 1.1409049034118652, + "learning_rate": 7.007394003191468e-06, + "loss": 0.7998, + "step": 7539 + }, + { + "epoch": 0.3875012848185836, + "grad_norm": 1.1737031936645508, + "learning_rate": 7.006631740255325e-06, + "loss": 0.7652, + "step": 7540 + }, + { + "epoch": 0.38755267756192824, + "grad_norm": 0.8028273582458496, + "learning_rate": 7.0058694217236965e-06, + "loss": 0.6816, + "step": 7541 + }, + { + "epoch": 0.3876040703052729, + "grad_norm": 1.0774755477905273, + "learning_rate": 7.0051070476177055e-06, + "loss": 0.7136, + "step": 7542 + }, + { + "epoch": 0.38765546304861753, + "grad_norm": 1.0718046426773071, + "learning_rate": 7.0043446179584715e-06, + "loss": 0.7396, + "step": 7543 + }, + { + "epoch": 0.3877068557919622, + "grad_norm": 1.0895307064056396, + "learning_rate": 7.003582132767121e-06, + "loss": 0.717, + "step": 7544 + }, + { + "epoch": 0.3877582485353068, + "grad_norm": 1.0787065029144287, + "learning_rate": 7.002819592064776e-06, + "loss": 0.7757, + "step": 7545 + }, + { + "epoch": 0.3878096412786515, + "grad_norm": 1.1404898166656494, + "learning_rate": 7.002056995872566e-06, + "loss": 0.7504, + "step": 7546 + }, + { + "epoch": 0.3878610340219961, + "grad_norm": 1.0683035850524902, + "learning_rate": 7.0012943442116185e-06, + "loss": 0.7052, + "step": 7547 + }, + { + "epoch": 0.3879124267653407, + "grad_norm": 1.1014246940612793, + "learning_rate": 7.000531637103064e-06, + "loss": 0.7426, + "step": 7548 + }, + { + "epoch": 0.38796381950868536, + "grad_norm": 1.0781656503677368, + "learning_rate": 6.999768874568034e-06, + "loss": 0.7176, + "step": 7549 + }, + { + "epoch": 0.38801521225203, + "grad_norm": 1.1062637567520142, + "learning_rate": 6.999006056627659e-06, + "loss": 0.7572, + "step": 7550 + }, + { + "epoch": 0.38806660499537465, + "grad_norm": 1.097590684890747, + "learning_rate": 6.998243183303078e-06, + "loss": 0.7598, + "step": 7551 + }, + { + "epoch": 0.3881179977387193, + "grad_norm": 1.1087310314178467, + "learning_rate": 6.997480254615423e-06, + "loss": 0.6769, + "step": 7552 + }, + { + "epoch": 0.38816939048206395, + "grad_norm": 1.0431005954742432, + "learning_rate": 6.996717270585833e-06, + "loss": 0.719, + "step": 7553 + }, + { + "epoch": 0.3882207832254086, + "grad_norm": 0.7828953862190247, + "learning_rate": 6.99595423123545e-06, + "loss": 0.7011, + "step": 7554 + }, + { + "epoch": 0.3882721759687532, + "grad_norm": 0.9954776763916016, + "learning_rate": 6.9951911365854094e-06, + "loss": 0.7286, + "step": 7555 + }, + { + "epoch": 0.38832356871209783, + "grad_norm": 1.1003565788269043, + "learning_rate": 6.994427986656858e-06, + "loss": 0.781, + "step": 7556 + }, + { + "epoch": 0.3883749614554425, + "grad_norm": 1.1294127702713013, + "learning_rate": 6.9936647814709364e-06, + "loss": 0.7875, + "step": 7557 + }, + { + "epoch": 0.3884263541987871, + "grad_norm": 0.761332094669342, + "learning_rate": 6.992901521048792e-06, + "loss": 0.6734, + "step": 7558 + }, + { + "epoch": 0.3884777469421318, + "grad_norm": 1.077763557434082, + "learning_rate": 6.992138205411569e-06, + "loss": 0.7434, + "step": 7559 + }, + { + "epoch": 0.3885291396854764, + "grad_norm": 1.0792584419250488, + "learning_rate": 6.99137483458042e-06, + "loss": 0.781, + "step": 7560 + }, + { + "epoch": 0.38858053242882107, + "grad_norm": 0.6737803220748901, + "learning_rate": 6.99061140857649e-06, + "loss": 0.708, + "step": 7561 + }, + { + "epoch": 0.3886319251721657, + "grad_norm": 1.057395577430725, + "learning_rate": 6.989847927420932e-06, + "loss": 0.7369, + "step": 7562 + }, + { + "epoch": 0.3886833179155103, + "grad_norm": 1.1328836679458618, + "learning_rate": 6.9890843911349005e-06, + "loss": 0.755, + "step": 7563 + }, + { + "epoch": 0.38873471065885495, + "grad_norm": 0.6859034299850464, + "learning_rate": 6.98832079973955e-06, + "loss": 0.6742, + "step": 7564 + }, + { + "epoch": 0.3887861034021996, + "grad_norm": 1.0645487308502197, + "learning_rate": 6.987557153256035e-06, + "loss": 0.7565, + "step": 7565 + }, + { + "epoch": 0.38883749614554425, + "grad_norm": 1.1425814628601074, + "learning_rate": 6.986793451705512e-06, + "loss": 0.7193, + "step": 7566 + }, + { + "epoch": 0.3888888888888889, + "grad_norm": 1.0597894191741943, + "learning_rate": 6.986029695109145e-06, + "loss": 0.693, + "step": 7567 + }, + { + "epoch": 0.38894028163223354, + "grad_norm": 1.1472972631454468, + "learning_rate": 6.985265883488088e-06, + "loss": 0.7453, + "step": 7568 + }, + { + "epoch": 0.3889916743755782, + "grad_norm": 1.0479751825332642, + "learning_rate": 6.9845020168635055e-06, + "loss": 0.7793, + "step": 7569 + }, + { + "epoch": 0.3890430671189228, + "grad_norm": 1.183117151260376, + "learning_rate": 6.983738095256565e-06, + "loss": 0.7869, + "step": 7570 + }, + { + "epoch": 0.3890944598622674, + "grad_norm": 1.0616142749786377, + "learning_rate": 6.9829741186884246e-06, + "loss": 0.7289, + "step": 7571 + }, + { + "epoch": 0.3891458526056121, + "grad_norm": 1.0492910146713257, + "learning_rate": 6.982210087180257e-06, + "loss": 0.7374, + "step": 7572 + }, + { + "epoch": 0.3891972453489567, + "grad_norm": 1.0634167194366455, + "learning_rate": 6.981446000753227e-06, + "loss": 0.7418, + "step": 7573 + }, + { + "epoch": 0.38924863809230137, + "grad_norm": 1.0403907299041748, + "learning_rate": 6.980681859428504e-06, + "loss": 0.7377, + "step": 7574 + }, + { + "epoch": 0.389300030835646, + "grad_norm": 1.0965745449066162, + "learning_rate": 6.979917663227262e-06, + "loss": 0.789, + "step": 7575 + }, + { + "epoch": 0.38935142357899066, + "grad_norm": 1.0559697151184082, + "learning_rate": 6.979153412170673e-06, + "loss": 0.7025, + "step": 7576 + }, + { + "epoch": 0.3894028163223353, + "grad_norm": 1.099478006362915, + "learning_rate": 6.9783891062799106e-06, + "loss": 0.7328, + "step": 7577 + }, + { + "epoch": 0.3894542090656799, + "grad_norm": 1.1058732271194458, + "learning_rate": 6.9776247455761495e-06, + "loss": 0.7617, + "step": 7578 + }, + { + "epoch": 0.38950560180902455, + "grad_norm": 1.0469423532485962, + "learning_rate": 6.976860330080567e-06, + "loss": 0.7848, + "step": 7579 + }, + { + "epoch": 0.3895569945523692, + "grad_norm": 1.1475212574005127, + "learning_rate": 6.976095859814344e-06, + "loss": 0.7434, + "step": 7580 + }, + { + "epoch": 0.38960838729571384, + "grad_norm": 1.081917643547058, + "learning_rate": 6.975331334798659e-06, + "loss": 0.7943, + "step": 7581 + }, + { + "epoch": 0.3896597800390585, + "grad_norm": 0.9631706476211548, + "learning_rate": 6.9745667550546945e-06, + "loss": 0.7106, + "step": 7582 + }, + { + "epoch": 0.38971117278240314, + "grad_norm": 1.1239150762557983, + "learning_rate": 6.973802120603633e-06, + "loss": 0.7662, + "step": 7583 + }, + { + "epoch": 0.3897625655257478, + "grad_norm": 1.059920072555542, + "learning_rate": 6.973037431466661e-06, + "loss": 0.7551, + "step": 7584 + }, + { + "epoch": 0.38981395826909243, + "grad_norm": 1.0924482345581055, + "learning_rate": 6.972272687664964e-06, + "loss": 0.7737, + "step": 7585 + }, + { + "epoch": 0.389865351012437, + "grad_norm": 1.040684461593628, + "learning_rate": 6.971507889219731e-06, + "loss": 0.7763, + "step": 7586 + }, + { + "epoch": 0.38991674375578167, + "grad_norm": 1.0347356796264648, + "learning_rate": 6.970743036152148e-06, + "loss": 0.687, + "step": 7587 + }, + { + "epoch": 0.3899681364991263, + "grad_norm": 1.0466582775115967, + "learning_rate": 6.96997812848341e-06, + "loss": 0.7234, + "step": 7588 + }, + { + "epoch": 0.39001952924247096, + "grad_norm": 1.0861093997955322, + "learning_rate": 6.9692131662347076e-06, + "loss": 0.7849, + "step": 7589 + }, + { + "epoch": 0.3900709219858156, + "grad_norm": 0.6770915389060974, + "learning_rate": 6.968448149427233e-06, + "loss": 0.6508, + "step": 7590 + }, + { + "epoch": 0.39012231472916026, + "grad_norm": 1.0380191802978516, + "learning_rate": 6.967683078082186e-06, + "loss": 0.6917, + "step": 7591 + }, + { + "epoch": 0.3901737074725049, + "grad_norm": 1.0087499618530273, + "learning_rate": 6.96691795222076e-06, + "loss": 0.7507, + "step": 7592 + }, + { + "epoch": 0.3902251002158495, + "grad_norm": 1.125178575515747, + "learning_rate": 6.966152771864153e-06, + "loss": 0.7583, + "step": 7593 + }, + { + "epoch": 0.39027649295919414, + "grad_norm": 1.0458521842956543, + "learning_rate": 6.965387537033569e-06, + "loss": 0.7864, + "step": 7594 + }, + { + "epoch": 0.3903278857025388, + "grad_norm": 1.1327494382858276, + "learning_rate": 6.964622247750205e-06, + "loss": 0.7676, + "step": 7595 + }, + { + "epoch": 0.39037927844588344, + "grad_norm": 0.8194754123687744, + "learning_rate": 6.963856904035268e-06, + "loss": 0.6339, + "step": 7596 + }, + { + "epoch": 0.3904306711892281, + "grad_norm": 1.1856040954589844, + "learning_rate": 6.96309150590996e-06, + "loss": 0.8211, + "step": 7597 + }, + { + "epoch": 0.39048206393257273, + "grad_norm": 1.0627042055130005, + "learning_rate": 6.962326053395486e-06, + "loss": 0.715, + "step": 7598 + }, + { + "epoch": 0.3905334566759174, + "grad_norm": 1.0718475580215454, + "learning_rate": 6.961560546513057e-06, + "loss": 0.7503, + "step": 7599 + }, + { + "epoch": 0.390584849419262, + "grad_norm": 0.7488054633140564, + "learning_rate": 6.960794985283878e-06, + "loss": 0.6311, + "step": 7600 + }, + { + "epoch": 0.3906362421626066, + "grad_norm": 1.1386374235153198, + "learning_rate": 6.9600293697291645e-06, + "loss": 0.7533, + "step": 7601 + }, + { + "epoch": 0.39068763490595126, + "grad_norm": 0.8333669900894165, + "learning_rate": 6.959263699870125e-06, + "loss": 0.7075, + "step": 7602 + }, + { + "epoch": 0.3907390276492959, + "grad_norm": 1.0365575551986694, + "learning_rate": 6.958497975727973e-06, + "loss": 0.715, + "step": 7603 + }, + { + "epoch": 0.39079042039264056, + "grad_norm": 0.7197472453117371, + "learning_rate": 6.957732197323923e-06, + "loss": 0.7103, + "step": 7604 + }, + { + "epoch": 0.3908418131359852, + "grad_norm": 1.1052440404891968, + "learning_rate": 6.956966364679195e-06, + "loss": 0.7589, + "step": 7605 + }, + { + "epoch": 0.39089320587932985, + "grad_norm": 0.7330883145332336, + "learning_rate": 6.956200477815004e-06, + "loss": 0.6794, + "step": 7606 + }, + { + "epoch": 0.3909445986226745, + "grad_norm": 1.1211355924606323, + "learning_rate": 6.955434536752571e-06, + "loss": 0.8298, + "step": 7607 + }, + { + "epoch": 0.3909959913660191, + "grad_norm": 1.0111035108566284, + "learning_rate": 6.954668541513115e-06, + "loss": 0.6908, + "step": 7608 + }, + { + "epoch": 0.39104738410936374, + "grad_norm": 1.1451387405395508, + "learning_rate": 6.9539024921178635e-06, + "loss": 0.7756, + "step": 7609 + }, + { + "epoch": 0.3910987768527084, + "grad_norm": 1.0160584449768066, + "learning_rate": 6.9531363885880355e-06, + "loss": 0.7218, + "step": 7610 + }, + { + "epoch": 0.39115016959605303, + "grad_norm": 1.0737526416778564, + "learning_rate": 6.952370230944857e-06, + "loss": 0.7762, + "step": 7611 + }, + { + "epoch": 0.3912015623393977, + "grad_norm": 1.0658982992172241, + "learning_rate": 6.951604019209558e-06, + "loss": 0.7784, + "step": 7612 + }, + { + "epoch": 0.3912529550827423, + "grad_norm": 1.0661336183547974, + "learning_rate": 6.950837753403364e-06, + "loss": 0.7816, + "step": 7613 + }, + { + "epoch": 0.391304347826087, + "grad_norm": 1.098449468612671, + "learning_rate": 6.9500714335475075e-06, + "loss": 0.7588, + "step": 7614 + }, + { + "epoch": 0.3913557405694316, + "grad_norm": 1.124150037765503, + "learning_rate": 6.949305059663219e-06, + "loss": 0.7906, + "step": 7615 + }, + { + "epoch": 0.3914071333127762, + "grad_norm": 1.110224962234497, + "learning_rate": 6.9485386317717306e-06, + "loss": 0.8329, + "step": 7616 + }, + { + "epoch": 0.39145852605612086, + "grad_norm": 0.7523649334907532, + "learning_rate": 6.947772149894278e-06, + "loss": 0.6988, + "step": 7617 + }, + { + "epoch": 0.3915099187994655, + "grad_norm": 1.074568271636963, + "learning_rate": 6.947005614052099e-06, + "loss": 0.7068, + "step": 7618 + }, + { + "epoch": 0.39156131154281015, + "grad_norm": 1.1233516931533813, + "learning_rate": 6.946239024266428e-06, + "loss": 0.7351, + "step": 7619 + }, + { + "epoch": 0.3916127042861548, + "grad_norm": 1.0954830646514893, + "learning_rate": 6.945472380558506e-06, + "loss": 0.7486, + "step": 7620 + }, + { + "epoch": 0.39166409702949945, + "grad_norm": 1.0953837633132935, + "learning_rate": 6.944705682949573e-06, + "loss": 0.7028, + "step": 7621 + }, + { + "epoch": 0.3917154897728441, + "grad_norm": 1.115058183670044, + "learning_rate": 6.943938931460869e-06, + "loss": 0.7717, + "step": 7622 + }, + { + "epoch": 0.39176688251618874, + "grad_norm": 1.0459257364273071, + "learning_rate": 6.943172126113639e-06, + "loss": 0.7146, + "step": 7623 + }, + { + "epoch": 0.39181827525953333, + "grad_norm": 1.108222484588623, + "learning_rate": 6.942405266929131e-06, + "loss": 0.7465, + "step": 7624 + }, + { + "epoch": 0.391869668002878, + "grad_norm": 1.0019032955169678, + "learning_rate": 6.941638353928587e-06, + "loss": 0.7182, + "step": 7625 + }, + { + "epoch": 0.3919210607462226, + "grad_norm": 1.0332640409469604, + "learning_rate": 6.940871387133259e-06, + "loss": 0.7265, + "step": 7626 + }, + { + "epoch": 0.3919724534895673, + "grad_norm": 1.0989500284194946, + "learning_rate": 6.940104366564392e-06, + "loss": 0.7557, + "step": 7627 + }, + { + "epoch": 0.3920238462329119, + "grad_norm": 1.0905793905258179, + "learning_rate": 6.93933729224324e-06, + "loss": 0.7729, + "step": 7628 + }, + { + "epoch": 0.39207523897625657, + "grad_norm": 1.03756844997406, + "learning_rate": 6.9385701641910565e-06, + "loss": 0.7939, + "step": 7629 + }, + { + "epoch": 0.3921266317196012, + "grad_norm": 1.0292121171951294, + "learning_rate": 6.9378029824290915e-06, + "loss": 0.7701, + "step": 7630 + }, + { + "epoch": 0.3921780244629458, + "grad_norm": 1.1175249814987183, + "learning_rate": 6.937035746978604e-06, + "loss": 0.7136, + "step": 7631 + }, + { + "epoch": 0.39222941720629045, + "grad_norm": 1.1299725770950317, + "learning_rate": 6.936268457860848e-06, + "loss": 0.7764, + "step": 7632 + }, + { + "epoch": 0.3922808099496351, + "grad_norm": 1.0661554336547852, + "learning_rate": 6.935501115097085e-06, + "loss": 0.7235, + "step": 7633 + }, + { + "epoch": 0.39233220269297975, + "grad_norm": 1.0558375120162964, + "learning_rate": 6.934733718708573e-06, + "loss": 0.7734, + "step": 7634 + }, + { + "epoch": 0.3923835954363244, + "grad_norm": 1.0801435708999634, + "learning_rate": 6.933966268716575e-06, + "loss": 0.8107, + "step": 7635 + }, + { + "epoch": 0.39243498817966904, + "grad_norm": 0.7441138029098511, + "learning_rate": 6.93319876514235e-06, + "loss": 0.6853, + "step": 7636 + }, + { + "epoch": 0.3924863809230137, + "grad_norm": 1.0828814506530762, + "learning_rate": 6.932431208007168e-06, + "loss": 0.7694, + "step": 7637 + }, + { + "epoch": 0.39253777366635834, + "grad_norm": 1.090512752532959, + "learning_rate": 6.93166359733229e-06, + "loss": 0.7703, + "step": 7638 + }, + { + "epoch": 0.3925891664097029, + "grad_norm": 1.0441005229949951, + "learning_rate": 6.930895933138986e-06, + "loss": 0.7815, + "step": 7639 + }, + { + "epoch": 0.3926405591530476, + "grad_norm": 1.1414127349853516, + "learning_rate": 6.930128215448524e-06, + "loss": 0.7484, + "step": 7640 + }, + { + "epoch": 0.3926919518963922, + "grad_norm": 0.8270021080970764, + "learning_rate": 6.929360444282174e-06, + "loss": 0.6734, + "step": 7641 + }, + { + "epoch": 0.39274334463973687, + "grad_norm": 1.1540815830230713, + "learning_rate": 6.92859261966121e-06, + "loss": 0.8229, + "step": 7642 + }, + { + "epoch": 0.3927947373830815, + "grad_norm": 1.0113199949264526, + "learning_rate": 6.9278247416069e-06, + "loss": 0.754, + "step": 7643 + }, + { + "epoch": 0.39284613012642616, + "grad_norm": 0.7301357388496399, + "learning_rate": 6.9270568101405236e-06, + "loss": 0.6849, + "step": 7644 + }, + { + "epoch": 0.3928975228697708, + "grad_norm": 1.0406712293624878, + "learning_rate": 6.926288825283356e-06, + "loss": 0.7165, + "step": 7645 + }, + { + "epoch": 0.3929489156131154, + "grad_norm": 0.794864296913147, + "learning_rate": 6.925520787056673e-06, + "loss": 0.6918, + "step": 7646 + }, + { + "epoch": 0.39300030835646005, + "grad_norm": 1.0652917623519897, + "learning_rate": 6.924752695481757e-06, + "loss": 0.819, + "step": 7647 + }, + { + "epoch": 0.3930517010998047, + "grad_norm": 0.7337948083877563, + "learning_rate": 6.923984550579885e-06, + "loss": 0.6858, + "step": 7648 + }, + { + "epoch": 0.39310309384314934, + "grad_norm": 1.056088924407959, + "learning_rate": 6.923216352372343e-06, + "loss": 0.77, + "step": 7649 + }, + { + "epoch": 0.393154486586494, + "grad_norm": 1.0483630895614624, + "learning_rate": 6.9224481008804115e-06, + "loss": 0.756, + "step": 7650 + }, + { + "epoch": 0.39320587932983864, + "grad_norm": 1.0716615915298462, + "learning_rate": 6.921679796125375e-06, + "loss": 0.7688, + "step": 7651 + }, + { + "epoch": 0.3932572720731833, + "grad_norm": 1.0684136152267456, + "learning_rate": 6.920911438128524e-06, + "loss": 0.7307, + "step": 7652 + }, + { + "epoch": 0.39330866481652793, + "grad_norm": 1.0338706970214844, + "learning_rate": 6.920143026911143e-06, + "loss": 0.734, + "step": 7653 + }, + { + "epoch": 0.3933600575598725, + "grad_norm": 1.034462809562683, + "learning_rate": 6.919374562494523e-06, + "loss": 0.7517, + "step": 7654 + }, + { + "epoch": 0.39341145030321717, + "grad_norm": 1.1041597127914429, + "learning_rate": 6.918606044899955e-06, + "loss": 0.698, + "step": 7655 + }, + { + "epoch": 0.3934628430465618, + "grad_norm": 1.0727167129516602, + "learning_rate": 6.91783747414873e-06, + "loss": 0.7886, + "step": 7656 + }, + { + "epoch": 0.39351423578990646, + "grad_norm": 0.7407634258270264, + "learning_rate": 6.9170688502621445e-06, + "loss": 0.6663, + "step": 7657 + }, + { + "epoch": 0.3935656285332511, + "grad_norm": 1.0173885822296143, + "learning_rate": 6.9163001732614925e-06, + "loss": 0.7207, + "step": 7658 + }, + { + "epoch": 0.39361702127659576, + "grad_norm": 1.2487030029296875, + "learning_rate": 6.915531443168069e-06, + "loss": 0.7171, + "step": 7659 + }, + { + "epoch": 0.3936684140199404, + "grad_norm": 1.0532139539718628, + "learning_rate": 6.9147626600031754e-06, + "loss": 0.807, + "step": 7660 + }, + { + "epoch": 0.39371980676328505, + "grad_norm": 1.1360340118408203, + "learning_rate": 6.91399382378811e-06, + "loss": 0.8117, + "step": 7661 + }, + { + "epoch": 0.39377119950662964, + "grad_norm": 1.0676496028900146, + "learning_rate": 6.913224934544174e-06, + "loss": 0.7427, + "step": 7662 + }, + { + "epoch": 0.3938225922499743, + "grad_norm": 1.0968384742736816, + "learning_rate": 6.912455992292672e-06, + "loss": 0.8227, + "step": 7663 + }, + { + "epoch": 0.39387398499331894, + "grad_norm": 1.1550003290176392, + "learning_rate": 6.911686997054905e-06, + "loss": 0.6706, + "step": 7664 + }, + { + "epoch": 0.3939253777366636, + "grad_norm": 1.024246096611023, + "learning_rate": 6.910917948852181e-06, + "loss": 0.7762, + "step": 7665 + }, + { + "epoch": 0.39397677048000823, + "grad_norm": 1.1120320558547974, + "learning_rate": 6.910148847705805e-06, + "loss": 0.7869, + "step": 7666 + }, + { + "epoch": 0.3940281632233529, + "grad_norm": 0.6909576058387756, + "learning_rate": 6.909379693637088e-06, + "loss": 0.6489, + "step": 7667 + }, + { + "epoch": 0.3940795559666975, + "grad_norm": 1.1059415340423584, + "learning_rate": 6.908610486667341e-06, + "loss": 0.7642, + "step": 7668 + }, + { + "epoch": 0.3941309487100421, + "grad_norm": 1.0094114542007446, + "learning_rate": 6.9078412268178726e-06, + "loss": 0.7487, + "step": 7669 + }, + { + "epoch": 0.39418234145338676, + "grad_norm": 1.045431137084961, + "learning_rate": 6.907071914109997e-06, + "loss": 0.7167, + "step": 7670 + }, + { + "epoch": 0.3942337341967314, + "grad_norm": 1.041668176651001, + "learning_rate": 6.906302548565029e-06, + "loss": 0.7292, + "step": 7671 + }, + { + "epoch": 0.39428512694007606, + "grad_norm": 1.0912792682647705, + "learning_rate": 6.905533130204284e-06, + "loss": 0.7102, + "step": 7672 + }, + { + "epoch": 0.3943365196834207, + "grad_norm": 1.0472631454467773, + "learning_rate": 6.9047636590490796e-06, + "loss": 0.7358, + "step": 7673 + }, + { + "epoch": 0.39438791242676535, + "grad_norm": 1.0955181121826172, + "learning_rate": 6.903994135120734e-06, + "loss": 0.7637, + "step": 7674 + }, + { + "epoch": 0.39443930517011, + "grad_norm": 1.124322533607483, + "learning_rate": 6.903224558440569e-06, + "loss": 0.7679, + "step": 7675 + }, + { + "epoch": 0.39449069791345465, + "grad_norm": 0.7169439792633057, + "learning_rate": 6.902454929029904e-06, + "loss": 0.6724, + "step": 7676 + }, + { + "epoch": 0.39454209065679924, + "grad_norm": 1.1768615245819092, + "learning_rate": 6.901685246910067e-06, + "loss": 0.7041, + "step": 7677 + }, + { + "epoch": 0.3945934834001439, + "grad_norm": 1.0974135398864746, + "learning_rate": 6.900915512102377e-06, + "loss": 0.7171, + "step": 7678 + }, + { + "epoch": 0.39464487614348853, + "grad_norm": 0.7104974985122681, + "learning_rate": 6.900145724628164e-06, + "loss": 0.6841, + "step": 7679 + }, + { + "epoch": 0.3946962688868332, + "grad_norm": 1.089743733406067, + "learning_rate": 6.899375884508755e-06, + "loss": 0.7396, + "step": 7680 + }, + { + "epoch": 0.3947476616301778, + "grad_norm": 1.0988154411315918, + "learning_rate": 6.898605991765478e-06, + "loss": 0.7127, + "step": 7681 + }, + { + "epoch": 0.3947990543735225, + "grad_norm": 1.0543286800384521, + "learning_rate": 6.897836046419663e-06, + "loss": 0.7126, + "step": 7682 + }, + { + "epoch": 0.3948504471168671, + "grad_norm": 1.016357660293579, + "learning_rate": 6.897066048492645e-06, + "loss": 0.7788, + "step": 7683 + }, + { + "epoch": 0.3949018398602117, + "grad_norm": 1.095605492591858, + "learning_rate": 6.8962959980057535e-06, + "loss": 0.7163, + "step": 7684 + }, + { + "epoch": 0.39495323260355636, + "grad_norm": 1.0979453325271606, + "learning_rate": 6.895525894980326e-06, + "loss": 0.7476, + "step": 7685 + }, + { + "epoch": 0.395004625346901, + "grad_norm": 0.7965176105499268, + "learning_rate": 6.894755739437698e-06, + "loss": 0.7273, + "step": 7686 + }, + { + "epoch": 0.39505601809024565, + "grad_norm": 1.0486069917678833, + "learning_rate": 6.893985531399209e-06, + "loss": 0.7813, + "step": 7687 + }, + { + "epoch": 0.3951074108335903, + "grad_norm": 1.0257296562194824, + "learning_rate": 6.8932152708861956e-06, + "loss": 0.7636, + "step": 7688 + }, + { + "epoch": 0.39515880357693495, + "grad_norm": 1.0557836294174194, + "learning_rate": 6.89244495792e-06, + "loss": 0.7587, + "step": 7689 + }, + { + "epoch": 0.3952101963202796, + "grad_norm": 1.0282378196716309, + "learning_rate": 6.891674592521966e-06, + "loss": 0.7963, + "step": 7690 + }, + { + "epoch": 0.39526158906362424, + "grad_norm": 1.1251766681671143, + "learning_rate": 6.8909041747134335e-06, + "loss": 0.8144, + "step": 7691 + }, + { + "epoch": 0.39531298180696883, + "grad_norm": 1.0695043802261353, + "learning_rate": 6.890133704515751e-06, + "loss": 0.8112, + "step": 7692 + }, + { + "epoch": 0.3953643745503135, + "grad_norm": 0.7831131815910339, + "learning_rate": 6.889363181950262e-06, + "loss": 0.6809, + "step": 7693 + }, + { + "epoch": 0.3954157672936581, + "grad_norm": 1.0838066339492798, + "learning_rate": 6.888592607038318e-06, + "loss": 0.764, + "step": 7694 + }, + { + "epoch": 0.3954671600370028, + "grad_norm": 1.046748399734497, + "learning_rate": 6.887821979801266e-06, + "loss": 0.7771, + "step": 7695 + }, + { + "epoch": 0.3955185527803474, + "grad_norm": 0.9859500527381897, + "learning_rate": 6.887051300260456e-06, + "loss": 0.7089, + "step": 7696 + }, + { + "epoch": 0.39556994552369207, + "grad_norm": 0.7578884363174438, + "learning_rate": 6.886280568437244e-06, + "loss": 0.6525, + "step": 7697 + }, + { + "epoch": 0.3956213382670367, + "grad_norm": 1.0375925302505493, + "learning_rate": 6.885509784352982e-06, + "loss": 0.7356, + "step": 7698 + }, + { + "epoch": 0.3956727310103813, + "grad_norm": 1.0571956634521484, + "learning_rate": 6.8847389480290225e-06, + "loss": 0.751, + "step": 7699 + }, + { + "epoch": 0.39572412375372595, + "grad_norm": 1.036126971244812, + "learning_rate": 6.883968059486726e-06, + "loss": 0.7356, + "step": 7700 + }, + { + "epoch": 0.3957755164970706, + "grad_norm": 0.7724809050559998, + "learning_rate": 6.8831971187474485e-06, + "loss": 0.6437, + "step": 7701 + }, + { + "epoch": 0.39582690924041525, + "grad_norm": 1.061787724494934, + "learning_rate": 6.882426125832552e-06, + "loss": 0.7447, + "step": 7702 + }, + { + "epoch": 0.3958783019837599, + "grad_norm": 0.9916962385177612, + "learning_rate": 6.8816550807633954e-06, + "loss": 0.7157, + "step": 7703 + }, + { + "epoch": 0.39592969472710454, + "grad_norm": 1.0794934034347534, + "learning_rate": 6.88088398356134e-06, + "loss": 0.7412, + "step": 7704 + }, + { + "epoch": 0.3959810874704492, + "grad_norm": 1.0943633317947388, + "learning_rate": 6.880112834247754e-06, + "loss": 0.7258, + "step": 7705 + }, + { + "epoch": 0.39603248021379384, + "grad_norm": 1.0183488130569458, + "learning_rate": 6.879341632843998e-06, + "loss": 0.7245, + "step": 7706 + }, + { + "epoch": 0.3960838729571384, + "grad_norm": 0.7264434695243835, + "learning_rate": 6.878570379371441e-06, + "loss": 0.68, + "step": 7707 + }, + { + "epoch": 0.3961352657004831, + "grad_norm": 1.1244704723358154, + "learning_rate": 6.877799073851452e-06, + "loss": 0.8166, + "step": 7708 + }, + { + "epoch": 0.3961866584438277, + "grad_norm": 0.737281084060669, + "learning_rate": 6.877027716305399e-06, + "loss": 0.7017, + "step": 7709 + }, + { + "epoch": 0.39623805118717237, + "grad_norm": 1.0160399675369263, + "learning_rate": 6.876256306754655e-06, + "loss": 0.7876, + "step": 7710 + }, + { + "epoch": 0.396289443930517, + "grad_norm": 1.013521432876587, + "learning_rate": 6.875484845220592e-06, + "loss": 0.7181, + "step": 7711 + }, + { + "epoch": 0.39634083667386166, + "grad_norm": 0.7500552535057068, + "learning_rate": 6.874713331724581e-06, + "loss": 0.6982, + "step": 7712 + }, + { + "epoch": 0.3963922294172063, + "grad_norm": 1.0660873651504517, + "learning_rate": 6.8739417662880035e-06, + "loss": 0.7494, + "step": 7713 + }, + { + "epoch": 0.39644362216055096, + "grad_norm": 0.8136256337165833, + "learning_rate": 6.873170148932231e-06, + "loss": 0.6971, + "step": 7714 + }, + { + "epoch": 0.39649501490389555, + "grad_norm": 0.8580878376960754, + "learning_rate": 6.872398479678645e-06, + "loss": 0.7295, + "step": 7715 + }, + { + "epoch": 0.3965464076472402, + "grad_norm": 1.1223771572113037, + "learning_rate": 6.871626758548622e-06, + "loss": 0.7252, + "step": 7716 + }, + { + "epoch": 0.39659780039058484, + "grad_norm": 1.0317038297653198, + "learning_rate": 6.8708549855635465e-06, + "loss": 0.7128, + "step": 7717 + }, + { + "epoch": 0.3966491931339295, + "grad_norm": 1.0364794731140137, + "learning_rate": 6.870083160744801e-06, + "loss": 0.7446, + "step": 7718 + }, + { + "epoch": 0.39670058587727414, + "grad_norm": 1.0909066200256348, + "learning_rate": 6.869311284113767e-06, + "loss": 0.7308, + "step": 7719 + }, + { + "epoch": 0.3967519786206188, + "grad_norm": 1.0916565656661987, + "learning_rate": 6.868539355691831e-06, + "loss": 0.7594, + "step": 7720 + }, + { + "epoch": 0.39680337136396343, + "grad_norm": 1.0309057235717773, + "learning_rate": 6.867767375500382e-06, + "loss": 0.8361, + "step": 7721 + }, + { + "epoch": 0.396854764107308, + "grad_norm": 0.7288246750831604, + "learning_rate": 6.866995343560807e-06, + "loss": 0.6119, + "step": 7722 + }, + { + "epoch": 0.39690615685065267, + "grad_norm": 0.7469972372055054, + "learning_rate": 6.866223259894493e-06, + "loss": 0.6512, + "step": 7723 + }, + { + "epoch": 0.3969575495939973, + "grad_norm": 1.290073037147522, + "learning_rate": 6.865451124522837e-06, + "loss": 0.7332, + "step": 7724 + }, + { + "epoch": 0.39700894233734196, + "grad_norm": 1.102309226989746, + "learning_rate": 6.8646789374672274e-06, + "loss": 0.7573, + "step": 7725 + }, + { + "epoch": 0.3970603350806866, + "grad_norm": 1.0716608762741089, + "learning_rate": 6.86390669874906e-06, + "loss": 0.7479, + "step": 7726 + }, + { + "epoch": 0.39711172782403126, + "grad_norm": 1.1664611101150513, + "learning_rate": 6.863134408389729e-06, + "loss": 0.7289, + "step": 7727 + }, + { + "epoch": 0.3971631205673759, + "grad_norm": 1.1512418985366821, + "learning_rate": 6.862362066410631e-06, + "loss": 0.7256, + "step": 7728 + }, + { + "epoch": 0.39721451331072055, + "grad_norm": 0.7525127530097961, + "learning_rate": 6.861589672833169e-06, + "loss": 0.7066, + "step": 7729 + }, + { + "epoch": 0.39726590605406514, + "grad_norm": 1.0336024761199951, + "learning_rate": 6.860817227678738e-06, + "loss": 0.7325, + "step": 7730 + }, + { + "epoch": 0.3973172987974098, + "grad_norm": 1.0607967376708984, + "learning_rate": 6.860044730968739e-06, + "loss": 0.7272, + "step": 7731 + }, + { + "epoch": 0.39736869154075444, + "grad_norm": 0.7145070433616638, + "learning_rate": 6.859272182724579e-06, + "loss": 0.6964, + "step": 7732 + }, + { + "epoch": 0.3974200842840991, + "grad_norm": 0.6816957592964172, + "learning_rate": 6.858499582967658e-06, + "loss": 0.6834, + "step": 7733 + }, + { + "epoch": 0.39747147702744373, + "grad_norm": 1.0652270317077637, + "learning_rate": 6.857726931719384e-06, + "loss": 0.6982, + "step": 7734 + }, + { + "epoch": 0.3975228697707884, + "grad_norm": 1.0576021671295166, + "learning_rate": 6.8569542290011634e-06, + "loss": 0.7456, + "step": 7735 + }, + { + "epoch": 0.397574262514133, + "grad_norm": 1.0707768201828003, + "learning_rate": 6.856181474834402e-06, + "loss": 0.7817, + "step": 7736 + }, + { + "epoch": 0.3976256552574776, + "grad_norm": 1.0332192182540894, + "learning_rate": 6.855408669240514e-06, + "loss": 0.7451, + "step": 7737 + }, + { + "epoch": 0.39767704800082226, + "grad_norm": 1.0896917581558228, + "learning_rate": 6.854635812240908e-06, + "loss": 0.7264, + "step": 7738 + }, + { + "epoch": 0.3977284407441669, + "grad_norm": 1.10227632522583, + "learning_rate": 6.853862903856998e-06, + "loss": 0.7768, + "step": 7739 + }, + { + "epoch": 0.39777983348751156, + "grad_norm": 1.1464961767196655, + "learning_rate": 6.853089944110197e-06, + "loss": 0.7836, + "step": 7740 + }, + { + "epoch": 0.3978312262308562, + "grad_norm": 1.1983598470687866, + "learning_rate": 6.852316933021921e-06, + "loss": 0.6845, + "step": 7741 + }, + { + "epoch": 0.39788261897420085, + "grad_norm": 1.0834624767303467, + "learning_rate": 6.851543870613586e-06, + "loss": 0.7332, + "step": 7742 + }, + { + "epoch": 0.3979340117175455, + "grad_norm": 0.7786639928817749, + "learning_rate": 6.850770756906613e-06, + "loss": 0.7053, + "step": 7743 + }, + { + "epoch": 0.39798540446089015, + "grad_norm": 0.7227832078933716, + "learning_rate": 6.849997591922418e-06, + "loss": 0.7294, + "step": 7744 + }, + { + "epoch": 0.39803679720423474, + "grad_norm": 1.0818644762039185, + "learning_rate": 6.849224375682426e-06, + "loss": 0.714, + "step": 7745 + }, + { + "epoch": 0.3980881899475794, + "grad_norm": 0.94719398021698, + "learning_rate": 6.8484511082080565e-06, + "loss": 0.6937, + "step": 7746 + }, + { + "epoch": 0.39813958269092403, + "grad_norm": 1.0978195667266846, + "learning_rate": 6.847677789520735e-06, + "loss": 0.7772, + "step": 7747 + }, + { + "epoch": 0.3981909754342687, + "grad_norm": 1.032971739768982, + "learning_rate": 6.846904419641886e-06, + "loss": 0.8105, + "step": 7748 + }, + { + "epoch": 0.3982423681776133, + "grad_norm": 1.0822910070419312, + "learning_rate": 6.8461309985929396e-06, + "loss": 0.7737, + "step": 7749 + }, + { + "epoch": 0.398293760920958, + "grad_norm": 1.0729904174804688, + "learning_rate": 6.845357526395321e-06, + "loss": 0.7449, + "step": 7750 + }, + { + "epoch": 0.3983451536643026, + "grad_norm": 1.0648449659347534, + "learning_rate": 6.844584003070461e-06, + "loss": 0.7979, + "step": 7751 + }, + { + "epoch": 0.39839654640764727, + "grad_norm": 0.8209916353225708, + "learning_rate": 6.843810428639789e-06, + "loss": 0.6533, + "step": 7752 + }, + { + "epoch": 0.39844793915099186, + "grad_norm": 1.030351996421814, + "learning_rate": 6.843036803124739e-06, + "loss": 0.7656, + "step": 7753 + }, + { + "epoch": 0.3984993318943365, + "grad_norm": 0.8125653862953186, + "learning_rate": 6.842263126546746e-06, + "loss": 0.6533, + "step": 7754 + }, + { + "epoch": 0.39855072463768115, + "grad_norm": 1.1078312397003174, + "learning_rate": 6.841489398927245e-06, + "loss": 0.8019, + "step": 7755 + }, + { + "epoch": 0.3986021173810258, + "grad_norm": 1.1109648942947388, + "learning_rate": 6.840715620287671e-06, + "loss": 0.6381, + "step": 7756 + }, + { + "epoch": 0.39865351012437045, + "grad_norm": 1.1202822923660278, + "learning_rate": 6.839941790649462e-06, + "loss": 0.8132, + "step": 7757 + }, + { + "epoch": 0.3987049028677151, + "grad_norm": 1.110429286956787, + "learning_rate": 6.839167910034061e-06, + "loss": 0.7751, + "step": 7758 + }, + { + "epoch": 0.39875629561105974, + "grad_norm": 0.9082592725753784, + "learning_rate": 6.838393978462907e-06, + "loss": 0.6657, + "step": 7759 + }, + { + "epoch": 0.39880768835440433, + "grad_norm": 1.1499602794647217, + "learning_rate": 6.8376199959574405e-06, + "loss": 0.7739, + "step": 7760 + }, + { + "epoch": 0.398859081097749, + "grad_norm": 1.0758951902389526, + "learning_rate": 6.836845962539109e-06, + "loss": 0.7114, + "step": 7761 + }, + { + "epoch": 0.3989104738410936, + "grad_norm": 1.0509676933288574, + "learning_rate": 6.8360718782293565e-06, + "loss": 0.7221, + "step": 7762 + }, + { + "epoch": 0.3989618665844383, + "grad_norm": 1.0686817169189453, + "learning_rate": 6.835297743049628e-06, + "loss": 0.7502, + "step": 7763 + }, + { + "epoch": 0.3990132593277829, + "grad_norm": 0.763863742351532, + "learning_rate": 6.834523557021374e-06, + "loss": 0.7015, + "step": 7764 + }, + { + "epoch": 0.39906465207112757, + "grad_norm": 1.0583761930465698, + "learning_rate": 6.833749320166042e-06, + "loss": 0.7603, + "step": 7765 + }, + { + "epoch": 0.3991160448144722, + "grad_norm": 1.079943060874939, + "learning_rate": 6.832975032505085e-06, + "loss": 0.7326, + "step": 7766 + }, + { + "epoch": 0.39916743755781686, + "grad_norm": 1.066389560699463, + "learning_rate": 6.832200694059953e-06, + "loss": 0.7635, + "step": 7767 + }, + { + "epoch": 0.39921883030116145, + "grad_norm": 1.0397090911865234, + "learning_rate": 6.8314263048521e-06, + "loss": 0.7527, + "step": 7768 + }, + { + "epoch": 0.3992702230445061, + "grad_norm": 0.812747061252594, + "learning_rate": 6.830651864902984e-06, + "loss": 0.6601, + "step": 7769 + }, + { + "epoch": 0.39932161578785075, + "grad_norm": 1.8816694021224976, + "learning_rate": 6.829877374234059e-06, + "loss": 0.6956, + "step": 7770 + }, + { + "epoch": 0.3993730085311954, + "grad_norm": 1.0488512516021729, + "learning_rate": 6.829102832866782e-06, + "loss": 0.7548, + "step": 7771 + }, + { + "epoch": 0.39942440127454004, + "grad_norm": 1.0703542232513428, + "learning_rate": 6.828328240822615e-06, + "loss": 0.7687, + "step": 7772 + }, + { + "epoch": 0.3994757940178847, + "grad_norm": 1.0814608335494995, + "learning_rate": 6.8275535981230155e-06, + "loss": 0.7274, + "step": 7773 + }, + { + "epoch": 0.39952718676122934, + "grad_norm": 1.0757063627243042, + "learning_rate": 6.826778904789451e-06, + "loss": 0.7349, + "step": 7774 + }, + { + "epoch": 0.3995785795045739, + "grad_norm": 0.7019311785697937, + "learning_rate": 6.82600416084338e-06, + "loss": 0.6561, + "step": 7775 + }, + { + "epoch": 0.3996299722479186, + "grad_norm": 1.0670479536056519, + "learning_rate": 6.825229366306269e-06, + "loss": 0.7322, + "step": 7776 + }, + { + "epoch": 0.3996813649912632, + "grad_norm": 1.0538848638534546, + "learning_rate": 6.824454521199585e-06, + "loss": 0.7808, + "step": 7777 + }, + { + "epoch": 0.39973275773460787, + "grad_norm": 0.6868626475334167, + "learning_rate": 6.823679625544794e-06, + "loss": 0.7096, + "step": 7778 + }, + { + "epoch": 0.3997841504779525, + "grad_norm": 0.9946128129959106, + "learning_rate": 6.822904679363367e-06, + "loss": 0.6895, + "step": 7779 + }, + { + "epoch": 0.39983554322129716, + "grad_norm": 1.0078725814819336, + "learning_rate": 6.822129682676774e-06, + "loss": 0.7259, + "step": 7780 + }, + { + "epoch": 0.3998869359646418, + "grad_norm": 1.1129213571548462, + "learning_rate": 6.821354635506489e-06, + "loss": 0.6952, + "step": 7781 + }, + { + "epoch": 0.39993832870798646, + "grad_norm": 0.9731841683387756, + "learning_rate": 6.8205795378739815e-06, + "loss": 0.6606, + "step": 7782 + }, + { + "epoch": 0.39998972145133105, + "grad_norm": 1.08409583568573, + "learning_rate": 6.819804389800728e-06, + "loss": 0.8091, + "step": 7783 + }, + { + "epoch": 0.4000411141946757, + "grad_norm": 0.7332271933555603, + "learning_rate": 6.819029191308205e-06, + "loss": 0.6755, + "step": 7784 + }, + { + "epoch": 0.40009250693802034, + "grad_norm": 1.128983974456787, + "learning_rate": 6.818253942417891e-06, + "loss": 0.7832, + "step": 7785 + }, + { + "epoch": 0.400143899681365, + "grad_norm": 1.0079869031906128, + "learning_rate": 6.817478643151263e-06, + "loss": 0.7207, + "step": 7786 + }, + { + "epoch": 0.40019529242470964, + "grad_norm": 1.014378309249878, + "learning_rate": 6.816703293529802e-06, + "loss": 0.7118, + "step": 7787 + }, + { + "epoch": 0.4002466851680543, + "grad_norm": 0.826568067073822, + "learning_rate": 6.8159278935749885e-06, + "loss": 0.7164, + "step": 7788 + }, + { + "epoch": 0.40029807791139893, + "grad_norm": 1.0655832290649414, + "learning_rate": 6.815152443308307e-06, + "loss": 0.7382, + "step": 7789 + }, + { + "epoch": 0.4003494706547436, + "grad_norm": 1.2548801898956299, + "learning_rate": 6.8143769427512445e-06, + "loss": 0.6768, + "step": 7790 + }, + { + "epoch": 0.40040086339808817, + "grad_norm": 0.6576259732246399, + "learning_rate": 6.813601391925283e-06, + "loss": 0.6719, + "step": 7791 + }, + { + "epoch": 0.4004522561414328, + "grad_norm": 1.334196925163269, + "learning_rate": 6.81282579085191e-06, + "loss": 0.7797, + "step": 7792 + }, + { + "epoch": 0.40050364888477746, + "grad_norm": 0.7099539041519165, + "learning_rate": 6.812050139552617e-06, + "loss": 0.653, + "step": 7793 + }, + { + "epoch": 0.4005550416281221, + "grad_norm": 1.0754698514938354, + "learning_rate": 6.8112744380488934e-06, + "loss": 0.7745, + "step": 7794 + }, + { + "epoch": 0.40060643437146676, + "grad_norm": 1.087551474571228, + "learning_rate": 6.810498686362228e-06, + "loss": 0.7486, + "step": 7795 + }, + { + "epoch": 0.4006578271148114, + "grad_norm": 1.0259875059127808, + "learning_rate": 6.809722884514116e-06, + "loss": 0.7185, + "step": 7796 + }, + { + "epoch": 0.40070921985815605, + "grad_norm": 1.09176504611969, + "learning_rate": 6.808947032526051e-06, + "loss": 0.7327, + "step": 7797 + }, + { + "epoch": 0.40076061260150064, + "grad_norm": 1.198707103729248, + "learning_rate": 6.808171130419528e-06, + "loss": 0.7655, + "step": 7798 + }, + { + "epoch": 0.4008120053448453, + "grad_norm": 1.1064229011535645, + "learning_rate": 6.807395178216047e-06, + "loss": 0.7418, + "step": 7799 + }, + { + "epoch": 0.40086339808818994, + "grad_norm": 1.0397785902023315, + "learning_rate": 6.806619175937102e-06, + "loss": 0.7306, + "step": 7800 + }, + { + "epoch": 0.4009147908315346, + "grad_norm": 1.1523398160934448, + "learning_rate": 6.805843123604197e-06, + "loss": 0.7562, + "step": 7801 + }, + { + "epoch": 0.40096618357487923, + "grad_norm": 0.7127928733825684, + "learning_rate": 6.805067021238831e-06, + "loss": 0.6401, + "step": 7802 + }, + { + "epoch": 0.4010175763182239, + "grad_norm": 1.0998114347457886, + "learning_rate": 6.804290868862506e-06, + "loss": 0.8263, + "step": 7803 + }, + { + "epoch": 0.4010689690615685, + "grad_norm": 1.1519544124603271, + "learning_rate": 6.803514666496728e-06, + "loss": 0.7285, + "step": 7804 + }, + { + "epoch": 0.4011203618049132, + "grad_norm": 1.0409435033798218, + "learning_rate": 6.802738414163e-06, + "loss": 0.7433, + "step": 7805 + }, + { + "epoch": 0.40117175454825776, + "grad_norm": 1.0748502016067505, + "learning_rate": 6.801962111882832e-06, + "loss": 0.7432, + "step": 7806 + }, + { + "epoch": 0.4012231472916024, + "grad_norm": 1.0552645921707153, + "learning_rate": 6.801185759677729e-06, + "loss": 0.7202, + "step": 7807 + }, + { + "epoch": 0.40127454003494706, + "grad_norm": 1.0588408708572388, + "learning_rate": 6.800409357569201e-06, + "loss": 0.7903, + "step": 7808 + }, + { + "epoch": 0.4013259327782917, + "grad_norm": 1.0377616882324219, + "learning_rate": 6.799632905578759e-06, + "loss": 0.6909, + "step": 7809 + }, + { + "epoch": 0.40137732552163635, + "grad_norm": 0.7881459593772888, + "learning_rate": 6.798856403727916e-06, + "loss": 0.669, + "step": 7810 + }, + { + "epoch": 0.401428718264981, + "grad_norm": 0.7208056449890137, + "learning_rate": 6.798079852038187e-06, + "loss": 0.6921, + "step": 7811 + }, + { + "epoch": 0.40148011100832565, + "grad_norm": 1.0429795980453491, + "learning_rate": 6.797303250531085e-06, + "loss": 0.7296, + "step": 7812 + }, + { + "epoch": 0.40153150375167024, + "grad_norm": 0.796619713306427, + "learning_rate": 6.796526599228127e-06, + "loss": 0.6649, + "step": 7813 + }, + { + "epoch": 0.4015828964950149, + "grad_norm": 1.190920114517212, + "learning_rate": 6.795749898150831e-06, + "loss": 0.6997, + "step": 7814 + }, + { + "epoch": 0.40163428923835953, + "grad_norm": 1.0986249446868896, + "learning_rate": 6.794973147320716e-06, + "loss": 0.8788, + "step": 7815 + }, + { + "epoch": 0.4016856819817042, + "grad_norm": 0.7709076404571533, + "learning_rate": 6.794196346759301e-06, + "loss": 0.7282, + "step": 7816 + }, + { + "epoch": 0.4017370747250488, + "grad_norm": 1.185579776763916, + "learning_rate": 6.793419496488112e-06, + "loss": 0.824, + "step": 7817 + }, + { + "epoch": 0.4017884674683935, + "grad_norm": 0.8647105097770691, + "learning_rate": 6.792642596528667e-06, + "loss": 0.6958, + "step": 7818 + }, + { + "epoch": 0.4018398602117381, + "grad_norm": 1.033717155456543, + "learning_rate": 6.7918656469024935e-06, + "loss": 0.7173, + "step": 7819 + }, + { + "epoch": 0.40189125295508277, + "grad_norm": 0.972399115562439, + "learning_rate": 6.79108864763112e-06, + "loss": 0.6992, + "step": 7820 + }, + { + "epoch": 0.40194264569842736, + "grad_norm": 1.12163245677948, + "learning_rate": 6.79031159873607e-06, + "loss": 0.7386, + "step": 7821 + }, + { + "epoch": 0.401994038441772, + "grad_norm": 1.0556813478469849, + "learning_rate": 6.789534500238874e-06, + "loss": 0.7916, + "step": 7822 + }, + { + "epoch": 0.40204543118511665, + "grad_norm": 1.0941044092178345, + "learning_rate": 6.788757352161062e-06, + "loss": 0.7231, + "step": 7823 + }, + { + "epoch": 0.4020968239284613, + "grad_norm": 1.0354557037353516, + "learning_rate": 6.787980154524165e-06, + "loss": 0.7298, + "step": 7824 + }, + { + "epoch": 0.40214821667180595, + "grad_norm": 0.8134052753448486, + "learning_rate": 6.787202907349717e-06, + "loss": 0.7304, + "step": 7825 + }, + { + "epoch": 0.4021996094151506, + "grad_norm": 1.0776816606521606, + "learning_rate": 6.786425610659252e-06, + "loss": 0.7704, + "step": 7826 + }, + { + "epoch": 0.40225100215849524, + "grad_norm": 1.0329506397247314, + "learning_rate": 6.785648264474305e-06, + "loss": 0.7437, + "step": 7827 + }, + { + "epoch": 0.40230239490183983, + "grad_norm": 1.117995023727417, + "learning_rate": 6.784870868816414e-06, + "loss": 0.7991, + "step": 7828 + }, + { + "epoch": 0.4023537876451845, + "grad_norm": 1.0235388278961182, + "learning_rate": 6.784093423707116e-06, + "loss": 0.7857, + "step": 7829 + }, + { + "epoch": 0.4024051803885291, + "grad_norm": 0.7928805351257324, + "learning_rate": 6.783315929167953e-06, + "loss": 0.6502, + "step": 7830 + }, + { + "epoch": 0.4024565731318738, + "grad_norm": 0.7468971014022827, + "learning_rate": 6.782538385220465e-06, + "loss": 0.7484, + "step": 7831 + }, + { + "epoch": 0.4025079658752184, + "grad_norm": 1.019614815711975, + "learning_rate": 6.781760791886193e-06, + "loss": 0.6843, + "step": 7832 + }, + { + "epoch": 0.40255935861856307, + "grad_norm": 1.0580393075942993, + "learning_rate": 6.780983149186684e-06, + "loss": 0.7099, + "step": 7833 + }, + { + "epoch": 0.4026107513619077, + "grad_norm": 1.1143561601638794, + "learning_rate": 6.7802054571434794e-06, + "loss": 0.8028, + "step": 7834 + }, + { + "epoch": 0.40266214410525236, + "grad_norm": 1.0954301357269287, + "learning_rate": 6.77942771577813e-06, + "loss": 0.7409, + "step": 7835 + }, + { + "epoch": 0.40271353684859695, + "grad_norm": 1.0712257623672485, + "learning_rate": 6.778649925112181e-06, + "loss": 0.7268, + "step": 7836 + }, + { + "epoch": 0.4027649295919416, + "grad_norm": 0.7284889221191406, + "learning_rate": 6.777872085167183e-06, + "loss": 0.6794, + "step": 7837 + }, + { + "epoch": 0.40281632233528625, + "grad_norm": 1.035780906677246, + "learning_rate": 6.777094195964686e-06, + "loss": 0.688, + "step": 7838 + }, + { + "epoch": 0.4028677150786309, + "grad_norm": 1.0690861940383911, + "learning_rate": 6.776316257526243e-06, + "loss": 0.7201, + "step": 7839 + }, + { + "epoch": 0.40291910782197554, + "grad_norm": 1.0391000509262085, + "learning_rate": 6.7755382698734066e-06, + "loss": 0.8167, + "step": 7840 + }, + { + "epoch": 0.4029705005653202, + "grad_norm": 1.0274579524993896, + "learning_rate": 6.774760233027732e-06, + "loss": 0.6713, + "step": 7841 + }, + { + "epoch": 0.40302189330866484, + "grad_norm": 1.1139503717422485, + "learning_rate": 6.773982147010775e-06, + "loss": 0.8049, + "step": 7842 + }, + { + "epoch": 0.4030732860520095, + "grad_norm": 1.0674599409103394, + "learning_rate": 6.773204011844095e-06, + "loss": 0.7447, + "step": 7843 + }, + { + "epoch": 0.4031246787953541, + "grad_norm": 1.0830377340316772, + "learning_rate": 6.772425827549248e-06, + "loss": 0.7372, + "step": 7844 + }, + { + "epoch": 0.4031760715386987, + "grad_norm": 1.0315676927566528, + "learning_rate": 6.7716475941477956e-06, + "loss": 0.7464, + "step": 7845 + }, + { + "epoch": 0.40322746428204337, + "grad_norm": 1.0504807233810425, + "learning_rate": 6.7708693116613e-06, + "loss": 0.7497, + "step": 7846 + }, + { + "epoch": 0.403278857025388, + "grad_norm": 1.0584125518798828, + "learning_rate": 6.770090980111324e-06, + "loss": 0.7428, + "step": 7847 + }, + { + "epoch": 0.40333024976873266, + "grad_norm": 1.099342703819275, + "learning_rate": 6.7693125995194305e-06, + "loss": 0.7731, + "step": 7848 + }, + { + "epoch": 0.4033816425120773, + "grad_norm": 1.0795953273773193, + "learning_rate": 6.768534169907187e-06, + "loss": 0.75, + "step": 7849 + }, + { + "epoch": 0.40343303525542196, + "grad_norm": 0.9759268760681152, + "learning_rate": 6.767755691296161e-06, + "loss": 0.6626, + "step": 7850 + }, + { + "epoch": 0.40348442799876655, + "grad_norm": 1.0457463264465332, + "learning_rate": 6.766977163707919e-06, + "loss": 0.7719, + "step": 7851 + }, + { + "epoch": 0.4035358207421112, + "grad_norm": 0.7540583610534668, + "learning_rate": 6.766198587164031e-06, + "loss": 0.6957, + "step": 7852 + }, + { + "epoch": 0.40358721348545584, + "grad_norm": 0.6900243759155273, + "learning_rate": 6.765419961686069e-06, + "loss": 0.6657, + "step": 7853 + }, + { + "epoch": 0.4036386062288005, + "grad_norm": 1.0358715057373047, + "learning_rate": 6.764641287295607e-06, + "loss": 0.7145, + "step": 7854 + }, + { + "epoch": 0.40368999897214514, + "grad_norm": 1.078387975692749, + "learning_rate": 6.7638625640142165e-06, + "loss": 0.7251, + "step": 7855 + }, + { + "epoch": 0.4037413917154898, + "grad_norm": 1.0359172821044922, + "learning_rate": 6.763083791863472e-06, + "loss": 0.775, + "step": 7856 + }, + { + "epoch": 0.40379278445883443, + "grad_norm": 1.1707258224487305, + "learning_rate": 6.762304970864952e-06, + "loss": 0.7427, + "step": 7857 + }, + { + "epoch": 0.4038441772021791, + "grad_norm": 1.0416220426559448, + "learning_rate": 6.761526101040234e-06, + "loss": 0.7636, + "step": 7858 + }, + { + "epoch": 0.40389556994552367, + "grad_norm": 1.062119722366333, + "learning_rate": 6.760747182410898e-06, + "loss": 0.7583, + "step": 7859 + }, + { + "epoch": 0.4039469626888683, + "grad_norm": 1.2153178453445435, + "learning_rate": 6.759968214998522e-06, + "loss": 0.7639, + "step": 7860 + }, + { + "epoch": 0.40399835543221296, + "grad_norm": 1.0584911108016968, + "learning_rate": 6.759189198824691e-06, + "loss": 0.8317, + "step": 7861 + }, + { + "epoch": 0.4040497481755576, + "grad_norm": 0.7581250667572021, + "learning_rate": 6.758410133910988e-06, + "loss": 0.671, + "step": 7862 + }, + { + "epoch": 0.40410114091890226, + "grad_norm": 1.1762150526046753, + "learning_rate": 6.7576310202789955e-06, + "loss": 0.7506, + "step": 7863 + }, + { + "epoch": 0.4041525336622469, + "grad_norm": 0.7374364137649536, + "learning_rate": 6.7568518579503e-06, + "loss": 0.7005, + "step": 7864 + }, + { + "epoch": 0.40420392640559155, + "grad_norm": 1.0703513622283936, + "learning_rate": 6.756072646946491e-06, + "loss": 0.8093, + "step": 7865 + }, + { + "epoch": 0.40425531914893614, + "grad_norm": 1.0418522357940674, + "learning_rate": 6.755293387289154e-06, + "loss": 0.7566, + "step": 7866 + }, + { + "epoch": 0.4043067118922808, + "grad_norm": 0.7447736263275146, + "learning_rate": 6.754514078999883e-06, + "loss": 0.6305, + "step": 7867 + }, + { + "epoch": 0.40435810463562544, + "grad_norm": 1.0050560235977173, + "learning_rate": 6.753734722100267e-06, + "loss": 0.7893, + "step": 7868 + }, + { + "epoch": 0.4044094973789701, + "grad_norm": 0.7225925326347351, + "learning_rate": 6.752955316611898e-06, + "loss": 0.6707, + "step": 7869 + }, + { + "epoch": 0.40446089012231473, + "grad_norm": 1.0461152791976929, + "learning_rate": 6.752175862556371e-06, + "loss": 0.7757, + "step": 7870 + }, + { + "epoch": 0.4045122828656594, + "grad_norm": 1.0345289707183838, + "learning_rate": 6.751396359955283e-06, + "loss": 0.7599, + "step": 7871 + }, + { + "epoch": 0.404563675609004, + "grad_norm": 1.5227004289627075, + "learning_rate": 6.750616808830228e-06, + "loss": 0.7001, + "step": 7872 + }, + { + "epoch": 0.4046150683523487, + "grad_norm": 1.1362090110778809, + "learning_rate": 6.7498372092028076e-06, + "loss": 0.8339, + "step": 7873 + }, + { + "epoch": 0.40466646109569326, + "grad_norm": 1.0582871437072754, + "learning_rate": 6.749057561094618e-06, + "loss": 0.7729, + "step": 7874 + }, + { + "epoch": 0.4047178538390379, + "grad_norm": 0.7246353030204773, + "learning_rate": 6.7482778645272615e-06, + "loss": 0.6814, + "step": 7875 + }, + { + "epoch": 0.40476924658238256, + "grad_norm": 0.7528852820396423, + "learning_rate": 6.747498119522341e-06, + "loss": 0.6821, + "step": 7876 + }, + { + "epoch": 0.4048206393257272, + "grad_norm": 1.0142005681991577, + "learning_rate": 6.7467183261014575e-06, + "loss": 0.7247, + "step": 7877 + }, + { + "epoch": 0.40487203206907185, + "grad_norm": 1.0177489519119263, + "learning_rate": 6.745938484286219e-06, + "loss": 0.7452, + "step": 7878 + }, + { + "epoch": 0.4049234248124165, + "grad_norm": 1.0192375183105469, + "learning_rate": 6.745158594098229e-06, + "loss": 0.7221, + "step": 7879 + }, + { + "epoch": 0.40497481755576115, + "grad_norm": 1.0155107975006104, + "learning_rate": 6.744378655559096e-06, + "loss": 0.7573, + "step": 7880 + }, + { + "epoch": 0.4050262102991058, + "grad_norm": 1.0010485649108887, + "learning_rate": 6.74359866869043e-06, + "loss": 0.7352, + "step": 7881 + }, + { + "epoch": 0.4050776030424504, + "grad_norm": 0.7212871313095093, + "learning_rate": 6.742818633513839e-06, + "loss": 0.6972, + "step": 7882 + }, + { + "epoch": 0.40512899578579503, + "grad_norm": 1.0373481512069702, + "learning_rate": 6.742038550050937e-06, + "loss": 0.7539, + "step": 7883 + }, + { + "epoch": 0.4051803885291397, + "grad_norm": 1.0220096111297607, + "learning_rate": 6.741258418323335e-06, + "loss": 0.706, + "step": 7884 + }, + { + "epoch": 0.4052317812724843, + "grad_norm": 1.0339752435684204, + "learning_rate": 6.740478238352649e-06, + "loss": 0.7506, + "step": 7885 + }, + { + "epoch": 0.405283174015829, + "grad_norm": 1.0616086721420288, + "learning_rate": 6.7396980101604935e-06, + "loss": 0.7902, + "step": 7886 + }, + { + "epoch": 0.4053345667591736, + "grad_norm": 1.0507900714874268, + "learning_rate": 6.738917733768485e-06, + "loss": 0.7691, + "step": 7887 + }, + { + "epoch": 0.40538595950251827, + "grad_norm": 1.1763027906417847, + "learning_rate": 6.738137409198242e-06, + "loss": 0.7282, + "step": 7888 + }, + { + "epoch": 0.40543735224586286, + "grad_norm": 1.0808122158050537, + "learning_rate": 6.7373570364713845e-06, + "loss": 0.7612, + "step": 7889 + }, + { + "epoch": 0.4054887449892075, + "grad_norm": 0.799289882183075, + "learning_rate": 6.736576615609532e-06, + "loss": 0.6452, + "step": 7890 + }, + { + "epoch": 0.40554013773255215, + "grad_norm": 1.4809321165084839, + "learning_rate": 6.735796146634309e-06, + "loss": 0.7288, + "step": 7891 + }, + { + "epoch": 0.4055915304758968, + "grad_norm": 1.0577455759048462, + "learning_rate": 6.735015629567338e-06, + "loss": 0.718, + "step": 7892 + }, + { + "epoch": 0.40564292321924145, + "grad_norm": 1.0974152088165283, + "learning_rate": 6.734235064430244e-06, + "loss": 0.8018, + "step": 7893 + }, + { + "epoch": 0.4056943159625861, + "grad_norm": 1.1344894170761108, + "learning_rate": 6.733454451244653e-06, + "loss": 0.7499, + "step": 7894 + }, + { + "epoch": 0.40574570870593074, + "grad_norm": 1.103716254234314, + "learning_rate": 6.732673790032194e-06, + "loss": 0.7605, + "step": 7895 + }, + { + "epoch": 0.4057971014492754, + "grad_norm": 1.0246015787124634, + "learning_rate": 6.731893080814492e-06, + "loss": 0.7532, + "step": 7896 + }, + { + "epoch": 0.40584849419262, + "grad_norm": 0.7563291788101196, + "learning_rate": 6.731112323613181e-06, + "loss": 0.6639, + "step": 7897 + }, + { + "epoch": 0.4058998869359646, + "grad_norm": 1.1162755489349365, + "learning_rate": 6.730331518449892e-06, + "loss": 0.7367, + "step": 7898 + }, + { + "epoch": 0.4059512796793093, + "grad_norm": 1.0836974382400513, + "learning_rate": 6.729550665346257e-06, + "loss": 0.6916, + "step": 7899 + }, + { + "epoch": 0.4060026724226539, + "grad_norm": 1.1318376064300537, + "learning_rate": 6.72876976432391e-06, + "loss": 0.7257, + "step": 7900 + }, + { + "epoch": 0.40605406516599857, + "grad_norm": 0.7126916646957397, + "learning_rate": 6.727988815404487e-06, + "loss": 0.7027, + "step": 7901 + }, + { + "epoch": 0.4061054579093432, + "grad_norm": 1.0463669300079346, + "learning_rate": 6.727207818609625e-06, + "loss": 0.7372, + "step": 7902 + }, + { + "epoch": 0.40615685065268786, + "grad_norm": 0.7074301242828369, + "learning_rate": 6.726426773960963e-06, + "loss": 0.6659, + "step": 7903 + }, + { + "epoch": 0.40620824339603245, + "grad_norm": 1.2183748483657837, + "learning_rate": 6.7256456814801385e-06, + "loss": 0.7869, + "step": 7904 + }, + { + "epoch": 0.4062596361393771, + "grad_norm": 1.0458272695541382, + "learning_rate": 6.724864541188795e-06, + "loss": 0.7126, + "step": 7905 + }, + { + "epoch": 0.40631102888272175, + "grad_norm": 0.7194162011146545, + "learning_rate": 6.724083353108572e-06, + "loss": 0.6864, + "step": 7906 + }, + { + "epoch": 0.4063624216260664, + "grad_norm": 0.7353252172470093, + "learning_rate": 6.723302117261114e-06, + "loss": 0.6489, + "step": 7907 + }, + { + "epoch": 0.40641381436941104, + "grad_norm": 0.995393693447113, + "learning_rate": 6.722520833668067e-06, + "loss": 0.7541, + "step": 7908 + }, + { + "epoch": 0.4064652071127557, + "grad_norm": 1.039131760597229, + "learning_rate": 6.721739502351075e-06, + "loss": 0.74, + "step": 7909 + }, + { + "epoch": 0.40651659985610034, + "grad_norm": 1.0343166589736938, + "learning_rate": 6.7209581233317865e-06, + "loss": 0.7065, + "step": 7910 + }, + { + "epoch": 0.406567992599445, + "grad_norm": 1.125809907913208, + "learning_rate": 6.720176696631851e-06, + "loss": 0.7563, + "step": 7911 + }, + { + "epoch": 0.4066193853427896, + "grad_norm": 1.133452296257019, + "learning_rate": 6.719395222272918e-06, + "loss": 0.7539, + "step": 7912 + }, + { + "epoch": 0.4066707780861342, + "grad_norm": 1.1749025583267212, + "learning_rate": 6.718613700276638e-06, + "loss": 0.7497, + "step": 7913 + }, + { + "epoch": 0.40672217082947887, + "grad_norm": 1.0754362344741821, + "learning_rate": 6.717832130664666e-06, + "loss": 0.7167, + "step": 7914 + }, + { + "epoch": 0.4067735635728235, + "grad_norm": 1.0977122783660889, + "learning_rate": 6.717050513458654e-06, + "loss": 0.7633, + "step": 7915 + }, + { + "epoch": 0.40682495631616816, + "grad_norm": 1.1105183362960815, + "learning_rate": 6.716268848680258e-06, + "loss": 0.7648, + "step": 7916 + }, + { + "epoch": 0.4068763490595128, + "grad_norm": 1.2383711338043213, + "learning_rate": 6.715487136351135e-06, + "loss": 0.737, + "step": 7917 + }, + { + "epoch": 0.40692774180285746, + "grad_norm": 1.0289186239242554, + "learning_rate": 6.714705376492944e-06, + "loss": 0.726, + "step": 7918 + }, + { + "epoch": 0.4069791345462021, + "grad_norm": 1.1408699750900269, + "learning_rate": 6.713923569127342e-06, + "loss": 0.7217, + "step": 7919 + }, + { + "epoch": 0.4070305272895467, + "grad_norm": 0.7385805249214172, + "learning_rate": 6.71314171427599e-06, + "loss": 0.6442, + "step": 7920 + }, + { + "epoch": 0.40708192003289134, + "grad_norm": 1.022669792175293, + "learning_rate": 6.712359811960552e-06, + "loss": 0.669, + "step": 7921 + }, + { + "epoch": 0.407133312776236, + "grad_norm": 1.0556048154830933, + "learning_rate": 6.71157786220269e-06, + "loss": 0.7361, + "step": 7922 + }, + { + "epoch": 0.40718470551958064, + "grad_norm": 1.0673633813858032, + "learning_rate": 6.710795865024069e-06, + "loss": 0.7113, + "step": 7923 + }, + { + "epoch": 0.4072360982629253, + "grad_norm": 0.9981977343559265, + "learning_rate": 6.710013820446354e-06, + "loss": 0.7581, + "step": 7924 + }, + { + "epoch": 0.40728749100626993, + "grad_norm": 1.059861421585083, + "learning_rate": 6.709231728491212e-06, + "loss": 0.7779, + "step": 7925 + }, + { + "epoch": 0.4073388837496146, + "grad_norm": 1.1826664209365845, + "learning_rate": 6.708449589180315e-06, + "loss": 0.7793, + "step": 7926 + }, + { + "epoch": 0.40739027649295917, + "grad_norm": 0.7404249906539917, + "learning_rate": 6.707667402535327e-06, + "loss": 0.7032, + "step": 7927 + }, + { + "epoch": 0.4074416692363038, + "grad_norm": 1.1206835508346558, + "learning_rate": 6.706885168577926e-06, + "loss": 0.727, + "step": 7928 + }, + { + "epoch": 0.40749306197964846, + "grad_norm": 1.0846922397613525, + "learning_rate": 6.706102887329779e-06, + "loss": 0.7493, + "step": 7929 + }, + { + "epoch": 0.4075444547229931, + "grad_norm": 0.9938129782676697, + "learning_rate": 6.705320558812561e-06, + "loss": 0.727, + "step": 7930 + }, + { + "epoch": 0.40759584746633776, + "grad_norm": 1.0975090265274048, + "learning_rate": 6.704538183047948e-06, + "loss": 0.7832, + "step": 7931 + }, + { + "epoch": 0.4076472402096824, + "grad_norm": 1.2475781440734863, + "learning_rate": 6.7037557600576166e-06, + "loss": 0.7706, + "step": 7932 + }, + { + "epoch": 0.40769863295302705, + "grad_norm": 1.024477481842041, + "learning_rate": 6.7029732898632435e-06, + "loss": 0.7765, + "step": 7933 + }, + { + "epoch": 0.4077500256963717, + "grad_norm": 3.735666036605835, + "learning_rate": 6.70219077248651e-06, + "loss": 0.7832, + "step": 7934 + }, + { + "epoch": 0.4078014184397163, + "grad_norm": 1.024791955947876, + "learning_rate": 6.7014082079490925e-06, + "loss": 0.7787, + "step": 7935 + }, + { + "epoch": 0.40785281118306094, + "grad_norm": 1.0390478372573853, + "learning_rate": 6.700625596272676e-06, + "loss": 0.7742, + "step": 7936 + }, + { + "epoch": 0.4079042039264056, + "grad_norm": 0.9915521740913391, + "learning_rate": 6.699842937478943e-06, + "loss": 0.7746, + "step": 7937 + }, + { + "epoch": 0.40795559666975023, + "grad_norm": 1.098596453666687, + "learning_rate": 6.699060231589576e-06, + "loss": 0.7376, + "step": 7938 + }, + { + "epoch": 0.4080069894130949, + "grad_norm": 1.0754375457763672, + "learning_rate": 6.698277478626262e-06, + "loss": 0.7301, + "step": 7939 + }, + { + "epoch": 0.4080583821564395, + "grad_norm": 0.6903818845748901, + "learning_rate": 6.697494678610687e-06, + "loss": 0.6705, + "step": 7940 + }, + { + "epoch": 0.4081097748997842, + "grad_norm": 1.0542471408843994, + "learning_rate": 6.69671183156454e-06, + "loss": 0.6802, + "step": 7941 + }, + { + "epoch": 0.40816116764312876, + "grad_norm": 0.7270194292068481, + "learning_rate": 6.69592893750951e-06, + "loss": 0.6984, + "step": 7942 + }, + { + "epoch": 0.4082125603864734, + "grad_norm": 1.0911885499954224, + "learning_rate": 6.695145996467287e-06, + "loss": 0.7764, + "step": 7943 + }, + { + "epoch": 0.40826395312981806, + "grad_norm": 1.1180353164672852, + "learning_rate": 6.694363008459565e-06, + "loss": 0.7209, + "step": 7944 + }, + { + "epoch": 0.4083153458731627, + "grad_norm": 1.1125692129135132, + "learning_rate": 6.693579973508037e-06, + "loss": 0.72, + "step": 7945 + }, + { + "epoch": 0.40836673861650735, + "grad_norm": 1.1563745737075806, + "learning_rate": 6.692796891634394e-06, + "loss": 0.7786, + "step": 7946 + }, + { + "epoch": 0.408418131359852, + "grad_norm": 0.7223285436630249, + "learning_rate": 6.6920137628603375e-06, + "loss": 0.6522, + "step": 7947 + }, + { + "epoch": 0.40846952410319665, + "grad_norm": 0.7379501461982727, + "learning_rate": 6.6912305872075625e-06, + "loss": 0.689, + "step": 7948 + }, + { + "epoch": 0.4085209168465413, + "grad_norm": 1.043828010559082, + "learning_rate": 6.690447364697765e-06, + "loss": 0.6923, + "step": 7949 + }, + { + "epoch": 0.4085723095898859, + "grad_norm": 0.6911939382553101, + "learning_rate": 6.689664095352649e-06, + "loss": 0.6945, + "step": 7950 + }, + { + "epoch": 0.40862370233323053, + "grad_norm": 1.0108751058578491, + "learning_rate": 6.688880779193912e-06, + "loss": 0.6998, + "step": 7951 + }, + { + "epoch": 0.4086750950765752, + "grad_norm": 1.1145551204681396, + "learning_rate": 6.68809741624326e-06, + "loss": 0.7428, + "step": 7952 + }, + { + "epoch": 0.4087264878199198, + "grad_norm": 1.1391953229904175, + "learning_rate": 6.687314006522394e-06, + "loss": 0.7186, + "step": 7953 + }, + { + "epoch": 0.4087778805632645, + "grad_norm": 1.129879117012024, + "learning_rate": 6.686530550053019e-06, + "loss": 0.7243, + "step": 7954 + }, + { + "epoch": 0.4088292733066091, + "grad_norm": 1.0704381465911865, + "learning_rate": 6.685747046856844e-06, + "loss": 0.7173, + "step": 7955 + }, + { + "epoch": 0.40888066604995377, + "grad_norm": 1.0790979862213135, + "learning_rate": 6.684963496955575e-06, + "loss": 0.7887, + "step": 7956 + }, + { + "epoch": 0.40893205879329836, + "grad_norm": 1.1580049991607666, + "learning_rate": 6.68417990037092e-06, + "loss": 0.7341, + "step": 7957 + }, + { + "epoch": 0.408983451536643, + "grad_norm": 1.129349708557129, + "learning_rate": 6.6833962571245915e-06, + "loss": 0.6943, + "step": 7958 + }, + { + "epoch": 0.40903484427998765, + "grad_norm": 1.1082468032836914, + "learning_rate": 6.682612567238298e-06, + "loss": 0.7637, + "step": 7959 + }, + { + "epoch": 0.4090862370233323, + "grad_norm": 1.122299075126648, + "learning_rate": 6.681828830733756e-06, + "loss": 0.7117, + "step": 7960 + }, + { + "epoch": 0.40913762976667695, + "grad_norm": 1.1041449308395386, + "learning_rate": 6.681045047632676e-06, + "loss": 0.7527, + "step": 7961 + }, + { + "epoch": 0.4091890225100216, + "grad_norm": 1.1245919466018677, + "learning_rate": 6.680261217956775e-06, + "loss": 0.7363, + "step": 7962 + }, + { + "epoch": 0.40924041525336624, + "grad_norm": 1.2942348718643188, + "learning_rate": 6.679477341727769e-06, + "loss": 0.7553, + "step": 7963 + }, + { + "epoch": 0.4092918079967109, + "grad_norm": 1.0296908617019653, + "learning_rate": 6.678693418967379e-06, + "loss": 0.7209, + "step": 7964 + }, + { + "epoch": 0.4093432007400555, + "grad_norm": 0.9074142575263977, + "learning_rate": 6.67790944969732e-06, + "loss": 0.7098, + "step": 7965 + }, + { + "epoch": 0.4093945934834001, + "grad_norm": 1.0555102825164795, + "learning_rate": 6.677125433939316e-06, + "loss": 0.7526, + "step": 7966 + }, + { + "epoch": 0.4094459862267448, + "grad_norm": 1.0784554481506348, + "learning_rate": 6.676341371715085e-06, + "loss": 0.7562, + "step": 7967 + }, + { + "epoch": 0.4094973789700894, + "grad_norm": 0.8925933837890625, + "learning_rate": 6.675557263046355e-06, + "loss": 0.6786, + "step": 7968 + }, + { + "epoch": 0.40954877171343407, + "grad_norm": 1.1209834814071655, + "learning_rate": 6.674773107954846e-06, + "loss": 0.7722, + "step": 7969 + }, + { + "epoch": 0.4096001644567787, + "grad_norm": 1.1376556158065796, + "learning_rate": 6.673988906462286e-06, + "loss": 0.7475, + "step": 7970 + }, + { + "epoch": 0.40965155720012336, + "grad_norm": 0.8658275604248047, + "learning_rate": 6.673204658590402e-06, + "loss": 0.678, + "step": 7971 + }, + { + "epoch": 0.409702949943468, + "grad_norm": 1.1058467626571655, + "learning_rate": 6.672420364360922e-06, + "loss": 0.7153, + "step": 7972 + }, + { + "epoch": 0.4097543426868126, + "grad_norm": 1.3219773769378662, + "learning_rate": 6.671636023795574e-06, + "loss": 0.6954, + "step": 7973 + }, + { + "epoch": 0.40980573543015725, + "grad_norm": 1.0119574069976807, + "learning_rate": 6.670851636916092e-06, + "loss": 0.7351, + "step": 7974 + }, + { + "epoch": 0.4098571281735019, + "grad_norm": 0.7516946196556091, + "learning_rate": 6.670067203744205e-06, + "loss": 0.7331, + "step": 7975 + }, + { + "epoch": 0.40990852091684654, + "grad_norm": 1.092466950416565, + "learning_rate": 6.669282724301649e-06, + "loss": 0.7284, + "step": 7976 + }, + { + "epoch": 0.4099599136601912, + "grad_norm": 1.09486722946167, + "learning_rate": 6.668498198610157e-06, + "loss": 0.765, + "step": 7977 + }, + { + "epoch": 0.41001130640353584, + "grad_norm": 0.8249090313911438, + "learning_rate": 6.667713626691464e-06, + "loss": 0.645, + "step": 7978 + }, + { + "epoch": 0.4100626991468805, + "grad_norm": 1.1555579900741577, + "learning_rate": 6.666929008567311e-06, + "loss": 0.7941, + "step": 7979 + }, + { + "epoch": 0.4101140918902251, + "grad_norm": 1.028634786605835, + "learning_rate": 6.666144344259433e-06, + "loss": 0.7189, + "step": 7980 + }, + { + "epoch": 0.4101654846335697, + "grad_norm": 1.0098729133605957, + "learning_rate": 6.665359633789571e-06, + "loss": 0.8234, + "step": 7981 + }, + { + "epoch": 0.41021687737691437, + "grad_norm": 1.10939359664917, + "learning_rate": 6.664574877179466e-06, + "loss": 0.719, + "step": 7982 + }, + { + "epoch": 0.410268270120259, + "grad_norm": 1.1669492721557617, + "learning_rate": 6.66379007445086e-06, + "loss": 0.7629, + "step": 7983 + }, + { + "epoch": 0.41031966286360366, + "grad_norm": 0.6827564835548401, + "learning_rate": 6.663005225625498e-06, + "loss": 0.626, + "step": 7984 + }, + { + "epoch": 0.4103710556069483, + "grad_norm": 1.048548698425293, + "learning_rate": 6.662220330725123e-06, + "loss": 0.7849, + "step": 7985 + }, + { + "epoch": 0.41042244835029296, + "grad_norm": 0.7019035816192627, + "learning_rate": 6.6614353897714825e-06, + "loss": 0.7116, + "step": 7986 + }, + { + "epoch": 0.4104738410936376, + "grad_norm": 1.0581175088882446, + "learning_rate": 6.660650402786324e-06, + "loss": 0.7314, + "step": 7987 + }, + { + "epoch": 0.4105252338369822, + "grad_norm": 1.0912425518035889, + "learning_rate": 6.659865369791398e-06, + "loss": 0.7665, + "step": 7988 + }, + { + "epoch": 0.41057662658032684, + "grad_norm": 0.8129478693008423, + "learning_rate": 6.659080290808449e-06, + "loss": 0.6657, + "step": 7989 + }, + { + "epoch": 0.4106280193236715, + "grad_norm": 1.0536584854125977, + "learning_rate": 6.658295165859233e-06, + "loss": 0.7547, + "step": 7990 + }, + { + "epoch": 0.41067941206701614, + "grad_norm": 1.038142204284668, + "learning_rate": 6.6575099949655005e-06, + "loss": 0.7629, + "step": 7991 + }, + { + "epoch": 0.4107308048103608, + "grad_norm": 0.695204496383667, + "learning_rate": 6.656724778149008e-06, + "loss": 0.6487, + "step": 7992 + }, + { + "epoch": 0.41078219755370543, + "grad_norm": 1.0843878984451294, + "learning_rate": 6.655939515431508e-06, + "loss": 0.7468, + "step": 7993 + }, + { + "epoch": 0.4108335902970501, + "grad_norm": 1.1089707612991333, + "learning_rate": 6.655154206834757e-06, + "loss": 0.7389, + "step": 7994 + }, + { + "epoch": 0.41088498304039467, + "grad_norm": 1.0390061140060425, + "learning_rate": 6.654368852380515e-06, + "loss": 0.7245, + "step": 7995 + }, + { + "epoch": 0.4109363757837393, + "grad_norm": 0.9975454807281494, + "learning_rate": 6.653583452090538e-06, + "loss": 0.7596, + "step": 7996 + }, + { + "epoch": 0.41098776852708396, + "grad_norm": 1.2355459928512573, + "learning_rate": 6.652798005986587e-06, + "loss": 0.7753, + "step": 7997 + }, + { + "epoch": 0.4110391612704286, + "grad_norm": 1.093235731124878, + "learning_rate": 6.652012514090425e-06, + "loss": 0.78, + "step": 7998 + }, + { + "epoch": 0.41109055401377326, + "grad_norm": 0.8892166018486023, + "learning_rate": 6.651226976423813e-06, + "loss": 0.6328, + "step": 7999 + }, + { + "epoch": 0.4111419467571179, + "grad_norm": 1.1210532188415527, + "learning_rate": 6.650441393008517e-06, + "loss": 0.775, + "step": 8000 + }, + { + "epoch": 0.41119333950046255, + "grad_norm": 1.0821943283081055, + "learning_rate": 6.649655763866301e-06, + "loss": 0.7294, + "step": 8001 + }, + { + "epoch": 0.4112447322438072, + "grad_norm": 0.7084898948669434, + "learning_rate": 6.64887008901893e-06, + "loss": 0.638, + "step": 8002 + }, + { + "epoch": 0.4112961249871518, + "grad_norm": 1.068906545639038, + "learning_rate": 6.648084368488174e-06, + "loss": 0.7367, + "step": 8003 + }, + { + "epoch": 0.41134751773049644, + "grad_norm": 1.0813502073287964, + "learning_rate": 6.6472986022958005e-06, + "loss": 0.7753, + "step": 8004 + }, + { + "epoch": 0.4113989104738411, + "grad_norm": 1.1435678005218506, + "learning_rate": 6.646512790463582e-06, + "loss": 0.7937, + "step": 8005 + }, + { + "epoch": 0.41145030321718573, + "grad_norm": 1.0334559679031372, + "learning_rate": 6.645726933013288e-06, + "loss": 0.7164, + "step": 8006 + }, + { + "epoch": 0.4115016959605304, + "grad_norm": 1.0630053281784058, + "learning_rate": 6.644941029966692e-06, + "loss": 0.7616, + "step": 8007 + }, + { + "epoch": 0.411553088703875, + "grad_norm": 1.265312910079956, + "learning_rate": 6.644155081345569e-06, + "loss": 0.7655, + "step": 8008 + }, + { + "epoch": 0.41160448144721967, + "grad_norm": 1.0740901231765747, + "learning_rate": 6.643369087171695e-06, + "loss": 0.6962, + "step": 8009 + }, + { + "epoch": 0.4116558741905643, + "grad_norm": 1.0570896863937378, + "learning_rate": 6.642583047466843e-06, + "loss": 0.7164, + "step": 8010 + }, + { + "epoch": 0.4117072669339089, + "grad_norm": 1.0792806148529053, + "learning_rate": 6.641796962252796e-06, + "loss": 0.7496, + "step": 8011 + }, + { + "epoch": 0.41175865967725356, + "grad_norm": 1.0608534812927246, + "learning_rate": 6.641010831551329e-06, + "loss": 0.708, + "step": 8012 + }, + { + "epoch": 0.4118100524205982, + "grad_norm": 0.9808920621871948, + "learning_rate": 6.640224655384222e-06, + "loss": 0.7496, + "step": 8013 + }, + { + "epoch": 0.41186144516394285, + "grad_norm": 1.0432575941085815, + "learning_rate": 6.63943843377326e-06, + "loss": 0.7447, + "step": 8014 + }, + { + "epoch": 0.4119128379072875, + "grad_norm": 0.9936188459396362, + "learning_rate": 6.638652166740225e-06, + "loss": 0.7049, + "step": 8015 + }, + { + "epoch": 0.41196423065063215, + "grad_norm": 0.763602077960968, + "learning_rate": 6.637865854306901e-06, + "loss": 0.6892, + "step": 8016 + }, + { + "epoch": 0.4120156233939768, + "grad_norm": 1.1282601356506348, + "learning_rate": 6.637079496495073e-06, + "loss": 0.749, + "step": 8017 + }, + { + "epoch": 0.4120670161373214, + "grad_norm": 1.0999804735183716, + "learning_rate": 6.636293093326527e-06, + "loss": 0.7258, + "step": 8018 + }, + { + "epoch": 0.41211840888066603, + "grad_norm": 0.7895588874816895, + "learning_rate": 6.635506644823053e-06, + "loss": 0.6964, + "step": 8019 + }, + { + "epoch": 0.4121698016240107, + "grad_norm": 0.6827719807624817, + "learning_rate": 6.634720151006439e-06, + "loss": 0.6429, + "step": 8020 + }, + { + "epoch": 0.4122211943673553, + "grad_norm": 0.6972300410270691, + "learning_rate": 6.633933611898475e-06, + "loss": 0.6914, + "step": 8021 + }, + { + "epoch": 0.4122725871107, + "grad_norm": 1.0962932109832764, + "learning_rate": 6.633147027520955e-06, + "loss": 0.7809, + "step": 8022 + }, + { + "epoch": 0.4123239798540446, + "grad_norm": 0.8886080384254456, + "learning_rate": 6.63236039789567e-06, + "loss": 0.6941, + "step": 8023 + }, + { + "epoch": 0.41237537259738927, + "grad_norm": 1.0961463451385498, + "learning_rate": 6.631573723044413e-06, + "loss": 0.7342, + "step": 8024 + }, + { + "epoch": 0.4124267653407339, + "grad_norm": 1.1105092763900757, + "learning_rate": 6.630787002988983e-06, + "loss": 0.7911, + "step": 8025 + }, + { + "epoch": 0.4124781580840785, + "grad_norm": 1.0195692777633667, + "learning_rate": 6.630000237751175e-06, + "loss": 0.6795, + "step": 8026 + }, + { + "epoch": 0.41252955082742315, + "grad_norm": 0.8244153261184692, + "learning_rate": 6.629213427352787e-06, + "loss": 0.6356, + "step": 8027 + }, + { + "epoch": 0.4125809435707678, + "grad_norm": 1.0605871677398682, + "learning_rate": 6.628426571815618e-06, + "loss": 0.7748, + "step": 8028 + }, + { + "epoch": 0.41263233631411245, + "grad_norm": 1.1088330745697021, + "learning_rate": 6.6276396711614685e-06, + "loss": 0.7195, + "step": 8029 + }, + { + "epoch": 0.4126837290574571, + "grad_norm": 1.0487561225891113, + "learning_rate": 6.626852725412142e-06, + "loss": 0.7044, + "step": 8030 + }, + { + "epoch": 0.41273512180080174, + "grad_norm": 1.0625576972961426, + "learning_rate": 6.626065734589437e-06, + "loss": 0.7644, + "step": 8031 + }, + { + "epoch": 0.4127865145441464, + "grad_norm": 1.0724247694015503, + "learning_rate": 6.625278698715164e-06, + "loss": 0.753, + "step": 8032 + }, + { + "epoch": 0.412837907287491, + "grad_norm": 1.0660923719406128, + "learning_rate": 6.624491617811125e-06, + "loss": 0.7268, + "step": 8033 + }, + { + "epoch": 0.4128893000308356, + "grad_norm": 1.0670807361602783, + "learning_rate": 6.6237044918991256e-06, + "loss": 0.7494, + "step": 8034 + }, + { + "epoch": 0.4129406927741803, + "grad_norm": 0.9825435280799866, + "learning_rate": 6.622917321000976e-06, + "loss": 0.7101, + "step": 8035 + }, + { + "epoch": 0.4129920855175249, + "grad_norm": 1.0288969278335571, + "learning_rate": 6.622130105138485e-06, + "loss": 0.71, + "step": 8036 + }, + { + "epoch": 0.41304347826086957, + "grad_norm": 1.1274778842926025, + "learning_rate": 6.621342844333463e-06, + "loss": 0.7525, + "step": 8037 + }, + { + "epoch": 0.4130948710042142, + "grad_norm": 1.1139103174209595, + "learning_rate": 6.620555538607722e-06, + "loss": 0.7971, + "step": 8038 + }, + { + "epoch": 0.41314626374755886, + "grad_norm": 1.0781826972961426, + "learning_rate": 6.619768187983074e-06, + "loss": 0.7599, + "step": 8039 + }, + { + "epoch": 0.4131976564909035, + "grad_norm": 1.1014935970306396, + "learning_rate": 6.6189807924813335e-06, + "loss": 0.8046, + "step": 8040 + }, + { + "epoch": 0.4132490492342481, + "grad_norm": 1.1662489175796509, + "learning_rate": 6.618193352124318e-06, + "loss": 0.7164, + "step": 8041 + }, + { + "epoch": 0.41330044197759275, + "grad_norm": 1.1016510725021362, + "learning_rate": 6.6174058669338405e-06, + "loss": 0.744, + "step": 8042 + }, + { + "epoch": 0.4133518347209374, + "grad_norm": 1.1341471672058105, + "learning_rate": 6.616618336931723e-06, + "loss": 0.7417, + "step": 8043 + }, + { + "epoch": 0.41340322746428204, + "grad_norm": 0.7457482814788818, + "learning_rate": 6.6158307621397814e-06, + "loss": 0.6385, + "step": 8044 + }, + { + "epoch": 0.4134546202076267, + "grad_norm": 0.7525314092636108, + "learning_rate": 6.6150431425798375e-06, + "loss": 0.7421, + "step": 8045 + }, + { + "epoch": 0.41350601295097134, + "grad_norm": 0.7390742897987366, + "learning_rate": 6.6142554782737145e-06, + "loss": 0.6755, + "step": 8046 + }, + { + "epoch": 0.413557405694316, + "grad_norm": 1.1073259115219116, + "learning_rate": 6.613467769243231e-06, + "loss": 0.7609, + "step": 8047 + }, + { + "epoch": 0.41360879843766063, + "grad_norm": 1.105319619178772, + "learning_rate": 6.612680015510218e-06, + "loss": 0.7142, + "step": 8048 + }, + { + "epoch": 0.4136601911810052, + "grad_norm": 1.0521056652069092, + "learning_rate": 6.611892217096494e-06, + "loss": 0.769, + "step": 8049 + }, + { + "epoch": 0.41371158392434987, + "grad_norm": 1.0649930238723755, + "learning_rate": 6.611104374023889e-06, + "loss": 0.7285, + "step": 8050 + }, + { + "epoch": 0.4137629766676945, + "grad_norm": 1.0694961547851562, + "learning_rate": 6.610316486314231e-06, + "loss": 0.7497, + "step": 8051 + }, + { + "epoch": 0.41381436941103916, + "grad_norm": 1.0268933773040771, + "learning_rate": 6.609528553989349e-06, + "loss": 0.7127, + "step": 8052 + }, + { + "epoch": 0.4138657621543838, + "grad_norm": 1.0627905130386353, + "learning_rate": 6.608740577071072e-06, + "loss": 0.7366, + "step": 8053 + }, + { + "epoch": 0.41391715489772846, + "grad_norm": 1.1004197597503662, + "learning_rate": 6.6079525555812315e-06, + "loss": 0.7154, + "step": 8054 + }, + { + "epoch": 0.4139685476410731, + "grad_norm": 1.1193249225616455, + "learning_rate": 6.607164489541664e-06, + "loss": 0.7857, + "step": 8055 + }, + { + "epoch": 0.4140199403844177, + "grad_norm": 1.1357054710388184, + "learning_rate": 6.6063763789741985e-06, + "loss": 0.7747, + "step": 8056 + }, + { + "epoch": 0.41407133312776234, + "grad_norm": 1.0460679531097412, + "learning_rate": 6.605588223900674e-06, + "loss": 0.7287, + "step": 8057 + }, + { + "epoch": 0.414122725871107, + "grad_norm": 1.0804314613342285, + "learning_rate": 6.604800024342925e-06, + "loss": 0.7721, + "step": 8058 + }, + { + "epoch": 0.41417411861445164, + "grad_norm": 1.0311671495437622, + "learning_rate": 6.60401178032279e-06, + "loss": 0.7684, + "step": 8059 + }, + { + "epoch": 0.4142255113577963, + "grad_norm": 1.1368834972381592, + "learning_rate": 6.603223491862107e-06, + "loss": 0.8338, + "step": 8060 + }, + { + "epoch": 0.41427690410114093, + "grad_norm": 8.082850456237793, + "learning_rate": 6.6024351589827195e-06, + "loss": 0.7723, + "step": 8061 + }, + { + "epoch": 0.4143282968444856, + "grad_norm": 1.0659213066101074, + "learning_rate": 6.601646781706466e-06, + "loss": 0.7203, + "step": 8062 + }, + { + "epoch": 0.4143796895878302, + "grad_norm": 1.0736312866210938, + "learning_rate": 6.600858360055189e-06, + "loss": 0.7195, + "step": 8063 + }, + { + "epoch": 0.4144310823311748, + "grad_norm": 1.2954188585281372, + "learning_rate": 6.600069894050734e-06, + "loss": 0.7187, + "step": 8064 + }, + { + "epoch": 0.41448247507451946, + "grad_norm": 1.1182818412780762, + "learning_rate": 6.599281383714944e-06, + "loss": 0.7746, + "step": 8065 + }, + { + "epoch": 0.4145338678178641, + "grad_norm": 0.7987220287322998, + "learning_rate": 6.598492829069667e-06, + "loss": 0.6286, + "step": 8066 + }, + { + "epoch": 0.41458526056120876, + "grad_norm": 1.0978220701217651, + "learning_rate": 6.597704230136751e-06, + "loss": 0.7208, + "step": 8067 + }, + { + "epoch": 0.4146366533045534, + "grad_norm": 1.081256628036499, + "learning_rate": 6.596915586938043e-06, + "loss": 0.7671, + "step": 8068 + }, + { + "epoch": 0.41468804604789805, + "grad_norm": 1.067455530166626, + "learning_rate": 6.596126899495395e-06, + "loss": 0.6885, + "step": 8069 + }, + { + "epoch": 0.4147394387912427, + "grad_norm": 1.0716016292572021, + "learning_rate": 6.595338167830658e-06, + "loss": 0.7478, + "step": 8070 + }, + { + "epoch": 0.4147908315345873, + "grad_norm": 1.0021570920944214, + "learning_rate": 6.594549391965683e-06, + "loss": 0.7817, + "step": 8071 + }, + { + "epoch": 0.41484222427793194, + "grad_norm": 1.0885429382324219, + "learning_rate": 6.593760571922326e-06, + "loss": 0.6927, + "step": 8072 + }, + { + "epoch": 0.4148936170212766, + "grad_norm": 1.0912247896194458, + "learning_rate": 6.59297170772244e-06, + "loss": 0.7879, + "step": 8073 + }, + { + "epoch": 0.41494500976462123, + "grad_norm": 1.1450971364974976, + "learning_rate": 6.59218279938788e-06, + "loss": 0.7893, + "step": 8074 + }, + { + "epoch": 0.4149964025079659, + "grad_norm": 1.067766547203064, + "learning_rate": 6.591393846940507e-06, + "loss": 0.734, + "step": 8075 + }, + { + "epoch": 0.4150477952513105, + "grad_norm": 1.0950958728790283, + "learning_rate": 6.590604850402178e-06, + "loss": 0.7708, + "step": 8076 + }, + { + "epoch": 0.41509918799465517, + "grad_norm": 1.088057518005371, + "learning_rate": 6.589815809794752e-06, + "loss": 0.7505, + "step": 8077 + }, + { + "epoch": 0.4151505807379998, + "grad_norm": 1.0441371202468872, + "learning_rate": 6.589026725140091e-06, + "loss": 0.7165, + "step": 8078 + }, + { + "epoch": 0.4152019734813444, + "grad_norm": 0.8173975944519043, + "learning_rate": 6.588237596460056e-06, + "loss": 0.5868, + "step": 8079 + }, + { + "epoch": 0.41525336622468906, + "grad_norm": 1.0947449207305908, + "learning_rate": 6.587448423776512e-06, + "loss": 0.781, + "step": 8080 + }, + { + "epoch": 0.4153047589680337, + "grad_norm": 1.1305118799209595, + "learning_rate": 6.586659207111325e-06, + "loss": 0.7693, + "step": 8081 + }, + { + "epoch": 0.41535615171137835, + "grad_norm": 1.2380605936050415, + "learning_rate": 6.585869946486356e-06, + "loss": 0.7608, + "step": 8082 + }, + { + "epoch": 0.415407544454723, + "grad_norm": 1.0711721181869507, + "learning_rate": 6.585080641923478e-06, + "loss": 0.7393, + "step": 8083 + }, + { + "epoch": 0.41545893719806765, + "grad_norm": 1.151130199432373, + "learning_rate": 6.5842912934445545e-06, + "loss": 0.7926, + "step": 8084 + }, + { + "epoch": 0.4155103299414123, + "grad_norm": 0.7503238320350647, + "learning_rate": 6.583501901071459e-06, + "loss": 0.6844, + "step": 8085 + }, + { + "epoch": 0.4155617226847569, + "grad_norm": 1.0263811349868774, + "learning_rate": 6.582712464826059e-06, + "loss": 0.7172, + "step": 8086 + }, + { + "epoch": 0.41561311542810153, + "grad_norm": 1.135429859161377, + "learning_rate": 6.581922984730228e-06, + "loss": 0.7376, + "step": 8087 + }, + { + "epoch": 0.4156645081714462, + "grad_norm": 1.0504059791564941, + "learning_rate": 6.581133460805841e-06, + "loss": 0.7107, + "step": 8088 + }, + { + "epoch": 0.4157159009147908, + "grad_norm": 1.010265588760376, + "learning_rate": 6.5803438930747695e-06, + "loss": 0.8095, + "step": 8089 + }, + { + "epoch": 0.4157672936581355, + "grad_norm": 0.846817672252655, + "learning_rate": 6.579554281558891e-06, + "loss": 0.6841, + "step": 8090 + }, + { + "epoch": 0.4158186864014801, + "grad_norm": 1.105237364768982, + "learning_rate": 6.578764626280082e-06, + "loss": 0.7898, + "step": 8091 + }, + { + "epoch": 0.41587007914482477, + "grad_norm": 1.076355218887329, + "learning_rate": 6.577974927260219e-06, + "loss": 0.7413, + "step": 8092 + }, + { + "epoch": 0.4159214718881694, + "grad_norm": 1.055192470550537, + "learning_rate": 6.577185184521184e-06, + "loss": 0.7145, + "step": 8093 + }, + { + "epoch": 0.415972864631514, + "grad_norm": 1.261038899421692, + "learning_rate": 6.576395398084856e-06, + "loss": 0.7645, + "step": 8094 + }, + { + "epoch": 0.41602425737485865, + "grad_norm": 1.0586450099945068, + "learning_rate": 6.575605567973117e-06, + "loss": 0.7114, + "step": 8095 + }, + { + "epoch": 0.4160756501182033, + "grad_norm": 1.0073610544204712, + "learning_rate": 6.574815694207849e-06, + "loss": 0.6922, + "step": 8096 + }, + { + "epoch": 0.41612704286154795, + "grad_norm": 1.1039044857025146, + "learning_rate": 6.574025776810939e-06, + "loss": 0.7373, + "step": 8097 + }, + { + "epoch": 0.4161784356048926, + "grad_norm": 0.76905357837677, + "learning_rate": 6.573235815804267e-06, + "loss": 0.7025, + "step": 8098 + }, + { + "epoch": 0.41622982834823724, + "grad_norm": 1.0936994552612305, + "learning_rate": 6.572445811209726e-06, + "loss": 0.7652, + "step": 8099 + }, + { + "epoch": 0.4162812210915819, + "grad_norm": 1.1234445571899414, + "learning_rate": 6.571655763049198e-06, + "loss": 0.7624, + "step": 8100 + }, + { + "epoch": 0.41633261383492653, + "grad_norm": 1.1500602960586548, + "learning_rate": 6.570865671344577e-06, + "loss": 0.7512, + "step": 8101 + }, + { + "epoch": 0.4163840065782711, + "grad_norm": 1.0710879564285278, + "learning_rate": 6.5700755361177505e-06, + "loss": 0.7082, + "step": 8102 + }, + { + "epoch": 0.4164353993216158, + "grad_norm": 0.7484697103500366, + "learning_rate": 6.569285357390609e-06, + "loss": 0.6856, + "step": 8103 + }, + { + "epoch": 0.4164867920649604, + "grad_norm": 1.1131904125213623, + "learning_rate": 6.568495135185048e-06, + "loss": 0.7162, + "step": 8104 + }, + { + "epoch": 0.41653818480830507, + "grad_norm": 1.0708578824996948, + "learning_rate": 6.5677048695229586e-06, + "loss": 0.686, + "step": 8105 + }, + { + "epoch": 0.4165895775516497, + "grad_norm": 1.2042489051818848, + "learning_rate": 6.566914560426236e-06, + "loss": 0.6986, + "step": 8106 + }, + { + "epoch": 0.41664097029499436, + "grad_norm": 0.8376294374465942, + "learning_rate": 6.566124207916778e-06, + "loss": 0.6528, + "step": 8107 + }, + { + "epoch": 0.416692363038339, + "grad_norm": 1.0975004434585571, + "learning_rate": 6.5653338120164815e-06, + "loss": 0.7798, + "step": 8108 + }, + { + "epoch": 0.4167437557816836, + "grad_norm": 0.7418920993804932, + "learning_rate": 6.564543372747244e-06, + "loss": 0.6513, + "step": 8109 + }, + { + "epoch": 0.41679514852502825, + "grad_norm": 0.8498019576072693, + "learning_rate": 6.563752890130968e-06, + "loss": 0.6927, + "step": 8110 + }, + { + "epoch": 0.4168465412683729, + "grad_norm": 0.9223999381065369, + "learning_rate": 6.56296236418955e-06, + "loss": 0.6627, + "step": 8111 + }, + { + "epoch": 0.41689793401171754, + "grad_norm": 1.099507451057434, + "learning_rate": 6.562171794944897e-06, + "loss": 0.7436, + "step": 8112 + }, + { + "epoch": 0.4169493267550622, + "grad_norm": 1.1419322490692139, + "learning_rate": 6.5613811824189096e-06, + "loss": 0.7847, + "step": 8113 + }, + { + "epoch": 0.41700071949840684, + "grad_norm": 0.7048693299293518, + "learning_rate": 6.560590526633493e-06, + "loss": 0.6705, + "step": 8114 + }, + { + "epoch": 0.4170521122417515, + "grad_norm": 1.1191236972808838, + "learning_rate": 6.559799827610554e-06, + "loss": 0.743, + "step": 8115 + }, + { + "epoch": 0.41710350498509613, + "grad_norm": 1.1514685153961182, + "learning_rate": 6.559009085371997e-06, + "loss": 0.7673, + "step": 8116 + }, + { + "epoch": 0.4171548977284407, + "grad_norm": 0.7785151600837708, + "learning_rate": 6.558218299939731e-06, + "loss": 0.7033, + "step": 8117 + }, + { + "epoch": 0.41720629047178537, + "grad_norm": 1.200346827507019, + "learning_rate": 6.557427471335669e-06, + "loss": 0.7584, + "step": 8118 + }, + { + "epoch": 0.41725768321513, + "grad_norm": 1.0426331758499146, + "learning_rate": 6.556636599581717e-06, + "loss": 0.7791, + "step": 8119 + }, + { + "epoch": 0.41730907595847466, + "grad_norm": 1.114567756652832, + "learning_rate": 6.55584568469979e-06, + "loss": 0.8171, + "step": 8120 + }, + { + "epoch": 0.4173604687018193, + "grad_norm": 1.1332224607467651, + "learning_rate": 6.5550547267117995e-06, + "loss": 0.7365, + "step": 8121 + }, + { + "epoch": 0.41741186144516396, + "grad_norm": 0.8459557294845581, + "learning_rate": 6.5542637256396595e-06, + "loss": 0.6767, + "step": 8122 + }, + { + "epoch": 0.4174632541885086, + "grad_norm": 1.145411729812622, + "learning_rate": 6.553472681505286e-06, + "loss": 0.7782, + "step": 8123 + }, + { + "epoch": 0.4175146469318532, + "grad_norm": 0.7274598479270935, + "learning_rate": 6.552681594330594e-06, + "loss": 0.6582, + "step": 8124 + }, + { + "epoch": 0.41756603967519784, + "grad_norm": 1.0792773962020874, + "learning_rate": 6.551890464137505e-06, + "loss": 0.8218, + "step": 8125 + }, + { + "epoch": 0.4176174324185425, + "grad_norm": 1.070538878440857, + "learning_rate": 6.551099290947933e-06, + "loss": 0.7688, + "step": 8126 + }, + { + "epoch": 0.41766882516188714, + "grad_norm": 0.6803285479545593, + "learning_rate": 6.550308074783802e-06, + "loss": 0.7056, + "step": 8127 + }, + { + "epoch": 0.4177202179052318, + "grad_norm": 1.1391576528549194, + "learning_rate": 6.549516815667031e-06, + "loss": 0.747, + "step": 8128 + }, + { + "epoch": 0.41777161064857643, + "grad_norm": 1.0530043840408325, + "learning_rate": 6.548725513619545e-06, + "loss": 0.7213, + "step": 8129 + }, + { + "epoch": 0.4178230033919211, + "grad_norm": 1.0601760149002075, + "learning_rate": 6.547934168663265e-06, + "loss": 0.7486, + "step": 8130 + }, + { + "epoch": 0.4178743961352657, + "grad_norm": 1.0474406480789185, + "learning_rate": 6.547142780820119e-06, + "loss": 0.7432, + "step": 8131 + }, + { + "epoch": 0.4179257888786103, + "grad_norm": 0.7322412133216858, + "learning_rate": 6.546351350112028e-06, + "loss": 0.6714, + "step": 8132 + }, + { + "epoch": 0.41797718162195496, + "grad_norm": 0.9110136032104492, + "learning_rate": 6.545559876560925e-06, + "loss": 0.6782, + "step": 8133 + }, + { + "epoch": 0.4180285743652996, + "grad_norm": 1.0378695726394653, + "learning_rate": 6.544768360188736e-06, + "loss": 0.6847, + "step": 8134 + }, + { + "epoch": 0.41807996710864426, + "grad_norm": 1.090872883796692, + "learning_rate": 6.543976801017389e-06, + "loss": 0.7547, + "step": 8135 + }, + { + "epoch": 0.4181313598519889, + "grad_norm": 1.1600438356399536, + "learning_rate": 6.543185199068818e-06, + "loss": 0.7552, + "step": 8136 + }, + { + "epoch": 0.41818275259533355, + "grad_norm": 1.0688138008117676, + "learning_rate": 6.542393554364952e-06, + "loss": 0.795, + "step": 8137 + }, + { + "epoch": 0.4182341453386782, + "grad_norm": 1.0418423414230347, + "learning_rate": 6.541601866927727e-06, + "loss": 0.7949, + "step": 8138 + }, + { + "epoch": 0.41828553808202285, + "grad_norm": 1.0694576501846313, + "learning_rate": 6.540810136779075e-06, + "loss": 0.7317, + "step": 8139 + }, + { + "epoch": 0.41833693082536744, + "grad_norm": 1.0427879095077515, + "learning_rate": 6.540018363940933e-06, + "loss": 0.7773, + "step": 8140 + }, + { + "epoch": 0.4183883235687121, + "grad_norm": 0.7623458504676819, + "learning_rate": 6.539226548435238e-06, + "loss": 0.6889, + "step": 8141 + }, + { + "epoch": 0.41843971631205673, + "grad_norm": 0.7354753017425537, + "learning_rate": 6.538434690283928e-06, + "loss": 0.6813, + "step": 8142 + }, + { + "epoch": 0.4184911090554014, + "grad_norm": 0.9941310286521912, + "learning_rate": 6.53764278950894e-06, + "loss": 0.7138, + "step": 8143 + }, + { + "epoch": 0.418542501798746, + "grad_norm": 1.0244524478912354, + "learning_rate": 6.536850846132217e-06, + "loss": 0.7346, + "step": 8144 + }, + { + "epoch": 0.41859389454209067, + "grad_norm": 1.2205531597137451, + "learning_rate": 6.5360588601756994e-06, + "loss": 0.7523, + "step": 8145 + }, + { + "epoch": 0.4186452872854353, + "grad_norm": 1.0979371070861816, + "learning_rate": 6.535266831661327e-06, + "loss": 0.698, + "step": 8146 + }, + { + "epoch": 0.4186966800287799, + "grad_norm": 0.9743461012840271, + "learning_rate": 6.534474760611049e-06, + "loss": 0.7209, + "step": 8147 + }, + { + "epoch": 0.41874807277212456, + "grad_norm": 1.0523778200149536, + "learning_rate": 6.533682647046806e-06, + "loss": 0.6897, + "step": 8148 + }, + { + "epoch": 0.4187994655154692, + "grad_norm": 1.0779714584350586, + "learning_rate": 6.532890490990548e-06, + "loss": 0.7488, + "step": 8149 + }, + { + "epoch": 0.41885085825881385, + "grad_norm": 1.1054168939590454, + "learning_rate": 6.53209829246422e-06, + "loss": 0.7674, + "step": 8150 + }, + { + "epoch": 0.4189022510021585, + "grad_norm": 1.0905927419662476, + "learning_rate": 6.531306051489769e-06, + "loss": 0.7606, + "step": 8151 + }, + { + "epoch": 0.41895364374550315, + "grad_norm": 1.1185486316680908, + "learning_rate": 6.530513768089148e-06, + "loss": 0.7367, + "step": 8152 + }, + { + "epoch": 0.4190050364888478, + "grad_norm": 0.8157680630683899, + "learning_rate": 6.529721442284308e-06, + "loss": 0.7016, + "step": 8153 + }, + { + "epoch": 0.41905642923219244, + "grad_norm": 1.058084487915039, + "learning_rate": 6.5289290740971975e-06, + "loss": 0.7433, + "step": 8154 + }, + { + "epoch": 0.41910782197553703, + "grad_norm": 1.0676947832107544, + "learning_rate": 6.5281366635497734e-06, + "loss": 0.7135, + "step": 8155 + }, + { + "epoch": 0.4191592147188817, + "grad_norm": 1.0593070983886719, + "learning_rate": 6.527344210663986e-06, + "loss": 0.7141, + "step": 8156 + }, + { + "epoch": 0.4192106074622263, + "grad_norm": 0.774882435798645, + "learning_rate": 6.526551715461796e-06, + "loss": 0.683, + "step": 8157 + }, + { + "epoch": 0.419262000205571, + "grad_norm": 1.029242753982544, + "learning_rate": 6.525759177965157e-06, + "loss": 0.7385, + "step": 8158 + }, + { + "epoch": 0.4193133929489156, + "grad_norm": 1.0384578704833984, + "learning_rate": 6.5249665981960275e-06, + "loss": 0.7523, + "step": 8159 + }, + { + "epoch": 0.41936478569226027, + "grad_norm": 0.8123130798339844, + "learning_rate": 6.5241739761763664e-06, + "loss": 0.7297, + "step": 8160 + }, + { + "epoch": 0.4194161784356049, + "grad_norm": 0.7716857194900513, + "learning_rate": 6.523381311928134e-06, + "loss": 0.7107, + "step": 8161 + }, + { + "epoch": 0.4194675711789495, + "grad_norm": 0.8681047558784485, + "learning_rate": 6.522588605473293e-06, + "loss": 0.6663, + "step": 8162 + }, + { + "epoch": 0.41951896392229415, + "grad_norm": 1.02434504032135, + "learning_rate": 6.521795856833806e-06, + "loss": 0.7231, + "step": 8163 + }, + { + "epoch": 0.4195703566656388, + "grad_norm": 1.1238293647766113, + "learning_rate": 6.521003066031634e-06, + "loss": 0.7505, + "step": 8164 + }, + { + "epoch": 0.41962174940898345, + "grad_norm": 0.7427038550376892, + "learning_rate": 6.520210233088745e-06, + "loss": 0.674, + "step": 8165 + }, + { + "epoch": 0.4196731421523281, + "grad_norm": 1.0484944581985474, + "learning_rate": 6.519417358027104e-06, + "loss": 0.7615, + "step": 8166 + }, + { + "epoch": 0.41972453489567274, + "grad_norm": 1.1346802711486816, + "learning_rate": 6.5186244408686775e-06, + "loss": 0.7546, + "step": 8167 + }, + { + "epoch": 0.4197759276390174, + "grad_norm": 0.9560454487800598, + "learning_rate": 6.517831481635435e-06, + "loss": 0.6599, + "step": 8168 + }, + { + "epoch": 0.41982732038236203, + "grad_norm": 1.0495399236679077, + "learning_rate": 6.517038480349345e-06, + "loss": 0.8055, + "step": 8169 + }, + { + "epoch": 0.4198787131257066, + "grad_norm": 0.9989027976989746, + "learning_rate": 6.51624543703238e-06, + "loss": 0.7269, + "step": 8170 + }, + { + "epoch": 0.4199301058690513, + "grad_norm": 1.1640007495880127, + "learning_rate": 6.515452351706511e-06, + "loss": 0.7729, + "step": 8171 + }, + { + "epoch": 0.4199814986123959, + "grad_norm": 1.127619981765747, + "learning_rate": 6.514659224393711e-06, + "loss": 0.785, + "step": 8172 + }, + { + "epoch": 0.42003289135574057, + "grad_norm": 0.7142515778541565, + "learning_rate": 6.513866055115956e-06, + "loss": 0.6658, + "step": 8173 + }, + { + "epoch": 0.4200842840990852, + "grad_norm": 1.0726038217544556, + "learning_rate": 6.513072843895219e-06, + "loss": 0.716, + "step": 8174 + }, + { + "epoch": 0.42013567684242986, + "grad_norm": 1.1132415533065796, + "learning_rate": 6.5122795907534775e-06, + "loss": 0.7281, + "step": 8175 + }, + { + "epoch": 0.4201870695857745, + "grad_norm": 1.0760494470596313, + "learning_rate": 6.51148629571271e-06, + "loss": 0.7501, + "step": 8176 + }, + { + "epoch": 0.42023846232911916, + "grad_norm": 1.1639372110366821, + "learning_rate": 6.510692958794893e-06, + "loss": 0.7707, + "step": 8177 + }, + { + "epoch": 0.42028985507246375, + "grad_norm": 1.1178914308547974, + "learning_rate": 6.50989958002201e-06, + "loss": 0.7497, + "step": 8178 + }, + { + "epoch": 0.4203412478158084, + "grad_norm": 0.7563962340354919, + "learning_rate": 6.50910615941604e-06, + "loss": 0.6238, + "step": 8179 + }, + { + "epoch": 0.42039264055915304, + "grad_norm": 1.0762879848480225, + "learning_rate": 6.5083126969989656e-06, + "loss": 0.7632, + "step": 8180 + }, + { + "epoch": 0.4204440333024977, + "grad_norm": 0.7737055420875549, + "learning_rate": 6.507519192792771e-06, + "loss": 0.6472, + "step": 8181 + }, + { + "epoch": 0.42049542604584234, + "grad_norm": 1.0744820833206177, + "learning_rate": 6.5067256468194425e-06, + "loss": 0.6937, + "step": 8182 + }, + { + "epoch": 0.420546818789187, + "grad_norm": 1.1416133642196655, + "learning_rate": 6.505932059100962e-06, + "loss": 0.7487, + "step": 8183 + }, + { + "epoch": 0.42059821153253163, + "grad_norm": 1.0580724477767944, + "learning_rate": 6.5051384296593204e-06, + "loss": 0.7186, + "step": 8184 + }, + { + "epoch": 0.4206496042758762, + "grad_norm": 1.0841445922851562, + "learning_rate": 6.504344758516503e-06, + "loss": 0.7685, + "step": 8185 + }, + { + "epoch": 0.42070099701922087, + "grad_norm": 1.056885004043579, + "learning_rate": 6.5035510456945e-06, + "loss": 0.7118, + "step": 8186 + }, + { + "epoch": 0.4207523897625655, + "grad_norm": 0.7877774238586426, + "learning_rate": 6.502757291215305e-06, + "loss": 0.6776, + "step": 8187 + }, + { + "epoch": 0.42080378250591016, + "grad_norm": 1.0735968351364136, + "learning_rate": 6.5019634951009045e-06, + "loss": 0.783, + "step": 8188 + }, + { + "epoch": 0.4208551752492548, + "grad_norm": 0.78546142578125, + "learning_rate": 6.5011696573732926e-06, + "loss": 0.6954, + "step": 8189 + }, + { + "epoch": 0.42090656799259946, + "grad_norm": 1.0860130786895752, + "learning_rate": 6.500375778054467e-06, + "loss": 0.7494, + "step": 8190 + }, + { + "epoch": 0.4209579607359441, + "grad_norm": 1.1014548540115356, + "learning_rate": 6.499581857166419e-06, + "loss": 0.6635, + "step": 8191 + }, + { + "epoch": 0.42100935347928875, + "grad_norm": 1.1342618465423584, + "learning_rate": 6.498787894731148e-06, + "loss": 0.8156, + "step": 8192 + }, + { + "epoch": 0.42106074622263334, + "grad_norm": 0.961471438407898, + "learning_rate": 6.4979938907706475e-06, + "loss": 0.7226, + "step": 8193 + }, + { + "epoch": 0.421112138965978, + "grad_norm": 1.146230697631836, + "learning_rate": 6.497199845306919e-06, + "loss": 0.7963, + "step": 8194 + }, + { + "epoch": 0.42116353170932264, + "grad_norm": 0.7355208396911621, + "learning_rate": 6.496405758361962e-06, + "loss": 0.6771, + "step": 8195 + }, + { + "epoch": 0.4212149244526673, + "grad_norm": 1.1335139274597168, + "learning_rate": 6.4956116299577756e-06, + "loss": 0.7597, + "step": 8196 + }, + { + "epoch": 0.42126631719601193, + "grad_norm": 1.089436411857605, + "learning_rate": 6.494817460116364e-06, + "loss": 0.743, + "step": 8197 + }, + { + "epoch": 0.4213177099393566, + "grad_norm": 1.0815538167953491, + "learning_rate": 6.494023248859728e-06, + "loss": 0.7621, + "step": 8198 + }, + { + "epoch": 0.4213691026827012, + "grad_norm": 0.7486788630485535, + "learning_rate": 6.493228996209872e-06, + "loss": 0.659, + "step": 8199 + }, + { + "epoch": 0.4214204954260458, + "grad_norm": 0.7722164392471313, + "learning_rate": 6.492434702188806e-06, + "loss": 0.6477, + "step": 8200 + }, + { + "epoch": 0.42147188816939046, + "grad_norm": 1.069804072380066, + "learning_rate": 6.491640366818531e-06, + "loss": 0.7261, + "step": 8201 + }, + { + "epoch": 0.4215232809127351, + "grad_norm": 1.0602997541427612, + "learning_rate": 6.4908459901210596e-06, + "loss": 0.704, + "step": 8202 + }, + { + "epoch": 0.42157467365607976, + "grad_norm": 1.0329790115356445, + "learning_rate": 6.4900515721183966e-06, + "loss": 0.7347, + "step": 8203 + }, + { + "epoch": 0.4216260663994244, + "grad_norm": 1.058082938194275, + "learning_rate": 6.489257112832553e-06, + "loss": 0.7159, + "step": 8204 + }, + { + "epoch": 0.42167745914276905, + "grad_norm": 1.0252909660339355, + "learning_rate": 6.488462612285542e-06, + "loss": 0.7619, + "step": 8205 + }, + { + "epoch": 0.4217288518861137, + "grad_norm": 1.0015450716018677, + "learning_rate": 6.487668070499374e-06, + "loss": 0.7076, + "step": 8206 + }, + { + "epoch": 0.42178024462945835, + "grad_norm": 0.8854926228523254, + "learning_rate": 6.486873487496063e-06, + "loss": 0.7281, + "step": 8207 + }, + { + "epoch": 0.42183163737280294, + "grad_norm": 0.7147998213768005, + "learning_rate": 6.486078863297623e-06, + "loss": 0.708, + "step": 8208 + }, + { + "epoch": 0.4218830301161476, + "grad_norm": 0.698016345500946, + "learning_rate": 6.48528419792607e-06, + "loss": 0.6967, + "step": 8209 + }, + { + "epoch": 0.42193442285949223, + "grad_norm": 1.0992891788482666, + "learning_rate": 6.484489491403422e-06, + "loss": 0.7381, + "step": 8210 + }, + { + "epoch": 0.4219858156028369, + "grad_norm": 1.1006677150726318, + "learning_rate": 6.483694743751696e-06, + "loss": 0.7324, + "step": 8211 + }, + { + "epoch": 0.4220372083461815, + "grad_norm": 1.140717625617981, + "learning_rate": 6.482899954992911e-06, + "loss": 0.7719, + "step": 8212 + }, + { + "epoch": 0.42208860108952617, + "grad_norm": 1.074892282485962, + "learning_rate": 6.48210512514909e-06, + "loss": 0.7955, + "step": 8213 + }, + { + "epoch": 0.4221399938328708, + "grad_norm": 1.069056749343872, + "learning_rate": 6.481310254242252e-06, + "loss": 0.7716, + "step": 8214 + }, + { + "epoch": 0.42219138657621547, + "grad_norm": 1.1065367460250854, + "learning_rate": 6.480515342294418e-06, + "loss": 0.774, + "step": 8215 + }, + { + "epoch": 0.42224277931956006, + "grad_norm": 1.0123153924942017, + "learning_rate": 6.479720389327615e-06, + "loss": 0.7241, + "step": 8216 + }, + { + "epoch": 0.4222941720629047, + "grad_norm": 1.10072922706604, + "learning_rate": 6.478925395363866e-06, + "loss": 0.7444, + "step": 8217 + }, + { + "epoch": 0.42234556480624935, + "grad_norm": 1.048885464668274, + "learning_rate": 6.478130360425197e-06, + "loss": 0.6838, + "step": 8218 + }, + { + "epoch": 0.422396957549594, + "grad_norm": 1.0726813077926636, + "learning_rate": 6.4773352845336345e-06, + "loss": 0.7387, + "step": 8219 + }, + { + "epoch": 0.42244835029293865, + "grad_norm": 0.7953245043754578, + "learning_rate": 6.47654016771121e-06, + "loss": 0.6689, + "step": 8220 + }, + { + "epoch": 0.4224997430362833, + "grad_norm": 1.119653582572937, + "learning_rate": 6.47574500997995e-06, + "loss": 0.7108, + "step": 8221 + }, + { + "epoch": 0.42255113577962794, + "grad_norm": 1.112272024154663, + "learning_rate": 6.4749498113618855e-06, + "loss": 0.7978, + "step": 8222 + }, + { + "epoch": 0.42260252852297253, + "grad_norm": 1.0212616920471191, + "learning_rate": 6.4741545718790485e-06, + "loss": 0.702, + "step": 8223 + }, + { + "epoch": 0.4226539212663172, + "grad_norm": 1.0341118574142456, + "learning_rate": 6.473359291553474e-06, + "loss": 0.7637, + "step": 8224 + }, + { + "epoch": 0.4227053140096618, + "grad_norm": 0.8750165104866028, + "learning_rate": 6.472563970407191e-06, + "loss": 0.7004, + "step": 8225 + }, + { + "epoch": 0.4227567067530065, + "grad_norm": 0.7670082449913025, + "learning_rate": 6.471768608462239e-06, + "loss": 0.6819, + "step": 8226 + }, + { + "epoch": 0.4228080994963511, + "grad_norm": 1.1221908330917358, + "learning_rate": 6.470973205740651e-06, + "loss": 0.7734, + "step": 8227 + }, + { + "epoch": 0.42285949223969577, + "grad_norm": 1.0767858028411865, + "learning_rate": 6.4701777622644665e-06, + "loss": 0.702, + "step": 8228 + }, + { + "epoch": 0.4229108849830404, + "grad_norm": 1.0595110654830933, + "learning_rate": 6.469382278055723e-06, + "loss": 0.765, + "step": 8229 + }, + { + "epoch": 0.42296227772638506, + "grad_norm": 1.0989242792129517, + "learning_rate": 6.468586753136461e-06, + "loss": 0.7695, + "step": 8230 + }, + { + "epoch": 0.42301367046972965, + "grad_norm": 1.024972677230835, + "learning_rate": 6.467791187528719e-06, + "loss": 0.7237, + "step": 8231 + }, + { + "epoch": 0.4230650632130743, + "grad_norm": 0.9777402281761169, + "learning_rate": 6.466995581254543e-06, + "loss": 0.7566, + "step": 8232 + }, + { + "epoch": 0.42311645595641895, + "grad_norm": 1.055586338043213, + "learning_rate": 6.4661999343359705e-06, + "loss": 0.71, + "step": 8233 + }, + { + "epoch": 0.4231678486997636, + "grad_norm": 1.0453256368637085, + "learning_rate": 6.465404246795051e-06, + "loss": 0.6905, + "step": 8234 + }, + { + "epoch": 0.42321924144310824, + "grad_norm": 1.0758497714996338, + "learning_rate": 6.464608518653827e-06, + "loss": 0.6696, + "step": 8235 + }, + { + "epoch": 0.4232706341864529, + "grad_norm": 1.1033661365509033, + "learning_rate": 6.463812749934343e-06, + "loss": 0.7654, + "step": 8236 + }, + { + "epoch": 0.42332202692979753, + "grad_norm": 1.008488416671753, + "learning_rate": 6.463016940658649e-06, + "loss": 0.703, + "step": 8237 + }, + { + "epoch": 0.4233734196731421, + "grad_norm": 0.7929936051368713, + "learning_rate": 6.4622210908487946e-06, + "loss": 0.6784, + "step": 8238 + }, + { + "epoch": 0.4234248124164868, + "grad_norm": 1.001913070678711, + "learning_rate": 6.461425200526828e-06, + "loss": 0.7131, + "step": 8239 + }, + { + "epoch": 0.4234762051598314, + "grad_norm": 0.8796018362045288, + "learning_rate": 6.460629269714797e-06, + "loss": 0.6782, + "step": 8240 + }, + { + "epoch": 0.42352759790317607, + "grad_norm": 1.1097356081008911, + "learning_rate": 6.459833298434759e-06, + "loss": 0.774, + "step": 8241 + }, + { + "epoch": 0.4235789906465207, + "grad_norm": 1.0712469816207886, + "learning_rate": 6.459037286708764e-06, + "loss": 0.7674, + "step": 8242 + }, + { + "epoch": 0.42363038338986536, + "grad_norm": 1.0817714929580688, + "learning_rate": 6.458241234558869e-06, + "loss": 0.7353, + "step": 8243 + }, + { + "epoch": 0.42368177613321, + "grad_norm": 1.0979995727539062, + "learning_rate": 6.457445142007125e-06, + "loss": 0.742, + "step": 8244 + }, + { + "epoch": 0.42373316887655466, + "grad_norm": 1.081201195716858, + "learning_rate": 6.4566490090755904e-06, + "loss": 0.7751, + "step": 8245 + }, + { + "epoch": 0.42378456161989925, + "grad_norm": 1.105096459388733, + "learning_rate": 6.455852835786325e-06, + "loss": 0.7362, + "step": 8246 + }, + { + "epoch": 0.4238359543632439, + "grad_norm": 0.9971508383750916, + "learning_rate": 6.455056622161384e-06, + "loss": 0.701, + "step": 8247 + }, + { + "epoch": 0.42388734710658854, + "grad_norm": 0.9921983480453491, + "learning_rate": 6.4542603682228285e-06, + "loss": 0.7474, + "step": 8248 + }, + { + "epoch": 0.4239387398499332, + "grad_norm": 1.0649241209030151, + "learning_rate": 6.45346407399272e-06, + "loss": 0.7212, + "step": 8249 + }, + { + "epoch": 0.42399013259327784, + "grad_norm": 1.233341097831726, + "learning_rate": 6.452667739493119e-06, + "loss": 0.7854, + "step": 8250 + }, + { + "epoch": 0.4240415253366225, + "grad_norm": 1.1549886465072632, + "learning_rate": 6.451871364746092e-06, + "loss": 0.7424, + "step": 8251 + }, + { + "epoch": 0.42409291807996713, + "grad_norm": 1.1375632286071777, + "learning_rate": 6.451074949773699e-06, + "loss": 0.7203, + "step": 8252 + }, + { + "epoch": 0.4241443108233117, + "grad_norm": 1.0938986539840698, + "learning_rate": 6.450278494598009e-06, + "loss": 0.7527, + "step": 8253 + }, + { + "epoch": 0.42419570356665637, + "grad_norm": 1.110064148902893, + "learning_rate": 6.449481999241086e-06, + "loss": 0.8271, + "step": 8254 + }, + { + "epoch": 0.424247096310001, + "grad_norm": 1.0966453552246094, + "learning_rate": 6.4486854637249985e-06, + "loss": 0.7106, + "step": 8255 + }, + { + "epoch": 0.42429848905334566, + "grad_norm": 1.0951966047286987, + "learning_rate": 6.447888888071816e-06, + "loss": 0.7327, + "step": 8256 + }, + { + "epoch": 0.4243498817966903, + "grad_norm": 1.0976653099060059, + "learning_rate": 6.447092272303606e-06, + "loss": 0.7998, + "step": 8257 + }, + { + "epoch": 0.42440127454003496, + "grad_norm": 1.0412391424179077, + "learning_rate": 6.4462956164424415e-06, + "loss": 0.7088, + "step": 8258 + }, + { + "epoch": 0.4244526672833796, + "grad_norm": 1.0415542125701904, + "learning_rate": 6.445498920510395e-06, + "loss": 0.7646, + "step": 8259 + }, + { + "epoch": 0.42450406002672425, + "grad_norm": 1.1661365032196045, + "learning_rate": 6.444702184529537e-06, + "loss": 0.7496, + "step": 8260 + }, + { + "epoch": 0.42455545277006884, + "grad_norm": 1.090607762336731, + "learning_rate": 6.443905408521943e-06, + "loss": 0.7133, + "step": 8261 + }, + { + "epoch": 0.4246068455134135, + "grad_norm": 1.1093566417694092, + "learning_rate": 6.44310859250969e-06, + "loss": 0.7079, + "step": 8262 + }, + { + "epoch": 0.42465823825675814, + "grad_norm": 1.0579400062561035, + "learning_rate": 6.442311736514853e-06, + "loss": 0.6838, + "step": 8263 + }, + { + "epoch": 0.4247096310001028, + "grad_norm": 4.69133996963501, + "learning_rate": 6.441514840559511e-06, + "loss": 0.8511, + "step": 8264 + }, + { + "epoch": 0.42476102374344743, + "grad_norm": 1.1832209825515747, + "learning_rate": 6.44071790466574e-06, + "loss": 0.7482, + "step": 8265 + }, + { + "epoch": 0.4248124164867921, + "grad_norm": 1.0471915006637573, + "learning_rate": 6.439920928855623e-06, + "loss": 0.7638, + "step": 8266 + }, + { + "epoch": 0.4248638092301367, + "grad_norm": 1.0906472206115723, + "learning_rate": 6.439123913151238e-06, + "loss": 0.6992, + "step": 8267 + }, + { + "epoch": 0.42491520197348137, + "grad_norm": 1.0624840259552002, + "learning_rate": 6.438326857574668e-06, + "loss": 0.7534, + "step": 8268 + }, + { + "epoch": 0.42496659471682596, + "grad_norm": 0.8258217573165894, + "learning_rate": 6.437529762147997e-06, + "loss": 0.6908, + "step": 8269 + }, + { + "epoch": 0.4250179874601706, + "grad_norm": 1.0538914203643799, + "learning_rate": 6.436732626893308e-06, + "loss": 0.7921, + "step": 8270 + }, + { + "epoch": 0.42506938020351526, + "grad_norm": 1.1080495119094849, + "learning_rate": 6.435935451832686e-06, + "loss": 0.7462, + "step": 8271 + }, + { + "epoch": 0.4251207729468599, + "grad_norm": 1.071796178817749, + "learning_rate": 6.435138236988221e-06, + "loss": 0.7626, + "step": 8272 + }, + { + "epoch": 0.42517216569020455, + "grad_norm": 1.04335355758667, + "learning_rate": 6.434340982381997e-06, + "loss": 0.7178, + "step": 8273 + }, + { + "epoch": 0.4252235584335492, + "grad_norm": 1.0774140357971191, + "learning_rate": 6.4335436880361035e-06, + "loss": 0.7633, + "step": 8274 + }, + { + "epoch": 0.42527495117689385, + "grad_norm": 1.23483407497406, + "learning_rate": 6.4327463539726295e-06, + "loss": 0.8217, + "step": 8275 + }, + { + "epoch": 0.42532634392023844, + "grad_norm": 1.067987322807312, + "learning_rate": 6.431948980213667e-06, + "loss": 0.6737, + "step": 8276 + }, + { + "epoch": 0.4253777366635831, + "grad_norm": 1.1560516357421875, + "learning_rate": 6.4311515667813085e-06, + "loss": 0.7805, + "step": 8277 + }, + { + "epoch": 0.42542912940692773, + "grad_norm": 1.0915915966033936, + "learning_rate": 6.430354113697645e-06, + "loss": 0.7641, + "step": 8278 + }, + { + "epoch": 0.4254805221502724, + "grad_norm": 0.8349462151527405, + "learning_rate": 6.429556620984772e-06, + "loss": 0.6726, + "step": 8279 + }, + { + "epoch": 0.425531914893617, + "grad_norm": 1.0294008255004883, + "learning_rate": 6.4287590886647845e-06, + "loss": 0.7656, + "step": 8280 + }, + { + "epoch": 0.42558330763696167, + "grad_norm": 0.8001941442489624, + "learning_rate": 6.42796151675978e-06, + "loss": 0.6581, + "step": 8281 + }, + { + "epoch": 0.4256347003803063, + "grad_norm": 1.0716136693954468, + "learning_rate": 6.4271639052918536e-06, + "loss": 0.7209, + "step": 8282 + }, + { + "epoch": 0.42568609312365097, + "grad_norm": 1.0247186422348022, + "learning_rate": 6.426366254283105e-06, + "loss": 0.7553, + "step": 8283 + }, + { + "epoch": 0.42573748586699556, + "grad_norm": 0.7531420588493347, + "learning_rate": 6.425568563755633e-06, + "loss": 0.6475, + "step": 8284 + }, + { + "epoch": 0.4257888786103402, + "grad_norm": 1.0480122566223145, + "learning_rate": 6.4247708337315394e-06, + "loss": 0.7917, + "step": 8285 + }, + { + "epoch": 0.42584027135368485, + "grad_norm": 1.0335288047790527, + "learning_rate": 6.423973064232926e-06, + "loss": 0.732, + "step": 8286 + }, + { + "epoch": 0.4258916640970295, + "grad_norm": 0.7679970264434814, + "learning_rate": 6.423175255281896e-06, + "loss": 0.684, + "step": 8287 + }, + { + "epoch": 0.42594305684037415, + "grad_norm": 1.181100606918335, + "learning_rate": 6.422377406900553e-06, + "loss": 0.795, + "step": 8288 + }, + { + "epoch": 0.4259944495837188, + "grad_norm": 1.07860267162323, + "learning_rate": 6.421579519111e-06, + "loss": 0.7791, + "step": 8289 + }, + { + "epoch": 0.42604584232706344, + "grad_norm": 1.1637686491012573, + "learning_rate": 6.4207815919353465e-06, + "loss": 0.7725, + "step": 8290 + }, + { + "epoch": 0.42609723507040803, + "grad_norm": 1.0649746656417847, + "learning_rate": 6.419983625395697e-06, + "loss": 0.7547, + "step": 8291 + }, + { + "epoch": 0.4261486278137527, + "grad_norm": 1.1197888851165771, + "learning_rate": 6.4191856195141625e-06, + "loss": 0.7846, + "step": 8292 + }, + { + "epoch": 0.4262000205570973, + "grad_norm": 1.0475739240646362, + "learning_rate": 6.418387574312851e-06, + "loss": 0.7388, + "step": 8293 + }, + { + "epoch": 0.426251413300442, + "grad_norm": 1.0338460206985474, + "learning_rate": 6.417589489813871e-06, + "loss": 0.7257, + "step": 8294 + }, + { + "epoch": 0.4263028060437866, + "grad_norm": 1.2435382604599, + "learning_rate": 6.416791366039339e-06, + "loss": 0.7656, + "step": 8295 + }, + { + "epoch": 0.42635419878713127, + "grad_norm": 1.0318820476531982, + "learning_rate": 6.415993203011365e-06, + "loss": 0.7093, + "step": 8296 + }, + { + "epoch": 0.4264055915304759, + "grad_norm": 0.7912799715995789, + "learning_rate": 6.4151950007520616e-06, + "loss": 0.6494, + "step": 8297 + }, + { + "epoch": 0.42645698427382056, + "grad_norm": 0.6895196437835693, + "learning_rate": 6.414396759283546e-06, + "loss": 0.686, + "step": 8298 + }, + { + "epoch": 0.42650837701716515, + "grad_norm": 1.133817195892334, + "learning_rate": 6.413598478627932e-06, + "loss": 0.7652, + "step": 8299 + }, + { + "epoch": 0.4265597697605098, + "grad_norm": 0.8014428615570068, + "learning_rate": 6.412800158807337e-06, + "loss": 0.6668, + "step": 8300 + }, + { + "epoch": 0.42661116250385445, + "grad_norm": 1.1080423593521118, + "learning_rate": 6.412001799843881e-06, + "loss": 0.813, + "step": 8301 + }, + { + "epoch": 0.4266625552471991, + "grad_norm": 1.0469075441360474, + "learning_rate": 6.411203401759682e-06, + "loss": 0.7301, + "step": 8302 + }, + { + "epoch": 0.42671394799054374, + "grad_norm": 3.1170568466186523, + "learning_rate": 6.410404964576861e-06, + "loss": 0.708, + "step": 8303 + }, + { + "epoch": 0.4267653407338884, + "grad_norm": 1.0842232704162598, + "learning_rate": 6.409606488317538e-06, + "loss": 0.7044, + "step": 8304 + }, + { + "epoch": 0.42681673347723303, + "grad_norm": 1.0441584587097168, + "learning_rate": 6.408807973003837e-06, + "loss": 0.6987, + "step": 8305 + }, + { + "epoch": 0.4268681262205777, + "grad_norm": 1.0478596687316895, + "learning_rate": 6.40800941865788e-06, + "loss": 0.7312, + "step": 8306 + }, + { + "epoch": 0.4269195189639223, + "grad_norm": 1.0818253755569458, + "learning_rate": 6.407210825301794e-06, + "loss": 0.715, + "step": 8307 + }, + { + "epoch": 0.4269709117072669, + "grad_norm": 1.0268062353134155, + "learning_rate": 6.4064121929577016e-06, + "loss": 0.7237, + "step": 8308 + }, + { + "epoch": 0.42702230445061157, + "grad_norm": 1.077306866645813, + "learning_rate": 6.405613521647732e-06, + "loss": 0.7896, + "step": 8309 + }, + { + "epoch": 0.4270736971939562, + "grad_norm": 1.078835129737854, + "learning_rate": 6.4048148113940115e-06, + "loss": 0.7085, + "step": 8310 + }, + { + "epoch": 0.42712508993730086, + "grad_norm": 1.4233893156051636, + "learning_rate": 6.404016062218672e-06, + "loss": 0.693, + "step": 8311 + }, + { + "epoch": 0.4271764826806455, + "grad_norm": 0.7691375017166138, + "learning_rate": 6.4032172741438395e-06, + "loss": 0.6566, + "step": 8312 + }, + { + "epoch": 0.42722787542399016, + "grad_norm": 1.108933687210083, + "learning_rate": 6.402418447191646e-06, + "loss": 0.7449, + "step": 8313 + }, + { + "epoch": 0.42727926816733475, + "grad_norm": 1.1395201683044434, + "learning_rate": 6.401619581384227e-06, + "loss": 0.7496, + "step": 8314 + }, + { + "epoch": 0.4273306609106794, + "grad_norm": 1.030959963798523, + "learning_rate": 6.400820676743714e-06, + "loss": 0.7094, + "step": 8315 + }, + { + "epoch": 0.42738205365402404, + "grad_norm": 1.120534062385559, + "learning_rate": 6.400021733292239e-06, + "loss": 0.8133, + "step": 8316 + }, + { + "epoch": 0.4274334463973687, + "grad_norm": 0.7026606798171997, + "learning_rate": 6.3992227510519415e-06, + "loss": 0.6219, + "step": 8317 + }, + { + "epoch": 0.42748483914071334, + "grad_norm": 1.1032322645187378, + "learning_rate": 6.398423730044954e-06, + "loss": 0.7206, + "step": 8318 + }, + { + "epoch": 0.427536231884058, + "grad_norm": 1.0905557870864868, + "learning_rate": 6.397624670293417e-06, + "loss": 0.7605, + "step": 8319 + }, + { + "epoch": 0.42758762462740263, + "grad_norm": 1.079391360282898, + "learning_rate": 6.396825571819467e-06, + "loss": 0.7473, + "step": 8320 + }, + { + "epoch": 0.4276390173707473, + "grad_norm": 1.072669267654419, + "learning_rate": 6.396026434645245e-06, + "loss": 0.7349, + "step": 8321 + }, + { + "epoch": 0.42769041011409187, + "grad_norm": 1.160841703414917, + "learning_rate": 6.395227258792891e-06, + "loss": 0.7994, + "step": 8322 + }, + { + "epoch": 0.4277418028574365, + "grad_norm": 0.8698166608810425, + "learning_rate": 6.394428044284549e-06, + "loss": 0.7168, + "step": 8323 + }, + { + "epoch": 0.42779319560078116, + "grad_norm": 0.7399750351905823, + "learning_rate": 6.393628791142359e-06, + "loss": 0.6734, + "step": 8324 + }, + { + "epoch": 0.4278445883441258, + "grad_norm": 1.2460758686065674, + "learning_rate": 6.392829499388467e-06, + "loss": 0.6618, + "step": 8325 + }, + { + "epoch": 0.42789598108747046, + "grad_norm": 1.0808895826339722, + "learning_rate": 6.392030169045016e-06, + "loss": 0.6902, + "step": 8326 + }, + { + "epoch": 0.4279473738308151, + "grad_norm": 1.0457595586776733, + "learning_rate": 6.391230800134156e-06, + "loss": 0.7312, + "step": 8327 + }, + { + "epoch": 0.42799876657415975, + "grad_norm": 1.0832843780517578, + "learning_rate": 6.3904313926780314e-06, + "loss": 0.7799, + "step": 8328 + }, + { + "epoch": 0.42805015931750434, + "grad_norm": 1.0643500089645386, + "learning_rate": 6.389631946698789e-06, + "loss": 0.7253, + "step": 8329 + }, + { + "epoch": 0.428101552060849, + "grad_norm": 1.1019455194473267, + "learning_rate": 6.388832462218582e-06, + "loss": 0.714, + "step": 8330 + }, + { + "epoch": 0.42815294480419364, + "grad_norm": 1.0317736864089966, + "learning_rate": 6.388032939259558e-06, + "loss": 0.7292, + "step": 8331 + }, + { + "epoch": 0.4282043375475383, + "grad_norm": 1.0136743783950806, + "learning_rate": 6.387233377843869e-06, + "loss": 0.6877, + "step": 8332 + }, + { + "epoch": 0.42825573029088293, + "grad_norm": 1.0540369749069214, + "learning_rate": 6.386433777993668e-06, + "loss": 0.7252, + "step": 8333 + }, + { + "epoch": 0.4283071230342276, + "grad_norm": 1.1630367040634155, + "learning_rate": 6.385634139731109e-06, + "loss": 0.7385, + "step": 8334 + }, + { + "epoch": 0.4283585157775722, + "grad_norm": 0.8848607540130615, + "learning_rate": 6.384834463078345e-06, + "loss": 0.687, + "step": 8335 + }, + { + "epoch": 0.42840990852091687, + "grad_norm": 1.1070735454559326, + "learning_rate": 6.384034748057535e-06, + "loss": 0.7533, + "step": 8336 + }, + { + "epoch": 0.42846130126426146, + "grad_norm": 1.2160663604736328, + "learning_rate": 6.383234994690832e-06, + "loss": 0.8477, + "step": 8337 + }, + { + "epoch": 0.4285126940076061, + "grad_norm": 1.0909734964370728, + "learning_rate": 6.382435203000396e-06, + "loss": 0.7801, + "step": 8338 + }, + { + "epoch": 0.42856408675095076, + "grad_norm": 1.1377675533294678, + "learning_rate": 6.381635373008387e-06, + "loss": 0.7847, + "step": 8339 + }, + { + "epoch": 0.4286154794942954, + "grad_norm": 1.065069317817688, + "learning_rate": 6.380835504736962e-06, + "loss": 0.697, + "step": 8340 + }, + { + "epoch": 0.42866687223764005, + "grad_norm": 1.0549118518829346, + "learning_rate": 6.3800355982082825e-06, + "loss": 0.7453, + "step": 8341 + }, + { + "epoch": 0.4287182649809847, + "grad_norm": 1.0431360006332397, + "learning_rate": 6.379235653444513e-06, + "loss": 0.7806, + "step": 8342 + }, + { + "epoch": 0.42876965772432934, + "grad_norm": 0.929344654083252, + "learning_rate": 6.378435670467815e-06, + "loss": 0.6732, + "step": 8343 + }, + { + "epoch": 0.428821050467674, + "grad_norm": 1.1320505142211914, + "learning_rate": 6.377635649300353e-06, + "loss": 0.7277, + "step": 8344 + }, + { + "epoch": 0.4288724432110186, + "grad_norm": 1.0407710075378418, + "learning_rate": 6.376835589964292e-06, + "loss": 0.7322, + "step": 8345 + }, + { + "epoch": 0.42892383595436323, + "grad_norm": 1.1120134592056274, + "learning_rate": 6.3760354924818e-06, + "loss": 0.7101, + "step": 8346 + }, + { + "epoch": 0.4289752286977079, + "grad_norm": 1.073689579963684, + "learning_rate": 6.375235356875042e-06, + "loss": 0.7608, + "step": 8347 + }, + { + "epoch": 0.4290266214410525, + "grad_norm": 0.7342345714569092, + "learning_rate": 6.374435183166188e-06, + "loss": 0.6786, + "step": 8348 + }, + { + "epoch": 0.42907801418439717, + "grad_norm": 1.0894666910171509, + "learning_rate": 6.373634971377407e-06, + "loss": 0.7908, + "step": 8349 + }, + { + "epoch": 0.4291294069277418, + "grad_norm": 1.051718831062317, + "learning_rate": 6.372834721530868e-06, + "loss": 0.7205, + "step": 8350 + }, + { + "epoch": 0.42918079967108647, + "grad_norm": 0.7243857383728027, + "learning_rate": 6.372034433648746e-06, + "loss": 0.6512, + "step": 8351 + }, + { + "epoch": 0.42923219241443106, + "grad_norm": 0.9980666637420654, + "learning_rate": 6.371234107753212e-06, + "loss": 0.7141, + "step": 8352 + }, + { + "epoch": 0.4292835851577757, + "grad_norm": 1.0969419479370117, + "learning_rate": 6.370433743866438e-06, + "loss": 0.7574, + "step": 8353 + }, + { + "epoch": 0.42933497790112035, + "grad_norm": 1.1351345777511597, + "learning_rate": 6.3696333420106015e-06, + "loss": 0.7446, + "step": 8354 + }, + { + "epoch": 0.429386370644465, + "grad_norm": 1.0555768013000488, + "learning_rate": 6.368832902207878e-06, + "loss": 0.7227, + "step": 8355 + }, + { + "epoch": 0.42943776338780965, + "grad_norm": 1.1305559873580933, + "learning_rate": 6.368032424480443e-06, + "loss": 0.785, + "step": 8356 + }, + { + "epoch": 0.4294891561311543, + "grad_norm": 0.7501512765884399, + "learning_rate": 6.367231908850475e-06, + "loss": 0.6534, + "step": 8357 + }, + { + "epoch": 0.42954054887449894, + "grad_norm": 1.0587468147277832, + "learning_rate": 6.366431355340152e-06, + "loss": 0.7621, + "step": 8358 + }, + { + "epoch": 0.4295919416178436, + "grad_norm": 1.061094880104065, + "learning_rate": 6.3656307639716565e-06, + "loss": 0.7467, + "step": 8359 + }, + { + "epoch": 0.4296433343611882, + "grad_norm": 0.7430071234703064, + "learning_rate": 6.364830134767168e-06, + "loss": 0.6524, + "step": 8360 + }, + { + "epoch": 0.4296947271045328, + "grad_norm": 1.0598183870315552, + "learning_rate": 6.3640294677488676e-06, + "loss": 0.7758, + "step": 8361 + }, + { + "epoch": 0.4297461198478775, + "grad_norm": 1.2606260776519775, + "learning_rate": 6.363228762938941e-06, + "loss": 0.8014, + "step": 8362 + }, + { + "epoch": 0.4297975125912221, + "grad_norm": 1.1287667751312256, + "learning_rate": 6.36242802035957e-06, + "loss": 0.777, + "step": 8363 + }, + { + "epoch": 0.42984890533456677, + "grad_norm": 0.7967458963394165, + "learning_rate": 6.361627240032942e-06, + "loss": 0.6593, + "step": 8364 + }, + { + "epoch": 0.4299002980779114, + "grad_norm": 1.077114224433899, + "learning_rate": 6.360826421981242e-06, + "loss": 0.7326, + "step": 8365 + }, + { + "epoch": 0.42995169082125606, + "grad_norm": 1.0205336809158325, + "learning_rate": 6.3600255662266566e-06, + "loss": 0.7667, + "step": 8366 + }, + { + "epoch": 0.43000308356460065, + "grad_norm": 0.7071298956871033, + "learning_rate": 6.359224672791377e-06, + "loss": 0.6604, + "step": 8367 + }, + { + "epoch": 0.4300544763079453, + "grad_norm": 1.0826181173324585, + "learning_rate": 6.3584237416975905e-06, + "loss": 0.8129, + "step": 8368 + }, + { + "epoch": 0.43010586905128995, + "grad_norm": 1.1143178939819336, + "learning_rate": 6.357622772967487e-06, + "loss": 0.776, + "step": 8369 + }, + { + "epoch": 0.4301572617946346, + "grad_norm": 0.704164981842041, + "learning_rate": 6.35682176662326e-06, + "loss": 0.6742, + "step": 8370 + }, + { + "epoch": 0.43020865453797924, + "grad_norm": 1.1142781972885132, + "learning_rate": 6.356020722687101e-06, + "loss": 0.7303, + "step": 8371 + }, + { + "epoch": 0.4302600472813239, + "grad_norm": 1.071683406829834, + "learning_rate": 6.3552196411812025e-06, + "loss": 0.685, + "step": 8372 + }, + { + "epoch": 0.43031144002466853, + "grad_norm": 1.091601848602295, + "learning_rate": 6.354418522127761e-06, + "loss": 0.7529, + "step": 8373 + }, + { + "epoch": 0.4303628327680132, + "grad_norm": 0.7305396795272827, + "learning_rate": 6.353617365548972e-06, + "loss": 0.6413, + "step": 8374 + }, + { + "epoch": 0.4304142255113578, + "grad_norm": 1.102630853652954, + "learning_rate": 6.352816171467032e-06, + "loss": 0.7141, + "step": 8375 + }, + { + "epoch": 0.4304656182547024, + "grad_norm": 1.0008684396743774, + "learning_rate": 6.352014939904139e-06, + "loss": 0.7236, + "step": 8376 + }, + { + "epoch": 0.43051701099804707, + "grad_norm": 1.0773789882659912, + "learning_rate": 6.35121367088249e-06, + "loss": 0.7424, + "step": 8377 + }, + { + "epoch": 0.4305684037413917, + "grad_norm": 1.1423330307006836, + "learning_rate": 6.350412364424288e-06, + "loss": 0.7682, + "step": 8378 + }, + { + "epoch": 0.43061979648473636, + "grad_norm": 1.1046618223190308, + "learning_rate": 6.349611020551732e-06, + "loss": 0.7829, + "step": 8379 + }, + { + "epoch": 0.430671189228081, + "grad_norm": 1.1270986795425415, + "learning_rate": 6.348809639287022e-06, + "loss": 0.803, + "step": 8380 + }, + { + "epoch": 0.43072258197142566, + "grad_norm": 1.1230157613754272, + "learning_rate": 6.348008220652366e-06, + "loss": 0.7069, + "step": 8381 + }, + { + "epoch": 0.43077397471477025, + "grad_norm": 1.0743664503097534, + "learning_rate": 6.347206764669963e-06, + "loss": 0.7657, + "step": 8382 + }, + { + "epoch": 0.4308253674581149, + "grad_norm": 0.7270748615264893, + "learning_rate": 6.346405271362022e-06, + "loss": 0.6883, + "step": 8383 + }, + { + "epoch": 0.43087676020145954, + "grad_norm": 1.0805649757385254, + "learning_rate": 6.345603740750745e-06, + "loss": 0.7839, + "step": 8384 + }, + { + "epoch": 0.4309281529448042, + "grad_norm": 0.8860407471656799, + "learning_rate": 6.344802172858342e-06, + "loss": 0.7003, + "step": 8385 + }, + { + "epoch": 0.43097954568814884, + "grad_norm": 0.800311267375946, + "learning_rate": 6.3440005677070216e-06, + "loss": 0.6805, + "step": 8386 + }, + { + "epoch": 0.4310309384314935, + "grad_norm": 1.065180778503418, + "learning_rate": 6.343198925318992e-06, + "loss": 0.7127, + "step": 8387 + }, + { + "epoch": 0.43108233117483813, + "grad_norm": 0.7365872859954834, + "learning_rate": 6.342397245716461e-06, + "loss": 0.6549, + "step": 8388 + }, + { + "epoch": 0.4311337239181828, + "grad_norm": 1.0598411560058594, + "learning_rate": 6.341595528921645e-06, + "loss": 0.6857, + "step": 8389 + }, + { + "epoch": 0.43118511666152737, + "grad_norm": 1.0902824401855469, + "learning_rate": 6.340793774956751e-06, + "loss": 0.7682, + "step": 8390 + }, + { + "epoch": 0.431236509404872, + "grad_norm": 1.0106401443481445, + "learning_rate": 6.339991983843995e-06, + "loss": 0.7361, + "step": 8391 + }, + { + "epoch": 0.43128790214821666, + "grad_norm": 0.9538063406944275, + "learning_rate": 6.339190155605592e-06, + "loss": 0.7278, + "step": 8392 + }, + { + "epoch": 0.4313392948915613, + "grad_norm": 1.0787593126296997, + "learning_rate": 6.338388290263755e-06, + "loss": 0.7894, + "step": 8393 + }, + { + "epoch": 0.43139068763490596, + "grad_norm": 1.147099494934082, + "learning_rate": 6.337586387840702e-06, + "loss": 0.7554, + "step": 8394 + }, + { + "epoch": 0.4314420803782506, + "grad_norm": 1.0683928728103638, + "learning_rate": 6.336784448358649e-06, + "loss": 0.7891, + "step": 8395 + }, + { + "epoch": 0.43149347312159525, + "grad_norm": 1.063873529434204, + "learning_rate": 6.3359824718398175e-06, + "loss": 0.7523, + "step": 8396 + }, + { + "epoch": 0.4315448658649399, + "grad_norm": 1.029515266418457, + "learning_rate": 6.335180458306424e-06, + "loss": 0.7013, + "step": 8397 + }, + { + "epoch": 0.4315962586082845, + "grad_norm": 1.083827018737793, + "learning_rate": 6.334378407780692e-06, + "loss": 0.7636, + "step": 8398 + }, + { + "epoch": 0.43164765135162914, + "grad_norm": 1.037231206893921, + "learning_rate": 6.333576320284839e-06, + "loss": 0.7377, + "step": 8399 + }, + { + "epoch": 0.4316990440949738, + "grad_norm": 1.0276010036468506, + "learning_rate": 6.3327741958410904e-06, + "loss": 0.7665, + "step": 8400 + }, + { + "epoch": 0.43175043683831843, + "grad_norm": 1.1145011186599731, + "learning_rate": 6.3319720344716674e-06, + "loss": 0.7614, + "step": 8401 + }, + { + "epoch": 0.4318018295816631, + "grad_norm": 1.1464964151382446, + "learning_rate": 6.3311698361987985e-06, + "loss": 0.7818, + "step": 8402 + }, + { + "epoch": 0.4318532223250077, + "grad_norm": 1.0981749296188354, + "learning_rate": 6.330367601044705e-06, + "loss": 0.719, + "step": 8403 + }, + { + "epoch": 0.43190461506835237, + "grad_norm": 1.0901585817337036, + "learning_rate": 6.329565329031614e-06, + "loss": 0.7758, + "step": 8404 + }, + { + "epoch": 0.43195600781169696, + "grad_norm": 0.8460590839385986, + "learning_rate": 6.328763020181756e-06, + "loss": 0.6807, + "step": 8405 + }, + { + "epoch": 0.4320074005550416, + "grad_norm": 1.1460169553756714, + "learning_rate": 6.327960674517358e-06, + "loss": 0.7666, + "step": 8406 + }, + { + "epoch": 0.43205879329838626, + "grad_norm": 1.0639140605926514, + "learning_rate": 6.327158292060651e-06, + "loss": 0.7805, + "step": 8407 + }, + { + "epoch": 0.4321101860417309, + "grad_norm": 1.0985881090164185, + "learning_rate": 6.326355872833865e-06, + "loss": 0.6885, + "step": 8408 + }, + { + "epoch": 0.43216157878507555, + "grad_norm": 1.2176406383514404, + "learning_rate": 6.325553416859229e-06, + "loss": 0.7658, + "step": 8409 + }, + { + "epoch": 0.4322129715284202, + "grad_norm": 0.8720978498458862, + "learning_rate": 6.32475092415898e-06, + "loss": 0.7014, + "step": 8410 + }, + { + "epoch": 0.43226436427176484, + "grad_norm": 1.0955424308776855, + "learning_rate": 6.323948394755348e-06, + "loss": 0.761, + "step": 8411 + }, + { + "epoch": 0.4323157570151095, + "grad_norm": 1.0364047288894653, + "learning_rate": 6.323145828670572e-06, + "loss": 0.6833, + "step": 8412 + }, + { + "epoch": 0.4323671497584541, + "grad_norm": 0.9960904121398926, + "learning_rate": 6.3223432259268844e-06, + "loss": 0.7505, + "step": 8413 + }, + { + "epoch": 0.43241854250179873, + "grad_norm": 1.2153621912002563, + "learning_rate": 6.321540586546522e-06, + "loss": 0.7231, + "step": 8414 + }, + { + "epoch": 0.4324699352451434, + "grad_norm": 0.7874817252159119, + "learning_rate": 6.320737910551724e-06, + "loss": 0.701, + "step": 8415 + }, + { + "epoch": 0.432521327988488, + "grad_norm": 1.0423996448516846, + "learning_rate": 6.31993519796473e-06, + "loss": 0.7384, + "step": 8416 + }, + { + "epoch": 0.43257272073183267, + "grad_norm": 0.7048157453536987, + "learning_rate": 6.3191324488077765e-06, + "loss": 0.6672, + "step": 8417 + }, + { + "epoch": 0.4326241134751773, + "grad_norm": 1.163750171661377, + "learning_rate": 6.318329663103109e-06, + "loss": 0.7977, + "step": 8418 + }, + { + "epoch": 0.43267550621852197, + "grad_norm": 1.0547767877578735, + "learning_rate": 6.317526840872965e-06, + "loss": 0.7798, + "step": 8419 + }, + { + "epoch": 0.43272689896186656, + "grad_norm": 1.0751404762268066, + "learning_rate": 6.316723982139591e-06, + "loss": 0.7206, + "step": 8420 + }, + { + "epoch": 0.4327782917052112, + "grad_norm": 1.0892095565795898, + "learning_rate": 6.315921086925229e-06, + "loss": 0.7703, + "step": 8421 + }, + { + "epoch": 0.43282968444855585, + "grad_norm": 0.7128773927688599, + "learning_rate": 6.3151181552521225e-06, + "loss": 0.6697, + "step": 8422 + }, + { + "epoch": 0.4328810771919005, + "grad_norm": 0.850968599319458, + "learning_rate": 6.314315187142521e-06, + "loss": 0.728, + "step": 8423 + }, + { + "epoch": 0.43293246993524515, + "grad_norm": 0.8781245350837708, + "learning_rate": 6.313512182618669e-06, + "loss": 0.696, + "step": 8424 + }, + { + "epoch": 0.4329838626785898, + "grad_norm": 1.0837726593017578, + "learning_rate": 6.312709141702813e-06, + "loss": 0.7451, + "step": 8425 + }, + { + "epoch": 0.43303525542193444, + "grad_norm": 1.0780463218688965, + "learning_rate": 6.311906064417206e-06, + "loss": 0.7086, + "step": 8426 + }, + { + "epoch": 0.4330866481652791, + "grad_norm": 1.0713844299316406, + "learning_rate": 6.311102950784096e-06, + "loss": 0.7407, + "step": 8427 + }, + { + "epoch": 0.4331380409086237, + "grad_norm": 1.1044763326644897, + "learning_rate": 6.310299800825733e-06, + "loss": 0.7806, + "step": 8428 + }, + { + "epoch": 0.4331894336519683, + "grad_norm": 1.1343417167663574, + "learning_rate": 6.309496614564371e-06, + "loss": 0.8192, + "step": 8429 + }, + { + "epoch": 0.433240826395313, + "grad_norm": 0.8975526094436646, + "learning_rate": 6.30869339202226e-06, + "loss": 0.6357, + "step": 8430 + }, + { + "epoch": 0.4332922191386576, + "grad_norm": 1.0665843486785889, + "learning_rate": 6.3078901332216565e-06, + "loss": 0.7494, + "step": 8431 + }, + { + "epoch": 0.43334361188200227, + "grad_norm": 1.038925290107727, + "learning_rate": 6.3070868381848175e-06, + "loss": 0.7479, + "step": 8432 + }, + { + "epoch": 0.4333950046253469, + "grad_norm": 1.123015284538269, + "learning_rate": 6.306283506933992e-06, + "loss": 0.8152, + "step": 8433 + }, + { + "epoch": 0.43344639736869156, + "grad_norm": 1.0991424322128296, + "learning_rate": 6.3054801394914435e-06, + "loss": 0.7572, + "step": 8434 + }, + { + "epoch": 0.4334977901120362, + "grad_norm": 1.0407248735427856, + "learning_rate": 6.304676735879427e-06, + "loss": 0.7459, + "step": 8435 + }, + { + "epoch": 0.4335491828553808, + "grad_norm": 1.0816010236740112, + "learning_rate": 6.303873296120202e-06, + "loss": 0.7547, + "step": 8436 + }, + { + "epoch": 0.43360057559872545, + "grad_norm": 0.818271279335022, + "learning_rate": 6.30306982023603e-06, + "loss": 0.6593, + "step": 8437 + }, + { + "epoch": 0.4336519683420701, + "grad_norm": 1.0635895729064941, + "learning_rate": 6.302266308249168e-06, + "loss": 0.7574, + "step": 8438 + }, + { + "epoch": 0.43370336108541474, + "grad_norm": 1.1095529794692993, + "learning_rate": 6.3014627601818835e-06, + "loss": 0.7463, + "step": 8439 + }, + { + "epoch": 0.4337547538287594, + "grad_norm": 1.038257122039795, + "learning_rate": 6.300659176056436e-06, + "loss": 0.7181, + "step": 8440 + }, + { + "epoch": 0.43380614657210403, + "grad_norm": 0.6907318234443665, + "learning_rate": 6.2998555558950905e-06, + "loss": 0.6548, + "step": 8441 + }, + { + "epoch": 0.4338575393154487, + "grad_norm": 1.0988339185714722, + "learning_rate": 6.2990518997201115e-06, + "loss": 0.7594, + "step": 8442 + }, + { + "epoch": 0.4339089320587933, + "grad_norm": 0.7527409195899963, + "learning_rate": 6.298248207553764e-06, + "loss": 0.7062, + "step": 8443 + }, + { + "epoch": 0.4339603248021379, + "grad_norm": 1.0896986722946167, + "learning_rate": 6.2974444794183175e-06, + "loss": 0.7499, + "step": 8444 + }, + { + "epoch": 0.43401171754548257, + "grad_norm": 1.1018531322479248, + "learning_rate": 6.296640715336039e-06, + "loss": 0.7109, + "step": 8445 + }, + { + "epoch": 0.4340631102888272, + "grad_norm": 0.7952009439468384, + "learning_rate": 6.295836915329195e-06, + "loss": 0.6729, + "step": 8446 + }, + { + "epoch": 0.43411450303217186, + "grad_norm": 0.7238272428512573, + "learning_rate": 6.295033079420061e-06, + "loss": 0.7238, + "step": 8447 + }, + { + "epoch": 0.4341658957755165, + "grad_norm": 1.0481033325195312, + "learning_rate": 6.294229207630903e-06, + "loss": 0.7516, + "step": 8448 + }, + { + "epoch": 0.43421728851886116, + "grad_norm": 1.0630829334259033, + "learning_rate": 6.293425299983994e-06, + "loss": 0.707, + "step": 8449 + }, + { + "epoch": 0.4342686812622058, + "grad_norm": 0.9712737798690796, + "learning_rate": 6.292621356501609e-06, + "loss": 0.725, + "step": 8450 + }, + { + "epoch": 0.4343200740055504, + "grad_norm": 1.0961427688598633, + "learning_rate": 6.291817377206019e-06, + "loss": 0.7857, + "step": 8451 + }, + { + "epoch": 0.43437146674889504, + "grad_norm": 1.1055337190628052, + "learning_rate": 6.2910133621195015e-06, + "loss": 0.7871, + "step": 8452 + }, + { + "epoch": 0.4344228594922397, + "grad_norm": 0.8499512076377869, + "learning_rate": 6.290209311264332e-06, + "loss": 0.6399, + "step": 8453 + }, + { + "epoch": 0.43447425223558434, + "grad_norm": 1.0557571649551392, + "learning_rate": 6.289405224662786e-06, + "loss": 0.7266, + "step": 8454 + }, + { + "epoch": 0.434525644978929, + "grad_norm": 0.7364407777786255, + "learning_rate": 6.2886011023371405e-06, + "loss": 0.7212, + "step": 8455 + }, + { + "epoch": 0.43457703772227363, + "grad_norm": 0.9995099306106567, + "learning_rate": 6.287796944309679e-06, + "loss": 0.6879, + "step": 8456 + }, + { + "epoch": 0.4346284304656183, + "grad_norm": 1.1055337190628052, + "learning_rate": 6.286992750602677e-06, + "loss": 0.7724, + "step": 8457 + }, + { + "epoch": 0.43467982320896287, + "grad_norm": 1.2343580722808838, + "learning_rate": 6.286188521238419e-06, + "loss": 0.8389, + "step": 8458 + }, + { + "epoch": 0.4347312159523075, + "grad_norm": 1.0446220636367798, + "learning_rate": 6.285384256239182e-06, + "loss": 0.682, + "step": 8459 + }, + { + "epoch": 0.43478260869565216, + "grad_norm": 1.0600786209106445, + "learning_rate": 6.2845799556272535e-06, + "loss": 0.7502, + "step": 8460 + }, + { + "epoch": 0.4348340014389968, + "grad_norm": 1.2921860218048096, + "learning_rate": 6.283775619424916e-06, + "loss": 0.7364, + "step": 8461 + }, + { + "epoch": 0.43488539418234146, + "grad_norm": 1.0856540203094482, + "learning_rate": 6.282971247654453e-06, + "loss": 0.6799, + "step": 8462 + }, + { + "epoch": 0.4349367869256861, + "grad_norm": 1.0880122184753418, + "learning_rate": 6.282166840338152e-06, + "loss": 0.7409, + "step": 8463 + }, + { + "epoch": 0.43498817966903075, + "grad_norm": 1.0276278257369995, + "learning_rate": 6.281362397498299e-06, + "loss": 0.7382, + "step": 8464 + }, + { + "epoch": 0.4350395724123754, + "grad_norm": 1.0319180488586426, + "learning_rate": 6.2805579191571805e-06, + "loss": 0.7132, + "step": 8465 + }, + { + "epoch": 0.43509096515572, + "grad_norm": 1.0523196458816528, + "learning_rate": 6.279753405337087e-06, + "loss": 0.7406, + "step": 8466 + }, + { + "epoch": 0.43514235789906464, + "grad_norm": 1.1038187742233276, + "learning_rate": 6.278948856060307e-06, + "loss": 0.7263, + "step": 8467 + }, + { + "epoch": 0.4351937506424093, + "grad_norm": 1.2612719535827637, + "learning_rate": 6.2781442713491345e-06, + "loss": 0.703, + "step": 8468 + }, + { + "epoch": 0.43524514338575393, + "grad_norm": 1.410661220550537, + "learning_rate": 6.277339651225858e-06, + "loss": 0.771, + "step": 8469 + }, + { + "epoch": 0.4352965361290986, + "grad_norm": 1.0300686359405518, + "learning_rate": 6.2765349957127706e-06, + "loss": 0.7591, + "step": 8470 + }, + { + "epoch": 0.4353479288724432, + "grad_norm": 1.0803775787353516, + "learning_rate": 6.275730304832167e-06, + "loss": 0.7209, + "step": 8471 + }, + { + "epoch": 0.43539932161578787, + "grad_norm": 1.103232502937317, + "learning_rate": 6.274925578606341e-06, + "loss": 0.6907, + "step": 8472 + }, + { + "epoch": 0.4354507143591325, + "grad_norm": 1.0692774057388306, + "learning_rate": 6.274120817057588e-06, + "loss": 0.7811, + "step": 8473 + }, + { + "epoch": 0.4355021071024771, + "grad_norm": 1.0819768905639648, + "learning_rate": 6.2733160202082064e-06, + "loss": 0.7249, + "step": 8474 + }, + { + "epoch": 0.43555349984582176, + "grad_norm": 1.159351110458374, + "learning_rate": 6.272511188080491e-06, + "loss": 0.7099, + "step": 8475 + }, + { + "epoch": 0.4356048925891664, + "grad_norm": 0.8502740263938904, + "learning_rate": 6.271706320696742e-06, + "loss": 0.6576, + "step": 8476 + }, + { + "epoch": 0.43565628533251105, + "grad_norm": 0.8586541414260864, + "learning_rate": 6.2709014180792605e-06, + "loss": 0.6566, + "step": 8477 + }, + { + "epoch": 0.4357076780758557, + "grad_norm": 1.1031328439712524, + "learning_rate": 6.2700964802503425e-06, + "loss": 0.7514, + "step": 8478 + }, + { + "epoch": 0.43575907081920034, + "grad_norm": 1.091124415397644, + "learning_rate": 6.2692915072322955e-06, + "loss": 0.7326, + "step": 8479 + }, + { + "epoch": 0.435810463562545, + "grad_norm": 1.1567610502243042, + "learning_rate": 6.268486499047418e-06, + "loss": 0.7443, + "step": 8480 + }, + { + "epoch": 0.4358618563058896, + "grad_norm": 0.7727978825569153, + "learning_rate": 6.267681455718013e-06, + "loss": 0.6679, + "step": 8481 + }, + { + "epoch": 0.43591324904923423, + "grad_norm": 1.1718244552612305, + "learning_rate": 6.266876377266389e-06, + "loss": 0.7665, + "step": 8482 + }, + { + "epoch": 0.4359646417925789, + "grad_norm": 1.080930233001709, + "learning_rate": 6.2660712637148455e-06, + "loss": 0.7282, + "step": 8483 + }, + { + "epoch": 0.4360160345359235, + "grad_norm": 1.0717263221740723, + "learning_rate": 6.265266115085695e-06, + "loss": 0.7224, + "step": 8484 + }, + { + "epoch": 0.43606742727926817, + "grad_norm": 1.0754872560501099, + "learning_rate": 6.264460931401241e-06, + "loss": 0.8059, + "step": 8485 + }, + { + "epoch": 0.4361188200226128, + "grad_norm": 1.1302589178085327, + "learning_rate": 6.263655712683791e-06, + "loss": 0.7911, + "step": 8486 + }, + { + "epoch": 0.43617021276595747, + "grad_norm": 1.0252442359924316, + "learning_rate": 6.262850458955657e-06, + "loss": 0.6952, + "step": 8487 + }, + { + "epoch": 0.4362216055093021, + "grad_norm": 1.1333891153335571, + "learning_rate": 6.262045170239148e-06, + "loss": 0.732, + "step": 8488 + }, + { + "epoch": 0.4362729982526467, + "grad_norm": 1.0965452194213867, + "learning_rate": 6.261239846556576e-06, + "loss": 0.7171, + "step": 8489 + }, + { + "epoch": 0.43632439099599135, + "grad_norm": 1.074366807937622, + "learning_rate": 6.260434487930254e-06, + "loss": 0.7155, + "step": 8490 + }, + { + "epoch": 0.436375783739336, + "grad_norm": 0.8176406621932983, + "learning_rate": 6.259629094382491e-06, + "loss": 0.6701, + "step": 8491 + }, + { + "epoch": 0.43642717648268065, + "grad_norm": 1.1059508323669434, + "learning_rate": 6.258823665935606e-06, + "loss": 0.7515, + "step": 8492 + }, + { + "epoch": 0.4364785692260253, + "grad_norm": 0.7819482684135437, + "learning_rate": 6.258018202611912e-06, + "loss": 0.6725, + "step": 8493 + }, + { + "epoch": 0.43652996196936994, + "grad_norm": 1.0585912466049194, + "learning_rate": 6.2572127044337236e-06, + "loss": 0.7367, + "step": 8494 + }, + { + "epoch": 0.4365813547127146, + "grad_norm": 1.086531400680542, + "learning_rate": 6.256407171423361e-06, + "loss": 0.7328, + "step": 8495 + }, + { + "epoch": 0.4366327474560592, + "grad_norm": 1.0657939910888672, + "learning_rate": 6.25560160360314e-06, + "loss": 0.7229, + "step": 8496 + }, + { + "epoch": 0.4366841401994038, + "grad_norm": 1.072554349899292, + "learning_rate": 6.254796000995379e-06, + "loss": 0.7451, + "step": 8497 + }, + { + "epoch": 0.4367355329427485, + "grad_norm": 1.1120802164077759, + "learning_rate": 6.253990363622401e-06, + "loss": 0.7705, + "step": 8498 + }, + { + "epoch": 0.4367869256860931, + "grad_norm": 0.7722738981246948, + "learning_rate": 6.253184691506523e-06, + "loss": 0.685, + "step": 8499 + }, + { + "epoch": 0.43683831842943777, + "grad_norm": 1.0963382720947266, + "learning_rate": 6.252378984670071e-06, + "loss": 0.7465, + "step": 8500 + }, + { + "epoch": 0.4368897111727824, + "grad_norm": 1.0725950002670288, + "learning_rate": 6.251573243135365e-06, + "loss": 0.7475, + "step": 8501 + }, + { + "epoch": 0.43694110391612706, + "grad_norm": 1.048986554145813, + "learning_rate": 6.250767466924728e-06, + "loss": 0.7421, + "step": 8502 + }, + { + "epoch": 0.4369924966594717, + "grad_norm": 1.0091770887374878, + "learning_rate": 6.249961656060487e-06, + "loss": 0.7638, + "step": 8503 + }, + { + "epoch": 0.4370438894028163, + "grad_norm": 1.0706737041473389, + "learning_rate": 6.249155810564967e-06, + "loss": 0.7207, + "step": 8504 + }, + { + "epoch": 0.43709528214616095, + "grad_norm": 1.0429250001907349, + "learning_rate": 6.248349930460494e-06, + "loss": 0.7979, + "step": 8505 + }, + { + "epoch": 0.4371466748895056, + "grad_norm": 1.0935955047607422, + "learning_rate": 6.247544015769396e-06, + "loss": 0.756, + "step": 8506 + }, + { + "epoch": 0.43719806763285024, + "grad_norm": 1.1420365571975708, + "learning_rate": 6.246738066514002e-06, + "loss": 0.7523, + "step": 8507 + }, + { + "epoch": 0.4372494603761949, + "grad_norm": 0.7566020488739014, + "learning_rate": 6.245932082716641e-06, + "loss": 0.6654, + "step": 8508 + }, + { + "epoch": 0.43730085311953953, + "grad_norm": 0.7629073262214661, + "learning_rate": 6.245126064399644e-06, + "loss": 0.6826, + "step": 8509 + }, + { + "epoch": 0.4373522458628842, + "grad_norm": 1.0363222360610962, + "learning_rate": 6.244320011585341e-06, + "loss": 0.7194, + "step": 8510 + }, + { + "epoch": 0.4374036386062288, + "grad_norm": 1.1170072555541992, + "learning_rate": 6.2435139242960665e-06, + "loss": 0.7848, + "step": 8511 + }, + { + "epoch": 0.4374550313495734, + "grad_norm": 1.1099908351898193, + "learning_rate": 6.242707802554152e-06, + "loss": 0.7766, + "step": 8512 + }, + { + "epoch": 0.43750642409291807, + "grad_norm": 1.034143328666687, + "learning_rate": 6.241901646381934e-06, + "loss": 0.738, + "step": 8513 + }, + { + "epoch": 0.4375578168362627, + "grad_norm": 1.0583261251449585, + "learning_rate": 6.241095455801746e-06, + "loss": 0.7282, + "step": 8514 + }, + { + "epoch": 0.43760920957960736, + "grad_norm": 1.2888649702072144, + "learning_rate": 6.240289230835924e-06, + "loss": 0.7319, + "step": 8515 + }, + { + "epoch": 0.437660602322952, + "grad_norm": 1.0071271657943726, + "learning_rate": 6.239482971506806e-06, + "loss": 0.6963, + "step": 8516 + }, + { + "epoch": 0.43771199506629666, + "grad_norm": 1.088568925857544, + "learning_rate": 6.2386766778367306e-06, + "loss": 0.7491, + "step": 8517 + }, + { + "epoch": 0.4377633878096413, + "grad_norm": 0.7671567797660828, + "learning_rate": 6.237870349848034e-06, + "loss": 0.6794, + "step": 8518 + }, + { + "epoch": 0.4378147805529859, + "grad_norm": 1.0515166521072388, + "learning_rate": 6.23706398756306e-06, + "loss": 0.7555, + "step": 8519 + }, + { + "epoch": 0.43786617329633054, + "grad_norm": 1.0878161191940308, + "learning_rate": 6.236257591004148e-06, + "loss": 0.7214, + "step": 8520 + }, + { + "epoch": 0.4379175660396752, + "grad_norm": 1.0449882745742798, + "learning_rate": 6.23545116019364e-06, + "loss": 0.7222, + "step": 8521 + }, + { + "epoch": 0.43796895878301983, + "grad_norm": 1.11459219455719, + "learning_rate": 6.234644695153878e-06, + "loss": 0.7393, + "step": 8522 + }, + { + "epoch": 0.4380203515263645, + "grad_norm": 1.059685230255127, + "learning_rate": 6.233838195907207e-06, + "loss": 0.7287, + "step": 8523 + }, + { + "epoch": 0.43807174426970913, + "grad_norm": 1.0576393604278564, + "learning_rate": 6.2330316624759725e-06, + "loss": 0.7343, + "step": 8524 + }, + { + "epoch": 0.4381231370130538, + "grad_norm": 1.092231035232544, + "learning_rate": 6.232225094882518e-06, + "loss": 0.685, + "step": 8525 + }, + { + "epoch": 0.4381745297563984, + "grad_norm": 0.7335990071296692, + "learning_rate": 6.231418493149191e-06, + "loss": 0.6551, + "step": 8526 + }, + { + "epoch": 0.438225922499743, + "grad_norm": 1.165383219718933, + "learning_rate": 6.230611857298339e-06, + "loss": 0.7834, + "step": 8527 + }, + { + "epoch": 0.43827731524308766, + "grad_norm": 1.05681312084198, + "learning_rate": 6.229805187352311e-06, + "loss": 0.7278, + "step": 8528 + }, + { + "epoch": 0.4383287079864323, + "grad_norm": 1.0848851203918457, + "learning_rate": 6.228998483333457e-06, + "loss": 0.7211, + "step": 8529 + }, + { + "epoch": 0.43838010072977696, + "grad_norm": 1.1085296869277954, + "learning_rate": 6.228191745264127e-06, + "loss": 0.7808, + "step": 8530 + }, + { + "epoch": 0.4384314934731216, + "grad_norm": 1.0813217163085938, + "learning_rate": 6.227384973166671e-06, + "loss": 0.7861, + "step": 8531 + }, + { + "epoch": 0.43848288621646625, + "grad_norm": 1.0476735830307007, + "learning_rate": 6.226578167063444e-06, + "loss": 0.7587, + "step": 8532 + }, + { + "epoch": 0.4385342789598109, + "grad_norm": 1.4857110977172852, + "learning_rate": 6.225771326976797e-06, + "loss": 0.7059, + "step": 8533 + }, + { + "epoch": 0.4385856717031555, + "grad_norm": 1.1320213079452515, + "learning_rate": 6.224964452929085e-06, + "loss": 0.7273, + "step": 8534 + }, + { + "epoch": 0.43863706444650014, + "grad_norm": 0.9844960570335388, + "learning_rate": 6.224157544942664e-06, + "loss": 0.6839, + "step": 8535 + }, + { + "epoch": 0.4386884571898448, + "grad_norm": 0.6965320706367493, + "learning_rate": 6.2233506030398885e-06, + "loss": 0.6668, + "step": 8536 + }, + { + "epoch": 0.43873984993318943, + "grad_norm": 1.0655978918075562, + "learning_rate": 6.222543627243116e-06, + "loss": 0.7279, + "step": 8537 + }, + { + "epoch": 0.4387912426765341, + "grad_norm": 0.7516601085662842, + "learning_rate": 6.221736617574704e-06, + "loss": 0.6416, + "step": 8538 + }, + { + "epoch": 0.4388426354198787, + "grad_norm": 1.0668766498565674, + "learning_rate": 6.220929574057013e-06, + "loss": 0.7315, + "step": 8539 + }, + { + "epoch": 0.43889402816322337, + "grad_norm": 0.6921804547309875, + "learning_rate": 6.2201224967124015e-06, + "loss": 0.707, + "step": 8540 + }, + { + "epoch": 0.438945420906568, + "grad_norm": 1.1046688556671143, + "learning_rate": 6.219315385563233e-06, + "loss": 0.731, + "step": 8541 + }, + { + "epoch": 0.4389968136499126, + "grad_norm": 1.024468183517456, + "learning_rate": 6.218508240631864e-06, + "loss": 0.76, + "step": 8542 + }, + { + "epoch": 0.43904820639325726, + "grad_norm": 0.8090651631355286, + "learning_rate": 6.2177010619406616e-06, + "loss": 0.6126, + "step": 8543 + }, + { + "epoch": 0.4390995991366019, + "grad_norm": 1.0247395038604736, + "learning_rate": 6.216893849511988e-06, + "loss": 0.6867, + "step": 8544 + }, + { + "epoch": 0.43915099187994655, + "grad_norm": 0.9569130539894104, + "learning_rate": 6.216086603368208e-06, + "loss": 0.7252, + "step": 8545 + }, + { + "epoch": 0.4392023846232912, + "grad_norm": 1.080208659172058, + "learning_rate": 6.215279323531687e-06, + "loss": 0.7341, + "step": 8546 + }, + { + "epoch": 0.43925377736663584, + "grad_norm": 1.2321627140045166, + "learning_rate": 6.214472010024789e-06, + "loss": 0.7489, + "step": 8547 + }, + { + "epoch": 0.4393051701099805, + "grad_norm": 0.9707562923431396, + "learning_rate": 6.213664662869884e-06, + "loss": 0.7326, + "step": 8548 + }, + { + "epoch": 0.4393565628533251, + "grad_norm": 1.0804896354675293, + "learning_rate": 6.212857282089341e-06, + "loss": 0.7011, + "step": 8549 + }, + { + "epoch": 0.43940795559666973, + "grad_norm": 0.6854485273361206, + "learning_rate": 6.2120498677055265e-06, + "loss": 0.6779, + "step": 8550 + }, + { + "epoch": 0.4394593483400144, + "grad_norm": 1.1602165699005127, + "learning_rate": 6.211242419740813e-06, + "loss": 0.7596, + "step": 8551 + }, + { + "epoch": 0.439510741083359, + "grad_norm": 1.1067523956298828, + "learning_rate": 6.21043493821757e-06, + "loss": 0.7446, + "step": 8552 + }, + { + "epoch": 0.43956213382670367, + "grad_norm": 1.0530751943588257, + "learning_rate": 6.20962742315817e-06, + "loss": 0.7996, + "step": 8553 + }, + { + "epoch": 0.4396135265700483, + "grad_norm": 1.1048904657363892, + "learning_rate": 6.208819874584987e-06, + "loss": 0.7747, + "step": 8554 + }, + { + "epoch": 0.43966491931339297, + "grad_norm": 1.120870590209961, + "learning_rate": 6.208012292520392e-06, + "loss": 0.6899, + "step": 8555 + }, + { + "epoch": 0.4397163120567376, + "grad_norm": 0.6727537512779236, + "learning_rate": 6.207204676986763e-06, + "loss": 0.6399, + "step": 8556 + }, + { + "epoch": 0.4397677048000822, + "grad_norm": 1.0299372673034668, + "learning_rate": 6.206397028006473e-06, + "loss": 0.6862, + "step": 8557 + }, + { + "epoch": 0.43981909754342685, + "grad_norm": 1.0700434446334839, + "learning_rate": 6.205589345601903e-06, + "loss": 0.7517, + "step": 8558 + }, + { + "epoch": 0.4398704902867715, + "grad_norm": 1.098470687866211, + "learning_rate": 6.204781629795424e-06, + "loss": 0.7539, + "step": 8559 + }, + { + "epoch": 0.43992188303011615, + "grad_norm": 1.0596468448638916, + "learning_rate": 6.203973880609418e-06, + "loss": 0.7009, + "step": 8560 + }, + { + "epoch": 0.4399732757734608, + "grad_norm": 0.6885043382644653, + "learning_rate": 6.203166098066266e-06, + "loss": 0.6445, + "step": 8561 + }, + { + "epoch": 0.44002466851680544, + "grad_norm": 0.7829486131668091, + "learning_rate": 6.202358282188346e-06, + "loss": 0.6536, + "step": 8562 + }, + { + "epoch": 0.4400760612601501, + "grad_norm": 1.1044014692306519, + "learning_rate": 6.201550432998039e-06, + "loss": 0.7799, + "step": 8563 + }, + { + "epoch": 0.44012745400349473, + "grad_norm": 0.7496541738510132, + "learning_rate": 6.200742550517729e-06, + "loss": 0.6342, + "step": 8564 + }, + { + "epoch": 0.4401788467468393, + "grad_norm": 1.0694981813430786, + "learning_rate": 6.1999346347697986e-06, + "loss": 0.6815, + "step": 8565 + }, + { + "epoch": 0.44023023949018397, + "grad_norm": 1.1206732988357544, + "learning_rate": 6.19912668577663e-06, + "loss": 0.7714, + "step": 8566 + }, + { + "epoch": 0.4402816322335286, + "grad_norm": 1.0986313819885254, + "learning_rate": 6.19831870356061e-06, + "loss": 0.7447, + "step": 8567 + }, + { + "epoch": 0.44033302497687327, + "grad_norm": 0.9972346425056458, + "learning_rate": 6.197510688144124e-06, + "loss": 0.7196, + "step": 8568 + }, + { + "epoch": 0.4403844177202179, + "grad_norm": 1.0339921712875366, + "learning_rate": 6.196702639549557e-06, + "loss": 0.7299, + "step": 8569 + }, + { + "epoch": 0.44043581046356256, + "grad_norm": 1.1181626319885254, + "learning_rate": 6.1958945577993e-06, + "loss": 0.7213, + "step": 8570 + }, + { + "epoch": 0.4404872032069072, + "grad_norm": 1.0512242317199707, + "learning_rate": 6.19508644291574e-06, + "loss": 0.7696, + "step": 8571 + }, + { + "epoch": 0.4405385959502518, + "grad_norm": 1.1621426343917847, + "learning_rate": 6.1942782949212665e-06, + "loss": 0.7568, + "step": 8572 + }, + { + "epoch": 0.44058998869359645, + "grad_norm": 1.0266258716583252, + "learning_rate": 6.19347011383827e-06, + "loss": 0.7343, + "step": 8573 + }, + { + "epoch": 0.4406413814369411, + "grad_norm": 1.091570258140564, + "learning_rate": 6.192661899689141e-06, + "loss": 0.7119, + "step": 8574 + }, + { + "epoch": 0.44069277418028574, + "grad_norm": 1.021521806716919, + "learning_rate": 6.191853652496273e-06, + "loss": 0.7096, + "step": 8575 + }, + { + "epoch": 0.4407441669236304, + "grad_norm": 1.130705714225769, + "learning_rate": 6.19104537228206e-06, + "loss": 0.7954, + "step": 8576 + }, + { + "epoch": 0.44079555966697503, + "grad_norm": 1.0801620483398438, + "learning_rate": 6.190237059068893e-06, + "loss": 0.7062, + "step": 8577 + }, + { + "epoch": 0.4408469524103197, + "grad_norm": 0.8573058247566223, + "learning_rate": 6.18942871287917e-06, + "loss": 0.6666, + "step": 8578 + }, + { + "epoch": 0.44089834515366433, + "grad_norm": 1.0760871171951294, + "learning_rate": 6.188620333735284e-06, + "loss": 0.7075, + "step": 8579 + }, + { + "epoch": 0.4409497378970089, + "grad_norm": 1.0348376035690308, + "learning_rate": 6.187811921659637e-06, + "loss": 0.7557, + "step": 8580 + }, + { + "epoch": 0.44100113064035357, + "grad_norm": 0.7962344288825989, + "learning_rate": 6.187003476674621e-06, + "loss": 0.6813, + "step": 8581 + }, + { + "epoch": 0.4410525233836982, + "grad_norm": 0.7579706907272339, + "learning_rate": 6.186194998802638e-06, + "loss": 0.6793, + "step": 8582 + }, + { + "epoch": 0.44110391612704286, + "grad_norm": 1.0536022186279297, + "learning_rate": 6.185386488066087e-06, + "loss": 0.7242, + "step": 8583 + }, + { + "epoch": 0.4411553088703875, + "grad_norm": 1.0478723049163818, + "learning_rate": 6.184577944487366e-06, + "loss": 0.7541, + "step": 8584 + }, + { + "epoch": 0.44120670161373216, + "grad_norm": 1.0063352584838867, + "learning_rate": 6.183769368088882e-06, + "loss": 0.7507, + "step": 8585 + }, + { + "epoch": 0.4412580943570768, + "grad_norm": 0.9992077350616455, + "learning_rate": 6.182960758893033e-06, + "loss": 0.7332, + "step": 8586 + }, + { + "epoch": 0.4413094871004214, + "grad_norm": 1.0855505466461182, + "learning_rate": 6.182152116922222e-06, + "loss": 0.7671, + "step": 8587 + }, + { + "epoch": 0.44136087984376604, + "grad_norm": 1.0898454189300537, + "learning_rate": 6.181343442198855e-06, + "loss": 0.7697, + "step": 8588 + }, + { + "epoch": 0.4414122725871107, + "grad_norm": 0.7950151562690735, + "learning_rate": 6.180534734745336e-06, + "loss": 0.6433, + "step": 8589 + }, + { + "epoch": 0.44146366533045533, + "grad_norm": 1.030315637588501, + "learning_rate": 6.17972599458407e-06, + "loss": 0.7431, + "step": 8590 + }, + { + "epoch": 0.4415150580738, + "grad_norm": 1.113896369934082, + "learning_rate": 6.1789172217374675e-06, + "loss": 0.7531, + "step": 8591 + }, + { + "epoch": 0.44156645081714463, + "grad_norm": 1.0667732954025269, + "learning_rate": 6.1781084162279326e-06, + "loss": 0.7594, + "step": 8592 + }, + { + "epoch": 0.4416178435604893, + "grad_norm": 1.1164042949676514, + "learning_rate": 6.1772995780778754e-06, + "loss": 0.7555, + "step": 8593 + }, + { + "epoch": 0.4416692363038339, + "grad_norm": 0.8158687949180603, + "learning_rate": 6.176490707309707e-06, + "loss": 0.6583, + "step": 8594 + }, + { + "epoch": 0.4417206290471785, + "grad_norm": 1.1119701862335205, + "learning_rate": 6.175681803945834e-06, + "loss": 0.748, + "step": 8595 + }, + { + "epoch": 0.44177202179052316, + "grad_norm": 1.0504924058914185, + "learning_rate": 6.174872868008671e-06, + "loss": 0.7498, + "step": 8596 + }, + { + "epoch": 0.4418234145338678, + "grad_norm": 0.6794732809066772, + "learning_rate": 6.174063899520629e-06, + "loss": 0.6722, + "step": 8597 + }, + { + "epoch": 0.44187480727721246, + "grad_norm": 1.08985435962677, + "learning_rate": 6.1732548985041205e-06, + "loss": 0.7639, + "step": 8598 + }, + { + "epoch": 0.4419262000205571, + "grad_norm": 1.125518560409546, + "learning_rate": 6.172445864981561e-06, + "loss": 0.7351, + "step": 8599 + }, + { + "epoch": 0.44197759276390175, + "grad_norm": 2.6217942237854004, + "learning_rate": 6.1716367989753646e-06, + "loss": 0.6925, + "step": 8600 + }, + { + "epoch": 0.4420289855072464, + "grad_norm": 1.0023314952850342, + "learning_rate": 6.170827700507948e-06, + "loss": 0.7166, + "step": 8601 + }, + { + "epoch": 0.44208037825059104, + "grad_norm": 1.099668264389038, + "learning_rate": 6.170018569601729e-06, + "loss": 0.7054, + "step": 8602 + }, + { + "epoch": 0.44213177099393564, + "grad_norm": 0.7717718482017517, + "learning_rate": 6.169209406279122e-06, + "loss": 0.6608, + "step": 8603 + }, + { + "epoch": 0.4421831637372803, + "grad_norm": 1.0417449474334717, + "learning_rate": 6.168400210562548e-06, + "loss": 0.6909, + "step": 8604 + }, + { + "epoch": 0.44223455648062493, + "grad_norm": 1.0432336330413818, + "learning_rate": 6.167590982474427e-06, + "loss": 0.7388, + "step": 8605 + }, + { + "epoch": 0.4422859492239696, + "grad_norm": 1.0393587350845337, + "learning_rate": 6.1667817220371775e-06, + "loss": 0.7338, + "step": 8606 + }, + { + "epoch": 0.4423373419673142, + "grad_norm": 1.2178142070770264, + "learning_rate": 6.165972429273221e-06, + "loss": 0.7607, + "step": 8607 + }, + { + "epoch": 0.44238873471065887, + "grad_norm": 1.0836079120635986, + "learning_rate": 6.16516310420498e-06, + "loss": 0.7905, + "step": 8608 + }, + { + "epoch": 0.4424401274540035, + "grad_norm": 1.0543299913406372, + "learning_rate": 6.1643537468548785e-06, + "loss": 0.7322, + "step": 8609 + }, + { + "epoch": 0.4424915201973481, + "grad_norm": 1.0480260848999023, + "learning_rate": 6.163544357245339e-06, + "loss": 0.7329, + "step": 8610 + }, + { + "epoch": 0.44254291294069276, + "grad_norm": 1.122348427772522, + "learning_rate": 6.162734935398786e-06, + "loss": 0.7247, + "step": 8611 + }, + { + "epoch": 0.4425943056840374, + "grad_norm": 1.080051302909851, + "learning_rate": 6.161925481337648e-06, + "loss": 0.7436, + "step": 8612 + }, + { + "epoch": 0.44264569842738205, + "grad_norm": 0.7509061098098755, + "learning_rate": 6.16111599508435e-06, + "loss": 0.6452, + "step": 8613 + }, + { + "epoch": 0.4426970911707267, + "grad_norm": 1.0758047103881836, + "learning_rate": 6.160306476661319e-06, + "loss": 0.7995, + "step": 8614 + }, + { + "epoch": 0.44274848391407134, + "grad_norm": 0.6906732320785522, + "learning_rate": 6.159496926090983e-06, + "loss": 0.6394, + "step": 8615 + }, + { + "epoch": 0.442799876657416, + "grad_norm": 1.1165037155151367, + "learning_rate": 6.158687343395773e-06, + "loss": 0.7238, + "step": 8616 + }, + { + "epoch": 0.44285126940076064, + "grad_norm": 1.105836033821106, + "learning_rate": 6.157877728598118e-06, + "loss": 0.7517, + "step": 8617 + }, + { + "epoch": 0.44290266214410523, + "grad_norm": 0.8277804255485535, + "learning_rate": 6.157068081720449e-06, + "loss": 0.7159, + "step": 8618 + }, + { + "epoch": 0.4429540548874499, + "grad_norm": 1.1816530227661133, + "learning_rate": 6.1562584027852e-06, + "loss": 0.7447, + "step": 8619 + }, + { + "epoch": 0.4430054476307945, + "grad_norm": 1.0948307514190674, + "learning_rate": 6.1554486918148e-06, + "loss": 0.7857, + "step": 8620 + }, + { + "epoch": 0.44305684037413917, + "grad_norm": 1.2470206022262573, + "learning_rate": 6.154638948831687e-06, + "loss": 0.7774, + "step": 8621 + }, + { + "epoch": 0.4431082331174838, + "grad_norm": 1.0419647693634033, + "learning_rate": 6.153829173858293e-06, + "loss": 0.803, + "step": 8622 + }, + { + "epoch": 0.44315962586082847, + "grad_norm": 1.1359453201293945, + "learning_rate": 6.153019366917053e-06, + "loss": 0.7395, + "step": 8623 + }, + { + "epoch": 0.4432110186041731, + "grad_norm": 1.0342309474945068, + "learning_rate": 6.152209528030406e-06, + "loss": 0.7632, + "step": 8624 + }, + { + "epoch": 0.4432624113475177, + "grad_norm": 0.8497276902198792, + "learning_rate": 6.151399657220788e-06, + "loss": 0.6546, + "step": 8625 + }, + { + "epoch": 0.44331380409086235, + "grad_norm": 1.0763641595840454, + "learning_rate": 6.150589754510636e-06, + "loss": 0.7182, + "step": 8626 + }, + { + "epoch": 0.443365196834207, + "grad_norm": 0.9959622621536255, + "learning_rate": 6.14977981992239e-06, + "loss": 0.7639, + "step": 8627 + }, + { + "epoch": 0.44341658957755165, + "grad_norm": 1.0687144994735718, + "learning_rate": 6.148969853478491e-06, + "loss": 0.7245, + "step": 8628 + }, + { + "epoch": 0.4434679823208963, + "grad_norm": 1.105849027633667, + "learning_rate": 6.148159855201379e-06, + "loss": 0.786, + "step": 8629 + }, + { + "epoch": 0.44351937506424094, + "grad_norm": 1.0340808629989624, + "learning_rate": 6.147349825113494e-06, + "loss": 0.7312, + "step": 8630 + }, + { + "epoch": 0.4435707678075856, + "grad_norm": 1.0555766820907593, + "learning_rate": 6.146539763237283e-06, + "loss": 0.7501, + "step": 8631 + }, + { + "epoch": 0.44362216055093023, + "grad_norm": 1.0347938537597656, + "learning_rate": 6.145729669595184e-06, + "loss": 0.7033, + "step": 8632 + }, + { + "epoch": 0.4436735532942748, + "grad_norm": 1.0456756353378296, + "learning_rate": 6.144919544209644e-06, + "loss": 0.7354, + "step": 8633 + }, + { + "epoch": 0.44372494603761947, + "grad_norm": 0.9883608222007751, + "learning_rate": 6.144109387103111e-06, + "loss": 0.6761, + "step": 8634 + }, + { + "epoch": 0.4437763387809641, + "grad_norm": 1.0230767726898193, + "learning_rate": 6.143299198298025e-06, + "loss": 0.7083, + "step": 8635 + }, + { + "epoch": 0.44382773152430877, + "grad_norm": 0.7963536977767944, + "learning_rate": 6.142488977816838e-06, + "loss": 0.6832, + "step": 8636 + }, + { + "epoch": 0.4438791242676534, + "grad_norm": 0.7524346113204956, + "learning_rate": 6.141678725681997e-06, + "loss": 0.6688, + "step": 8637 + }, + { + "epoch": 0.44393051701099806, + "grad_norm": 1.0277385711669922, + "learning_rate": 6.140868441915949e-06, + "loss": 0.7022, + "step": 8638 + }, + { + "epoch": 0.4439819097543427, + "grad_norm": 1.0472514629364014, + "learning_rate": 6.1400581265411454e-06, + "loss": 0.7274, + "step": 8639 + }, + { + "epoch": 0.4440333024976873, + "grad_norm": 0.7473666071891785, + "learning_rate": 6.139247779580034e-06, + "loss": 0.7052, + "step": 8640 + }, + { + "epoch": 0.44408469524103195, + "grad_norm": 1.0470846891403198, + "learning_rate": 6.138437401055068e-06, + "loss": 0.7215, + "step": 8641 + }, + { + "epoch": 0.4441360879843766, + "grad_norm": 0.8383448719978333, + "learning_rate": 6.137626990988701e-06, + "loss": 0.6827, + "step": 8642 + }, + { + "epoch": 0.44418748072772124, + "grad_norm": 1.0874866247177124, + "learning_rate": 6.136816549403385e-06, + "loss": 0.7846, + "step": 8643 + }, + { + "epoch": 0.4442388734710659, + "grad_norm": 1.1708168983459473, + "learning_rate": 6.136006076321573e-06, + "loss": 0.7642, + "step": 8644 + }, + { + "epoch": 0.44429026621441053, + "grad_norm": 0.7425611019134521, + "learning_rate": 6.135195571765721e-06, + "loss": 0.6725, + "step": 8645 + }, + { + "epoch": 0.4443416589577552, + "grad_norm": 1.216227650642395, + "learning_rate": 6.134385035758284e-06, + "loss": 0.7087, + "step": 8646 + }, + { + "epoch": 0.44439305170109983, + "grad_norm": 1.033188819885254, + "learning_rate": 6.13357446832172e-06, + "loss": 0.706, + "step": 8647 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 1.0633041858673096, + "learning_rate": 6.132763869478484e-06, + "loss": 0.7301, + "step": 8648 + }, + { + "epoch": 0.44449583718778907, + "grad_norm": 1.0413168668746948, + "learning_rate": 6.131953239251037e-06, + "loss": 0.7952, + "step": 8649 + }, + { + "epoch": 0.4445472299311337, + "grad_norm": 1.0456479787826538, + "learning_rate": 6.131142577661836e-06, + "loss": 0.7251, + "step": 8650 + }, + { + "epoch": 0.44459862267447836, + "grad_norm": 1.0407538414001465, + "learning_rate": 6.130331884733343e-06, + "loss": 0.6853, + "step": 8651 + }, + { + "epoch": 0.444650015417823, + "grad_norm": 1.0133893489837646, + "learning_rate": 6.129521160488018e-06, + "loss": 0.742, + "step": 8652 + }, + { + "epoch": 0.44470140816116766, + "grad_norm": 1.121391773223877, + "learning_rate": 6.1287104049483215e-06, + "loss": 0.7524, + "step": 8653 + }, + { + "epoch": 0.4447528009045123, + "grad_norm": 1.0003796815872192, + "learning_rate": 6.12789961813672e-06, + "loss": 0.7512, + "step": 8654 + }, + { + "epoch": 0.44480419364785695, + "grad_norm": 1.0605747699737549, + "learning_rate": 6.127088800075673e-06, + "loss": 0.7475, + "step": 8655 + }, + { + "epoch": 0.44485558639120154, + "grad_norm": 1.07450270652771, + "learning_rate": 6.126277950787647e-06, + "loss": 0.7317, + "step": 8656 + }, + { + "epoch": 0.4449069791345462, + "grad_norm": 1.0545673370361328, + "learning_rate": 6.125467070295108e-06, + "loss": 0.7602, + "step": 8657 + }, + { + "epoch": 0.44495837187789083, + "grad_norm": 1.1869577169418335, + "learning_rate": 6.12465615862052e-06, + "loss": 0.7588, + "step": 8658 + }, + { + "epoch": 0.4450097646212355, + "grad_norm": 1.0719866752624512, + "learning_rate": 6.123845215786351e-06, + "loss": 0.7491, + "step": 8659 + }, + { + "epoch": 0.44506115736458013, + "grad_norm": 1.1084269285202026, + "learning_rate": 6.123034241815069e-06, + "loss": 0.7077, + "step": 8660 + }, + { + "epoch": 0.4451125501079248, + "grad_norm": 1.075262188911438, + "learning_rate": 6.122223236729141e-06, + "loss": 0.745, + "step": 8661 + }, + { + "epoch": 0.4451639428512694, + "grad_norm": 0.7542333602905273, + "learning_rate": 6.12141220055104e-06, + "loss": 0.6585, + "step": 8662 + }, + { + "epoch": 0.445215335594614, + "grad_norm": 1.024070382118225, + "learning_rate": 6.1206011333032345e-06, + "loss": 0.7133, + "step": 8663 + }, + { + "epoch": 0.44526672833795866, + "grad_norm": 1.105056881904602, + "learning_rate": 6.119790035008195e-06, + "loss": 0.7553, + "step": 8664 + }, + { + "epoch": 0.4453181210813033, + "grad_norm": 1.0491613149642944, + "learning_rate": 6.1189789056883975e-06, + "loss": 0.7167, + "step": 8665 + }, + { + "epoch": 0.44536951382464796, + "grad_norm": 1.047937273979187, + "learning_rate": 6.11816774536631e-06, + "loss": 0.7435, + "step": 8666 + }, + { + "epoch": 0.4454209065679926, + "grad_norm": 0.7605351805686951, + "learning_rate": 6.117356554064409e-06, + "loss": 0.699, + "step": 8667 + }, + { + "epoch": 0.44547229931133725, + "grad_norm": 1.1410140991210938, + "learning_rate": 6.116545331805169e-06, + "loss": 0.7408, + "step": 8668 + }, + { + "epoch": 0.4455236920546819, + "grad_norm": 1.062819004058838, + "learning_rate": 6.1157340786110656e-06, + "loss": 0.7621, + "step": 8669 + }, + { + "epoch": 0.44557508479802654, + "grad_norm": 0.980811595916748, + "learning_rate": 6.114922794504577e-06, + "loss": 0.7616, + "step": 8670 + }, + { + "epoch": 0.44562647754137114, + "grad_norm": 0.9879646301269531, + "learning_rate": 6.114111479508176e-06, + "loss": 0.7511, + "step": 8671 + }, + { + "epoch": 0.4456778702847158, + "grad_norm": 1.1515069007873535, + "learning_rate": 6.113300133644343e-06, + "loss": 0.7127, + "step": 8672 + }, + { + "epoch": 0.44572926302806043, + "grad_norm": 0.7640554904937744, + "learning_rate": 6.11248875693556e-06, + "loss": 0.6833, + "step": 8673 + }, + { + "epoch": 0.4457806557714051, + "grad_norm": 1.026353359222412, + "learning_rate": 6.111677349404305e-06, + "loss": 0.7026, + "step": 8674 + }, + { + "epoch": 0.4458320485147497, + "grad_norm": 1.098001480102539, + "learning_rate": 6.1108659110730565e-06, + "loss": 0.7436, + "step": 8675 + }, + { + "epoch": 0.44588344125809437, + "grad_norm": 0.7734015583992004, + "learning_rate": 6.110054441964298e-06, + "loss": 0.7093, + "step": 8676 + }, + { + "epoch": 0.445934834001439, + "grad_norm": 1.0277550220489502, + "learning_rate": 6.109242942100513e-06, + "loss": 0.7275, + "step": 8677 + }, + { + "epoch": 0.4459862267447836, + "grad_norm": 1.0378984212875366, + "learning_rate": 6.108431411504183e-06, + "loss": 0.7506, + "step": 8678 + }, + { + "epoch": 0.44603761948812826, + "grad_norm": 1.070737361907959, + "learning_rate": 6.107619850197794e-06, + "loss": 0.6873, + "step": 8679 + }, + { + "epoch": 0.4460890122314729, + "grad_norm": 1.014614224433899, + "learning_rate": 6.1068082582038264e-06, + "loss": 0.7579, + "step": 8680 + }, + { + "epoch": 0.44614040497481755, + "grad_norm": 1.052404761314392, + "learning_rate": 6.105996635544772e-06, + "loss": 0.7104, + "step": 8681 + }, + { + "epoch": 0.4461917977181622, + "grad_norm": 1.0825116634368896, + "learning_rate": 6.105184982243115e-06, + "loss": 0.7538, + "step": 8682 + }, + { + "epoch": 0.44624319046150684, + "grad_norm": 1.0540229082107544, + "learning_rate": 6.104373298321342e-06, + "loss": 0.7316, + "step": 8683 + }, + { + "epoch": 0.4462945832048515, + "grad_norm": 1.0700398683547974, + "learning_rate": 6.103561583801943e-06, + "loss": 0.7674, + "step": 8684 + }, + { + "epoch": 0.44634597594819614, + "grad_norm": 1.041287899017334, + "learning_rate": 6.102749838707407e-06, + "loss": 0.718, + "step": 8685 + }, + { + "epoch": 0.44639736869154073, + "grad_norm": 1.0677050352096558, + "learning_rate": 6.1019380630602246e-06, + "loss": 0.7298, + "step": 8686 + }, + { + "epoch": 0.4464487614348854, + "grad_norm": 1.1162304878234863, + "learning_rate": 6.101126256882885e-06, + "loss": 0.7197, + "step": 8687 + }, + { + "epoch": 0.44650015417823, + "grad_norm": 1.1728863716125488, + "learning_rate": 6.100314420197879e-06, + "loss": 0.752, + "step": 8688 + }, + { + "epoch": 0.44655154692157467, + "grad_norm": 1.177954912185669, + "learning_rate": 6.099502553027704e-06, + "loss": 0.7137, + "step": 8689 + }, + { + "epoch": 0.4466029396649193, + "grad_norm": 1.1424295902252197, + "learning_rate": 6.09869065539485e-06, + "loss": 0.7815, + "step": 8690 + }, + { + "epoch": 0.44665433240826397, + "grad_norm": 0.7147906422615051, + "learning_rate": 6.097878727321811e-06, + "loss": 0.6552, + "step": 8691 + }, + { + "epoch": 0.4467057251516086, + "grad_norm": 1.055344820022583, + "learning_rate": 6.097066768831083e-06, + "loss": 0.8121, + "step": 8692 + }, + { + "epoch": 0.44675711789495326, + "grad_norm": 1.1017271280288696, + "learning_rate": 6.096254779945161e-06, + "loss": 0.7953, + "step": 8693 + }, + { + "epoch": 0.44680851063829785, + "grad_norm": 1.0747833251953125, + "learning_rate": 6.095442760686545e-06, + "loss": 0.7586, + "step": 8694 + }, + { + "epoch": 0.4468599033816425, + "grad_norm": 1.033912181854248, + "learning_rate": 6.0946307110777316e-06, + "loss": 0.7193, + "step": 8695 + }, + { + "epoch": 0.44691129612498715, + "grad_norm": 1.0123625993728638, + "learning_rate": 6.093818631141218e-06, + "loss": 0.7526, + "step": 8696 + }, + { + "epoch": 0.4469626888683318, + "grad_norm": 1.0873355865478516, + "learning_rate": 6.093006520899502e-06, + "loss": 0.7455, + "step": 8697 + }, + { + "epoch": 0.44701408161167644, + "grad_norm": 1.0881823301315308, + "learning_rate": 6.0921943803750885e-06, + "loss": 0.7591, + "step": 8698 + }, + { + "epoch": 0.4470654743550211, + "grad_norm": 1.1538639068603516, + "learning_rate": 6.091382209590474e-06, + "loss": 0.793, + "step": 8699 + }, + { + "epoch": 0.44711686709836573, + "grad_norm": 0.9964667558670044, + "learning_rate": 6.090570008568164e-06, + "loss": 0.719, + "step": 8700 + }, + { + "epoch": 0.4471682598417103, + "grad_norm": 1.062361478805542, + "learning_rate": 6.089757777330658e-06, + "loss": 0.6719, + "step": 8701 + }, + { + "epoch": 0.44721965258505497, + "grad_norm": 1.140395164489746, + "learning_rate": 6.088945515900461e-06, + "loss": 0.7699, + "step": 8702 + }, + { + "epoch": 0.4472710453283996, + "grad_norm": 0.7888919711112976, + "learning_rate": 6.088133224300079e-06, + "loss": 0.6699, + "step": 8703 + }, + { + "epoch": 0.44732243807174427, + "grad_norm": 1.0718834400177002, + "learning_rate": 6.087320902552014e-06, + "loss": 0.7589, + "step": 8704 + }, + { + "epoch": 0.4473738308150889, + "grad_norm": 1.0600528717041016, + "learning_rate": 6.086508550678776e-06, + "loss": 0.7509, + "step": 8705 + }, + { + "epoch": 0.44742522355843356, + "grad_norm": 0.8602684736251831, + "learning_rate": 6.085696168702869e-06, + "loss": 0.6721, + "step": 8706 + }, + { + "epoch": 0.4474766163017782, + "grad_norm": 1.0821882486343384, + "learning_rate": 6.084883756646801e-06, + "loss": 0.7797, + "step": 8707 + }, + { + "epoch": 0.44752800904512285, + "grad_norm": 1.0473047494888306, + "learning_rate": 6.084071314533082e-06, + "loss": 0.7213, + "step": 8708 + }, + { + "epoch": 0.44757940178846745, + "grad_norm": 1.1313908100128174, + "learning_rate": 6.0832588423842195e-06, + "loss": 0.7358, + "step": 8709 + }, + { + "epoch": 0.4476307945318121, + "grad_norm": 1.0331586599349976, + "learning_rate": 6.082446340222726e-06, + "loss": 0.7557, + "step": 8710 + }, + { + "epoch": 0.44768218727515674, + "grad_norm": 1.075469970703125, + "learning_rate": 6.08163380807111e-06, + "loss": 0.7549, + "step": 8711 + }, + { + "epoch": 0.4477335800185014, + "grad_norm": 1.1163846254348755, + "learning_rate": 6.0808212459518865e-06, + "loss": 0.7633, + "step": 8712 + }, + { + "epoch": 0.44778497276184603, + "grad_norm": 1.113763451576233, + "learning_rate": 6.080008653887566e-06, + "loss": 0.6874, + "step": 8713 + }, + { + "epoch": 0.4478363655051907, + "grad_norm": 0.9843372702598572, + "learning_rate": 6.079196031900663e-06, + "loss": 0.7293, + "step": 8714 + }, + { + "epoch": 0.44788775824853533, + "grad_norm": 1.1348650455474854, + "learning_rate": 6.078383380013689e-06, + "loss": 0.7331, + "step": 8715 + }, + { + "epoch": 0.4479391509918799, + "grad_norm": 1.012511134147644, + "learning_rate": 6.077570698249164e-06, + "loss": 0.7359, + "step": 8716 + }, + { + "epoch": 0.44799054373522457, + "grad_norm": 1.0930002927780151, + "learning_rate": 6.076757986629602e-06, + "loss": 0.7628, + "step": 8717 + }, + { + "epoch": 0.4480419364785692, + "grad_norm": 0.7513021230697632, + "learning_rate": 6.075945245177519e-06, + "loss": 0.6938, + "step": 8718 + }, + { + "epoch": 0.44809332922191386, + "grad_norm": 1.1562069654464722, + "learning_rate": 6.075132473915435e-06, + "loss": 0.7635, + "step": 8719 + }, + { + "epoch": 0.4481447219652585, + "grad_norm": 0.8543563485145569, + "learning_rate": 6.074319672865865e-06, + "loss": 0.6779, + "step": 8720 + }, + { + "epoch": 0.44819611470860315, + "grad_norm": 1.1893200874328613, + "learning_rate": 6.073506842051331e-06, + "loss": 0.7066, + "step": 8721 + }, + { + "epoch": 0.4482475074519478, + "grad_norm": 1.08341383934021, + "learning_rate": 6.072693981494352e-06, + "loss": 0.6879, + "step": 8722 + }, + { + "epoch": 0.44829890019529245, + "grad_norm": 1.070870041847229, + "learning_rate": 6.07188109121745e-06, + "loss": 0.725, + "step": 8723 + }, + { + "epoch": 0.44835029293863704, + "grad_norm": 0.7500124573707581, + "learning_rate": 6.071068171243146e-06, + "loss": 0.6631, + "step": 8724 + }, + { + "epoch": 0.4484016856819817, + "grad_norm": 1.0653276443481445, + "learning_rate": 6.070255221593963e-06, + "loss": 0.7522, + "step": 8725 + }, + { + "epoch": 0.44845307842532633, + "grad_norm": 1.1105051040649414, + "learning_rate": 6.069442242292425e-06, + "loss": 0.718, + "step": 8726 + }, + { + "epoch": 0.448504471168671, + "grad_norm": 1.032767415046692, + "learning_rate": 6.0686292333610565e-06, + "loss": 0.7544, + "step": 8727 + }, + { + "epoch": 0.44855586391201563, + "grad_norm": 1.1087244749069214, + "learning_rate": 6.06781619482238e-06, + "loss": 0.7847, + "step": 8728 + }, + { + "epoch": 0.4486072566553603, + "grad_norm": 1.1503602266311646, + "learning_rate": 6.067003126698925e-06, + "loss": 0.7618, + "step": 8729 + }, + { + "epoch": 0.4486586493987049, + "grad_norm": 1.0570268630981445, + "learning_rate": 6.066190029013217e-06, + "loss": 0.753, + "step": 8730 + }, + { + "epoch": 0.44871004214204957, + "grad_norm": 1.0824902057647705, + "learning_rate": 6.065376901787781e-06, + "loss": 0.7894, + "step": 8731 + }, + { + "epoch": 0.44876143488539416, + "grad_norm": 1.0582715272903442, + "learning_rate": 6.064563745045149e-06, + "loss": 0.7367, + "step": 8732 + }, + { + "epoch": 0.4488128276287388, + "grad_norm": 1.2017083168029785, + "learning_rate": 6.063750558807848e-06, + "loss": 0.7286, + "step": 8733 + }, + { + "epoch": 0.44886422037208346, + "grad_norm": 1.1653245687484741, + "learning_rate": 6.06293734309841e-06, + "loss": 0.8064, + "step": 8734 + }, + { + "epoch": 0.4489156131154281, + "grad_norm": 1.0741320848464966, + "learning_rate": 6.062124097939363e-06, + "loss": 0.708, + "step": 8735 + }, + { + "epoch": 0.44896700585877275, + "grad_norm": 1.0196375846862793, + "learning_rate": 6.061310823353242e-06, + "loss": 0.6681, + "step": 8736 + }, + { + "epoch": 0.4490183986021174, + "grad_norm": 0.7248873114585876, + "learning_rate": 6.060497519362578e-06, + "loss": 0.6599, + "step": 8737 + }, + { + "epoch": 0.44906979134546204, + "grad_norm": 1.0733503103256226, + "learning_rate": 6.059684185989905e-06, + "loss": 0.7734, + "step": 8738 + }, + { + "epoch": 0.44912118408880664, + "grad_norm": 1.1071665287017822, + "learning_rate": 6.058870823257753e-06, + "loss": 0.8375, + "step": 8739 + }, + { + "epoch": 0.4491725768321513, + "grad_norm": 1.0948023796081543, + "learning_rate": 6.058057431188663e-06, + "loss": 0.7285, + "step": 8740 + }, + { + "epoch": 0.44922396957549593, + "grad_norm": 1.1065912246704102, + "learning_rate": 6.057244009805167e-06, + "loss": 0.7171, + "step": 8741 + }, + { + "epoch": 0.4492753623188406, + "grad_norm": 1.050552487373352, + "learning_rate": 6.0564305591298024e-06, + "loss": 0.7294, + "step": 8742 + }, + { + "epoch": 0.4493267550621852, + "grad_norm": 1.0280983448028564, + "learning_rate": 6.055617079185105e-06, + "loss": 0.7197, + "step": 8743 + }, + { + "epoch": 0.44937814780552987, + "grad_norm": 1.1325042247772217, + "learning_rate": 6.054803569993617e-06, + "loss": 0.6921, + "step": 8744 + }, + { + "epoch": 0.4494295405488745, + "grad_norm": 1.0866978168487549, + "learning_rate": 6.053990031577875e-06, + "loss": 0.8101, + "step": 8745 + }, + { + "epoch": 0.44948093329221916, + "grad_norm": 0.7294654846191406, + "learning_rate": 6.053176463960417e-06, + "loss": 0.6333, + "step": 8746 + }, + { + "epoch": 0.44953232603556376, + "grad_norm": 1.0454052686691284, + "learning_rate": 6.0523628671637865e-06, + "loss": 0.7152, + "step": 8747 + }, + { + "epoch": 0.4495837187789084, + "grad_norm": 0.7002802491188049, + "learning_rate": 6.051549241210525e-06, + "loss": 0.6329, + "step": 8748 + }, + { + "epoch": 0.44963511152225305, + "grad_norm": 0.7702453136444092, + "learning_rate": 6.050735586123171e-06, + "loss": 0.6356, + "step": 8749 + }, + { + "epoch": 0.4496865042655977, + "grad_norm": 1.0647058486938477, + "learning_rate": 6.049921901924271e-06, + "loss": 0.6905, + "step": 8750 + }, + { + "epoch": 0.44973789700894234, + "grad_norm": 0.7184454798698425, + "learning_rate": 6.04910818863637e-06, + "loss": 0.641, + "step": 8751 + }, + { + "epoch": 0.449789289752287, + "grad_norm": 1.13466215133667, + "learning_rate": 6.048294446282008e-06, + "loss": 0.7849, + "step": 8752 + }, + { + "epoch": 0.44984068249563164, + "grad_norm": 1.0794284343719482, + "learning_rate": 6.0474806748837325e-06, + "loss": 0.703, + "step": 8753 + }, + { + "epoch": 0.44989207523897623, + "grad_norm": 1.0722686052322388, + "learning_rate": 6.046666874464091e-06, + "loss": 0.7762, + "step": 8754 + }, + { + "epoch": 0.4499434679823209, + "grad_norm": 1.0447022914886475, + "learning_rate": 6.045853045045631e-06, + "loss": 0.8023, + "step": 8755 + }, + { + "epoch": 0.4499948607256655, + "grad_norm": 1.0594232082366943, + "learning_rate": 6.0450391866508984e-06, + "loss": 0.7303, + "step": 8756 + }, + { + "epoch": 0.45004625346901017, + "grad_norm": 1.0883451700210571, + "learning_rate": 6.044225299302442e-06, + "loss": 0.7481, + "step": 8757 + }, + { + "epoch": 0.4500976462123548, + "grad_norm": 0.7400407791137695, + "learning_rate": 6.043411383022812e-06, + "loss": 0.7309, + "step": 8758 + }, + { + "epoch": 0.45014903895569947, + "grad_norm": 1.2627620697021484, + "learning_rate": 6.04259743783456e-06, + "loss": 0.7637, + "step": 8759 + }, + { + "epoch": 0.4502004316990441, + "grad_norm": 1.0771883726119995, + "learning_rate": 6.041783463760233e-06, + "loss": 0.7358, + "step": 8760 + }, + { + "epoch": 0.45025182444238876, + "grad_norm": 0.8023768663406372, + "learning_rate": 6.040969460822387e-06, + "loss": 0.6864, + "step": 8761 + }, + { + "epoch": 0.45030321718573335, + "grad_norm": 0.7750018239021301, + "learning_rate": 6.0401554290435724e-06, + "loss": 0.6934, + "step": 8762 + }, + { + "epoch": 0.450354609929078, + "grad_norm": 1.0368411540985107, + "learning_rate": 6.039341368446344e-06, + "loss": 0.6895, + "step": 8763 + }, + { + "epoch": 0.45040600267242265, + "grad_norm": 1.1251578330993652, + "learning_rate": 6.038527279053255e-06, + "loss": 0.7877, + "step": 8764 + }, + { + "epoch": 0.4504573954157673, + "grad_norm": 1.0740529298782349, + "learning_rate": 6.03771316088686e-06, + "loss": 0.7752, + "step": 8765 + }, + { + "epoch": 0.45050878815911194, + "grad_norm": 1.1583425998687744, + "learning_rate": 6.036899013969717e-06, + "loss": 0.7562, + "step": 8766 + }, + { + "epoch": 0.4505601809024566, + "grad_norm": 0.7255450487136841, + "learning_rate": 6.0360848383243805e-06, + "loss": 0.7217, + "step": 8767 + }, + { + "epoch": 0.45061157364580123, + "grad_norm": 1.0998930931091309, + "learning_rate": 6.035270633973409e-06, + "loss": 0.7522, + "step": 8768 + }, + { + "epoch": 0.4506629663891459, + "grad_norm": 0.8069698810577393, + "learning_rate": 6.034456400939361e-06, + "loss": 0.6706, + "step": 8769 + }, + { + "epoch": 0.45071435913249047, + "grad_norm": 1.0711617469787598, + "learning_rate": 6.033642139244794e-06, + "loss": 0.708, + "step": 8770 + }, + { + "epoch": 0.4507657518758351, + "grad_norm": 1.0387017726898193, + "learning_rate": 6.032827848912271e-06, + "loss": 0.7344, + "step": 8771 + }, + { + "epoch": 0.45081714461917977, + "grad_norm": 1.043518304824829, + "learning_rate": 6.032013529964349e-06, + "loss": 0.7285, + "step": 8772 + }, + { + "epoch": 0.4508685373625244, + "grad_norm": 1.0986995697021484, + "learning_rate": 6.031199182423591e-06, + "loss": 0.7518, + "step": 8773 + }, + { + "epoch": 0.45091993010586906, + "grad_norm": 0.6784643530845642, + "learning_rate": 6.0303848063125594e-06, + "loss": 0.6686, + "step": 8774 + }, + { + "epoch": 0.4509713228492137, + "grad_norm": 1.0071924924850464, + "learning_rate": 6.029570401653817e-06, + "loss": 0.6932, + "step": 8775 + }, + { + "epoch": 0.45102271559255835, + "grad_norm": 1.1687572002410889, + "learning_rate": 6.0287559684699255e-06, + "loss": 0.7702, + "step": 8776 + }, + { + "epoch": 0.45107410833590295, + "grad_norm": 1.1235078573226929, + "learning_rate": 6.0279415067834546e-06, + "loss": 0.8043, + "step": 8777 + }, + { + "epoch": 0.4511255010792476, + "grad_norm": 1.0233426094055176, + "learning_rate": 6.027127016616965e-06, + "loss": 0.755, + "step": 8778 + }, + { + "epoch": 0.45117689382259224, + "grad_norm": 1.067035436630249, + "learning_rate": 6.026312497993025e-06, + "loss": 0.683, + "step": 8779 + }, + { + "epoch": 0.4512282865659369, + "grad_norm": 1.0684348344802856, + "learning_rate": 6.0254979509342025e-06, + "loss": 0.7165, + "step": 8780 + }, + { + "epoch": 0.45127967930928153, + "grad_norm": 0.8247617483139038, + "learning_rate": 6.0246833754630615e-06, + "loss": 0.6698, + "step": 8781 + }, + { + "epoch": 0.4513310720526262, + "grad_norm": 1.0869888067245483, + "learning_rate": 6.023868771602174e-06, + "loss": 0.7054, + "step": 8782 + }, + { + "epoch": 0.45138246479597083, + "grad_norm": 1.0583577156066895, + "learning_rate": 6.023054139374107e-06, + "loss": 0.7294, + "step": 8783 + }, + { + "epoch": 0.4514338575393155, + "grad_norm": 0.729859471321106, + "learning_rate": 6.022239478801433e-06, + "loss": 0.6898, + "step": 8784 + }, + { + "epoch": 0.45148525028266007, + "grad_norm": 1.0447458028793335, + "learning_rate": 6.0214247899067205e-06, + "loss": 0.7493, + "step": 8785 + }, + { + "epoch": 0.4515366430260047, + "grad_norm": 1.1456223726272583, + "learning_rate": 6.020610072712542e-06, + "loss": 0.7456, + "step": 8786 + }, + { + "epoch": 0.45158803576934936, + "grad_norm": 1.0732054710388184, + "learning_rate": 6.019795327241471e-06, + "loss": 0.7066, + "step": 8787 + }, + { + "epoch": 0.451639428512694, + "grad_norm": 0.8310574293136597, + "learning_rate": 6.018980553516081e-06, + "loss": 0.6568, + "step": 8788 + }, + { + "epoch": 0.45169082125603865, + "grad_norm": 1.0504428148269653, + "learning_rate": 6.018165751558943e-06, + "loss": 0.7152, + "step": 8789 + }, + { + "epoch": 0.4517422139993833, + "grad_norm": 0.9774937629699707, + "learning_rate": 6.017350921392635e-06, + "loss": 0.695, + "step": 8790 + }, + { + "epoch": 0.45179360674272795, + "grad_norm": 0.8530800342559814, + "learning_rate": 6.016536063039731e-06, + "loss": 0.6731, + "step": 8791 + }, + { + "epoch": 0.45184499948607254, + "grad_norm": 0.9511625170707703, + "learning_rate": 6.015721176522806e-06, + "loss": 0.7251, + "step": 8792 + }, + { + "epoch": 0.4518963922294172, + "grad_norm": 1.0526080131530762, + "learning_rate": 6.0149062618644415e-06, + "loss": 0.6722, + "step": 8793 + }, + { + "epoch": 0.45194778497276183, + "grad_norm": 0.9751661419868469, + "learning_rate": 6.014091319087211e-06, + "loss": 0.678, + "step": 8794 + }, + { + "epoch": 0.4519991777161065, + "grad_norm": 1.1386967897415161, + "learning_rate": 6.013276348213694e-06, + "loss": 0.7687, + "step": 8795 + }, + { + "epoch": 0.45205057045945113, + "grad_norm": 0.778996467590332, + "learning_rate": 6.012461349266474e-06, + "loss": 0.6821, + "step": 8796 + }, + { + "epoch": 0.4521019632027958, + "grad_norm": 0.8433127999305725, + "learning_rate": 6.011646322268127e-06, + "loss": 0.6709, + "step": 8797 + }, + { + "epoch": 0.4521533559461404, + "grad_norm": 1.1352248191833496, + "learning_rate": 6.010831267241235e-06, + "loss": 0.6916, + "step": 8798 + }, + { + "epoch": 0.45220474868948507, + "grad_norm": 1.0445233583450317, + "learning_rate": 6.010016184208381e-06, + "loss": 0.6579, + "step": 8799 + }, + { + "epoch": 0.45225614143282966, + "grad_norm": 0.7776066064834595, + "learning_rate": 6.0092010731921435e-06, + "loss": 0.6662, + "step": 8800 + }, + { + "epoch": 0.4523075341761743, + "grad_norm": 0.6743345856666565, + "learning_rate": 6.008385934215112e-06, + "loss": 0.6304, + "step": 8801 + }, + { + "epoch": 0.45235892691951896, + "grad_norm": 1.046325922012329, + "learning_rate": 6.007570767299866e-06, + "loss": 0.7266, + "step": 8802 + }, + { + "epoch": 0.4524103196628636, + "grad_norm": 1.0551177263259888, + "learning_rate": 6.006755572468993e-06, + "loss": 0.6636, + "step": 8803 + }, + { + "epoch": 0.45246171240620825, + "grad_norm": 1.0989246368408203, + "learning_rate": 6.005940349745077e-06, + "loss": 0.7148, + "step": 8804 + }, + { + "epoch": 0.4525131051495529, + "grad_norm": 1.0230375528335571, + "learning_rate": 6.005125099150705e-06, + "loss": 0.7244, + "step": 8805 + }, + { + "epoch": 0.45256449789289754, + "grad_norm": 1.1634154319763184, + "learning_rate": 6.004309820708466e-06, + "loss": 0.7639, + "step": 8806 + }, + { + "epoch": 0.45261589063624214, + "grad_norm": 1.0131864547729492, + "learning_rate": 6.003494514440946e-06, + "loss": 0.6997, + "step": 8807 + }, + { + "epoch": 0.4526672833795868, + "grad_norm": 1.046074390411377, + "learning_rate": 6.002679180370733e-06, + "loss": 0.7286, + "step": 8808 + }, + { + "epoch": 0.45271867612293143, + "grad_norm": 1.0035896301269531, + "learning_rate": 6.0018638185204195e-06, + "loss": 0.7155, + "step": 8809 + }, + { + "epoch": 0.4527700688662761, + "grad_norm": 0.8956099152565002, + "learning_rate": 6.001048428912591e-06, + "loss": 0.6778, + "step": 8810 + }, + { + "epoch": 0.4528214616096207, + "grad_norm": 1.0704295635223389, + "learning_rate": 6.000233011569845e-06, + "loss": 0.6985, + "step": 8811 + }, + { + "epoch": 0.45287285435296537, + "grad_norm": 1.0480855703353882, + "learning_rate": 5.999417566514768e-06, + "loss": 0.7651, + "step": 8812 + }, + { + "epoch": 0.45292424709631, + "grad_norm": 1.1389119625091553, + "learning_rate": 5.998602093769955e-06, + "loss": 0.714, + "step": 8813 + }, + { + "epoch": 0.45297563983965466, + "grad_norm": 1.1206594705581665, + "learning_rate": 5.997786593358e-06, + "loss": 0.7262, + "step": 8814 + }, + { + "epoch": 0.45302703258299926, + "grad_norm": 1.0106931924819946, + "learning_rate": 5.996971065301494e-06, + "loss": 0.7184, + "step": 8815 + }, + { + "epoch": 0.4530784253263439, + "grad_norm": 1.01264488697052, + "learning_rate": 5.996155509623034e-06, + "loss": 0.7316, + "step": 8816 + }, + { + "epoch": 0.45312981806968855, + "grad_norm": 1.0353342294692993, + "learning_rate": 5.995339926345219e-06, + "loss": 0.6825, + "step": 8817 + }, + { + "epoch": 0.4531812108130332, + "grad_norm": 1.0812112092971802, + "learning_rate": 5.994524315490639e-06, + "loss": 0.8164, + "step": 8818 + }, + { + "epoch": 0.45323260355637784, + "grad_norm": 1.0570074319839478, + "learning_rate": 5.993708677081895e-06, + "loss": 0.7224, + "step": 8819 + }, + { + "epoch": 0.4532839962997225, + "grad_norm": 1.236261010169983, + "learning_rate": 5.992893011141585e-06, + "loss": 0.7555, + "step": 8820 + }, + { + "epoch": 0.45333538904306714, + "grad_norm": 1.1103880405426025, + "learning_rate": 5.992077317692307e-06, + "loss": 0.8053, + "step": 8821 + }, + { + "epoch": 0.4533867817864118, + "grad_norm": 1.060664415359497, + "learning_rate": 5.991261596756661e-06, + "loss": 0.7893, + "step": 8822 + }, + { + "epoch": 0.4534381745297564, + "grad_norm": 1.1645253896713257, + "learning_rate": 5.990445848357247e-06, + "loss": 0.6486, + "step": 8823 + }, + { + "epoch": 0.453489567273101, + "grad_norm": 1.1562540531158447, + "learning_rate": 5.989630072516665e-06, + "loss": 0.7506, + "step": 8824 + }, + { + "epoch": 0.45354096001644567, + "grad_norm": 1.0875499248504639, + "learning_rate": 5.988814269257517e-06, + "loss": 0.6898, + "step": 8825 + }, + { + "epoch": 0.4535923527597903, + "grad_norm": 1.20587956905365, + "learning_rate": 5.987998438602406e-06, + "loss": 0.7269, + "step": 8826 + }, + { + "epoch": 0.45364374550313497, + "grad_norm": 1.0175936222076416, + "learning_rate": 5.987182580573937e-06, + "loss": 0.7027, + "step": 8827 + }, + { + "epoch": 0.4536951382464796, + "grad_norm": 1.021414875984192, + "learning_rate": 5.986366695194713e-06, + "loss": 0.7174, + "step": 8828 + }, + { + "epoch": 0.45374653098982426, + "grad_norm": 1.1210459470748901, + "learning_rate": 5.985550782487336e-06, + "loss": 0.7505, + "step": 8829 + }, + { + "epoch": 0.45379792373316885, + "grad_norm": 1.2500932216644287, + "learning_rate": 5.984734842474417e-06, + "loss": 0.7233, + "step": 8830 + }, + { + "epoch": 0.4538493164765135, + "grad_norm": 0.8410046696662903, + "learning_rate": 5.9839188751785575e-06, + "loss": 0.6741, + "step": 8831 + }, + { + "epoch": 0.45390070921985815, + "grad_norm": 1.081351399421692, + "learning_rate": 5.983102880622366e-06, + "loss": 0.8385, + "step": 8832 + }, + { + "epoch": 0.4539521019632028, + "grad_norm": 1.076521396636963, + "learning_rate": 5.982286858828452e-06, + "loss": 0.7647, + "step": 8833 + }, + { + "epoch": 0.45400349470654744, + "grad_norm": 1.0220509767532349, + "learning_rate": 5.981470809819421e-06, + "loss": 0.7243, + "step": 8834 + }, + { + "epoch": 0.4540548874498921, + "grad_norm": 1.1124690771102905, + "learning_rate": 5.980654733617885e-06, + "loss": 0.7301, + "step": 8835 + }, + { + "epoch": 0.45410628019323673, + "grad_norm": 1.1178840398788452, + "learning_rate": 5.979838630246454e-06, + "loss": 0.7985, + "step": 8836 + }, + { + "epoch": 0.4541576729365814, + "grad_norm": 1.1785459518432617, + "learning_rate": 5.979022499727737e-06, + "loss": 0.8169, + "step": 8837 + }, + { + "epoch": 0.45420906567992597, + "grad_norm": 1.0918692350387573, + "learning_rate": 5.978206342084347e-06, + "loss": 0.7215, + "step": 8838 + }, + { + "epoch": 0.4542604584232706, + "grad_norm": 0.6651068329811096, + "learning_rate": 5.977390157338897e-06, + "loss": 0.6618, + "step": 8839 + }, + { + "epoch": 0.45431185116661527, + "grad_norm": 1.1593281030654907, + "learning_rate": 5.9765739455139986e-06, + "loss": 0.7322, + "step": 8840 + }, + { + "epoch": 0.4543632439099599, + "grad_norm": 0.9651076793670654, + "learning_rate": 5.975757706632266e-06, + "loss": 0.7421, + "step": 8841 + }, + { + "epoch": 0.45441463665330456, + "grad_norm": 1.121661901473999, + "learning_rate": 5.974941440716314e-06, + "loss": 0.8054, + "step": 8842 + }, + { + "epoch": 0.4544660293966492, + "grad_norm": 1.0248355865478516, + "learning_rate": 5.974125147788759e-06, + "loss": 0.6987, + "step": 8843 + }, + { + "epoch": 0.45451742213999385, + "grad_norm": 1.1309994459152222, + "learning_rate": 5.973308827872216e-06, + "loss": 0.7698, + "step": 8844 + }, + { + "epoch": 0.45456881488333845, + "grad_norm": 1.0594685077667236, + "learning_rate": 5.9724924809893e-06, + "loss": 0.7437, + "step": 8845 + }, + { + "epoch": 0.4546202076266831, + "grad_norm": 1.0823743343353271, + "learning_rate": 5.971676107162632e-06, + "loss": 0.7885, + "step": 8846 + }, + { + "epoch": 0.45467160037002774, + "grad_norm": 1.117125391960144, + "learning_rate": 5.97085970641483e-06, + "loss": 0.7388, + "step": 8847 + }, + { + "epoch": 0.4547229931133724, + "grad_norm": 0.8462509512901306, + "learning_rate": 5.9700432787685105e-06, + "loss": 0.7087, + "step": 8848 + }, + { + "epoch": 0.45477438585671703, + "grad_norm": 0.9970471858978271, + "learning_rate": 5.969226824246295e-06, + "loss": 0.7647, + "step": 8849 + }, + { + "epoch": 0.4548257786000617, + "grad_norm": 1.0255184173583984, + "learning_rate": 5.968410342870804e-06, + "loss": 0.7518, + "step": 8850 + }, + { + "epoch": 0.45487717134340633, + "grad_norm": 1.0873438119888306, + "learning_rate": 5.96759383466466e-06, + "loss": 0.7519, + "step": 8851 + }, + { + "epoch": 0.454928564086751, + "grad_norm": 1.0741733312606812, + "learning_rate": 5.966777299650483e-06, + "loss": 0.8132, + "step": 8852 + }, + { + "epoch": 0.45497995683009557, + "grad_norm": 0.7321688532829285, + "learning_rate": 5.965960737850897e-06, + "loss": 0.6752, + "step": 8853 + }, + { + "epoch": 0.4550313495734402, + "grad_norm": 1.0968360900878906, + "learning_rate": 5.965144149288525e-06, + "loss": 0.724, + "step": 8854 + }, + { + "epoch": 0.45508274231678486, + "grad_norm": 1.0788156986236572, + "learning_rate": 5.964327533985991e-06, + "loss": 0.8018, + "step": 8855 + }, + { + "epoch": 0.4551341350601295, + "grad_norm": 1.0266183614730835, + "learning_rate": 5.96351089196592e-06, + "loss": 0.7472, + "step": 8856 + }, + { + "epoch": 0.45518552780347415, + "grad_norm": 1.1603820323944092, + "learning_rate": 5.96269422325094e-06, + "loss": 0.7888, + "step": 8857 + }, + { + "epoch": 0.4552369205468188, + "grad_norm": 0.708926796913147, + "learning_rate": 5.9618775278636745e-06, + "loss": 0.6654, + "step": 8858 + }, + { + "epoch": 0.45528831329016345, + "grad_norm": 1.1360771656036377, + "learning_rate": 5.961060805826753e-06, + "loss": 0.6886, + "step": 8859 + }, + { + "epoch": 0.4553397060335081, + "grad_norm": 1.0509719848632812, + "learning_rate": 5.9602440571628024e-06, + "loss": 0.7626, + "step": 8860 + }, + { + "epoch": 0.4553910987768527, + "grad_norm": 1.0279475450515747, + "learning_rate": 5.959427281894452e-06, + "loss": 0.7025, + "step": 8861 + }, + { + "epoch": 0.45544249152019733, + "grad_norm": 0.990743100643158, + "learning_rate": 5.958610480044331e-06, + "loss": 0.726, + "step": 8862 + }, + { + "epoch": 0.455493884263542, + "grad_norm": 1.0370988845825195, + "learning_rate": 5.957793651635069e-06, + "loss": 0.704, + "step": 8863 + }, + { + "epoch": 0.45554527700688663, + "grad_norm": 0.713202714920044, + "learning_rate": 5.956976796689298e-06, + "loss": 0.7083, + "step": 8864 + }, + { + "epoch": 0.4555966697502313, + "grad_norm": 1.0848804712295532, + "learning_rate": 5.95615991522965e-06, + "loss": 0.741, + "step": 8865 + }, + { + "epoch": 0.4556480624935759, + "grad_norm": 1.1045877933502197, + "learning_rate": 5.9553430072787545e-06, + "loss": 0.7756, + "step": 8866 + }, + { + "epoch": 0.45569945523692057, + "grad_norm": 1.1364927291870117, + "learning_rate": 5.954526072859248e-06, + "loss": 0.7517, + "step": 8867 + }, + { + "epoch": 0.45575084798026516, + "grad_norm": 1.0900721549987793, + "learning_rate": 5.953709111993763e-06, + "loss": 0.746, + "step": 8868 + }, + { + "epoch": 0.4558022407236098, + "grad_norm": 0.7207632660865784, + "learning_rate": 5.952892124704933e-06, + "loss": 0.6536, + "step": 8869 + }, + { + "epoch": 0.45585363346695446, + "grad_norm": 1.098912239074707, + "learning_rate": 5.952075111015396e-06, + "loss": 0.7371, + "step": 8870 + }, + { + "epoch": 0.4559050262102991, + "grad_norm": 1.1477363109588623, + "learning_rate": 5.9512580709477865e-06, + "loss": 0.8166, + "step": 8871 + }, + { + "epoch": 0.45595641895364375, + "grad_norm": 1.0114022493362427, + "learning_rate": 5.950441004524742e-06, + "loss": 0.7537, + "step": 8872 + }, + { + "epoch": 0.4560078116969884, + "grad_norm": 0.6834369897842407, + "learning_rate": 5.949623911768899e-06, + "loss": 0.6589, + "step": 8873 + }, + { + "epoch": 0.45605920444033304, + "grad_norm": 0.9901047945022583, + "learning_rate": 5.948806792702896e-06, + "loss": 0.733, + "step": 8874 + }, + { + "epoch": 0.4561105971836777, + "grad_norm": 0.8677506446838379, + "learning_rate": 5.947989647349372e-06, + "loss": 0.7416, + "step": 8875 + }, + { + "epoch": 0.4561619899270223, + "grad_norm": 1.0808535814285278, + "learning_rate": 5.947172475730967e-06, + "loss": 0.756, + "step": 8876 + }, + { + "epoch": 0.45621338267036693, + "grad_norm": 1.030073642730713, + "learning_rate": 5.946355277870322e-06, + "loss": 0.7692, + "step": 8877 + }, + { + "epoch": 0.4562647754137116, + "grad_norm": 1.1000250577926636, + "learning_rate": 5.9455380537900776e-06, + "loss": 0.7408, + "step": 8878 + }, + { + "epoch": 0.4563161681570562, + "grad_norm": 1.0278034210205078, + "learning_rate": 5.944720803512874e-06, + "loss": 0.7735, + "step": 8879 + }, + { + "epoch": 0.45636756090040087, + "grad_norm": 1.0684130191802979, + "learning_rate": 5.943903527061359e-06, + "loss": 0.7345, + "step": 8880 + }, + { + "epoch": 0.4564189536437455, + "grad_norm": 0.7972928881645203, + "learning_rate": 5.94308622445817e-06, + "loss": 0.6416, + "step": 8881 + }, + { + "epoch": 0.45647034638709016, + "grad_norm": 1.0249911546707153, + "learning_rate": 5.942268895725955e-06, + "loss": 0.7868, + "step": 8882 + }, + { + "epoch": 0.45652173913043476, + "grad_norm": 1.025810718536377, + "learning_rate": 5.9414515408873565e-06, + "loss": 0.7837, + "step": 8883 + }, + { + "epoch": 0.4565731318737794, + "grad_norm": 0.9920753240585327, + "learning_rate": 5.9406341599650215e-06, + "loss": 0.7756, + "step": 8884 + }, + { + "epoch": 0.45662452461712405, + "grad_norm": 0.7278254628181458, + "learning_rate": 5.939816752981594e-06, + "loss": 0.6557, + "step": 8885 + }, + { + "epoch": 0.4566759173604687, + "grad_norm": 1.0390771627426147, + "learning_rate": 5.938999319959724e-06, + "loss": 0.712, + "step": 8886 + }, + { + "epoch": 0.45672731010381334, + "grad_norm": 1.0876400470733643, + "learning_rate": 5.938181860922059e-06, + "loss": 0.7359, + "step": 8887 + }, + { + "epoch": 0.456778702847158, + "grad_norm": 1.1079777479171753, + "learning_rate": 5.937364375891244e-06, + "loss": 0.6797, + "step": 8888 + }, + { + "epoch": 0.45683009559050264, + "grad_norm": 1.06488835811615, + "learning_rate": 5.936546864889931e-06, + "loss": 0.737, + "step": 8889 + }, + { + "epoch": 0.4568814883338473, + "grad_norm": 1.0920110940933228, + "learning_rate": 5.93572932794077e-06, + "loss": 0.7562, + "step": 8890 + }, + { + "epoch": 0.4569328810771919, + "grad_norm": 1.0269219875335693, + "learning_rate": 5.9349117650664115e-06, + "loss": 0.7464, + "step": 8891 + }, + { + "epoch": 0.4569842738205365, + "grad_norm": 1.1151102781295776, + "learning_rate": 5.934094176289505e-06, + "loss": 0.7241, + "step": 8892 + }, + { + "epoch": 0.45703566656388117, + "grad_norm": 1.0413362979888916, + "learning_rate": 5.9332765616327035e-06, + "loss": 0.7139, + "step": 8893 + }, + { + "epoch": 0.4570870593072258, + "grad_norm": 1.061132788658142, + "learning_rate": 5.932458921118661e-06, + "loss": 0.7046, + "step": 8894 + }, + { + "epoch": 0.45713845205057047, + "grad_norm": 1.0489342212677002, + "learning_rate": 5.931641254770028e-06, + "loss": 0.7577, + "step": 8895 + }, + { + "epoch": 0.4571898447939151, + "grad_norm": 1.037558913230896, + "learning_rate": 5.930823562609464e-06, + "loss": 0.725, + "step": 8896 + }, + { + "epoch": 0.45724123753725976, + "grad_norm": 1.074945330619812, + "learning_rate": 5.930005844659616e-06, + "loss": 0.7044, + "step": 8897 + }, + { + "epoch": 0.4572926302806044, + "grad_norm": 1.0921604633331299, + "learning_rate": 5.929188100943146e-06, + "loss": 0.7408, + "step": 8898 + }, + { + "epoch": 0.457344023023949, + "grad_norm": 1.00763738155365, + "learning_rate": 5.928370331482709e-06, + "loss": 0.7523, + "step": 8899 + }, + { + "epoch": 0.45739541576729364, + "grad_norm": 0.9662325382232666, + "learning_rate": 5.927552536300961e-06, + "loss": 0.772, + "step": 8900 + }, + { + "epoch": 0.4574468085106383, + "grad_norm": 1.0094407796859741, + "learning_rate": 5.926734715420559e-06, + "loss": 0.7609, + "step": 8901 + }, + { + "epoch": 0.45749820125398294, + "grad_norm": 1.112140417098999, + "learning_rate": 5.925916868864163e-06, + "loss": 0.7708, + "step": 8902 + }, + { + "epoch": 0.4575495939973276, + "grad_norm": 1.0625040531158447, + "learning_rate": 5.925098996654432e-06, + "loss": 0.7352, + "step": 8903 + }, + { + "epoch": 0.45760098674067223, + "grad_norm": 1.1343601942062378, + "learning_rate": 5.924281098814025e-06, + "loss": 0.7757, + "step": 8904 + }, + { + "epoch": 0.4576523794840169, + "grad_norm": 1.1333168745040894, + "learning_rate": 5.923463175365603e-06, + "loss": 0.7772, + "step": 8905 + }, + { + "epoch": 0.45770377222736147, + "grad_norm": 1.106407642364502, + "learning_rate": 5.922645226331827e-06, + "loss": 0.7368, + "step": 8906 + }, + { + "epoch": 0.4577551649707061, + "grad_norm": 0.9942788481712341, + "learning_rate": 5.921827251735359e-06, + "loss": 0.6851, + "step": 8907 + }, + { + "epoch": 0.45780655771405077, + "grad_norm": 1.0715062618255615, + "learning_rate": 5.921009251598864e-06, + "loss": 0.75, + "step": 8908 + }, + { + "epoch": 0.4578579504573954, + "grad_norm": 1.036826252937317, + "learning_rate": 5.920191225945001e-06, + "loss": 0.7448, + "step": 8909 + }, + { + "epoch": 0.45790934320074006, + "grad_norm": 1.1309218406677246, + "learning_rate": 5.919373174796438e-06, + "loss": 0.7734, + "step": 8910 + }, + { + "epoch": 0.4579607359440847, + "grad_norm": 1.0349030494689941, + "learning_rate": 5.918555098175838e-06, + "loss": 0.6973, + "step": 8911 + }, + { + "epoch": 0.45801212868742935, + "grad_norm": 1.1430330276489258, + "learning_rate": 5.917736996105867e-06, + "loss": 0.7525, + "step": 8912 + }, + { + "epoch": 0.458063521430774, + "grad_norm": 2.8730721473693848, + "learning_rate": 5.9169188686091915e-06, + "loss": 0.7975, + "step": 8913 + }, + { + "epoch": 0.4581149141741186, + "grad_norm": 1.1182926893234253, + "learning_rate": 5.916100715708477e-06, + "loss": 0.7553, + "step": 8914 + }, + { + "epoch": 0.45816630691746324, + "grad_norm": 0.837189793586731, + "learning_rate": 5.9152825374263934e-06, + "loss": 0.7017, + "step": 8915 + }, + { + "epoch": 0.4582176996608079, + "grad_norm": 3.481139659881592, + "learning_rate": 5.914464333785608e-06, + "loss": 0.7562, + "step": 8916 + }, + { + "epoch": 0.45826909240415253, + "grad_norm": 1.005302906036377, + "learning_rate": 5.913646104808788e-06, + "loss": 0.7192, + "step": 8917 + }, + { + "epoch": 0.4583204851474972, + "grad_norm": 1.0742340087890625, + "learning_rate": 5.912827850518606e-06, + "loss": 0.7548, + "step": 8918 + }, + { + "epoch": 0.45837187789084183, + "grad_norm": 0.7497778534889221, + "learning_rate": 5.912009570937729e-06, + "loss": 0.6611, + "step": 8919 + }, + { + "epoch": 0.4584232706341865, + "grad_norm": 1.0084547996520996, + "learning_rate": 5.911191266088834e-06, + "loss": 0.7422, + "step": 8920 + }, + { + "epoch": 0.45847466337753107, + "grad_norm": 1.0855684280395508, + "learning_rate": 5.910372935994587e-06, + "loss": 0.7346, + "step": 8921 + }, + { + "epoch": 0.4585260561208757, + "grad_norm": 1.1071358919143677, + "learning_rate": 5.909554580677663e-06, + "loss": 0.7494, + "step": 8922 + }, + { + "epoch": 0.45857744886422036, + "grad_norm": 1.0432698726654053, + "learning_rate": 5.908736200160736e-06, + "loss": 0.7171, + "step": 8923 + }, + { + "epoch": 0.458628841607565, + "grad_norm": 1.1482576131820679, + "learning_rate": 5.90791779446648e-06, + "loss": 0.7566, + "step": 8924 + }, + { + "epoch": 0.45868023435090965, + "grad_norm": 1.041137933731079, + "learning_rate": 5.907099363617567e-06, + "loss": 0.7383, + "step": 8925 + }, + { + "epoch": 0.4587316270942543, + "grad_norm": 1.1440412998199463, + "learning_rate": 5.906280907636675e-06, + "loss": 0.7703, + "step": 8926 + }, + { + "epoch": 0.45878301983759895, + "grad_norm": 1.075931429862976, + "learning_rate": 5.905462426546476e-06, + "loss": 0.7866, + "step": 8927 + }, + { + "epoch": 0.4588344125809436, + "grad_norm": 1.1004613637924194, + "learning_rate": 5.904643920369652e-06, + "loss": 0.7265, + "step": 8928 + }, + { + "epoch": 0.4588858053242882, + "grad_norm": 1.0379250049591064, + "learning_rate": 5.903825389128878e-06, + "loss": 0.6747, + "step": 8929 + }, + { + "epoch": 0.45893719806763283, + "grad_norm": 1.0629864931106567, + "learning_rate": 5.903006832846833e-06, + "loss": 0.792, + "step": 8930 + }, + { + "epoch": 0.4589885908109775, + "grad_norm": 1.1615045070648193, + "learning_rate": 5.9021882515461955e-06, + "loss": 0.7542, + "step": 8931 + }, + { + "epoch": 0.45903998355432213, + "grad_norm": 1.0473079681396484, + "learning_rate": 5.901369645249645e-06, + "loss": 0.7288, + "step": 8932 + }, + { + "epoch": 0.4590913762976668, + "grad_norm": 1.0952554941177368, + "learning_rate": 5.9005510139798595e-06, + "loss": 0.6933, + "step": 8933 + }, + { + "epoch": 0.4591427690410114, + "grad_norm": 1.0468007326126099, + "learning_rate": 5.899732357759523e-06, + "loss": 0.7743, + "step": 8934 + }, + { + "epoch": 0.45919416178435607, + "grad_norm": 1.0818570852279663, + "learning_rate": 5.898913676611315e-06, + "loss": 0.7555, + "step": 8935 + }, + { + "epoch": 0.45924555452770066, + "grad_norm": 1.1436657905578613, + "learning_rate": 5.898094970557919e-06, + "loss": 0.726, + "step": 8936 + }, + { + "epoch": 0.4592969472710453, + "grad_norm": 0.7811368703842163, + "learning_rate": 5.897276239622017e-06, + "loss": 0.7249, + "step": 8937 + }, + { + "epoch": 0.45934834001438996, + "grad_norm": 1.0438807010650635, + "learning_rate": 5.8964574838262944e-06, + "loss": 0.7383, + "step": 8938 + }, + { + "epoch": 0.4593997327577346, + "grad_norm": 1.066916823387146, + "learning_rate": 5.895638703193434e-06, + "loss": 0.7363, + "step": 8939 + }, + { + "epoch": 0.45945112550107925, + "grad_norm": 1.0424696207046509, + "learning_rate": 5.894819897746121e-06, + "loss": 0.7623, + "step": 8940 + }, + { + "epoch": 0.4595025182444239, + "grad_norm": 1.1100372076034546, + "learning_rate": 5.894001067507041e-06, + "loss": 0.7456, + "step": 8941 + }, + { + "epoch": 0.45955391098776854, + "grad_norm": 1.1429909467697144, + "learning_rate": 5.893182212498882e-06, + "loss": 0.7111, + "step": 8942 + }, + { + "epoch": 0.4596053037311132, + "grad_norm": 1.0366735458374023, + "learning_rate": 5.892363332744329e-06, + "loss": 0.6266, + "step": 8943 + }, + { + "epoch": 0.4596566964744578, + "grad_norm": 1.0912282466888428, + "learning_rate": 5.891544428266071e-06, + "loss": 0.7641, + "step": 8944 + }, + { + "epoch": 0.45970808921780243, + "grad_norm": 1.0330140590667725, + "learning_rate": 5.890725499086796e-06, + "loss": 0.7701, + "step": 8945 + }, + { + "epoch": 0.4597594819611471, + "grad_norm": 1.0573067665100098, + "learning_rate": 5.889906545229192e-06, + "loss": 0.7292, + "step": 8946 + }, + { + "epoch": 0.4598108747044917, + "grad_norm": 1.0763170719146729, + "learning_rate": 5.889087566715952e-06, + "loss": 0.7293, + "step": 8947 + }, + { + "epoch": 0.45986226744783637, + "grad_norm": 0.6770279407501221, + "learning_rate": 5.888268563569762e-06, + "loss": 0.6425, + "step": 8948 + }, + { + "epoch": 0.459913660191181, + "grad_norm": 0.7366620302200317, + "learning_rate": 5.887449535813318e-06, + "loss": 0.6363, + "step": 8949 + }, + { + "epoch": 0.45996505293452566, + "grad_norm": 1.0756702423095703, + "learning_rate": 5.886630483469309e-06, + "loss": 0.7421, + "step": 8950 + }, + { + "epoch": 0.4600164456778703, + "grad_norm": 0.7815482020378113, + "learning_rate": 5.885811406560428e-06, + "loss": 0.6845, + "step": 8951 + }, + { + "epoch": 0.4600678384212149, + "grad_norm": 1.1058789491653442, + "learning_rate": 5.88499230510937e-06, + "loss": 0.7175, + "step": 8952 + }, + { + "epoch": 0.46011923116455955, + "grad_norm": 1.1916143894195557, + "learning_rate": 5.884173179138826e-06, + "loss": 0.6629, + "step": 8953 + }, + { + "epoch": 0.4601706239079042, + "grad_norm": 1.1176693439483643, + "learning_rate": 5.88335402867149e-06, + "loss": 0.7127, + "step": 8954 + }, + { + "epoch": 0.46022201665124884, + "grad_norm": 1.0726125240325928, + "learning_rate": 5.882534853730062e-06, + "loss": 0.768, + "step": 8955 + }, + { + "epoch": 0.4602734093945935, + "grad_norm": 1.0672916173934937, + "learning_rate": 5.881715654337235e-06, + "loss": 0.729, + "step": 8956 + }, + { + "epoch": 0.46032480213793814, + "grad_norm": 1.0780296325683594, + "learning_rate": 5.880896430515706e-06, + "loss": 0.72, + "step": 8957 + }, + { + "epoch": 0.4603761948812828, + "grad_norm": 1.064030647277832, + "learning_rate": 5.880077182288169e-06, + "loss": 0.7252, + "step": 8958 + }, + { + "epoch": 0.4604275876246274, + "grad_norm": 0.8556851148605347, + "learning_rate": 5.879257909677329e-06, + "loss": 0.7201, + "step": 8959 + }, + { + "epoch": 0.460478980367972, + "grad_norm": 0.7724310159683228, + "learning_rate": 5.878438612705879e-06, + "loss": 0.6328, + "step": 8960 + }, + { + "epoch": 0.46053037311131667, + "grad_norm": 1.0718120336532593, + "learning_rate": 5.877619291396522e-06, + "loss": 0.7448, + "step": 8961 + }, + { + "epoch": 0.4605817658546613, + "grad_norm": 1.0667632818222046, + "learning_rate": 5.876799945771954e-06, + "loss": 0.7409, + "step": 8962 + }, + { + "epoch": 0.46063315859800597, + "grad_norm": 0.7033167481422424, + "learning_rate": 5.875980575854878e-06, + "loss": 0.6096, + "step": 8963 + }, + { + "epoch": 0.4606845513413506, + "grad_norm": 1.046469807624817, + "learning_rate": 5.875161181667996e-06, + "loss": 0.7326, + "step": 8964 + }, + { + "epoch": 0.46073594408469526, + "grad_norm": 1.1148231029510498, + "learning_rate": 5.87434176323401e-06, + "loss": 0.7698, + "step": 8965 + }, + { + "epoch": 0.4607873368280399, + "grad_norm": 1.1007137298583984, + "learning_rate": 5.873522320575621e-06, + "loss": 0.7532, + "step": 8966 + }, + { + "epoch": 0.4608387295713845, + "grad_norm": 1.0778814554214478, + "learning_rate": 5.872702853715532e-06, + "loss": 0.7206, + "step": 8967 + }, + { + "epoch": 0.46089012231472914, + "grad_norm": 0.7628300786018372, + "learning_rate": 5.871883362676451e-06, + "loss": 0.6239, + "step": 8968 + }, + { + "epoch": 0.4609415150580738, + "grad_norm": 1.0552924871444702, + "learning_rate": 5.871063847481078e-06, + "loss": 0.7395, + "step": 8969 + }, + { + "epoch": 0.46099290780141844, + "grad_norm": 1.0969772338867188, + "learning_rate": 5.87024430815212e-06, + "loss": 0.7442, + "step": 8970 + }, + { + "epoch": 0.4610443005447631, + "grad_norm": 1.033203125, + "learning_rate": 5.869424744712285e-06, + "loss": 0.7149, + "step": 8971 + }, + { + "epoch": 0.46109569328810773, + "grad_norm": 1.0113794803619385, + "learning_rate": 5.868605157184279e-06, + "loss": 0.7185, + "step": 8972 + }, + { + "epoch": 0.4611470860314524, + "grad_norm": 1.1189467906951904, + "learning_rate": 5.867785545590806e-06, + "loss": 0.7427, + "step": 8973 + }, + { + "epoch": 0.46119847877479697, + "grad_norm": 1.08404541015625, + "learning_rate": 5.866965909954578e-06, + "loss": 0.7678, + "step": 8974 + }, + { + "epoch": 0.4612498715181416, + "grad_norm": 1.0680245161056519, + "learning_rate": 5.8661462502983024e-06, + "loss": 0.768, + "step": 8975 + }, + { + "epoch": 0.46130126426148627, + "grad_norm": 1.0621349811553955, + "learning_rate": 5.86532656664469e-06, + "loss": 0.6948, + "step": 8976 + }, + { + "epoch": 0.4613526570048309, + "grad_norm": 1.3989737033843994, + "learning_rate": 5.864506859016448e-06, + "loss": 0.7728, + "step": 8977 + }, + { + "epoch": 0.46140404974817556, + "grad_norm": 1.1540238857269287, + "learning_rate": 5.863687127436288e-06, + "loss": 0.8093, + "step": 8978 + }, + { + "epoch": 0.4614554424915202, + "grad_norm": 1.0131595134735107, + "learning_rate": 5.862867371926922e-06, + "loss": 0.7147, + "step": 8979 + }, + { + "epoch": 0.46150683523486485, + "grad_norm": 1.0851927995681763, + "learning_rate": 5.862047592511062e-06, + "loss": 0.7201, + "step": 8980 + }, + { + "epoch": 0.4615582279782095, + "grad_norm": 1.0080941915512085, + "learning_rate": 5.861227789211423e-06, + "loss": 0.7545, + "step": 8981 + }, + { + "epoch": 0.4616096207215541, + "grad_norm": 1.176670789718628, + "learning_rate": 5.860407962050714e-06, + "loss": 0.7277, + "step": 8982 + }, + { + "epoch": 0.46166101346489874, + "grad_norm": 1.1212631464004517, + "learning_rate": 5.859588111051651e-06, + "loss": 0.7134, + "step": 8983 + }, + { + "epoch": 0.4617124062082434, + "grad_norm": 1.1416161060333252, + "learning_rate": 5.858768236236949e-06, + "loss": 0.7674, + "step": 8984 + }, + { + "epoch": 0.46176379895158803, + "grad_norm": 1.0965406894683838, + "learning_rate": 5.857948337629324e-06, + "loss": 0.7633, + "step": 8985 + }, + { + "epoch": 0.4618151916949327, + "grad_norm": 1.0567373037338257, + "learning_rate": 5.85712841525149e-06, + "loss": 0.7685, + "step": 8986 + }, + { + "epoch": 0.46186658443827733, + "grad_norm": 1.1180740594863892, + "learning_rate": 5.856308469126165e-06, + "loss": 0.6777, + "step": 8987 + }, + { + "epoch": 0.461917977181622, + "grad_norm": 1.0868412256240845, + "learning_rate": 5.855488499276067e-06, + "loss": 0.7214, + "step": 8988 + }, + { + "epoch": 0.4619693699249666, + "grad_norm": 0.7879918813705444, + "learning_rate": 5.854668505723911e-06, + "loss": 0.6564, + "step": 8989 + }, + { + "epoch": 0.4620207626683112, + "grad_norm": 1.0919828414916992, + "learning_rate": 5.8538484884924195e-06, + "loss": 0.708, + "step": 8990 + }, + { + "epoch": 0.46207215541165586, + "grad_norm": 1.0887128114700317, + "learning_rate": 5.853028447604309e-06, + "loss": 0.736, + "step": 8991 + }, + { + "epoch": 0.4621235481550005, + "grad_norm": 1.108296275138855, + "learning_rate": 5.8522083830823e-06, + "loss": 0.7599, + "step": 8992 + }, + { + "epoch": 0.46217494089834515, + "grad_norm": 1.0775268077850342, + "learning_rate": 5.851388294949116e-06, + "loss": 0.7562, + "step": 8993 + }, + { + "epoch": 0.4622263336416898, + "grad_norm": 1.0385046005249023, + "learning_rate": 5.850568183227474e-06, + "loss": 0.7003, + "step": 8994 + }, + { + "epoch": 0.46227772638503445, + "grad_norm": 1.0174345970153809, + "learning_rate": 5.849748047940098e-06, + "loss": 0.7155, + "step": 8995 + }, + { + "epoch": 0.4623291191283791, + "grad_norm": 1.1871230602264404, + "learning_rate": 5.84892788910971e-06, + "loss": 0.7598, + "step": 8996 + }, + { + "epoch": 0.4623805118717237, + "grad_norm": 1.0645506381988525, + "learning_rate": 5.8481077067590344e-06, + "loss": 0.7223, + "step": 8997 + }, + { + "epoch": 0.46243190461506833, + "grad_norm": 1.1522107124328613, + "learning_rate": 5.847287500910794e-06, + "loss": 0.7543, + "step": 8998 + }, + { + "epoch": 0.462483297358413, + "grad_norm": 1.0858427286148071, + "learning_rate": 5.846467271587712e-06, + "loss": 0.7073, + "step": 8999 + }, + { + "epoch": 0.46253469010175763, + "grad_norm": 0.9995402097702026, + "learning_rate": 5.845647018812517e-06, + "loss": 0.6965, + "step": 9000 + }, + { + "epoch": 0.4625860828451023, + "grad_norm": 1.1073576211929321, + "learning_rate": 5.844826742607932e-06, + "loss": 0.7093, + "step": 9001 + }, + { + "epoch": 0.4626374755884469, + "grad_norm": 1.0715878009796143, + "learning_rate": 5.844006442996684e-06, + "loss": 0.7633, + "step": 9002 + }, + { + "epoch": 0.46268886833179157, + "grad_norm": 1.1467885971069336, + "learning_rate": 5.843186120001502e-06, + "loss": 0.8419, + "step": 9003 + }, + { + "epoch": 0.4627402610751362, + "grad_norm": 0.6586687564849854, + "learning_rate": 5.84236577364511e-06, + "loss": 0.6796, + "step": 9004 + }, + { + "epoch": 0.4627916538184808, + "grad_norm": 1.0261226892471313, + "learning_rate": 5.841545403950241e-06, + "loss": 0.7336, + "step": 9005 + }, + { + "epoch": 0.46284304656182546, + "grad_norm": 1.125173568725586, + "learning_rate": 5.840725010939621e-06, + "loss": 0.6709, + "step": 9006 + }, + { + "epoch": 0.4628944393051701, + "grad_norm": 1.0425140857696533, + "learning_rate": 5.839904594635979e-06, + "loss": 0.7213, + "step": 9007 + }, + { + "epoch": 0.46294583204851475, + "grad_norm": 1.0032652616500854, + "learning_rate": 5.8390841550620485e-06, + "loss": 0.6975, + "step": 9008 + }, + { + "epoch": 0.4629972247918594, + "grad_norm": 1.2033262252807617, + "learning_rate": 5.838263692240557e-06, + "loss": 0.7951, + "step": 9009 + }, + { + "epoch": 0.46304861753520404, + "grad_norm": 1.072046160697937, + "learning_rate": 5.837443206194239e-06, + "loss": 0.7756, + "step": 9010 + }, + { + "epoch": 0.4631000102785487, + "grad_norm": 1.1966664791107178, + "learning_rate": 5.836622696945825e-06, + "loss": 0.7884, + "step": 9011 + }, + { + "epoch": 0.4631514030218933, + "grad_norm": 0.763951301574707, + "learning_rate": 5.835802164518049e-06, + "loss": 0.661, + "step": 9012 + }, + { + "epoch": 0.46320279576523793, + "grad_norm": 1.0983941555023193, + "learning_rate": 5.834981608933646e-06, + "loss": 0.729, + "step": 9013 + }, + { + "epoch": 0.4632541885085826, + "grad_norm": 1.105281949043274, + "learning_rate": 5.8341610302153465e-06, + "loss": 0.7541, + "step": 9014 + }, + { + "epoch": 0.4633055812519272, + "grad_norm": 1.0642547607421875, + "learning_rate": 5.8333404283858864e-06, + "loss": 0.7125, + "step": 9015 + }, + { + "epoch": 0.46335697399527187, + "grad_norm": 0.7126036882400513, + "learning_rate": 5.832519803468003e-06, + "loss": 0.6668, + "step": 9016 + }, + { + "epoch": 0.4634083667386165, + "grad_norm": 0.7853831052780151, + "learning_rate": 5.8316991554844325e-06, + "loss": 0.7318, + "step": 9017 + }, + { + "epoch": 0.46345975948196116, + "grad_norm": 1.0368157625198364, + "learning_rate": 5.830878484457909e-06, + "loss": 0.7225, + "step": 9018 + }, + { + "epoch": 0.4635111522253058, + "grad_norm": 1.1008564233779907, + "learning_rate": 5.830057790411173e-06, + "loss": 0.7108, + "step": 9019 + }, + { + "epoch": 0.4635625449686504, + "grad_norm": 1.0381215810775757, + "learning_rate": 5.829237073366959e-06, + "loss": 0.7306, + "step": 9020 + }, + { + "epoch": 0.46361393771199505, + "grad_norm": 0.8310261964797974, + "learning_rate": 5.828416333348009e-06, + "loss": 0.6766, + "step": 9021 + }, + { + "epoch": 0.4636653304553397, + "grad_norm": 1.0303864479064941, + "learning_rate": 5.8275955703770615e-06, + "loss": 0.6795, + "step": 9022 + }, + { + "epoch": 0.46371672319868434, + "grad_norm": 1.105635643005371, + "learning_rate": 5.826774784476855e-06, + "loss": 0.7478, + "step": 9023 + }, + { + "epoch": 0.463768115942029, + "grad_norm": 1.052188515663147, + "learning_rate": 5.825953975670132e-06, + "loss": 0.7188, + "step": 9024 + }, + { + "epoch": 0.46381950868537364, + "grad_norm": 1.0696007013320923, + "learning_rate": 5.825133143979633e-06, + "loss": 0.736, + "step": 9025 + }, + { + "epoch": 0.4638709014287183, + "grad_norm": 1.1537314653396606, + "learning_rate": 5.824312289428099e-06, + "loss": 0.7869, + "step": 9026 + }, + { + "epoch": 0.46392229417206293, + "grad_norm": 1.0709136724472046, + "learning_rate": 5.823491412038273e-06, + "loss": 0.6675, + "step": 9027 + }, + { + "epoch": 0.4639736869154075, + "grad_norm": 1.0840314626693726, + "learning_rate": 5.8226705118329e-06, + "loss": 0.7709, + "step": 9028 + }, + { + "epoch": 0.46402507965875217, + "grad_norm": 1.1706689596176147, + "learning_rate": 5.82184958883472e-06, + "loss": 0.7383, + "step": 9029 + }, + { + "epoch": 0.4640764724020968, + "grad_norm": 1.029422640800476, + "learning_rate": 5.82102864306648e-06, + "loss": 0.7412, + "step": 9030 + }, + { + "epoch": 0.46412786514544146, + "grad_norm": 1.0164307355880737, + "learning_rate": 5.820207674550924e-06, + "loss": 0.7304, + "step": 9031 + }, + { + "epoch": 0.4641792578887861, + "grad_norm": 1.068285584449768, + "learning_rate": 5.819386683310801e-06, + "loss": 0.7903, + "step": 9032 + }, + { + "epoch": 0.46423065063213076, + "grad_norm": 0.9525502324104309, + "learning_rate": 5.818565669368852e-06, + "loss": 0.7017, + "step": 9033 + }, + { + "epoch": 0.4642820433754754, + "grad_norm": 1.0377914905548096, + "learning_rate": 5.817744632747826e-06, + "loss": 0.7375, + "step": 9034 + }, + { + "epoch": 0.46433343611882, + "grad_norm": 1.0358093976974487, + "learning_rate": 5.816923573470472e-06, + "loss": 0.7407, + "step": 9035 + }, + { + "epoch": 0.46438482886216464, + "grad_norm": 0.7444321513175964, + "learning_rate": 5.816102491559536e-06, + "loss": 0.6619, + "step": 9036 + }, + { + "epoch": 0.4644362216055093, + "grad_norm": 1.0119374990463257, + "learning_rate": 5.815281387037769e-06, + "loss": 0.6936, + "step": 9037 + }, + { + "epoch": 0.46448761434885394, + "grad_norm": 1.0652567148208618, + "learning_rate": 5.814460259927919e-06, + "loss": 0.7545, + "step": 9038 + }, + { + "epoch": 0.4645390070921986, + "grad_norm": 1.0876790285110474, + "learning_rate": 5.8136391102527355e-06, + "loss": 0.7528, + "step": 9039 + }, + { + "epoch": 0.46459039983554323, + "grad_norm": 1.068233847618103, + "learning_rate": 5.81281793803497e-06, + "loss": 0.7577, + "step": 9040 + }, + { + "epoch": 0.4646417925788879, + "grad_norm": 0.8206691741943359, + "learning_rate": 5.811996743297375e-06, + "loss": 0.656, + "step": 9041 + }, + { + "epoch": 0.4646931853222325, + "grad_norm": 0.8031511306762695, + "learning_rate": 5.811175526062699e-06, + "loss": 0.6547, + "step": 9042 + }, + { + "epoch": 0.4647445780655771, + "grad_norm": 1.1287580728530884, + "learning_rate": 5.810354286353699e-06, + "loss": 0.7211, + "step": 9043 + }, + { + "epoch": 0.46479597080892177, + "grad_norm": 1.0637571811676025, + "learning_rate": 5.809533024193124e-06, + "loss": 0.7162, + "step": 9044 + }, + { + "epoch": 0.4648473635522664, + "grad_norm": 1.0583562850952148, + "learning_rate": 5.808711739603731e-06, + "loss": 0.711, + "step": 9045 + }, + { + "epoch": 0.46489875629561106, + "grad_norm": 1.0513478517532349, + "learning_rate": 5.807890432608272e-06, + "loss": 0.7366, + "step": 9046 + }, + { + "epoch": 0.4649501490389557, + "grad_norm": 1.010457992553711, + "learning_rate": 5.807069103229504e-06, + "loss": 0.6823, + "step": 9047 + }, + { + "epoch": 0.46500154178230035, + "grad_norm": 0.7982476353645325, + "learning_rate": 5.806247751490182e-06, + "loss": 0.6446, + "step": 9048 + }, + { + "epoch": 0.465052934525645, + "grad_norm": 1.1567963361740112, + "learning_rate": 5.805426377413061e-06, + "loss": 0.772, + "step": 9049 + }, + { + "epoch": 0.4651043272689896, + "grad_norm": 1.0834085941314697, + "learning_rate": 5.8046049810208985e-06, + "loss": 0.7238, + "step": 9050 + }, + { + "epoch": 0.46515572001233424, + "grad_norm": 1.0637526512145996, + "learning_rate": 5.803783562336452e-06, + "loss": 0.7572, + "step": 9051 + }, + { + "epoch": 0.4652071127556789, + "grad_norm": 1.0176827907562256, + "learning_rate": 5.8029621213824806e-06, + "loss": 0.7217, + "step": 9052 + }, + { + "epoch": 0.46525850549902353, + "grad_norm": 1.1860554218292236, + "learning_rate": 5.802140658181744e-06, + "loss": 0.7254, + "step": 9053 + }, + { + "epoch": 0.4653098982423682, + "grad_norm": 0.7953958511352539, + "learning_rate": 5.801319172757e-06, + "loss": 0.6777, + "step": 9054 + }, + { + "epoch": 0.4653612909857128, + "grad_norm": 1.100940465927124, + "learning_rate": 5.8004976651310064e-06, + "loss": 0.7889, + "step": 9055 + }, + { + "epoch": 0.4654126837290575, + "grad_norm": 1.034502387046814, + "learning_rate": 5.799676135326527e-06, + "loss": 0.7369, + "step": 9056 + }, + { + "epoch": 0.4654640764724021, + "grad_norm": 1.1026217937469482, + "learning_rate": 5.798854583366322e-06, + "loss": 0.8042, + "step": 9057 + }, + { + "epoch": 0.4655154692157467, + "grad_norm": 1.0479249954223633, + "learning_rate": 5.798033009273154e-06, + "loss": 0.7582, + "step": 9058 + }, + { + "epoch": 0.46556686195909136, + "grad_norm": 1.0674561262130737, + "learning_rate": 5.797211413069784e-06, + "loss": 0.7607, + "step": 9059 + }, + { + "epoch": 0.465618254702436, + "grad_norm": 1.0040827989578247, + "learning_rate": 5.796389794778975e-06, + "loss": 0.7594, + "step": 9060 + }, + { + "epoch": 0.46566964744578065, + "grad_norm": 1.0610641241073608, + "learning_rate": 5.7955681544234906e-06, + "loss": 0.7622, + "step": 9061 + }, + { + "epoch": 0.4657210401891253, + "grad_norm": 1.1866360902786255, + "learning_rate": 5.794746492026097e-06, + "loss": 0.7197, + "step": 9062 + }, + { + "epoch": 0.46577243293246995, + "grad_norm": 1.1726354360580444, + "learning_rate": 5.7939248076095575e-06, + "loss": 0.7333, + "step": 9063 + }, + { + "epoch": 0.4658238256758146, + "grad_norm": 1.1168599128723145, + "learning_rate": 5.7931031011966385e-06, + "loss": 0.7548, + "step": 9064 + }, + { + "epoch": 0.4658752184191592, + "grad_norm": 1.0656801462173462, + "learning_rate": 5.792281372810106e-06, + "loss": 0.7487, + "step": 9065 + }, + { + "epoch": 0.46592661116250383, + "grad_norm": 1.05821692943573, + "learning_rate": 5.791459622472725e-06, + "loss": 0.7262, + "step": 9066 + }, + { + "epoch": 0.4659780039058485, + "grad_norm": 1.0438910722732544, + "learning_rate": 5.790637850207265e-06, + "loss": 0.7261, + "step": 9067 + }, + { + "epoch": 0.46602939664919313, + "grad_norm": 1.0939925909042358, + "learning_rate": 5.789816056036492e-06, + "loss": 0.7369, + "step": 9068 + }, + { + "epoch": 0.4660807893925378, + "grad_norm": 1.0153177976608276, + "learning_rate": 5.788994239983176e-06, + "loss": 0.75, + "step": 9069 + }, + { + "epoch": 0.4661321821358824, + "grad_norm": 1.0503993034362793, + "learning_rate": 5.788172402070086e-06, + "loss": 0.7098, + "step": 9070 + }, + { + "epoch": 0.46618357487922707, + "grad_norm": 1.096700668334961, + "learning_rate": 5.78735054231999e-06, + "loss": 0.7187, + "step": 9071 + }, + { + "epoch": 0.4662349676225717, + "grad_norm": 1.0928943157196045, + "learning_rate": 5.786528660755659e-06, + "loss": 0.8071, + "step": 9072 + }, + { + "epoch": 0.4662863603659163, + "grad_norm": 1.1031432151794434, + "learning_rate": 5.7857067573998675e-06, + "loss": 0.7717, + "step": 9073 + }, + { + "epoch": 0.46633775310926096, + "grad_norm": 1.0703964233398438, + "learning_rate": 5.784884832275382e-06, + "loss": 0.7649, + "step": 9074 + }, + { + "epoch": 0.4663891458526056, + "grad_norm": 1.1120059490203857, + "learning_rate": 5.784062885404978e-06, + "loss": 0.7354, + "step": 9075 + }, + { + "epoch": 0.46644053859595025, + "grad_norm": 1.059704303741455, + "learning_rate": 5.783240916811426e-06, + "loss": 0.6959, + "step": 9076 + }, + { + "epoch": 0.4664919313392949, + "grad_norm": 1.087563157081604, + "learning_rate": 5.782418926517501e-06, + "loss": 0.692, + "step": 9077 + }, + { + "epoch": 0.46654332408263954, + "grad_norm": 0.8931577801704407, + "learning_rate": 5.781596914545977e-06, + "loss": 0.6437, + "step": 9078 + }, + { + "epoch": 0.4665947168259842, + "grad_norm": 1.1860404014587402, + "learning_rate": 5.780774880919626e-06, + "loss": 0.7631, + "step": 9079 + }, + { + "epoch": 0.46664610956932884, + "grad_norm": 1.0655113458633423, + "learning_rate": 5.779952825661228e-06, + "loss": 0.6939, + "step": 9080 + }, + { + "epoch": 0.46669750231267343, + "grad_norm": 1.1033114194869995, + "learning_rate": 5.779130748793553e-06, + "loss": 0.787, + "step": 9081 + }, + { + "epoch": 0.4667488950560181, + "grad_norm": 0.7280256748199463, + "learning_rate": 5.778308650339382e-06, + "loss": 0.6181, + "step": 9082 + }, + { + "epoch": 0.4668002877993627, + "grad_norm": 1.063771367073059, + "learning_rate": 5.77748653032149e-06, + "loss": 0.6855, + "step": 9083 + }, + { + "epoch": 0.46685168054270737, + "grad_norm": 1.1214535236358643, + "learning_rate": 5.776664388762654e-06, + "loss": 0.7276, + "step": 9084 + }, + { + "epoch": 0.466903073286052, + "grad_norm": 1.08709716796875, + "learning_rate": 5.775842225685654e-06, + "loss": 0.7217, + "step": 9085 + }, + { + "epoch": 0.46695446602939666, + "grad_norm": 1.0346719026565552, + "learning_rate": 5.775020041113268e-06, + "loss": 0.7315, + "step": 9086 + }, + { + "epoch": 0.4670058587727413, + "grad_norm": 1.0865516662597656, + "learning_rate": 5.774197835068273e-06, + "loss": 0.7528, + "step": 9087 + }, + { + "epoch": 0.4670572515160859, + "grad_norm": 1.0703585147857666, + "learning_rate": 5.773375607573451e-06, + "loss": 0.7563, + "step": 9088 + }, + { + "epoch": 0.46710864425943055, + "grad_norm": 1.161829948425293, + "learning_rate": 5.7725533586515835e-06, + "loss": 0.7658, + "step": 9089 + }, + { + "epoch": 0.4671600370027752, + "grad_norm": 1.0884894132614136, + "learning_rate": 5.7717310883254515e-06, + "loss": 0.7481, + "step": 9090 + }, + { + "epoch": 0.46721142974611984, + "grad_norm": 1.3401157855987549, + "learning_rate": 5.7709087966178345e-06, + "loss": 0.7518, + "step": 9091 + }, + { + "epoch": 0.4672628224894645, + "grad_norm": 1.1092058420181274, + "learning_rate": 5.770086483551516e-06, + "loss": 0.758, + "step": 9092 + }, + { + "epoch": 0.46731421523280914, + "grad_norm": 1.0136786699295044, + "learning_rate": 5.769264149149279e-06, + "loss": 0.6851, + "step": 9093 + }, + { + "epoch": 0.4673656079761538, + "grad_norm": 1.1079891920089722, + "learning_rate": 5.768441793433908e-06, + "loss": 0.7544, + "step": 9094 + }, + { + "epoch": 0.46741700071949843, + "grad_norm": 1.0423799753189087, + "learning_rate": 5.767619416428185e-06, + "loss": 0.7469, + "step": 9095 + }, + { + "epoch": 0.467468393462843, + "grad_norm": 1.047964096069336, + "learning_rate": 5.766797018154896e-06, + "loss": 0.7103, + "step": 9096 + }, + { + "epoch": 0.46751978620618767, + "grad_norm": 1.020954966545105, + "learning_rate": 5.765974598636826e-06, + "loss": 0.7399, + "step": 9097 + }, + { + "epoch": 0.4675711789495323, + "grad_norm": 1.1197277307510376, + "learning_rate": 5.765152157896762e-06, + "loss": 0.7711, + "step": 9098 + }, + { + "epoch": 0.46762257169287696, + "grad_norm": 0.790625274181366, + "learning_rate": 5.764329695957489e-06, + "loss": 0.6927, + "step": 9099 + }, + { + "epoch": 0.4676739644362216, + "grad_norm": 1.081231951713562, + "learning_rate": 5.763507212841793e-06, + "loss": 0.7646, + "step": 9100 + }, + { + "epoch": 0.46772535717956626, + "grad_norm": 1.1325751543045044, + "learning_rate": 5.762684708572464e-06, + "loss": 0.7005, + "step": 9101 + }, + { + "epoch": 0.4677767499229109, + "grad_norm": 1.1213966608047485, + "learning_rate": 5.761862183172288e-06, + "loss": 0.7804, + "step": 9102 + }, + { + "epoch": 0.4678281426662555, + "grad_norm": 1.4390102624893188, + "learning_rate": 5.761039636664055e-06, + "loss": 0.6872, + "step": 9103 + }, + { + "epoch": 0.46787953540960014, + "grad_norm": 1.0561023950576782, + "learning_rate": 5.760217069070555e-06, + "loss": 0.7217, + "step": 9104 + }, + { + "epoch": 0.4679309281529448, + "grad_norm": 1.0902947187423706, + "learning_rate": 5.7593944804145775e-06, + "loss": 0.7761, + "step": 9105 + }, + { + "epoch": 0.46798232089628944, + "grad_norm": 0.7485927939414978, + "learning_rate": 5.758571870718912e-06, + "loss": 0.6497, + "step": 9106 + }, + { + "epoch": 0.4680337136396341, + "grad_norm": 1.1094627380371094, + "learning_rate": 5.757749240006352e-06, + "loss": 0.7384, + "step": 9107 + }, + { + "epoch": 0.46808510638297873, + "grad_norm": 1.058859944343567, + "learning_rate": 5.756926588299686e-06, + "loss": 0.6883, + "step": 9108 + }, + { + "epoch": 0.4681364991263234, + "grad_norm": 1.0982186794281006, + "learning_rate": 5.756103915621709e-06, + "loss": 0.7394, + "step": 9109 + }, + { + "epoch": 0.468187891869668, + "grad_norm": 1.0921043157577515, + "learning_rate": 5.755281221995212e-06, + "loss": 0.7453, + "step": 9110 + }, + { + "epoch": 0.4682392846130126, + "grad_norm": 1.0325762033462524, + "learning_rate": 5.754458507442989e-06, + "loss": 0.7877, + "step": 9111 + }, + { + "epoch": 0.46829067735635727, + "grad_norm": 1.0677783489227295, + "learning_rate": 5.753635771987834e-06, + "loss": 0.7374, + "step": 9112 + }, + { + "epoch": 0.4683420700997019, + "grad_norm": 0.9708812832832336, + "learning_rate": 5.752813015652543e-06, + "loss": 0.7168, + "step": 9113 + }, + { + "epoch": 0.46839346284304656, + "grad_norm": 1.0660696029663086, + "learning_rate": 5.751990238459909e-06, + "loss": 0.7017, + "step": 9114 + }, + { + "epoch": 0.4684448555863912, + "grad_norm": 0.7915019392967224, + "learning_rate": 5.75116744043273e-06, + "loss": 0.6398, + "step": 9115 + }, + { + "epoch": 0.46849624832973585, + "grad_norm": 1.0071666240692139, + "learning_rate": 5.7503446215937995e-06, + "loss": 0.7369, + "step": 9116 + }, + { + "epoch": 0.4685476410730805, + "grad_norm": 1.0285992622375488, + "learning_rate": 5.749521781965917e-06, + "loss": 0.7048, + "step": 9117 + }, + { + "epoch": 0.46859903381642515, + "grad_norm": 1.071171760559082, + "learning_rate": 5.7486989215718806e-06, + "loss": 0.7778, + "step": 9118 + }, + { + "epoch": 0.46865042655976974, + "grad_norm": 1.0935696363449097, + "learning_rate": 5.747876040434484e-06, + "loss": 0.7563, + "step": 9119 + }, + { + "epoch": 0.4687018193031144, + "grad_norm": 0.831864058971405, + "learning_rate": 5.747053138576529e-06, + "loss": 0.6895, + "step": 9120 + }, + { + "epoch": 0.46875321204645903, + "grad_norm": 0.7572145462036133, + "learning_rate": 5.746230216020815e-06, + "loss": 0.7595, + "step": 9121 + }, + { + "epoch": 0.4688046047898037, + "grad_norm": 1.1116102933883667, + "learning_rate": 5.745407272790142e-06, + "loss": 0.7687, + "step": 9122 + }, + { + "epoch": 0.4688559975331483, + "grad_norm": 1.0225255489349365, + "learning_rate": 5.744584308907308e-06, + "loss": 0.7255, + "step": 9123 + }, + { + "epoch": 0.468907390276493, + "grad_norm": 0.6658361554145813, + "learning_rate": 5.743761324395116e-06, + "loss": 0.6835, + "step": 9124 + }, + { + "epoch": 0.4689587830198376, + "grad_norm": 1.028028964996338, + "learning_rate": 5.742938319276368e-06, + "loss": 0.7458, + "step": 9125 + }, + { + "epoch": 0.4690101757631822, + "grad_norm": 1.0913782119750977, + "learning_rate": 5.742115293573866e-06, + "loss": 0.7938, + "step": 9126 + }, + { + "epoch": 0.46906156850652686, + "grad_norm": 1.0903044939041138, + "learning_rate": 5.7412922473104095e-06, + "loss": 0.6991, + "step": 9127 + }, + { + "epoch": 0.4691129612498715, + "grad_norm": 1.05997633934021, + "learning_rate": 5.740469180508804e-06, + "loss": 0.7848, + "step": 9128 + }, + { + "epoch": 0.46916435399321615, + "grad_norm": 1.095229983329773, + "learning_rate": 5.739646093191854e-06, + "loss": 0.6849, + "step": 9129 + }, + { + "epoch": 0.4692157467365608, + "grad_norm": 1.0129516124725342, + "learning_rate": 5.738822985382364e-06, + "loss": 0.7122, + "step": 9130 + }, + { + "epoch": 0.46926713947990545, + "grad_norm": 1.0196632146835327, + "learning_rate": 5.737999857103137e-06, + "loss": 0.7535, + "step": 9131 + }, + { + "epoch": 0.4693185322232501, + "grad_norm": 1.0907652378082275, + "learning_rate": 5.737176708376979e-06, + "loss": 0.7664, + "step": 9132 + }, + { + "epoch": 0.46936992496659474, + "grad_norm": 1.0542631149291992, + "learning_rate": 5.7363535392266965e-06, + "loss": 0.7274, + "step": 9133 + }, + { + "epoch": 0.46942131770993933, + "grad_norm": 1.0437034368515015, + "learning_rate": 5.735530349675098e-06, + "loss": 0.7446, + "step": 9134 + }, + { + "epoch": 0.469472710453284, + "grad_norm": 1.0687042474746704, + "learning_rate": 5.734707139744988e-06, + "loss": 0.709, + "step": 9135 + }, + { + "epoch": 0.46952410319662863, + "grad_norm": 1.1929371356964111, + "learning_rate": 5.733883909459175e-06, + "loss": 0.7948, + "step": 9136 + }, + { + "epoch": 0.4695754959399733, + "grad_norm": 1.0236198902130127, + "learning_rate": 5.7330606588404655e-06, + "loss": 0.7457, + "step": 9137 + }, + { + "epoch": 0.4696268886833179, + "grad_norm": 1.050380825996399, + "learning_rate": 5.7322373879116736e-06, + "loss": 0.7573, + "step": 9138 + }, + { + "epoch": 0.46967828142666257, + "grad_norm": 1.04297935962677, + "learning_rate": 5.731414096695603e-06, + "loss": 0.6499, + "step": 9139 + }, + { + "epoch": 0.4697296741700072, + "grad_norm": 1.0867043733596802, + "learning_rate": 5.7305907852150665e-06, + "loss": 0.7553, + "step": 9140 + }, + { + "epoch": 0.4697810669133518, + "grad_norm": 0.7333614826202393, + "learning_rate": 5.729767453492876e-06, + "loss": 0.6558, + "step": 9141 + }, + { + "epoch": 0.46983245965669646, + "grad_norm": 1.0604865550994873, + "learning_rate": 5.728944101551841e-06, + "loss": 0.7394, + "step": 9142 + }, + { + "epoch": 0.4698838524000411, + "grad_norm": 1.0322959423065186, + "learning_rate": 5.728120729414771e-06, + "loss": 0.7742, + "step": 9143 + }, + { + "epoch": 0.46993524514338575, + "grad_norm": 1.0673558712005615, + "learning_rate": 5.727297337104481e-06, + "loss": 0.7635, + "step": 9144 + }, + { + "epoch": 0.4699866378867304, + "grad_norm": 1.0791901350021362, + "learning_rate": 5.726473924643783e-06, + "loss": 0.7492, + "step": 9145 + }, + { + "epoch": 0.47003803063007504, + "grad_norm": 1.056095004081726, + "learning_rate": 5.725650492055491e-06, + "loss": 0.7347, + "step": 9146 + }, + { + "epoch": 0.4700894233734197, + "grad_norm": 1.1589689254760742, + "learning_rate": 5.7248270393624185e-06, + "loss": 0.7764, + "step": 9147 + }, + { + "epoch": 0.47014081611676434, + "grad_norm": 1.053005576133728, + "learning_rate": 5.724003566587379e-06, + "loss": 0.7065, + "step": 9148 + }, + { + "epoch": 0.47019220886010893, + "grad_norm": 1.0851510763168335, + "learning_rate": 5.723180073753191e-06, + "loss": 0.7094, + "step": 9149 + }, + { + "epoch": 0.4702436016034536, + "grad_norm": 1.0544852018356323, + "learning_rate": 5.7223565608826655e-06, + "loss": 0.7179, + "step": 9150 + }, + { + "epoch": 0.4702949943467982, + "grad_norm": 1.0030632019042969, + "learning_rate": 5.72153302799862e-06, + "loss": 0.727, + "step": 9151 + }, + { + "epoch": 0.47034638709014287, + "grad_norm": 1.0418330430984497, + "learning_rate": 5.720709475123874e-06, + "loss": 0.7793, + "step": 9152 + }, + { + "epoch": 0.4703977798334875, + "grad_norm": 1.0173323154449463, + "learning_rate": 5.719885902281241e-06, + "loss": 0.7236, + "step": 9153 + }, + { + "epoch": 0.47044917257683216, + "grad_norm": 1.0436437129974365, + "learning_rate": 5.71906230949354e-06, + "loss": 0.6893, + "step": 9154 + }, + { + "epoch": 0.4705005653201768, + "grad_norm": 1.035166621208191, + "learning_rate": 5.718238696783592e-06, + "loss": 0.7542, + "step": 9155 + }, + { + "epoch": 0.47055195806352146, + "grad_norm": 1.0700384378433228, + "learning_rate": 5.717415064174212e-06, + "loss": 0.7123, + "step": 9156 + }, + { + "epoch": 0.47060335080686605, + "grad_norm": 1.031958818435669, + "learning_rate": 5.716591411688222e-06, + "loss": 0.7059, + "step": 9157 + }, + { + "epoch": 0.4706547435502107, + "grad_norm": 1.0683881044387817, + "learning_rate": 5.715767739348441e-06, + "loss": 0.7741, + "step": 9158 + }, + { + "epoch": 0.47070613629355534, + "grad_norm": 1.0181117057800293, + "learning_rate": 5.714944047177689e-06, + "loss": 0.7255, + "step": 9159 + }, + { + "epoch": 0.4707575290369, + "grad_norm": 1.0431150197982788, + "learning_rate": 5.714120335198789e-06, + "loss": 0.7045, + "step": 9160 + }, + { + "epoch": 0.47080892178024464, + "grad_norm": 1.02367103099823, + "learning_rate": 5.713296603434561e-06, + "loss": 0.7359, + "step": 9161 + }, + { + "epoch": 0.4708603145235893, + "grad_norm": 1.123472809791565, + "learning_rate": 5.712472851907828e-06, + "loss": 0.8097, + "step": 9162 + }, + { + "epoch": 0.47091170726693393, + "grad_norm": 1.0098596811294556, + "learning_rate": 5.711649080641411e-06, + "loss": 0.7141, + "step": 9163 + }, + { + "epoch": 0.4709631000102785, + "grad_norm": 1.3075313568115234, + "learning_rate": 5.710825289658137e-06, + "loss": 0.7292, + "step": 9164 + }, + { + "epoch": 0.47101449275362317, + "grad_norm": 0.9928123354911804, + "learning_rate": 5.710001478980825e-06, + "loss": 0.7443, + "step": 9165 + }, + { + "epoch": 0.4710658854969678, + "grad_norm": 1.088578701019287, + "learning_rate": 5.709177648632305e-06, + "loss": 0.7477, + "step": 9166 + }, + { + "epoch": 0.47111727824031246, + "grad_norm": 0.9493699073791504, + "learning_rate": 5.708353798635397e-06, + "loss": 0.6709, + "step": 9167 + }, + { + "epoch": 0.4711686709836571, + "grad_norm": 1.180959939956665, + "learning_rate": 5.7075299290129296e-06, + "loss": 0.7968, + "step": 9168 + }, + { + "epoch": 0.47122006372700176, + "grad_norm": 1.1137547492980957, + "learning_rate": 5.706706039787726e-06, + "loss": 0.6983, + "step": 9169 + }, + { + "epoch": 0.4712714564703464, + "grad_norm": 1.0846682786941528, + "learning_rate": 5.705882130982616e-06, + "loss": 0.7159, + "step": 9170 + }, + { + "epoch": 0.47132284921369105, + "grad_norm": 0.9548245072364807, + "learning_rate": 5.705058202620426e-06, + "loss": 0.7302, + "step": 9171 + }, + { + "epoch": 0.47137424195703564, + "grad_norm": 0.7919398546218872, + "learning_rate": 5.7042342547239806e-06, + "loss": 0.6784, + "step": 9172 + }, + { + "epoch": 0.4714256347003803, + "grad_norm": 0.7552742958068848, + "learning_rate": 5.703410287316111e-06, + "loss": 0.6712, + "step": 9173 + }, + { + "epoch": 0.47147702744372494, + "grad_norm": 1.0433566570281982, + "learning_rate": 5.702586300419645e-06, + "loss": 0.716, + "step": 9174 + }, + { + "epoch": 0.4715284201870696, + "grad_norm": 1.120636224746704, + "learning_rate": 5.701762294057411e-06, + "loss": 0.7375, + "step": 9175 + }, + { + "epoch": 0.47157981293041423, + "grad_norm": 1.1099270582199097, + "learning_rate": 5.700938268252243e-06, + "loss": 0.8098, + "step": 9176 + }, + { + "epoch": 0.4716312056737589, + "grad_norm": 0.7387608289718628, + "learning_rate": 5.700114223026965e-06, + "loss": 0.6638, + "step": 9177 + }, + { + "epoch": 0.4716825984171035, + "grad_norm": 1.0869698524475098, + "learning_rate": 5.699290158404412e-06, + "loss": 0.7451, + "step": 9178 + }, + { + "epoch": 0.4717339911604481, + "grad_norm": 0.7070497870445251, + "learning_rate": 5.698466074407416e-06, + "loss": 0.6838, + "step": 9179 + }, + { + "epoch": 0.47178538390379277, + "grad_norm": 1.0209150314331055, + "learning_rate": 5.697641971058806e-06, + "loss": 0.67, + "step": 9180 + }, + { + "epoch": 0.4718367766471374, + "grad_norm": 1.1599454879760742, + "learning_rate": 5.696817848381417e-06, + "loss": 0.6882, + "step": 9181 + }, + { + "epoch": 0.47188816939048206, + "grad_norm": 0.7231242656707764, + "learning_rate": 5.695993706398081e-06, + "loss": 0.675, + "step": 9182 + }, + { + "epoch": 0.4719395621338267, + "grad_norm": 1.169425129890442, + "learning_rate": 5.69516954513163e-06, + "loss": 0.756, + "step": 9183 + }, + { + "epoch": 0.47199095487717135, + "grad_norm": 1.0638024806976318, + "learning_rate": 5.6943453646049e-06, + "loss": 0.7265, + "step": 9184 + }, + { + "epoch": 0.472042347620516, + "grad_norm": 0.7346010208129883, + "learning_rate": 5.693521164840725e-06, + "loss": 0.6943, + "step": 9185 + }, + { + "epoch": 0.47209374036386065, + "grad_norm": 1.1254210472106934, + "learning_rate": 5.6926969458619415e-06, + "loss": 0.7356, + "step": 9186 + }, + { + "epoch": 0.47214513310720524, + "grad_norm": 1.079274296760559, + "learning_rate": 5.691872707691386e-06, + "loss": 0.7296, + "step": 9187 + }, + { + "epoch": 0.4721965258505499, + "grad_norm": 1.1221206188201904, + "learning_rate": 5.69104845035189e-06, + "loss": 0.7215, + "step": 9188 + }, + { + "epoch": 0.47224791859389453, + "grad_norm": 1.0603994131088257, + "learning_rate": 5.690224173866294e-06, + "loss": 0.7675, + "step": 9189 + }, + { + "epoch": 0.4722993113372392, + "grad_norm": 1.0694677829742432, + "learning_rate": 5.689399878257437e-06, + "loss": 0.7758, + "step": 9190 + }, + { + "epoch": 0.4723507040805838, + "grad_norm": 0.7089584469795227, + "learning_rate": 5.688575563548151e-06, + "loss": 0.6788, + "step": 9191 + }, + { + "epoch": 0.4724020968239285, + "grad_norm": 0.7196356654167175, + "learning_rate": 5.68775122976128e-06, + "loss": 0.6584, + "step": 9192 + }, + { + "epoch": 0.4724534895672731, + "grad_norm": 1.0938193798065186, + "learning_rate": 5.686926876919659e-06, + "loss": 0.7301, + "step": 9193 + }, + { + "epoch": 0.4725048823106177, + "grad_norm": 1.1049164533615112, + "learning_rate": 5.686102505046129e-06, + "loss": 0.7275, + "step": 9194 + }, + { + "epoch": 0.47255627505396236, + "grad_norm": 0.9249621629714966, + "learning_rate": 5.68527811416353e-06, + "loss": 0.6617, + "step": 9195 + }, + { + "epoch": 0.472607667797307, + "grad_norm": 1.0655007362365723, + "learning_rate": 5.684453704294703e-06, + "loss": 0.7332, + "step": 9196 + }, + { + "epoch": 0.47265906054065165, + "grad_norm": 1.0716074705123901, + "learning_rate": 5.683629275462489e-06, + "loss": 0.7527, + "step": 9197 + }, + { + "epoch": 0.4727104532839963, + "grad_norm": 0.9782666563987732, + "learning_rate": 5.6828048276897295e-06, + "loss": 0.7065, + "step": 9198 + }, + { + "epoch": 0.47276184602734095, + "grad_norm": 0.6938285827636719, + "learning_rate": 5.681980360999264e-06, + "loss": 0.6788, + "step": 9199 + }, + { + "epoch": 0.4728132387706856, + "grad_norm": 1.1836494207382202, + "learning_rate": 5.681155875413938e-06, + "loss": 0.7248, + "step": 9200 + }, + { + "epoch": 0.47286463151403024, + "grad_norm": 1.1004406213760376, + "learning_rate": 5.680331370956593e-06, + "loss": 0.7521, + "step": 9201 + }, + { + "epoch": 0.47291602425737483, + "grad_norm": 1.032076120376587, + "learning_rate": 5.6795068476500745e-06, + "loss": 0.7492, + "step": 9202 + }, + { + "epoch": 0.4729674170007195, + "grad_norm": 1.1093873977661133, + "learning_rate": 5.6786823055172254e-06, + "loss": 0.7914, + "step": 9203 + }, + { + "epoch": 0.47301880974406413, + "grad_norm": 1.1851825714111328, + "learning_rate": 5.677857744580889e-06, + "loss": 0.8116, + "step": 9204 + }, + { + "epoch": 0.4730702024874088, + "grad_norm": 0.9952714443206787, + "learning_rate": 5.677033164863912e-06, + "loss": 0.7412, + "step": 9205 + }, + { + "epoch": 0.4731215952307534, + "grad_norm": 1.1736841201782227, + "learning_rate": 5.676208566389141e-06, + "loss": 0.7375, + "step": 9206 + }, + { + "epoch": 0.47317298797409807, + "grad_norm": 1.0979254245758057, + "learning_rate": 5.67538394917942e-06, + "loss": 0.7151, + "step": 9207 + }, + { + "epoch": 0.4732243807174427, + "grad_norm": 1.0516613721847534, + "learning_rate": 5.674559313257598e-06, + "loss": 0.737, + "step": 9208 + }, + { + "epoch": 0.47327577346078736, + "grad_norm": 1.0974156856536865, + "learning_rate": 5.673734658646521e-06, + "loss": 0.7941, + "step": 9209 + }, + { + "epoch": 0.47332716620413195, + "grad_norm": 1.0812278985977173, + "learning_rate": 5.6729099853690375e-06, + "loss": 0.7458, + "step": 9210 + }, + { + "epoch": 0.4733785589474766, + "grad_norm": 1.0341105461120605, + "learning_rate": 5.672085293447994e-06, + "loss": 0.7593, + "step": 9211 + }, + { + "epoch": 0.47342995169082125, + "grad_norm": 0.966256856918335, + "learning_rate": 5.671260582906242e-06, + "loss": 0.6602, + "step": 9212 + }, + { + "epoch": 0.4734813444341659, + "grad_norm": 0.6759054064750671, + "learning_rate": 5.670435853766628e-06, + "loss": 0.6802, + "step": 9213 + }, + { + "epoch": 0.47353273717751054, + "grad_norm": 1.0618385076522827, + "learning_rate": 5.669611106052003e-06, + "loss": 0.7326, + "step": 9214 + }, + { + "epoch": 0.4735841299208552, + "grad_norm": 1.0748823881149292, + "learning_rate": 5.668786339785219e-06, + "loss": 0.7816, + "step": 9215 + }, + { + "epoch": 0.47363552266419984, + "grad_norm": 1.0557425022125244, + "learning_rate": 5.667961554989124e-06, + "loss": 0.7246, + "step": 9216 + }, + { + "epoch": 0.47368691540754443, + "grad_norm": 1.0283653736114502, + "learning_rate": 5.6671367516865705e-06, + "loss": 0.7167, + "step": 9217 + }, + { + "epoch": 0.4737383081508891, + "grad_norm": 1.0345820188522339, + "learning_rate": 5.666311929900412e-06, + "loss": 0.7196, + "step": 9218 + }, + { + "epoch": 0.4737897008942337, + "grad_norm": 0.7294495105743408, + "learning_rate": 5.665487089653501e-06, + "loss": 0.6806, + "step": 9219 + }, + { + "epoch": 0.47384109363757837, + "grad_norm": 1.0568950176239014, + "learning_rate": 5.664662230968686e-06, + "loss": 0.7069, + "step": 9220 + }, + { + "epoch": 0.473892486380923, + "grad_norm": 1.0549429655075073, + "learning_rate": 5.663837353868826e-06, + "loss": 0.6943, + "step": 9221 + }, + { + "epoch": 0.47394387912426766, + "grad_norm": 0.826448917388916, + "learning_rate": 5.663012458376771e-06, + "loss": 0.6993, + "step": 9222 + }, + { + "epoch": 0.4739952718676123, + "grad_norm": 0.8376103639602661, + "learning_rate": 5.6621875445153775e-06, + "loss": 0.6577, + "step": 9223 + }, + { + "epoch": 0.47404666461095696, + "grad_norm": 1.0684700012207031, + "learning_rate": 5.661362612307499e-06, + "loss": 0.7469, + "step": 9224 + }, + { + "epoch": 0.47409805735430155, + "grad_norm": 1.058569073677063, + "learning_rate": 5.660537661775992e-06, + "loss": 0.7842, + "step": 9225 + }, + { + "epoch": 0.4741494500976462, + "grad_norm": 1.0559556484222412, + "learning_rate": 5.659712692943712e-06, + "loss": 0.7321, + "step": 9226 + }, + { + "epoch": 0.47420084284099084, + "grad_norm": 1.1208834648132324, + "learning_rate": 5.658887705833517e-06, + "loss": 0.7765, + "step": 9227 + }, + { + "epoch": 0.4742522355843355, + "grad_norm": 1.0904886722564697, + "learning_rate": 5.658062700468261e-06, + "loss": 0.7011, + "step": 9228 + }, + { + "epoch": 0.47430362832768014, + "grad_norm": 1.0916576385498047, + "learning_rate": 5.657237676870805e-06, + "loss": 0.7474, + "step": 9229 + }, + { + "epoch": 0.4743550210710248, + "grad_norm": 1.088936686515808, + "learning_rate": 5.6564126350640035e-06, + "loss": 0.7478, + "step": 9230 + }, + { + "epoch": 0.47440641381436943, + "grad_norm": 1.0974252223968506, + "learning_rate": 5.655587575070719e-06, + "loss": 0.7374, + "step": 9231 + }, + { + "epoch": 0.474457806557714, + "grad_norm": 1.0085128545761108, + "learning_rate": 5.654762496913807e-06, + "loss": 0.6862, + "step": 9232 + }, + { + "epoch": 0.47450919930105867, + "grad_norm": 1.082457184791565, + "learning_rate": 5.653937400616129e-06, + "loss": 0.7711, + "step": 9233 + }, + { + "epoch": 0.4745605920444033, + "grad_norm": 1.0591022968292236, + "learning_rate": 5.653112286200543e-06, + "loss": 0.7291, + "step": 9234 + }, + { + "epoch": 0.47461198478774796, + "grad_norm": 1.1501758098602295, + "learning_rate": 5.65228715368991e-06, + "loss": 0.757, + "step": 9235 + }, + { + "epoch": 0.4746633775310926, + "grad_norm": 1.012110948562622, + "learning_rate": 5.651462003107093e-06, + "loss": 0.7252, + "step": 9236 + }, + { + "epoch": 0.47471477027443726, + "grad_norm": 1.1001081466674805, + "learning_rate": 5.650636834474953e-06, + "loss": 0.7199, + "step": 9237 + }, + { + "epoch": 0.4747661630177819, + "grad_norm": 1.1054781675338745, + "learning_rate": 5.64981164781635e-06, + "loss": 0.7482, + "step": 9238 + }, + { + "epoch": 0.47481755576112655, + "grad_norm": 1.0530426502227783, + "learning_rate": 5.6489864431541495e-06, + "loss": 0.7367, + "step": 9239 + }, + { + "epoch": 0.47486894850447114, + "grad_norm": 1.0405614376068115, + "learning_rate": 5.648161220511212e-06, + "loss": 0.7247, + "step": 9240 + }, + { + "epoch": 0.4749203412478158, + "grad_norm": 1.0769857168197632, + "learning_rate": 5.647335979910402e-06, + "loss": 0.7116, + "step": 9241 + }, + { + "epoch": 0.47497173399116044, + "grad_norm": 1.116463541984558, + "learning_rate": 5.646510721374584e-06, + "loss": 0.7222, + "step": 9242 + }, + { + "epoch": 0.4750231267345051, + "grad_norm": 1.0808955430984497, + "learning_rate": 5.645685444926622e-06, + "loss": 0.7276, + "step": 9243 + }, + { + "epoch": 0.47507451947784973, + "grad_norm": 1.027607798576355, + "learning_rate": 5.644860150589379e-06, + "loss": 0.7095, + "step": 9244 + }, + { + "epoch": 0.4751259122211944, + "grad_norm": 1.1123250722885132, + "learning_rate": 5.644034838385724e-06, + "loss": 0.7638, + "step": 9245 + }, + { + "epoch": 0.475177304964539, + "grad_norm": 0.7411683797836304, + "learning_rate": 5.643209508338521e-06, + "loss": 0.6575, + "step": 9246 + }, + { + "epoch": 0.4752286977078837, + "grad_norm": 1.021462082862854, + "learning_rate": 5.642384160470635e-06, + "loss": 0.7061, + "step": 9247 + }, + { + "epoch": 0.47528009045122827, + "grad_norm": 0.853138267993927, + "learning_rate": 5.641558794804938e-06, + "loss": 0.6699, + "step": 9248 + }, + { + "epoch": 0.4753314831945729, + "grad_norm": 1.188086986541748, + "learning_rate": 5.640733411364292e-06, + "loss": 0.7402, + "step": 9249 + }, + { + "epoch": 0.47538287593791756, + "grad_norm": 1.187341570854187, + "learning_rate": 5.639908010171568e-06, + "loss": 0.7388, + "step": 9250 + }, + { + "epoch": 0.4754342686812622, + "grad_norm": 1.1461656093597412, + "learning_rate": 5.639082591249635e-06, + "loss": 0.8267, + "step": 9251 + }, + { + "epoch": 0.47548566142460685, + "grad_norm": 1.0076953172683716, + "learning_rate": 5.63825715462136e-06, + "loss": 0.7617, + "step": 9252 + }, + { + "epoch": 0.4755370541679515, + "grad_norm": 1.0705081224441528, + "learning_rate": 5.637431700309613e-06, + "loss": 0.7074, + "step": 9253 + }, + { + "epoch": 0.47558844691129615, + "grad_norm": 0.9094269871711731, + "learning_rate": 5.636606228337264e-06, + "loss": 0.6961, + "step": 9254 + }, + { + "epoch": 0.47563983965464074, + "grad_norm": 1.1163562536239624, + "learning_rate": 5.635780738727183e-06, + "loss": 0.7432, + "step": 9255 + }, + { + "epoch": 0.4756912323979854, + "grad_norm": 1.0883206129074097, + "learning_rate": 5.6349552315022424e-06, + "loss": 0.7015, + "step": 9256 + }, + { + "epoch": 0.47574262514133003, + "grad_norm": 1.1945425271987915, + "learning_rate": 5.6341297066853114e-06, + "loss": 0.799, + "step": 9257 + }, + { + "epoch": 0.4757940178846747, + "grad_norm": 0.6965017318725586, + "learning_rate": 5.6333041642992646e-06, + "loss": 0.6654, + "step": 9258 + }, + { + "epoch": 0.4758454106280193, + "grad_norm": 1.1033636331558228, + "learning_rate": 5.632478604366973e-06, + "loss": 0.7533, + "step": 9259 + }, + { + "epoch": 0.475896803371364, + "grad_norm": 0.885718822479248, + "learning_rate": 5.631653026911308e-06, + "loss": 0.7126, + "step": 9260 + }, + { + "epoch": 0.4759481961147086, + "grad_norm": 1.0306239128112793, + "learning_rate": 5.630827431955146e-06, + "loss": 0.7212, + "step": 9261 + }, + { + "epoch": 0.47599958885805327, + "grad_norm": 1.0844796895980835, + "learning_rate": 5.630001819521358e-06, + "loss": 0.7433, + "step": 9262 + }, + { + "epoch": 0.47605098160139786, + "grad_norm": 1.0109909772872925, + "learning_rate": 5.62917618963282e-06, + "loss": 0.7381, + "step": 9263 + }, + { + "epoch": 0.4761023743447425, + "grad_norm": 1.066633939743042, + "learning_rate": 5.628350542312407e-06, + "loss": 0.7288, + "step": 9264 + }, + { + "epoch": 0.47615376708808715, + "grad_norm": 1.0770617723464966, + "learning_rate": 5.62752487758299e-06, + "loss": 0.6651, + "step": 9265 + }, + { + "epoch": 0.4762051598314318, + "grad_norm": 1.0272241830825806, + "learning_rate": 5.6266991954674525e-06, + "loss": 0.702, + "step": 9266 + }, + { + "epoch": 0.47625655257477645, + "grad_norm": 1.0983392000198364, + "learning_rate": 5.625873495988663e-06, + "loss": 0.7202, + "step": 9267 + }, + { + "epoch": 0.4763079453181211, + "grad_norm": 1.1578168869018555, + "learning_rate": 5.625047779169503e-06, + "loss": 0.7339, + "step": 9268 + }, + { + "epoch": 0.47635933806146574, + "grad_norm": 0.8839200735092163, + "learning_rate": 5.62422204503285e-06, + "loss": 0.6894, + "step": 9269 + }, + { + "epoch": 0.47641073080481033, + "grad_norm": 1.0937029123306274, + "learning_rate": 5.623396293601578e-06, + "loss": 0.6823, + "step": 9270 + }, + { + "epoch": 0.476462123548155, + "grad_norm": 1.1051486730575562, + "learning_rate": 5.622570524898569e-06, + "loss": 0.7042, + "step": 9271 + }, + { + "epoch": 0.47651351629149963, + "grad_norm": 1.0830765962600708, + "learning_rate": 5.621744738946699e-06, + "loss": 0.7696, + "step": 9272 + }, + { + "epoch": 0.4765649090348443, + "grad_norm": 1.2515655755996704, + "learning_rate": 5.620918935768849e-06, + "loss": 0.7592, + "step": 9273 + }, + { + "epoch": 0.4766163017781889, + "grad_norm": 1.042804479598999, + "learning_rate": 5.6200931153878965e-06, + "loss": 0.7512, + "step": 9274 + }, + { + "epoch": 0.47666769452153357, + "grad_norm": 1.1719958782196045, + "learning_rate": 5.619267277826724e-06, + "loss": 0.7555, + "step": 9275 + }, + { + "epoch": 0.4767190872648782, + "grad_norm": 1.0380206108093262, + "learning_rate": 5.61844142310821e-06, + "loss": 0.7093, + "step": 9276 + }, + { + "epoch": 0.47677048000822286, + "grad_norm": 1.0361785888671875, + "learning_rate": 5.617615551255235e-06, + "loss": 0.7698, + "step": 9277 + }, + { + "epoch": 0.47682187275156745, + "grad_norm": 1.1024243831634521, + "learning_rate": 5.616789662290684e-06, + "loss": 0.7959, + "step": 9278 + }, + { + "epoch": 0.4768732654949121, + "grad_norm": 1.0970488786697388, + "learning_rate": 5.6159637562374355e-06, + "loss": 0.7563, + "step": 9279 + }, + { + "epoch": 0.47692465823825675, + "grad_norm": 0.6821733713150024, + "learning_rate": 5.615137833118375e-06, + "loss": 0.6571, + "step": 9280 + }, + { + "epoch": 0.4769760509816014, + "grad_norm": 1.1118887662887573, + "learning_rate": 5.6143118929563825e-06, + "loss": 0.778, + "step": 9281 + }, + { + "epoch": 0.47702744372494604, + "grad_norm": 1.0481951236724854, + "learning_rate": 5.613485935774342e-06, + "loss": 0.7284, + "step": 9282 + }, + { + "epoch": 0.4770788364682907, + "grad_norm": 1.0448375940322876, + "learning_rate": 5.612659961595139e-06, + "loss": 0.7286, + "step": 9283 + }, + { + "epoch": 0.47713022921163534, + "grad_norm": 1.0651406049728394, + "learning_rate": 5.611833970441656e-06, + "loss": 0.7377, + "step": 9284 + }, + { + "epoch": 0.47718162195498, + "grad_norm": 1.0012246370315552, + "learning_rate": 5.6110079623367795e-06, + "loss": 0.7084, + "step": 9285 + }, + { + "epoch": 0.4772330146983246, + "grad_norm": 1.0567457675933838, + "learning_rate": 5.610181937303392e-06, + "loss": 0.7865, + "step": 9286 + }, + { + "epoch": 0.4772844074416692, + "grad_norm": 1.0820527076721191, + "learning_rate": 5.609355895364382e-06, + "loss": 0.7158, + "step": 9287 + }, + { + "epoch": 0.47733580018501387, + "grad_norm": 1.0715365409851074, + "learning_rate": 5.6085298365426355e-06, + "loss": 0.7309, + "step": 9288 + }, + { + "epoch": 0.4773871929283585, + "grad_norm": 1.015673041343689, + "learning_rate": 5.6077037608610376e-06, + "loss": 0.6902, + "step": 9289 + }, + { + "epoch": 0.47743858567170316, + "grad_norm": 0.9144046902656555, + "learning_rate": 5.606877668342476e-06, + "loss": 0.6646, + "step": 9290 + }, + { + "epoch": 0.4774899784150478, + "grad_norm": 0.9954757690429688, + "learning_rate": 5.6060515590098405e-06, + "loss": 0.7603, + "step": 9291 + }, + { + "epoch": 0.47754137115839246, + "grad_norm": 1.0255017280578613, + "learning_rate": 5.605225432886016e-06, + "loss": 0.6763, + "step": 9292 + }, + { + "epoch": 0.47759276390173705, + "grad_norm": 1.0901261568069458, + "learning_rate": 5.604399289993893e-06, + "loss": 0.7378, + "step": 9293 + }, + { + "epoch": 0.4776441566450817, + "grad_norm": 1.0971847772598267, + "learning_rate": 5.603573130356359e-06, + "loss": 0.7691, + "step": 9294 + }, + { + "epoch": 0.47769554938842634, + "grad_norm": 1.1193262338638306, + "learning_rate": 5.602746953996306e-06, + "loss": 0.7049, + "step": 9295 + }, + { + "epoch": 0.477746942131771, + "grad_norm": 1.061022400856018, + "learning_rate": 5.601920760936621e-06, + "loss": 0.7624, + "step": 9296 + }, + { + "epoch": 0.47779833487511564, + "grad_norm": 1.0158711671829224, + "learning_rate": 5.601094551200194e-06, + "loss": 0.7258, + "step": 9297 + }, + { + "epoch": 0.4778497276184603, + "grad_norm": 0.7592194676399231, + "learning_rate": 5.60026832480992e-06, + "loss": 0.633, + "step": 9298 + }, + { + "epoch": 0.47790112036180493, + "grad_norm": 0.6777262091636658, + "learning_rate": 5.599442081788687e-06, + "loss": 0.7244, + "step": 9299 + }, + { + "epoch": 0.4779525131051496, + "grad_norm": 1.0996352434158325, + "learning_rate": 5.598615822159388e-06, + "loss": 0.7241, + "step": 9300 + }, + { + "epoch": 0.47800390584849417, + "grad_norm": 0.7368532419204712, + "learning_rate": 5.597789545944915e-06, + "loss": 0.7033, + "step": 9301 + }, + { + "epoch": 0.4780552985918388, + "grad_norm": 1.0993101596832275, + "learning_rate": 5.596963253168159e-06, + "loss": 0.7412, + "step": 9302 + }, + { + "epoch": 0.47810669133518346, + "grad_norm": 1.0420209169387817, + "learning_rate": 5.596136943852017e-06, + "loss": 0.7484, + "step": 9303 + }, + { + "epoch": 0.4781580840785281, + "grad_norm": 1.0188112258911133, + "learning_rate": 5.595310618019381e-06, + "loss": 0.7294, + "step": 9304 + }, + { + "epoch": 0.47820947682187276, + "grad_norm": 1.0193397998809814, + "learning_rate": 5.5944842756931425e-06, + "loss": 0.7733, + "step": 9305 + }, + { + "epoch": 0.4782608695652174, + "grad_norm": 1.035310983657837, + "learning_rate": 5.593657916896199e-06, + "loss": 0.7174, + "step": 9306 + }, + { + "epoch": 0.47831226230856205, + "grad_norm": 1.0632489919662476, + "learning_rate": 5.592831541651444e-06, + "loss": 0.7475, + "step": 9307 + }, + { + "epoch": 0.47836365505190664, + "grad_norm": 1.091317892074585, + "learning_rate": 5.592005149981773e-06, + "loss": 0.7007, + "step": 9308 + }, + { + "epoch": 0.4784150477952513, + "grad_norm": 1.1301343441009521, + "learning_rate": 5.5911787419100825e-06, + "loss": 0.7182, + "step": 9309 + }, + { + "epoch": 0.47846644053859594, + "grad_norm": 0.7572804689407349, + "learning_rate": 5.5903523174592685e-06, + "loss": 0.6974, + "step": 9310 + }, + { + "epoch": 0.4785178332819406, + "grad_norm": 1.152143955230713, + "learning_rate": 5.58952587665223e-06, + "loss": 0.7022, + "step": 9311 + }, + { + "epoch": 0.47856922602528523, + "grad_norm": 1.0656213760375977, + "learning_rate": 5.58869941951186e-06, + "loss": 0.6877, + "step": 9312 + }, + { + "epoch": 0.4786206187686299, + "grad_norm": 1.2253516912460327, + "learning_rate": 5.5878729460610594e-06, + "loss": 0.8016, + "step": 9313 + }, + { + "epoch": 0.4786720115119745, + "grad_norm": 1.129782795906067, + "learning_rate": 5.587046456322726e-06, + "loss": 0.776, + "step": 9314 + }, + { + "epoch": 0.4787234042553192, + "grad_norm": 1.1205326318740845, + "learning_rate": 5.586219950319758e-06, + "loss": 0.7311, + "step": 9315 + }, + { + "epoch": 0.47877479699866377, + "grad_norm": 1.1362173557281494, + "learning_rate": 5.585393428075053e-06, + "loss": 0.7616, + "step": 9316 + }, + { + "epoch": 0.4788261897420084, + "grad_norm": 1.0390986204147339, + "learning_rate": 5.584566889611513e-06, + "loss": 0.7227, + "step": 9317 + }, + { + "epoch": 0.47887758248535306, + "grad_norm": 1.108679175376892, + "learning_rate": 5.583740334952037e-06, + "loss": 0.7535, + "step": 9318 + }, + { + "epoch": 0.4789289752286977, + "grad_norm": 1.0474495887756348, + "learning_rate": 5.582913764119524e-06, + "loss": 0.7272, + "step": 9319 + }, + { + "epoch": 0.47898036797204235, + "grad_norm": 1.0956218242645264, + "learning_rate": 5.582087177136877e-06, + "loss": 0.7986, + "step": 9320 + }, + { + "epoch": 0.479031760715387, + "grad_norm": 0.7247957587242126, + "learning_rate": 5.5812605740269955e-06, + "loss": 0.6533, + "step": 9321 + }, + { + "epoch": 0.47908315345873165, + "grad_norm": 0.6783864498138428, + "learning_rate": 5.580433954812784e-06, + "loss": 0.6294, + "step": 9322 + }, + { + "epoch": 0.47913454620207624, + "grad_norm": 1.0336880683898926, + "learning_rate": 5.5796073195171414e-06, + "loss": 0.6682, + "step": 9323 + }, + { + "epoch": 0.4791859389454209, + "grad_norm": 1.0942810773849487, + "learning_rate": 5.578780668162973e-06, + "loss": 0.7341, + "step": 9324 + }, + { + "epoch": 0.47923733168876553, + "grad_norm": 1.0713120698928833, + "learning_rate": 5.57795400077318e-06, + "loss": 0.8021, + "step": 9325 + }, + { + "epoch": 0.4792887244321102, + "grad_norm": 1.0736093521118164, + "learning_rate": 5.577127317370668e-06, + "loss": 0.7408, + "step": 9326 + }, + { + "epoch": 0.4793401171754548, + "grad_norm": 1.1135426759719849, + "learning_rate": 5.576300617978339e-06, + "loss": 0.7053, + "step": 9327 + }, + { + "epoch": 0.4793915099187995, + "grad_norm": 0.7100291848182678, + "learning_rate": 5.575473902619096e-06, + "loss": 0.6956, + "step": 9328 + }, + { + "epoch": 0.4794429026621441, + "grad_norm": 1.0849339962005615, + "learning_rate": 5.574647171315848e-06, + "loss": 0.7425, + "step": 9329 + }, + { + "epoch": 0.47949429540548877, + "grad_norm": 1.0699530839920044, + "learning_rate": 5.5738204240914985e-06, + "loss": 0.7548, + "step": 9330 + }, + { + "epoch": 0.47954568814883336, + "grad_norm": 0.9828718304634094, + "learning_rate": 5.572993660968952e-06, + "loss": 0.7147, + "step": 9331 + }, + { + "epoch": 0.479597080892178, + "grad_norm": 1.0109267234802246, + "learning_rate": 5.572166881971118e-06, + "loss": 0.7218, + "step": 9332 + }, + { + "epoch": 0.47964847363552265, + "grad_norm": 1.0340708494186401, + "learning_rate": 5.571340087120899e-06, + "loss": 0.7172, + "step": 9333 + }, + { + "epoch": 0.4796998663788673, + "grad_norm": 1.0614937543869019, + "learning_rate": 5.570513276441203e-06, + "loss": 0.7586, + "step": 9334 + }, + { + "epoch": 0.47975125912221195, + "grad_norm": 1.0970309972763062, + "learning_rate": 5.56968644995494e-06, + "loss": 0.7797, + "step": 9335 + }, + { + "epoch": 0.4798026518655566, + "grad_norm": 0.7149417400360107, + "learning_rate": 5.5688596076850174e-06, + "loss": 0.6384, + "step": 9336 + }, + { + "epoch": 0.47985404460890124, + "grad_norm": 1.0598479509353638, + "learning_rate": 5.5680327496543406e-06, + "loss": 0.7106, + "step": 9337 + }, + { + "epoch": 0.4799054373522459, + "grad_norm": 0.784929633140564, + "learning_rate": 5.56720587588582e-06, + "loss": 0.6508, + "step": 9338 + }, + { + "epoch": 0.4799568300955905, + "grad_norm": 1.0527395009994507, + "learning_rate": 5.566378986402367e-06, + "loss": 0.6873, + "step": 9339 + }, + { + "epoch": 0.48000822283893513, + "grad_norm": 1.0780612230300903, + "learning_rate": 5.5655520812268895e-06, + "loss": 0.729, + "step": 9340 + }, + { + "epoch": 0.4800596155822798, + "grad_norm": 1.0857625007629395, + "learning_rate": 5.564725160382298e-06, + "loss": 0.7682, + "step": 9341 + }, + { + "epoch": 0.4801110083256244, + "grad_norm": 1.0418905019760132, + "learning_rate": 5.5638982238915016e-06, + "loss": 0.7297, + "step": 9342 + }, + { + "epoch": 0.48016240106896907, + "grad_norm": 1.1343281269073486, + "learning_rate": 5.563071271777413e-06, + "loss": 0.8151, + "step": 9343 + }, + { + "epoch": 0.4802137938123137, + "grad_norm": 1.0613363981246948, + "learning_rate": 5.5622443040629435e-06, + "loss": 0.7074, + "step": 9344 + }, + { + "epoch": 0.48026518655565836, + "grad_norm": 1.0590349435806274, + "learning_rate": 5.561417320771004e-06, + "loss": 0.7683, + "step": 9345 + }, + { + "epoch": 0.48031657929900295, + "grad_norm": 1.0071563720703125, + "learning_rate": 5.560590321924508e-06, + "loss": 0.6351, + "step": 9346 + }, + { + "epoch": 0.4803679720423476, + "grad_norm": 1.01652193069458, + "learning_rate": 5.559763307546366e-06, + "loss": 0.7025, + "step": 9347 + }, + { + "epoch": 0.48041936478569225, + "grad_norm": 1.1316579580307007, + "learning_rate": 5.558936277659496e-06, + "loss": 0.7676, + "step": 9348 + }, + { + "epoch": 0.4804707575290369, + "grad_norm": 1.1674885749816895, + "learning_rate": 5.558109232286804e-06, + "loss": 0.7172, + "step": 9349 + }, + { + "epoch": 0.48052215027238154, + "grad_norm": 1.11307954788208, + "learning_rate": 5.557282171451212e-06, + "loss": 0.7684, + "step": 9350 + }, + { + "epoch": 0.4805735430157262, + "grad_norm": 1.1166071891784668, + "learning_rate": 5.556455095175629e-06, + "loss": 0.6948, + "step": 9351 + }, + { + "epoch": 0.48062493575907084, + "grad_norm": 1.1304198503494263, + "learning_rate": 5.555628003482972e-06, + "loss": 0.7251, + "step": 9352 + }, + { + "epoch": 0.4806763285024155, + "grad_norm": 1.0502710342407227, + "learning_rate": 5.554800896396156e-06, + "loss": 0.7362, + "step": 9353 + }, + { + "epoch": 0.4807277212457601, + "grad_norm": 1.1019030809402466, + "learning_rate": 5.553973773938098e-06, + "loss": 0.7354, + "step": 9354 + }, + { + "epoch": 0.4807791139891047, + "grad_norm": 1.091068148612976, + "learning_rate": 5.553146636131711e-06, + "loss": 0.7869, + "step": 9355 + }, + { + "epoch": 0.48083050673244937, + "grad_norm": 0.897142767906189, + "learning_rate": 5.552319482999914e-06, + "loss": 0.6793, + "step": 9356 + }, + { + "epoch": 0.480881899475794, + "grad_norm": 1.1070574522018433, + "learning_rate": 5.551492314565624e-06, + "loss": 0.7477, + "step": 9357 + }, + { + "epoch": 0.48093329221913866, + "grad_norm": 1.0726337432861328, + "learning_rate": 5.550665130851756e-06, + "loss": 0.7389, + "step": 9358 + }, + { + "epoch": 0.4809846849624833, + "grad_norm": 1.0646748542785645, + "learning_rate": 5.549837931881231e-06, + "loss": 0.6984, + "step": 9359 + }, + { + "epoch": 0.48103607770582796, + "grad_norm": 0.8094071745872498, + "learning_rate": 5.549010717676967e-06, + "loss": 0.6983, + "step": 9360 + }, + { + "epoch": 0.48108747044917255, + "grad_norm": 1.0667892694473267, + "learning_rate": 5.5481834882618805e-06, + "loss": 0.729, + "step": 9361 + }, + { + "epoch": 0.4811388631925172, + "grad_norm": 0.7472618222236633, + "learning_rate": 5.547356243658891e-06, + "loss": 0.6709, + "step": 9362 + }, + { + "epoch": 0.48119025593586184, + "grad_norm": 0.8461788892745972, + "learning_rate": 5.546528983890919e-06, + "loss": 0.663, + "step": 9363 + }, + { + "epoch": 0.4812416486792065, + "grad_norm": 0.9896567463874817, + "learning_rate": 5.545701708980886e-06, + "loss": 0.7224, + "step": 9364 + }, + { + "epoch": 0.48129304142255114, + "grad_norm": 1.1093904972076416, + "learning_rate": 5.54487441895171e-06, + "loss": 0.8096, + "step": 9365 + }, + { + "epoch": 0.4813444341658958, + "grad_norm": 1.1064847707748413, + "learning_rate": 5.544047113826311e-06, + "loss": 0.7578, + "step": 9366 + }, + { + "epoch": 0.48139582690924043, + "grad_norm": 1.9331846237182617, + "learning_rate": 5.543219793627613e-06, + "loss": 0.66, + "step": 9367 + }, + { + "epoch": 0.4814472196525851, + "grad_norm": 1.0688841342926025, + "learning_rate": 5.542392458378535e-06, + "loss": 0.7226, + "step": 9368 + }, + { + "epoch": 0.48149861239592967, + "grad_norm": 1.0167242288589478, + "learning_rate": 5.541565108102002e-06, + "loss": 0.748, + "step": 9369 + }, + { + "epoch": 0.4815500051392743, + "grad_norm": 1.074195146560669, + "learning_rate": 5.540737742820933e-06, + "loss": 0.7771, + "step": 9370 + }, + { + "epoch": 0.48160139788261896, + "grad_norm": 0.8099533915519714, + "learning_rate": 5.5399103625582525e-06, + "loss": 0.7045, + "step": 9371 + }, + { + "epoch": 0.4816527906259636, + "grad_norm": 0.8186115026473999, + "learning_rate": 5.539082967336886e-06, + "loss": 0.7044, + "step": 9372 + }, + { + "epoch": 0.48170418336930826, + "grad_norm": 1.0901892185211182, + "learning_rate": 5.538255557179754e-06, + "loss": 0.7448, + "step": 9373 + }, + { + "epoch": 0.4817555761126529, + "grad_norm": 1.1268736124038696, + "learning_rate": 5.537428132109782e-06, + "loss": 0.7421, + "step": 9374 + }, + { + "epoch": 0.48180696885599755, + "grad_norm": 0.9127083420753479, + "learning_rate": 5.536600692149895e-06, + "loss": 0.6473, + "step": 9375 + }, + { + "epoch": 0.4818583615993422, + "grad_norm": 1.014736294746399, + "learning_rate": 5.535773237323017e-06, + "loss": 0.705, + "step": 9376 + }, + { + "epoch": 0.4819097543426868, + "grad_norm": 1.0789954662322998, + "learning_rate": 5.534945767652071e-06, + "loss": 0.6701, + "step": 9377 + }, + { + "epoch": 0.48196114708603144, + "grad_norm": 1.0487639904022217, + "learning_rate": 5.534118283159988e-06, + "loss": 0.7782, + "step": 9378 + }, + { + "epoch": 0.4820125398293761, + "grad_norm": 1.021418571472168, + "learning_rate": 5.533290783869691e-06, + "loss": 0.7375, + "step": 9379 + }, + { + "epoch": 0.48206393257272073, + "grad_norm": 1.0228590965270996, + "learning_rate": 5.532463269804106e-06, + "loss": 0.7183, + "step": 9380 + }, + { + "epoch": 0.4821153253160654, + "grad_norm": 1.07512629032135, + "learning_rate": 5.531635740986163e-06, + "loss": 0.7936, + "step": 9381 + }, + { + "epoch": 0.48216671805941, + "grad_norm": 1.0449707508087158, + "learning_rate": 5.530808197438786e-06, + "loss": 0.798, + "step": 9382 + }, + { + "epoch": 0.4822181108027547, + "grad_norm": 1.104135513305664, + "learning_rate": 5.529980639184906e-06, + "loss": 0.7702, + "step": 9383 + }, + { + "epoch": 0.48226950354609927, + "grad_norm": 1.115519404411316, + "learning_rate": 5.529153066247449e-06, + "loss": 0.746, + "step": 9384 + }, + { + "epoch": 0.4823208962894439, + "grad_norm": 1.1583386659622192, + "learning_rate": 5.5283254786493435e-06, + "loss": 0.7519, + "step": 9385 + }, + { + "epoch": 0.48237228903278856, + "grad_norm": 1.0242222547531128, + "learning_rate": 5.52749787641352e-06, + "loss": 0.7469, + "step": 9386 + }, + { + "epoch": 0.4824236817761332, + "grad_norm": 1.0207234621047974, + "learning_rate": 5.5266702595629065e-06, + "loss": 0.7174, + "step": 9387 + }, + { + "epoch": 0.48247507451947785, + "grad_norm": 1.2652196884155273, + "learning_rate": 5.525842628120433e-06, + "loss": 0.7777, + "step": 9388 + }, + { + "epoch": 0.4825264672628225, + "grad_norm": 1.0312669277191162, + "learning_rate": 5.525014982109032e-06, + "loss": 0.7458, + "step": 9389 + }, + { + "epoch": 0.48257786000616715, + "grad_norm": 1.0260474681854248, + "learning_rate": 5.5241873215516315e-06, + "loss": 0.7336, + "step": 9390 + }, + { + "epoch": 0.4826292527495118, + "grad_norm": 1.0687013864517212, + "learning_rate": 5.523359646471164e-06, + "loss": 0.7487, + "step": 9391 + }, + { + "epoch": 0.4826806454928564, + "grad_norm": 1.1108461618423462, + "learning_rate": 5.52253195689056e-06, + "loss": 0.735, + "step": 9392 + }, + { + "epoch": 0.48273203823620103, + "grad_norm": 1.0323104858398438, + "learning_rate": 5.521704252832752e-06, + "loss": 0.6977, + "step": 9393 + }, + { + "epoch": 0.4827834309795457, + "grad_norm": 1.0239531993865967, + "learning_rate": 5.520876534320672e-06, + "loss": 0.6796, + "step": 9394 + }, + { + "epoch": 0.4828348237228903, + "grad_norm": 1.1626373529434204, + "learning_rate": 5.520048801377252e-06, + "loss": 0.7381, + "step": 9395 + }, + { + "epoch": 0.482886216466235, + "grad_norm": 1.0431400537490845, + "learning_rate": 5.519221054025428e-06, + "loss": 0.7599, + "step": 9396 + }, + { + "epoch": 0.4829376092095796, + "grad_norm": 0.7426326274871826, + "learning_rate": 5.51839329228813e-06, + "loss": 0.6716, + "step": 9397 + }, + { + "epoch": 0.48298900195292427, + "grad_norm": 1.0871039628982544, + "learning_rate": 5.517565516188292e-06, + "loss": 0.7422, + "step": 9398 + }, + { + "epoch": 0.48304039469626886, + "grad_norm": 1.1728631258010864, + "learning_rate": 5.51673772574885e-06, + "loss": 0.7809, + "step": 9399 + }, + { + "epoch": 0.4830917874396135, + "grad_norm": 1.1152043342590332, + "learning_rate": 5.515909920992739e-06, + "loss": 0.733, + "step": 9400 + }, + { + "epoch": 0.48314318018295815, + "grad_norm": 1.0957964658737183, + "learning_rate": 5.515082101942893e-06, + "loss": 0.7188, + "step": 9401 + }, + { + "epoch": 0.4831945729263028, + "grad_norm": 1.0207455158233643, + "learning_rate": 5.514254268622247e-06, + "loss": 0.7203, + "step": 9402 + }, + { + "epoch": 0.48324596566964745, + "grad_norm": 1.0612454414367676, + "learning_rate": 5.513426421053737e-06, + "loss": 0.7339, + "step": 9403 + }, + { + "epoch": 0.4832973584129921, + "grad_norm": 1.0991202592849731, + "learning_rate": 5.512598559260302e-06, + "loss": 0.7648, + "step": 9404 + }, + { + "epoch": 0.48334875115633674, + "grad_norm": 1.0323444604873657, + "learning_rate": 5.511770683264874e-06, + "loss": 0.7526, + "step": 9405 + }, + { + "epoch": 0.4834001438996814, + "grad_norm": 1.035022258758545, + "learning_rate": 5.5109427930903926e-06, + "loss": 0.7472, + "step": 9406 + }, + { + "epoch": 0.483451536643026, + "grad_norm": 1.113804578781128, + "learning_rate": 5.510114888759795e-06, + "loss": 0.7098, + "step": 9407 + }, + { + "epoch": 0.48350292938637063, + "grad_norm": 1.0342614650726318, + "learning_rate": 5.509286970296021e-06, + "loss": 0.718, + "step": 9408 + }, + { + "epoch": 0.4835543221297153, + "grad_norm": 1.1069092750549316, + "learning_rate": 5.5084590377220035e-06, + "loss": 0.7418, + "step": 9409 + }, + { + "epoch": 0.4836057148730599, + "grad_norm": 1.0686357021331787, + "learning_rate": 5.5076310910606855e-06, + "loss": 0.749, + "step": 9410 + }, + { + "epoch": 0.48365710761640457, + "grad_norm": 1.043076515197754, + "learning_rate": 5.5068031303350046e-06, + "loss": 0.7341, + "step": 9411 + }, + { + "epoch": 0.4837085003597492, + "grad_norm": 0.8362899422645569, + "learning_rate": 5.505975155567902e-06, + "loss": 0.6765, + "step": 9412 + }, + { + "epoch": 0.48375989310309386, + "grad_norm": 0.7150397300720215, + "learning_rate": 5.505147166782316e-06, + "loss": 0.647, + "step": 9413 + }, + { + "epoch": 0.4838112858464385, + "grad_norm": 1.1081665754318237, + "learning_rate": 5.504319164001184e-06, + "loss": 0.7098, + "step": 9414 + }, + { + "epoch": 0.4838626785897831, + "grad_norm": 1.0340710878372192, + "learning_rate": 5.5034911472474514e-06, + "loss": 0.7336, + "step": 9415 + }, + { + "epoch": 0.48391407133312775, + "grad_norm": 0.7537180781364441, + "learning_rate": 5.502663116544057e-06, + "loss": 0.6787, + "step": 9416 + }, + { + "epoch": 0.4839654640764724, + "grad_norm": 1.1423346996307373, + "learning_rate": 5.50183507191394e-06, + "loss": 0.8111, + "step": 9417 + }, + { + "epoch": 0.48401685681981704, + "grad_norm": 1.019592046737671, + "learning_rate": 5.501007013380046e-06, + "loss": 0.6829, + "step": 9418 + }, + { + "epoch": 0.4840682495631617, + "grad_norm": 1.0390514135360718, + "learning_rate": 5.500178940965314e-06, + "loss": 0.6712, + "step": 9419 + }, + { + "epoch": 0.48411964230650634, + "grad_norm": 1.1257941722869873, + "learning_rate": 5.499350854692689e-06, + "loss": 0.7841, + "step": 9420 + }, + { + "epoch": 0.484171035049851, + "grad_norm": 1.1053500175476074, + "learning_rate": 5.49852275458511e-06, + "loss": 0.7316, + "step": 9421 + }, + { + "epoch": 0.4842224277931956, + "grad_norm": 1.0224664211273193, + "learning_rate": 5.497694640665526e-06, + "loss": 0.745, + "step": 9422 + }, + { + "epoch": 0.4842738205365402, + "grad_norm": 1.0418940782546997, + "learning_rate": 5.496866512956876e-06, + "loss": 0.6939, + "step": 9423 + }, + { + "epoch": 0.48432521327988487, + "grad_norm": 1.0702447891235352, + "learning_rate": 5.496038371482104e-06, + "loss": 0.7418, + "step": 9424 + }, + { + "epoch": 0.4843766060232295, + "grad_norm": 1.049414873123169, + "learning_rate": 5.4952102162641575e-06, + "loss": 0.7635, + "step": 9425 + }, + { + "epoch": 0.48442799876657416, + "grad_norm": 0.9871751070022583, + "learning_rate": 5.494382047325979e-06, + "loss": 0.7107, + "step": 9426 + }, + { + "epoch": 0.4844793915099188, + "grad_norm": 1.0417555570602417, + "learning_rate": 5.493553864690513e-06, + "loss": 0.7369, + "step": 9427 + }, + { + "epoch": 0.48453078425326346, + "grad_norm": 1.1234605312347412, + "learning_rate": 5.492725668380709e-06, + "loss": 0.7634, + "step": 9428 + }, + { + "epoch": 0.4845821769966081, + "grad_norm": 1.023833155632019, + "learning_rate": 5.491897458419508e-06, + "loss": 0.753, + "step": 9429 + }, + { + "epoch": 0.4846335697399527, + "grad_norm": 1.1806488037109375, + "learning_rate": 5.4910692348298575e-06, + "loss": 0.7822, + "step": 9430 + }, + { + "epoch": 0.48468496248329734, + "grad_norm": 0.9322224259376526, + "learning_rate": 5.490240997634706e-06, + "loss": 0.686, + "step": 9431 + }, + { + "epoch": 0.484736355226642, + "grad_norm": 1.041218638420105, + "learning_rate": 5.4894127468570014e-06, + "loss": 0.7126, + "step": 9432 + }, + { + "epoch": 0.48478774796998664, + "grad_norm": 1.0199620723724365, + "learning_rate": 5.4885844825196866e-06, + "loss": 0.7359, + "step": 9433 + }, + { + "epoch": 0.4848391407133313, + "grad_norm": 1.0956038236618042, + "learning_rate": 5.487756204645714e-06, + "loss": 0.7252, + "step": 9434 + }, + { + "epoch": 0.48489053345667593, + "grad_norm": 1.035573124885559, + "learning_rate": 5.486927913258028e-06, + "loss": 0.6881, + "step": 9435 + }, + { + "epoch": 0.4849419262000206, + "grad_norm": 1.030729055404663, + "learning_rate": 5.48609960837958e-06, + "loss": 0.729, + "step": 9436 + }, + { + "epoch": 0.48499331894336517, + "grad_norm": 1.0253101587295532, + "learning_rate": 5.4852712900333185e-06, + "loss": 0.6982, + "step": 9437 + }, + { + "epoch": 0.4850447116867098, + "grad_norm": 1.075419545173645, + "learning_rate": 5.484442958242191e-06, + "loss": 0.7428, + "step": 9438 + }, + { + "epoch": 0.48509610443005446, + "grad_norm": 1.1414252519607544, + "learning_rate": 5.483614613029149e-06, + "loss": 0.7657, + "step": 9439 + }, + { + "epoch": 0.4851474971733991, + "grad_norm": 1.1154879331588745, + "learning_rate": 5.4827862544171414e-06, + "loss": 0.716, + "step": 9440 + }, + { + "epoch": 0.48519888991674376, + "grad_norm": 1.0697299242019653, + "learning_rate": 5.48195788242912e-06, + "loss": 0.7405, + "step": 9441 + }, + { + "epoch": 0.4852502826600884, + "grad_norm": 0.9866037368774414, + "learning_rate": 5.4811294970880335e-06, + "loss": 0.6897, + "step": 9442 + }, + { + "epoch": 0.48530167540343305, + "grad_norm": 1.1108030080795288, + "learning_rate": 5.480301098416834e-06, + "loss": 0.7395, + "step": 9443 + }, + { + "epoch": 0.4853530681467777, + "grad_norm": 1.1003204584121704, + "learning_rate": 5.479472686438474e-06, + "loss": 0.7614, + "step": 9444 + }, + { + "epoch": 0.4854044608901223, + "grad_norm": 0.8542898893356323, + "learning_rate": 5.478644261175904e-06, + "loss": 0.6569, + "step": 9445 + }, + { + "epoch": 0.48545585363346694, + "grad_norm": 1.0212955474853516, + "learning_rate": 5.477815822652076e-06, + "loss": 0.7506, + "step": 9446 + }, + { + "epoch": 0.4855072463768116, + "grad_norm": 1.0662847757339478, + "learning_rate": 5.476987370889945e-06, + "loss": 0.7031, + "step": 9447 + }, + { + "epoch": 0.48555863912015623, + "grad_norm": 1.0966449975967407, + "learning_rate": 5.476158905912461e-06, + "loss": 0.7127, + "step": 9448 + }, + { + "epoch": 0.4856100318635009, + "grad_norm": 1.1148920059204102, + "learning_rate": 5.47533042774258e-06, + "loss": 0.7564, + "step": 9449 + }, + { + "epoch": 0.4856614246068455, + "grad_norm": 1.0633505582809448, + "learning_rate": 5.474501936403255e-06, + "loss": 0.7693, + "step": 9450 + }, + { + "epoch": 0.4857128173501902, + "grad_norm": 1.0824952125549316, + "learning_rate": 5.473673431917437e-06, + "loss": 0.7183, + "step": 9451 + }, + { + "epoch": 0.4857642100935348, + "grad_norm": 1.0508842468261719, + "learning_rate": 5.472844914308084e-06, + "loss": 0.7375, + "step": 9452 + }, + { + "epoch": 0.4858156028368794, + "grad_norm": 0.7544159293174744, + "learning_rate": 5.47201638359815e-06, + "loss": 0.6689, + "step": 9453 + }, + { + "epoch": 0.48586699558022406, + "grad_norm": 0.9461003541946411, + "learning_rate": 5.47118783981059e-06, + "loss": 0.681, + "step": 9454 + }, + { + "epoch": 0.4859183883235687, + "grad_norm": 1.0841604471206665, + "learning_rate": 5.470359282968359e-06, + "loss": 0.7852, + "step": 9455 + }, + { + "epoch": 0.48596978106691335, + "grad_norm": 1.2208683490753174, + "learning_rate": 5.4695307130944135e-06, + "loss": 0.6862, + "step": 9456 + }, + { + "epoch": 0.486021173810258, + "grad_norm": 1.054756760597229, + "learning_rate": 5.4687021302117105e-06, + "loss": 0.6971, + "step": 9457 + }, + { + "epoch": 0.48607256655360265, + "grad_norm": 1.0326743125915527, + "learning_rate": 5.467873534343206e-06, + "loss": 0.716, + "step": 9458 + }, + { + "epoch": 0.4861239592969473, + "grad_norm": 0.7408831119537354, + "learning_rate": 5.467044925511854e-06, + "loss": 0.7048, + "step": 9459 + }, + { + "epoch": 0.4861753520402919, + "grad_norm": 1.0980217456817627, + "learning_rate": 5.466216303740617e-06, + "loss": 0.7241, + "step": 9460 + }, + { + "epoch": 0.48622674478363653, + "grad_norm": 1.0268486738204956, + "learning_rate": 5.4653876690524486e-06, + "loss": 0.7361, + "step": 9461 + }, + { + "epoch": 0.4862781375269812, + "grad_norm": 1.0121040344238281, + "learning_rate": 5.464559021470308e-06, + "loss": 0.6675, + "step": 9462 + }, + { + "epoch": 0.4863295302703258, + "grad_norm": 1.1095714569091797, + "learning_rate": 5.463730361017155e-06, + "loss": 0.7158, + "step": 9463 + }, + { + "epoch": 0.4863809230136705, + "grad_norm": 1.0649281740188599, + "learning_rate": 5.462901687715948e-06, + "loss": 0.7667, + "step": 9464 + }, + { + "epoch": 0.4864323157570151, + "grad_norm": 1.0386686325073242, + "learning_rate": 5.4620730015896454e-06, + "loss": 0.7056, + "step": 9465 + }, + { + "epoch": 0.48648370850035977, + "grad_norm": 1.0437140464782715, + "learning_rate": 5.461244302661207e-06, + "loss": 0.7163, + "step": 9466 + }, + { + "epoch": 0.4865351012437044, + "grad_norm": 0.8819904327392578, + "learning_rate": 5.460415590953592e-06, + "loss": 0.6776, + "step": 9467 + }, + { + "epoch": 0.486586493987049, + "grad_norm": 1.064073920249939, + "learning_rate": 5.45958686648976e-06, + "loss": 0.6793, + "step": 9468 + }, + { + "epoch": 0.48663788673039365, + "grad_norm": 1.0936199426651, + "learning_rate": 5.458758129292674e-06, + "loss": 0.707, + "step": 9469 + }, + { + "epoch": 0.4866892794737383, + "grad_norm": 0.9179823398590088, + "learning_rate": 5.457929379385293e-06, + "loss": 0.7087, + "step": 9470 + }, + { + "epoch": 0.48674067221708295, + "grad_norm": 1.0610612630844116, + "learning_rate": 5.4571006167905786e-06, + "loss": 0.7201, + "step": 9471 + }, + { + "epoch": 0.4867920649604276, + "grad_norm": 1.04362952709198, + "learning_rate": 5.4562718415314916e-06, + "loss": 0.7035, + "step": 9472 + }, + { + "epoch": 0.48684345770377224, + "grad_norm": 1.0488141775131226, + "learning_rate": 5.455443053630995e-06, + "loss": 0.7697, + "step": 9473 + }, + { + "epoch": 0.4868948504471169, + "grad_norm": 1.0690515041351318, + "learning_rate": 5.454614253112052e-06, + "loss": 0.7591, + "step": 9474 + }, + { + "epoch": 0.4869462431904615, + "grad_norm": 1.081305742263794, + "learning_rate": 5.453785439997623e-06, + "loss": 0.6973, + "step": 9475 + }, + { + "epoch": 0.48699763593380613, + "grad_norm": 0.7594033479690552, + "learning_rate": 5.452956614310673e-06, + "loss": 0.67, + "step": 9476 + }, + { + "epoch": 0.4870490286771508, + "grad_norm": 0.7347837090492249, + "learning_rate": 5.452127776074164e-06, + "loss": 0.6944, + "step": 9477 + }, + { + "epoch": 0.4871004214204954, + "grad_norm": 1.087239146232605, + "learning_rate": 5.45129892531106e-06, + "loss": 0.7405, + "step": 9478 + }, + { + "epoch": 0.48715181416384007, + "grad_norm": 1.3487238883972168, + "learning_rate": 5.450470062044326e-06, + "loss": 0.7149, + "step": 9479 + }, + { + "epoch": 0.4872032069071847, + "grad_norm": 1.0602219104766846, + "learning_rate": 5.4496411862969244e-06, + "loss": 0.7193, + "step": 9480 + }, + { + "epoch": 0.48725459965052936, + "grad_norm": 1.0953388214111328, + "learning_rate": 5.448812298091822e-06, + "loss": 0.742, + "step": 9481 + }, + { + "epoch": 0.487305992393874, + "grad_norm": 1.0527359247207642, + "learning_rate": 5.447983397451983e-06, + "loss": 0.7173, + "step": 9482 + }, + { + "epoch": 0.4873573851372186, + "grad_norm": 1.0570027828216553, + "learning_rate": 5.447154484400371e-06, + "loss": 0.6761, + "step": 9483 + }, + { + "epoch": 0.48740877788056325, + "grad_norm": 1.0538517236709595, + "learning_rate": 5.446325558959955e-06, + "loss": 0.7841, + "step": 9484 + }, + { + "epoch": 0.4874601706239079, + "grad_norm": 0.8465815186500549, + "learning_rate": 5.445496621153701e-06, + "loss": 0.6656, + "step": 9485 + }, + { + "epoch": 0.48751156336725254, + "grad_norm": 0.721488893032074, + "learning_rate": 5.444667671004572e-06, + "loss": 0.6731, + "step": 9486 + }, + { + "epoch": 0.4875629561105972, + "grad_norm": 1.089809775352478, + "learning_rate": 5.443838708535538e-06, + "loss": 0.7267, + "step": 9487 + }, + { + "epoch": 0.48761434885394184, + "grad_norm": 0.7786794900894165, + "learning_rate": 5.443009733769563e-06, + "loss": 0.7051, + "step": 9488 + }, + { + "epoch": 0.4876657415972865, + "grad_norm": 0.764552116394043, + "learning_rate": 5.442180746729619e-06, + "loss": 0.6638, + "step": 9489 + }, + { + "epoch": 0.4877171343406311, + "grad_norm": 1.057086706161499, + "learning_rate": 5.441351747438671e-06, + "loss": 0.7144, + "step": 9490 + }, + { + "epoch": 0.4877685270839757, + "grad_norm": 1.1409790515899658, + "learning_rate": 5.440522735919685e-06, + "loss": 0.7225, + "step": 9491 + }, + { + "epoch": 0.48781991982732037, + "grad_norm": 0.8731694221496582, + "learning_rate": 5.439693712195635e-06, + "loss": 0.673, + "step": 9492 + }, + { + "epoch": 0.487871312570665, + "grad_norm": 0.6754937767982483, + "learning_rate": 5.438864676289484e-06, + "loss": 0.6318, + "step": 9493 + }, + { + "epoch": 0.48792270531400966, + "grad_norm": 1.1085034608840942, + "learning_rate": 5.438035628224205e-06, + "loss": 0.7108, + "step": 9494 + }, + { + "epoch": 0.4879740980573543, + "grad_norm": 1.0729222297668457, + "learning_rate": 5.437206568022768e-06, + "loss": 0.7791, + "step": 9495 + }, + { + "epoch": 0.48802549080069896, + "grad_norm": 1.0918159484863281, + "learning_rate": 5.436377495708139e-06, + "loss": 0.7112, + "step": 9496 + }, + { + "epoch": 0.4880768835440436, + "grad_norm": 1.0716404914855957, + "learning_rate": 5.435548411303292e-06, + "loss": 0.7601, + "step": 9497 + }, + { + "epoch": 0.4881282762873882, + "grad_norm": 1.1228057146072388, + "learning_rate": 5.434719314831196e-06, + "loss": 0.7364, + "step": 9498 + }, + { + "epoch": 0.48817966903073284, + "grad_norm": 1.076778531074524, + "learning_rate": 5.43389020631482e-06, + "loss": 0.6947, + "step": 9499 + }, + { + "epoch": 0.4882310617740775, + "grad_norm": 1.0357011556625366, + "learning_rate": 5.433061085777138e-06, + "loss": 0.7226, + "step": 9500 + }, + { + "epoch": 0.48828245451742214, + "grad_norm": 1.1128188371658325, + "learning_rate": 5.432231953241122e-06, + "loss": 0.7778, + "step": 9501 + }, + { + "epoch": 0.4883338472607668, + "grad_norm": 1.0051047801971436, + "learning_rate": 5.43140280872974e-06, + "loss": 0.6871, + "step": 9502 + }, + { + "epoch": 0.48838524000411143, + "grad_norm": 0.8065167665481567, + "learning_rate": 5.4305736522659665e-06, + "loss": 0.6781, + "step": 9503 + }, + { + "epoch": 0.4884366327474561, + "grad_norm": 0.7258554100990295, + "learning_rate": 5.429744483872774e-06, + "loss": 0.6467, + "step": 9504 + }, + { + "epoch": 0.4884880254908007, + "grad_norm": 0.989908754825592, + "learning_rate": 5.428915303573137e-06, + "loss": 0.7021, + "step": 9505 + }, + { + "epoch": 0.4885394182341453, + "grad_norm": 1.0529425144195557, + "learning_rate": 5.428086111390026e-06, + "loss": 0.743, + "step": 9506 + }, + { + "epoch": 0.48859081097748996, + "grad_norm": 0.7950737476348877, + "learning_rate": 5.427256907346415e-06, + "loss": 0.6514, + "step": 9507 + }, + { + "epoch": 0.4886422037208346, + "grad_norm": 1.0267528295516968, + "learning_rate": 5.4264276914652794e-06, + "loss": 0.7475, + "step": 9508 + }, + { + "epoch": 0.48869359646417926, + "grad_norm": 1.0431227684020996, + "learning_rate": 5.4255984637695915e-06, + "loss": 0.6702, + "step": 9509 + }, + { + "epoch": 0.4887449892075239, + "grad_norm": 1.0637890100479126, + "learning_rate": 5.424769224282326e-06, + "loss": 0.7082, + "step": 9510 + }, + { + "epoch": 0.48879638195086855, + "grad_norm": 1.1046890020370483, + "learning_rate": 5.423939973026459e-06, + "loss": 0.7177, + "step": 9511 + }, + { + "epoch": 0.4888477746942132, + "grad_norm": 0.7119315266609192, + "learning_rate": 5.423110710024964e-06, + "loss": 0.6912, + "step": 9512 + }, + { + "epoch": 0.4888991674375578, + "grad_norm": 1.0632457733154297, + "learning_rate": 5.422281435300817e-06, + "loss": 0.8007, + "step": 9513 + }, + { + "epoch": 0.48895056018090244, + "grad_norm": 1.1114763021469116, + "learning_rate": 5.421452148876995e-06, + "loss": 0.7446, + "step": 9514 + }, + { + "epoch": 0.4890019529242471, + "grad_norm": 1.0081735849380493, + "learning_rate": 5.420622850776473e-06, + "loss": 0.7081, + "step": 9515 + }, + { + "epoch": 0.48905334566759173, + "grad_norm": 1.0235774517059326, + "learning_rate": 5.419793541022229e-06, + "loss": 0.7135, + "step": 9516 + }, + { + "epoch": 0.4891047384109364, + "grad_norm": 1.0041431188583374, + "learning_rate": 5.418964219637237e-06, + "loss": 0.7083, + "step": 9517 + }, + { + "epoch": 0.489156131154281, + "grad_norm": 0.9953317046165466, + "learning_rate": 5.418134886644475e-06, + "loss": 0.7067, + "step": 9518 + }, + { + "epoch": 0.4892075238976257, + "grad_norm": 1.015871286392212, + "learning_rate": 5.417305542066922e-06, + "loss": 0.6647, + "step": 9519 + }, + { + "epoch": 0.4892589166409703, + "grad_norm": 0.9987191557884216, + "learning_rate": 5.416476185927553e-06, + "loss": 0.7052, + "step": 9520 + }, + { + "epoch": 0.4893103093843149, + "grad_norm": 1.0949862003326416, + "learning_rate": 5.415646818249349e-06, + "loss": 0.7357, + "step": 9521 + }, + { + "epoch": 0.48936170212765956, + "grad_norm": 0.9590392112731934, + "learning_rate": 5.414817439055288e-06, + "loss": 0.7013, + "step": 9522 + }, + { + "epoch": 0.4894130948710042, + "grad_norm": 1.1485745906829834, + "learning_rate": 5.413988048368345e-06, + "loss": 0.696, + "step": 9523 + }, + { + "epoch": 0.48946448761434885, + "grad_norm": 1.066043734550476, + "learning_rate": 5.413158646211502e-06, + "loss": 0.7712, + "step": 9524 + }, + { + "epoch": 0.4895158803576935, + "grad_norm": 1.0126806497573853, + "learning_rate": 5.412329232607739e-06, + "loss": 0.7445, + "step": 9525 + }, + { + "epoch": 0.48956727310103815, + "grad_norm": 0.762461245059967, + "learning_rate": 5.411499807580034e-06, + "loss": 0.6701, + "step": 9526 + }, + { + "epoch": 0.4896186658443828, + "grad_norm": 1.063962697982788, + "learning_rate": 5.410670371151367e-06, + "loss": 0.7519, + "step": 9527 + }, + { + "epoch": 0.4896700585877274, + "grad_norm": 1.2112607955932617, + "learning_rate": 5.409840923344719e-06, + "loss": 0.7086, + "step": 9528 + }, + { + "epoch": 0.48972145133107203, + "grad_norm": 1.100990653038025, + "learning_rate": 5.409011464183071e-06, + "loss": 0.7695, + "step": 9529 + }, + { + "epoch": 0.4897728440744167, + "grad_norm": 0.9387048482894897, + "learning_rate": 5.408181993689404e-06, + "loss": 0.7116, + "step": 9530 + }, + { + "epoch": 0.4898242368177613, + "grad_norm": 1.0274606943130493, + "learning_rate": 5.407352511886696e-06, + "loss": 0.632, + "step": 9531 + }, + { + "epoch": 0.489875629561106, + "grad_norm": 1.0875567197799683, + "learning_rate": 5.406523018797933e-06, + "loss": 0.7345, + "step": 9532 + }, + { + "epoch": 0.4899270223044506, + "grad_norm": 1.0277342796325684, + "learning_rate": 5.405693514446094e-06, + "loss": 0.7155, + "step": 9533 + }, + { + "epoch": 0.48997841504779527, + "grad_norm": 1.0742981433868408, + "learning_rate": 5.404863998854162e-06, + "loss": 0.7277, + "step": 9534 + }, + { + "epoch": 0.4900298077911399, + "grad_norm": 1.1260440349578857, + "learning_rate": 5.404034472045119e-06, + "loss": 0.7476, + "step": 9535 + }, + { + "epoch": 0.4900812005344845, + "grad_norm": 1.0490459203720093, + "learning_rate": 5.403204934041948e-06, + "loss": 0.7474, + "step": 9536 + }, + { + "epoch": 0.49013259327782915, + "grad_norm": 1.0062735080718994, + "learning_rate": 5.4023753848676334e-06, + "loss": 0.7582, + "step": 9537 + }, + { + "epoch": 0.4901839860211738, + "grad_norm": 1.026776909828186, + "learning_rate": 5.401545824545157e-06, + "loss": 0.6594, + "step": 9538 + }, + { + "epoch": 0.49023537876451845, + "grad_norm": 1.0998259782791138, + "learning_rate": 5.400716253097502e-06, + "loss": 0.7063, + "step": 9539 + }, + { + "epoch": 0.4902867715078631, + "grad_norm": 1.0422205924987793, + "learning_rate": 5.399886670547655e-06, + "loss": 0.7256, + "step": 9540 + }, + { + "epoch": 0.49033816425120774, + "grad_norm": 1.0290815830230713, + "learning_rate": 5.399057076918598e-06, + "loss": 0.7556, + "step": 9541 + }, + { + "epoch": 0.4903895569945524, + "grad_norm": 1.0331223011016846, + "learning_rate": 5.398227472233315e-06, + "loss": 0.7588, + "step": 9542 + }, + { + "epoch": 0.49044094973789704, + "grad_norm": 1.0452048778533936, + "learning_rate": 5.397397856514792e-06, + "loss": 0.6972, + "step": 9543 + }, + { + "epoch": 0.4904923424812416, + "grad_norm": 1.0479238033294678, + "learning_rate": 5.396568229786015e-06, + "loss": 0.7378, + "step": 9544 + }, + { + "epoch": 0.4905437352245863, + "grad_norm": 1.0410690307617188, + "learning_rate": 5.395738592069969e-06, + "loss": 0.7262, + "step": 9545 + }, + { + "epoch": 0.4905951279679309, + "grad_norm": 1.0231990814208984, + "learning_rate": 5.39490894338964e-06, + "loss": 0.7253, + "step": 9546 + }, + { + "epoch": 0.49064652071127557, + "grad_norm": 1.0775612592697144, + "learning_rate": 5.394079283768013e-06, + "loss": 0.7267, + "step": 9547 + }, + { + "epoch": 0.4906979134546202, + "grad_norm": 1.1124353408813477, + "learning_rate": 5.393249613228076e-06, + "loss": 0.7743, + "step": 9548 + }, + { + "epoch": 0.49074930619796486, + "grad_norm": 1.1986351013183594, + "learning_rate": 5.392419931792816e-06, + "loss": 0.6769, + "step": 9549 + }, + { + "epoch": 0.4908006989413095, + "grad_norm": 1.0849008560180664, + "learning_rate": 5.391590239485217e-06, + "loss": 0.6916, + "step": 9550 + }, + { + "epoch": 0.4908520916846541, + "grad_norm": 0.7937957048416138, + "learning_rate": 5.390760536328271e-06, + "loss": 0.6646, + "step": 9551 + }, + { + "epoch": 0.49090348442799875, + "grad_norm": 1.3388746976852417, + "learning_rate": 5.389930822344961e-06, + "loss": 0.7954, + "step": 9552 + }, + { + "epoch": 0.4909548771713434, + "grad_norm": 1.1110291481018066, + "learning_rate": 5.389101097558278e-06, + "loss": 0.7382, + "step": 9553 + }, + { + "epoch": 0.49100626991468804, + "grad_norm": 1.0181349515914917, + "learning_rate": 5.3882713619912085e-06, + "loss": 0.6832, + "step": 9554 + }, + { + "epoch": 0.4910576626580327, + "grad_norm": 0.9963048696517944, + "learning_rate": 5.387441615666742e-06, + "loss": 0.6808, + "step": 9555 + }, + { + "epoch": 0.49110905540137734, + "grad_norm": 1.064102292060852, + "learning_rate": 5.386611858607867e-06, + "loss": 0.7363, + "step": 9556 + }, + { + "epoch": 0.491160448144722, + "grad_norm": 1.025748610496521, + "learning_rate": 5.3857820908375735e-06, + "loss": 0.7675, + "step": 9557 + }, + { + "epoch": 0.49121184088806663, + "grad_norm": 1.1190896034240723, + "learning_rate": 5.384952312378851e-06, + "loss": 0.7558, + "step": 9558 + }, + { + "epoch": 0.4912632336314112, + "grad_norm": 1.0449038743972778, + "learning_rate": 5.384122523254689e-06, + "loss": 0.7436, + "step": 9559 + }, + { + "epoch": 0.49131462637475587, + "grad_norm": 1.007791519165039, + "learning_rate": 5.383292723488075e-06, + "loss": 0.7424, + "step": 9560 + }, + { + "epoch": 0.4913660191181005, + "grad_norm": 1.0655417442321777, + "learning_rate": 5.382462913102002e-06, + "loss": 0.7508, + "step": 9561 + }, + { + "epoch": 0.49141741186144516, + "grad_norm": 1.0937548875808716, + "learning_rate": 5.381633092119462e-06, + "loss": 0.6862, + "step": 9562 + }, + { + "epoch": 0.4914688046047898, + "grad_norm": 1.0049195289611816, + "learning_rate": 5.380803260563441e-06, + "loss": 0.6774, + "step": 9563 + }, + { + "epoch": 0.49152019734813446, + "grad_norm": 1.0798002481460571, + "learning_rate": 5.379973418456934e-06, + "loss": 0.7574, + "step": 9564 + }, + { + "epoch": 0.4915715900914791, + "grad_norm": 1.1226277351379395, + "learning_rate": 5.379143565822932e-06, + "loss": 0.7289, + "step": 9565 + }, + { + "epoch": 0.4916229828348237, + "grad_norm": 1.043548822402954, + "learning_rate": 5.378313702684425e-06, + "loss": 0.6853, + "step": 9566 + }, + { + "epoch": 0.49167437557816834, + "grad_norm": 1.202102780342102, + "learning_rate": 5.3774838290644095e-06, + "loss": 0.7415, + "step": 9567 + }, + { + "epoch": 0.491725768321513, + "grad_norm": 1.0347708463668823, + "learning_rate": 5.376653944985872e-06, + "loss": 0.7296, + "step": 9568 + }, + { + "epoch": 0.49177716106485764, + "grad_norm": 1.1020474433898926, + "learning_rate": 5.375824050471811e-06, + "loss": 0.7663, + "step": 9569 + }, + { + "epoch": 0.4918285538082023, + "grad_norm": 1.070357322692871, + "learning_rate": 5.374994145545214e-06, + "loss": 0.6992, + "step": 9570 + }, + { + "epoch": 0.49187994655154693, + "grad_norm": 1.0060158967971802, + "learning_rate": 5.374164230229078e-06, + "loss": 0.7173, + "step": 9571 + }, + { + "epoch": 0.4919313392948916, + "grad_norm": 1.0634430646896362, + "learning_rate": 5.3733343045463945e-06, + "loss": 0.7174, + "step": 9572 + }, + { + "epoch": 0.4919827320382362, + "grad_norm": 1.14021897315979, + "learning_rate": 5.372504368520158e-06, + "loss": 0.7603, + "step": 9573 + }, + { + "epoch": 0.4920341247815808, + "grad_norm": 1.1049827337265015, + "learning_rate": 5.371674422173364e-06, + "loss": 0.722, + "step": 9574 + }, + { + "epoch": 0.49208551752492546, + "grad_norm": 1.042099118232727, + "learning_rate": 5.3708444655290036e-06, + "loss": 0.6942, + "step": 9575 + }, + { + "epoch": 0.4921369102682701, + "grad_norm": 1.02151358127594, + "learning_rate": 5.370014498610075e-06, + "loss": 0.7515, + "step": 9576 + }, + { + "epoch": 0.49218830301161476, + "grad_norm": 0.978813111782074, + "learning_rate": 5.369184521439571e-06, + "loss": 0.7145, + "step": 9577 + }, + { + "epoch": 0.4922396957549594, + "grad_norm": 0.862740159034729, + "learning_rate": 5.368354534040487e-06, + "loss": 0.7193, + "step": 9578 + }, + { + "epoch": 0.49229108849830405, + "grad_norm": 1.1445499658584595, + "learning_rate": 5.3675245364358195e-06, + "loss": 0.7195, + "step": 9579 + }, + { + "epoch": 0.4923424812416487, + "grad_norm": 1.0370557308197021, + "learning_rate": 5.366694528648564e-06, + "loss": 0.7544, + "step": 9580 + }, + { + "epoch": 0.49239387398499335, + "grad_norm": 1.1446062326431274, + "learning_rate": 5.365864510701716e-06, + "loss": 0.7302, + "step": 9581 + }, + { + "epoch": 0.49244526672833794, + "grad_norm": 0.9933484196662903, + "learning_rate": 5.365034482618273e-06, + "loss": 0.7739, + "step": 9582 + }, + { + "epoch": 0.4924966594716826, + "grad_norm": 1.0212934017181396, + "learning_rate": 5.36420444442123e-06, + "loss": 0.6992, + "step": 9583 + }, + { + "epoch": 0.49254805221502723, + "grad_norm": 1.083933711051941, + "learning_rate": 5.363374396133585e-06, + "loss": 0.7044, + "step": 9584 + }, + { + "epoch": 0.4925994449583719, + "grad_norm": 1.0710134506225586, + "learning_rate": 5.3625443377783345e-06, + "loss": 0.7062, + "step": 9585 + }, + { + "epoch": 0.4926508377017165, + "grad_norm": 1.1001828908920288, + "learning_rate": 5.361714269378477e-06, + "loss": 0.6699, + "step": 9586 + }, + { + "epoch": 0.4927022304450612, + "grad_norm": 1.1703779697418213, + "learning_rate": 5.360884190957009e-06, + "loss": 0.6955, + "step": 9587 + }, + { + "epoch": 0.4927536231884058, + "grad_norm": 1.0764039754867554, + "learning_rate": 5.360054102536932e-06, + "loss": 0.7366, + "step": 9588 + }, + { + "epoch": 0.4928050159317504, + "grad_norm": 0.8195311427116394, + "learning_rate": 5.359224004141239e-06, + "loss": 0.6617, + "step": 9589 + }, + { + "epoch": 0.49285640867509506, + "grad_norm": 1.093002200126648, + "learning_rate": 5.358393895792934e-06, + "loss": 0.7736, + "step": 9590 + }, + { + "epoch": 0.4929078014184397, + "grad_norm": 1.0097301006317139, + "learning_rate": 5.357563777515012e-06, + "loss": 0.7501, + "step": 9591 + }, + { + "epoch": 0.49295919416178435, + "grad_norm": 0.8267471194267273, + "learning_rate": 5.356733649330471e-06, + "loss": 0.6674, + "step": 9592 + }, + { + "epoch": 0.493010586905129, + "grad_norm": 1.0620254278182983, + "learning_rate": 5.3559035112623155e-06, + "loss": 0.6703, + "step": 9593 + }, + { + "epoch": 0.49306197964847365, + "grad_norm": 1.049376368522644, + "learning_rate": 5.35507336333354e-06, + "loss": 0.7327, + "step": 9594 + }, + { + "epoch": 0.4931133723918183, + "grad_norm": 1.0751487016677856, + "learning_rate": 5.35424320556715e-06, + "loss": 0.7658, + "step": 9595 + }, + { + "epoch": 0.49316476513516294, + "grad_norm": 1.0827155113220215, + "learning_rate": 5.35341303798614e-06, + "loss": 0.7221, + "step": 9596 + }, + { + "epoch": 0.49321615787850753, + "grad_norm": 0.735813558101654, + "learning_rate": 5.352582860613513e-06, + "loss": 0.6938, + "step": 9597 + }, + { + "epoch": 0.4932675506218522, + "grad_norm": 1.0243816375732422, + "learning_rate": 5.3517526734722724e-06, + "loss": 0.6826, + "step": 9598 + }, + { + "epoch": 0.4933189433651968, + "grad_norm": 1.0461496114730835, + "learning_rate": 5.350922476585416e-06, + "loss": 0.7456, + "step": 9599 + }, + { + "epoch": 0.4933703361085415, + "grad_norm": 1.1016942262649536, + "learning_rate": 5.350092269975945e-06, + "loss": 0.7964, + "step": 9600 + }, + { + "epoch": 0.4934217288518861, + "grad_norm": 1.0775535106658936, + "learning_rate": 5.349262053666862e-06, + "loss": 0.7673, + "step": 9601 + }, + { + "epoch": 0.49347312159523077, + "grad_norm": 1.0956441164016724, + "learning_rate": 5.348431827681169e-06, + "loss": 0.7941, + "step": 9602 + }, + { + "epoch": 0.4935245143385754, + "grad_norm": 1.108159065246582, + "learning_rate": 5.347601592041869e-06, + "loss": 0.7566, + "step": 9603 + }, + { + "epoch": 0.49357590708192, + "grad_norm": 1.030697226524353, + "learning_rate": 5.346771346771963e-06, + "loss": 0.749, + "step": 9604 + }, + { + "epoch": 0.49362729982526465, + "grad_norm": 0.7208949327468872, + "learning_rate": 5.345941091894453e-06, + "loss": 0.6412, + "step": 9605 + }, + { + "epoch": 0.4936786925686093, + "grad_norm": 1.0395797491073608, + "learning_rate": 5.345110827432343e-06, + "loss": 0.7254, + "step": 9606 + }, + { + "epoch": 0.49373008531195395, + "grad_norm": 1.0050891637802124, + "learning_rate": 5.3442805534086365e-06, + "loss": 0.6659, + "step": 9607 + }, + { + "epoch": 0.4937814780552986, + "grad_norm": 1.1142754554748535, + "learning_rate": 5.343450269846336e-06, + "loss": 0.7544, + "step": 9608 + }, + { + "epoch": 0.49383287079864324, + "grad_norm": 1.0315179824829102, + "learning_rate": 5.342619976768447e-06, + "loss": 0.7355, + "step": 9609 + }, + { + "epoch": 0.4938842635419879, + "grad_norm": 1.0394960641860962, + "learning_rate": 5.341789674197973e-06, + "loss": 0.7096, + "step": 9610 + }, + { + "epoch": 0.49393565628533254, + "grad_norm": 1.135819673538208, + "learning_rate": 5.340959362157916e-06, + "loss": 0.801, + "step": 9611 + }, + { + "epoch": 0.4939870490286771, + "grad_norm": 1.0144143104553223, + "learning_rate": 5.340129040671283e-06, + "loss": 0.716, + "step": 9612 + }, + { + "epoch": 0.4940384417720218, + "grad_norm": 1.031213641166687, + "learning_rate": 5.339298709761077e-06, + "loss": 0.7399, + "step": 9613 + }, + { + "epoch": 0.4940898345153664, + "grad_norm": 1.077452301979065, + "learning_rate": 5.3384683694503045e-06, + "loss": 0.7526, + "step": 9614 + }, + { + "epoch": 0.49414122725871107, + "grad_norm": 5.250999927520752, + "learning_rate": 5.33763801976197e-06, + "loss": 0.8154, + "step": 9615 + }, + { + "epoch": 0.4941926200020557, + "grad_norm": 0.738052248954773, + "learning_rate": 5.336807660719081e-06, + "loss": 0.6513, + "step": 9616 + }, + { + "epoch": 0.49424401274540036, + "grad_norm": 0.684563159942627, + "learning_rate": 5.3359772923446395e-06, + "loss": 0.6772, + "step": 9617 + }, + { + "epoch": 0.494295405488745, + "grad_norm": 1.1076298952102661, + "learning_rate": 5.335146914661655e-06, + "loss": 0.6777, + "step": 9618 + }, + { + "epoch": 0.4943467982320896, + "grad_norm": 1.0343161821365356, + "learning_rate": 5.3343165276931315e-06, + "loss": 0.7311, + "step": 9619 + }, + { + "epoch": 0.49439819097543425, + "grad_norm": 1.0697482824325562, + "learning_rate": 5.333486131462078e-06, + "loss": 0.7595, + "step": 9620 + }, + { + "epoch": 0.4944495837187789, + "grad_norm": 0.8122111558914185, + "learning_rate": 5.332655725991498e-06, + "loss": 0.673, + "step": 9621 + }, + { + "epoch": 0.49450097646212354, + "grad_norm": 0.7715480923652649, + "learning_rate": 5.331825311304403e-06, + "loss": 0.6603, + "step": 9622 + }, + { + "epoch": 0.4945523692054682, + "grad_norm": 1.0459703207015991, + "learning_rate": 5.330994887423797e-06, + "loss": 0.7273, + "step": 9623 + }, + { + "epoch": 0.49460376194881284, + "grad_norm": 1.1299817562103271, + "learning_rate": 5.330164454372688e-06, + "loss": 0.7502, + "step": 9624 + }, + { + "epoch": 0.4946551546921575, + "grad_norm": 1.1158483028411865, + "learning_rate": 5.329334012174086e-06, + "loss": 0.7756, + "step": 9625 + }, + { + "epoch": 0.49470654743550213, + "grad_norm": 1.011510968208313, + "learning_rate": 5.328503560850996e-06, + "loss": 0.7395, + "step": 9626 + }, + { + "epoch": 0.4947579401788467, + "grad_norm": 1.1215327978134155, + "learning_rate": 5.327673100426428e-06, + "loss": 0.724, + "step": 9627 + }, + { + "epoch": 0.49480933292219137, + "grad_norm": 1.0712478160858154, + "learning_rate": 5.3268426309233915e-06, + "loss": 0.7393, + "step": 9628 + }, + { + "epoch": 0.494860725665536, + "grad_norm": 0.9891983270645142, + "learning_rate": 5.326012152364894e-06, + "loss": 0.7271, + "step": 9629 + }, + { + "epoch": 0.49491211840888066, + "grad_norm": 0.728307843208313, + "learning_rate": 5.325181664773945e-06, + "loss": 0.669, + "step": 9630 + }, + { + "epoch": 0.4949635111522253, + "grad_norm": 0.8278331160545349, + "learning_rate": 5.324351168173555e-06, + "loss": 0.6617, + "step": 9631 + }, + { + "epoch": 0.49501490389556996, + "grad_norm": 1.0681143999099731, + "learning_rate": 5.323520662586732e-06, + "loss": 0.773, + "step": 9632 + }, + { + "epoch": 0.4950662966389146, + "grad_norm": 1.1373975276947021, + "learning_rate": 5.322690148036487e-06, + "loss": 0.6991, + "step": 9633 + }, + { + "epoch": 0.49511768938225925, + "grad_norm": 1.1680774688720703, + "learning_rate": 5.321859624545829e-06, + "loss": 0.6914, + "step": 9634 + }, + { + "epoch": 0.49516908212560384, + "grad_norm": 1.0779521465301514, + "learning_rate": 5.321029092137769e-06, + "loss": 0.7437, + "step": 9635 + }, + { + "epoch": 0.4952204748689485, + "grad_norm": 1.073246717453003, + "learning_rate": 5.320198550835317e-06, + "loss": 0.7275, + "step": 9636 + }, + { + "epoch": 0.49527186761229314, + "grad_norm": 1.1071537733078003, + "learning_rate": 5.319368000661485e-06, + "loss": 0.7194, + "step": 9637 + }, + { + "epoch": 0.4953232603556378, + "grad_norm": 0.8047956824302673, + "learning_rate": 5.318537441639284e-06, + "loss": 0.6999, + "step": 9638 + }, + { + "epoch": 0.49537465309898243, + "grad_norm": 1.1221832036972046, + "learning_rate": 5.317706873791726e-06, + "loss": 0.7097, + "step": 9639 + }, + { + "epoch": 0.4954260458423271, + "grad_norm": 1.1357895135879517, + "learning_rate": 5.316876297141819e-06, + "loss": 0.7437, + "step": 9640 + }, + { + "epoch": 0.4954774385856717, + "grad_norm": 1.0529829263687134, + "learning_rate": 5.316045711712581e-06, + "loss": 0.7253, + "step": 9641 + }, + { + "epoch": 0.4955288313290163, + "grad_norm": 1.062212586402893, + "learning_rate": 5.31521511752702e-06, + "loss": 0.7968, + "step": 9642 + }, + { + "epoch": 0.49558022407236096, + "grad_norm": 1.1542739868164062, + "learning_rate": 5.314384514608147e-06, + "loss": 0.7364, + "step": 9643 + }, + { + "epoch": 0.4956316168157056, + "grad_norm": 1.1220766305923462, + "learning_rate": 5.313553902978976e-06, + "loss": 0.7826, + "step": 9644 + }, + { + "epoch": 0.49568300955905026, + "grad_norm": 1.0064659118652344, + "learning_rate": 5.312723282662522e-06, + "loss": 0.717, + "step": 9645 + }, + { + "epoch": 0.4957344023023949, + "grad_norm": 1.0356231927871704, + "learning_rate": 5.311892653681796e-06, + "loss": 0.7153, + "step": 9646 + }, + { + "epoch": 0.49578579504573955, + "grad_norm": 1.1202448606491089, + "learning_rate": 5.31106201605981e-06, + "loss": 0.7895, + "step": 9647 + }, + { + "epoch": 0.4958371877890842, + "grad_norm": 0.703321099281311, + "learning_rate": 5.310231369819581e-06, + "loss": 0.6845, + "step": 9648 + }, + { + "epoch": 0.49588858053242885, + "grad_norm": 1.1695502996444702, + "learning_rate": 5.309400714984121e-06, + "loss": 0.7455, + "step": 9649 + }, + { + "epoch": 0.49593997327577344, + "grad_norm": 1.094068169593811, + "learning_rate": 5.308570051576443e-06, + "loss": 0.7101, + "step": 9650 + }, + { + "epoch": 0.4959913660191181, + "grad_norm": 1.1081759929656982, + "learning_rate": 5.307739379619563e-06, + "loss": 0.7709, + "step": 9651 + }, + { + "epoch": 0.49604275876246273, + "grad_norm": 0.7252534627914429, + "learning_rate": 5.306908699136496e-06, + "loss": 0.6998, + "step": 9652 + }, + { + "epoch": 0.4960941515058074, + "grad_norm": 1.0350507497787476, + "learning_rate": 5.306078010150254e-06, + "loss": 0.7511, + "step": 9653 + }, + { + "epoch": 0.496145544249152, + "grad_norm": 0.706834077835083, + "learning_rate": 5.305247312683853e-06, + "loss": 0.6506, + "step": 9654 + }, + { + "epoch": 0.4961969369924967, + "grad_norm": 1.027999758720398, + "learning_rate": 5.304416606760311e-06, + "loss": 0.7025, + "step": 9655 + }, + { + "epoch": 0.4962483297358413, + "grad_norm": 1.042326807975769, + "learning_rate": 5.303585892402638e-06, + "loss": 0.6973, + "step": 9656 + }, + { + "epoch": 0.4962997224791859, + "grad_norm": 1.2624150514602661, + "learning_rate": 5.302755169633854e-06, + "loss": 0.7833, + "step": 9657 + }, + { + "epoch": 0.49635111522253056, + "grad_norm": 1.0304924249649048, + "learning_rate": 5.301924438476974e-06, + "loss": 0.6808, + "step": 9658 + }, + { + "epoch": 0.4964025079658752, + "grad_norm": 0.9943413734436035, + "learning_rate": 5.301093698955013e-06, + "loss": 0.6542, + "step": 9659 + }, + { + "epoch": 0.49645390070921985, + "grad_norm": 1.1055750846862793, + "learning_rate": 5.30026295109099e-06, + "loss": 0.7367, + "step": 9660 + }, + { + "epoch": 0.4965052934525645, + "grad_norm": 1.1270208358764648, + "learning_rate": 5.299432194907918e-06, + "loss": 0.7885, + "step": 9661 + }, + { + "epoch": 0.49655668619590915, + "grad_norm": 1.0890278816223145, + "learning_rate": 5.298601430428816e-06, + "loss": 0.7337, + "step": 9662 + }, + { + "epoch": 0.4966080789392538, + "grad_norm": 1.017724871635437, + "learning_rate": 5.297770657676701e-06, + "loss": 0.7582, + "step": 9663 + }, + { + "epoch": 0.49665947168259844, + "grad_norm": 1.0810452699661255, + "learning_rate": 5.296939876674588e-06, + "loss": 0.6933, + "step": 9664 + }, + { + "epoch": 0.49671086442594303, + "grad_norm": 1.093916893005371, + "learning_rate": 5.296109087445499e-06, + "loss": 0.7374, + "step": 9665 + }, + { + "epoch": 0.4967622571692877, + "grad_norm": 1.1080012321472168, + "learning_rate": 5.295278290012448e-06, + "loss": 0.7033, + "step": 9666 + }, + { + "epoch": 0.4968136499126323, + "grad_norm": 1.0958491563796997, + "learning_rate": 5.294447484398454e-06, + "loss": 0.6928, + "step": 9667 + }, + { + "epoch": 0.496865042655977, + "grad_norm": 1.1174042224884033, + "learning_rate": 5.293616670626536e-06, + "loss": 0.6757, + "step": 9668 + }, + { + "epoch": 0.4969164353993216, + "grad_norm": 1.0633642673492432, + "learning_rate": 5.29278584871971e-06, + "loss": 0.7605, + "step": 9669 + }, + { + "epoch": 0.49696782814266627, + "grad_norm": 1.0971599817276, + "learning_rate": 5.291955018700998e-06, + "loss": 0.764, + "step": 9670 + }, + { + "epoch": 0.4970192208860109, + "grad_norm": 0.748594343662262, + "learning_rate": 5.291124180593418e-06, + "loss": 0.6579, + "step": 9671 + }, + { + "epoch": 0.49707061362935556, + "grad_norm": 1.0452440977096558, + "learning_rate": 5.290293334419986e-06, + "loss": 0.7701, + "step": 9672 + }, + { + "epoch": 0.49712200637270015, + "grad_norm": 0.8301875591278076, + "learning_rate": 5.289462480203725e-06, + "loss": 0.7051, + "step": 9673 + }, + { + "epoch": 0.4971733991160448, + "grad_norm": 1.078459620475769, + "learning_rate": 5.288631617967653e-06, + "loss": 0.7268, + "step": 9674 + }, + { + "epoch": 0.49722479185938945, + "grad_norm": 0.9991085529327393, + "learning_rate": 5.287800747734792e-06, + "loss": 0.7196, + "step": 9675 + }, + { + "epoch": 0.4972761846027341, + "grad_norm": 1.060250997543335, + "learning_rate": 5.286969869528158e-06, + "loss": 0.7105, + "step": 9676 + }, + { + "epoch": 0.49732757734607874, + "grad_norm": 1.0783272981643677, + "learning_rate": 5.286138983370772e-06, + "loss": 0.7821, + "step": 9677 + }, + { + "epoch": 0.4973789700894234, + "grad_norm": 1.1080095767974854, + "learning_rate": 5.285308089285657e-06, + "loss": 0.7422, + "step": 9678 + }, + { + "epoch": 0.49743036283276804, + "grad_norm": 1.0426934957504272, + "learning_rate": 5.2844771872958325e-06, + "loss": 0.7761, + "step": 9679 + }, + { + "epoch": 0.4974817555761126, + "grad_norm": 0.7346612215042114, + "learning_rate": 5.283646277424318e-06, + "loss": 0.6799, + "step": 9680 + }, + { + "epoch": 0.4975331483194573, + "grad_norm": 1.2342984676361084, + "learning_rate": 5.282815359694137e-06, + "loss": 0.7685, + "step": 9681 + }, + { + "epoch": 0.4975845410628019, + "grad_norm": 0.726105809211731, + "learning_rate": 5.281984434128307e-06, + "loss": 0.6509, + "step": 9682 + }, + { + "epoch": 0.49763593380614657, + "grad_norm": 0.7369107007980347, + "learning_rate": 5.281153500749856e-06, + "loss": 0.703, + "step": 9683 + }, + { + "epoch": 0.4976873265494912, + "grad_norm": 1.0366274118423462, + "learning_rate": 5.2803225595818e-06, + "loss": 0.7238, + "step": 9684 + }, + { + "epoch": 0.49773871929283586, + "grad_norm": 1.0933395624160767, + "learning_rate": 5.279491610647162e-06, + "loss": 0.7605, + "step": 9685 + }, + { + "epoch": 0.4977901120361805, + "grad_norm": 0.7025826573371887, + "learning_rate": 5.278660653968965e-06, + "loss": 0.6564, + "step": 9686 + }, + { + "epoch": 0.49784150477952516, + "grad_norm": 0.988102376461029, + "learning_rate": 5.277829689570231e-06, + "loss": 0.7503, + "step": 9687 + }, + { + "epoch": 0.49789289752286975, + "grad_norm": 1.0864988565444946, + "learning_rate": 5.2769987174739835e-06, + "loss": 0.7011, + "step": 9688 + }, + { + "epoch": 0.4979442902662144, + "grad_norm": 1.0948187112808228, + "learning_rate": 5.276167737703244e-06, + "loss": 0.745, + "step": 9689 + }, + { + "epoch": 0.49799568300955904, + "grad_norm": 1.082934856414795, + "learning_rate": 5.275336750281036e-06, + "loss": 0.6679, + "step": 9690 + }, + { + "epoch": 0.4980470757529037, + "grad_norm": 0.7197828888893127, + "learning_rate": 5.274505755230384e-06, + "loss": 0.6347, + "step": 9691 + }, + { + "epoch": 0.49809846849624834, + "grad_norm": 1.0458934307098389, + "learning_rate": 5.27367475257431e-06, + "loss": 0.7365, + "step": 9692 + }, + { + "epoch": 0.498149861239593, + "grad_norm": 1.0925427675247192, + "learning_rate": 5.272843742335838e-06, + "loss": 0.7196, + "step": 9693 + }, + { + "epoch": 0.49820125398293763, + "grad_norm": 1.093076229095459, + "learning_rate": 5.272012724537993e-06, + "loss": 0.7344, + "step": 9694 + }, + { + "epoch": 0.4982526467262822, + "grad_norm": 0.8250869512557983, + "learning_rate": 5.271181699203796e-06, + "loss": 0.6848, + "step": 9695 + }, + { + "epoch": 0.49830403946962687, + "grad_norm": 1.0631132125854492, + "learning_rate": 5.270350666356274e-06, + "loss": 0.8014, + "step": 9696 + }, + { + "epoch": 0.4983554322129715, + "grad_norm": 0.7209571003913879, + "learning_rate": 5.269519626018451e-06, + "loss": 0.657, + "step": 9697 + }, + { + "epoch": 0.49840682495631616, + "grad_norm": 1.065102458000183, + "learning_rate": 5.26868857821335e-06, + "loss": 0.6533, + "step": 9698 + }, + { + "epoch": 0.4984582176996608, + "grad_norm": 1.0978947877883911, + "learning_rate": 5.267857522963999e-06, + "loss": 0.765, + "step": 9699 + }, + { + "epoch": 0.49850961044300546, + "grad_norm": 1.1280215978622437, + "learning_rate": 5.267026460293421e-06, + "loss": 0.744, + "step": 9700 + }, + { + "epoch": 0.4985610031863501, + "grad_norm": 1.062023401260376, + "learning_rate": 5.26619539022464e-06, + "loss": 0.695, + "step": 9701 + }, + { + "epoch": 0.49861239592969475, + "grad_norm": 1.0390610694885254, + "learning_rate": 5.2653643127806854e-06, + "loss": 0.754, + "step": 9702 + }, + { + "epoch": 0.49866378867303934, + "grad_norm": 1.0924310684204102, + "learning_rate": 5.264533227984581e-06, + "loss": 0.6825, + "step": 9703 + }, + { + "epoch": 0.498715181416384, + "grad_norm": 0.9988656640052795, + "learning_rate": 5.2637021358593496e-06, + "loss": 0.6697, + "step": 9704 + }, + { + "epoch": 0.49876657415972864, + "grad_norm": 1.0447070598602295, + "learning_rate": 5.262871036428023e-06, + "loss": 0.7346, + "step": 9705 + }, + { + "epoch": 0.4988179669030733, + "grad_norm": 0.7650237083435059, + "learning_rate": 5.262039929713624e-06, + "loss": 0.6881, + "step": 9706 + }, + { + "epoch": 0.49886935964641793, + "grad_norm": 1.054854154586792, + "learning_rate": 5.26120881573918e-06, + "loss": 0.7329, + "step": 9707 + }, + { + "epoch": 0.4989207523897626, + "grad_norm": 0.996692955493927, + "learning_rate": 5.260377694527717e-06, + "loss": 0.7191, + "step": 9708 + }, + { + "epoch": 0.4989721451331072, + "grad_norm": 1.1335670948028564, + "learning_rate": 5.259546566102263e-06, + "loss": 0.7073, + "step": 9709 + }, + { + "epoch": 0.4990235378764519, + "grad_norm": 1.023481845855713, + "learning_rate": 5.258715430485846e-06, + "loss": 0.7638, + "step": 9710 + }, + { + "epoch": 0.49907493061979646, + "grad_norm": 1.0852361917495728, + "learning_rate": 5.257884287701491e-06, + "loss": 0.7277, + "step": 9711 + }, + { + "epoch": 0.4991263233631411, + "grad_norm": 0.7520588040351868, + "learning_rate": 5.257053137772227e-06, + "loss": 0.6633, + "step": 9712 + }, + { + "epoch": 0.49917771610648576, + "grad_norm": 1.060171127319336, + "learning_rate": 5.256221980721081e-06, + "loss": 0.6772, + "step": 9713 + }, + { + "epoch": 0.4992291088498304, + "grad_norm": 1.0898305177688599, + "learning_rate": 5.255390816571081e-06, + "loss": 0.7518, + "step": 9714 + }, + { + "epoch": 0.49928050159317505, + "grad_norm": 0.8825089931488037, + "learning_rate": 5.254559645345255e-06, + "loss": 0.6416, + "step": 9715 + }, + { + "epoch": 0.4993318943365197, + "grad_norm": 1.0771045684814453, + "learning_rate": 5.253728467066633e-06, + "loss": 0.7767, + "step": 9716 + }, + { + "epoch": 0.49938328707986435, + "grad_norm": 1.0837466716766357, + "learning_rate": 5.252897281758241e-06, + "loss": 0.6988, + "step": 9717 + }, + { + "epoch": 0.49943467982320894, + "grad_norm": 1.1109775304794312, + "learning_rate": 5.252066089443109e-06, + "loss": 0.7643, + "step": 9718 + }, + { + "epoch": 0.4994860725665536, + "grad_norm": 0.7742611169815063, + "learning_rate": 5.2512348901442674e-06, + "loss": 0.6486, + "step": 9719 + }, + { + "epoch": 0.49953746530989823, + "grad_norm": 1.0676828622817993, + "learning_rate": 5.2504036838847425e-06, + "loss": 0.748, + "step": 9720 + }, + { + "epoch": 0.4995888580532429, + "grad_norm": 1.0265713930130005, + "learning_rate": 5.249572470687566e-06, + "loss": 0.6595, + "step": 9721 + }, + { + "epoch": 0.4996402507965875, + "grad_norm": 1.0895823240280151, + "learning_rate": 5.248741250575765e-06, + "loss": 0.6794, + "step": 9722 + }, + { + "epoch": 0.4996916435399322, + "grad_norm": 1.1792957782745361, + "learning_rate": 5.247910023572371e-06, + "loss": 0.6982, + "step": 9723 + }, + { + "epoch": 0.4997430362832768, + "grad_norm": 1.0468356609344482, + "learning_rate": 5.247078789700414e-06, + "loss": 0.7734, + "step": 9724 + }, + { + "epoch": 0.49979442902662147, + "grad_norm": 0.9966729283332825, + "learning_rate": 5.246247548982922e-06, + "loss": 0.761, + "step": 9725 + }, + { + "epoch": 0.49984582176996606, + "grad_norm": 1.0957149267196655, + "learning_rate": 5.245416301442928e-06, + "loss": 0.7252, + "step": 9726 + }, + { + "epoch": 0.4998972145133107, + "grad_norm": 1.0969775915145874, + "learning_rate": 5.244585047103461e-06, + "loss": 0.7572, + "step": 9727 + }, + { + "epoch": 0.49994860725665535, + "grad_norm": 1.0184416770935059, + "learning_rate": 5.243753785987551e-06, + "loss": 0.7151, + "step": 9728 + }, + { + "epoch": 0.5, + "grad_norm": 0.8881608247756958, + "learning_rate": 5.242922518118229e-06, + "loss": 0.6984, + "step": 9729 + }, + { + "epoch": 0.5000513927433446, + "grad_norm": 0.9978525638580322, + "learning_rate": 5.2420912435185255e-06, + "loss": 0.7395, + "step": 9730 + }, + { + "epoch": 0.5001027854866893, + "grad_norm": 1.0411361455917358, + "learning_rate": 5.241259962211475e-06, + "loss": 0.7679, + "step": 9731 + }, + { + "epoch": 0.5001541782300339, + "grad_norm": 1.0489391088485718, + "learning_rate": 5.240428674220106e-06, + "loss": 0.6484, + "step": 9732 + }, + { + "epoch": 0.5002055709733786, + "grad_norm": 0.8208889961242676, + "learning_rate": 5.239597379567451e-06, + "loss": 0.7005, + "step": 9733 + }, + { + "epoch": 0.5002569637167232, + "grad_norm": 1.0971370935440063, + "learning_rate": 5.238766078276541e-06, + "loss": 0.7348, + "step": 9734 + }, + { + "epoch": 0.5003083564600679, + "grad_norm": 1.0362783670425415, + "learning_rate": 5.237934770370408e-06, + "loss": 0.7171, + "step": 9735 + }, + { + "epoch": 0.5003597492034125, + "grad_norm": 1.1381134986877441, + "learning_rate": 5.2371034558720845e-06, + "loss": 0.7567, + "step": 9736 + }, + { + "epoch": 0.5004111419467571, + "grad_norm": 1.0195916891098022, + "learning_rate": 5.2362721348046035e-06, + "loss": 0.7427, + "step": 9737 + }, + { + "epoch": 0.5004625346901017, + "grad_norm": 1.15377676486969, + "learning_rate": 5.235440807190994e-06, + "loss": 0.7576, + "step": 9738 + }, + { + "epoch": 0.5005139274334464, + "grad_norm": 1.032058596611023, + "learning_rate": 5.234609473054293e-06, + "loss": 0.7367, + "step": 9739 + }, + { + "epoch": 0.500565320176791, + "grad_norm": 1.0951296091079712, + "learning_rate": 5.233778132417533e-06, + "loss": 0.7481, + "step": 9740 + }, + { + "epoch": 0.5006167129201357, + "grad_norm": 1.0305310487747192, + "learning_rate": 5.2329467853037444e-06, + "loss": 0.6947, + "step": 9741 + }, + { + "epoch": 0.5006681056634803, + "grad_norm": 1.077505350112915, + "learning_rate": 5.2321154317359615e-06, + "loss": 0.7395, + "step": 9742 + }, + { + "epoch": 0.500719498406825, + "grad_norm": 1.0372668504714966, + "learning_rate": 5.231284071737219e-06, + "loss": 0.7567, + "step": 9743 + }, + { + "epoch": 0.5007708911501696, + "grad_norm": 1.0231887102127075, + "learning_rate": 5.230452705330548e-06, + "loss": 0.6947, + "step": 9744 + }, + { + "epoch": 0.5008222838935142, + "grad_norm": 0.737357497215271, + "learning_rate": 5.229621332538985e-06, + "loss": 0.7297, + "step": 9745 + }, + { + "epoch": 0.5008736766368589, + "grad_norm": 1.098196268081665, + "learning_rate": 5.228789953385561e-06, + "loss": 0.7409, + "step": 9746 + }, + { + "epoch": 0.5009250693802035, + "grad_norm": 1.1107432842254639, + "learning_rate": 5.227958567893312e-06, + "loss": 0.7301, + "step": 9747 + }, + { + "epoch": 0.5009764621235482, + "grad_norm": 1.1124063730239868, + "learning_rate": 5.227127176085272e-06, + "loss": 0.7323, + "step": 9748 + }, + { + "epoch": 0.5010278548668928, + "grad_norm": 1.1481200456619263, + "learning_rate": 5.2262957779844756e-06, + "loss": 0.7028, + "step": 9749 + }, + { + "epoch": 0.5010792476102375, + "grad_norm": 1.040295124053955, + "learning_rate": 5.225464373613956e-06, + "loss": 0.6624, + "step": 9750 + }, + { + "epoch": 0.5011306403535821, + "grad_norm": 1.0675523281097412, + "learning_rate": 5.224632962996751e-06, + "loss": 0.7519, + "step": 9751 + }, + { + "epoch": 0.5011820330969267, + "grad_norm": 1.0806888341903687, + "learning_rate": 5.223801546155891e-06, + "loss": 0.7278, + "step": 9752 + }, + { + "epoch": 0.5012334258402713, + "grad_norm": 1.166495442390442, + "learning_rate": 5.2229701231144155e-06, + "loss": 0.7187, + "step": 9753 + }, + { + "epoch": 0.501284818583616, + "grad_norm": 1.123867154121399, + "learning_rate": 5.222138693895358e-06, + "loss": 0.7166, + "step": 9754 + }, + { + "epoch": 0.5013362113269606, + "grad_norm": 1.07319974899292, + "learning_rate": 5.221307258521754e-06, + "loss": 0.7327, + "step": 9755 + }, + { + "epoch": 0.5013876040703052, + "grad_norm": 1.0976871252059937, + "learning_rate": 5.220475817016639e-06, + "loss": 0.7442, + "step": 9756 + }, + { + "epoch": 0.5014389968136499, + "grad_norm": 0.9777010679244995, + "learning_rate": 5.219644369403047e-06, + "loss": 0.689, + "step": 9757 + }, + { + "epoch": 0.5014903895569945, + "grad_norm": 1.0784786939620972, + "learning_rate": 5.218812915704019e-06, + "loss": 0.6815, + "step": 9758 + }, + { + "epoch": 0.5015417823003392, + "grad_norm": 1.1116551160812378, + "learning_rate": 5.217981455942586e-06, + "loss": 0.7102, + "step": 9759 + }, + { + "epoch": 0.5015931750436838, + "grad_norm": 1.0390983819961548, + "learning_rate": 5.217149990141786e-06, + "loss": 0.7218, + "step": 9760 + }, + { + "epoch": 0.5016445677870285, + "grad_norm": 0.9617928862571716, + "learning_rate": 5.2163185183246575e-06, + "loss": 0.6825, + "step": 9761 + }, + { + "epoch": 0.5016959605303731, + "grad_norm": 1.035317063331604, + "learning_rate": 5.215487040514235e-06, + "loss": 0.75, + "step": 9762 + }, + { + "epoch": 0.5017473532737178, + "grad_norm": 1.0783419609069824, + "learning_rate": 5.214655556733556e-06, + "loss": 0.7051, + "step": 9763 + }, + { + "epoch": 0.5017987460170624, + "grad_norm": 1.084179162979126, + "learning_rate": 5.213824067005658e-06, + "loss": 0.7347, + "step": 9764 + }, + { + "epoch": 0.5018501387604071, + "grad_norm": 1.1248584985733032, + "learning_rate": 5.212992571353577e-06, + "loss": 0.7589, + "step": 9765 + }, + { + "epoch": 0.5019015315037517, + "grad_norm": 1.1401808261871338, + "learning_rate": 5.212161069800351e-06, + "loss": 0.7782, + "step": 9766 + }, + { + "epoch": 0.5019529242470963, + "grad_norm": 1.0196930170059204, + "learning_rate": 5.211329562369017e-06, + "loss": 0.6927, + "step": 9767 + }, + { + "epoch": 0.5020043169904409, + "grad_norm": 1.1252310276031494, + "learning_rate": 5.210498049082613e-06, + "loss": 0.7573, + "step": 9768 + }, + { + "epoch": 0.5020557097337855, + "grad_norm": 1.024275541305542, + "learning_rate": 5.209666529964175e-06, + "loss": 0.6932, + "step": 9769 + }, + { + "epoch": 0.5021071024771302, + "grad_norm": 1.0425715446472168, + "learning_rate": 5.208835005036745e-06, + "loss": 0.7136, + "step": 9770 + }, + { + "epoch": 0.5021584952204748, + "grad_norm": 1.0093886852264404, + "learning_rate": 5.208003474323359e-06, + "loss": 0.7201, + "step": 9771 + }, + { + "epoch": 0.5022098879638195, + "grad_norm": 1.058971643447876, + "learning_rate": 5.207171937847054e-06, + "loss": 0.7678, + "step": 9772 + }, + { + "epoch": 0.5022612807071641, + "grad_norm": 1.0527020692825317, + "learning_rate": 5.206340395630869e-06, + "loss": 0.7302, + "step": 9773 + }, + { + "epoch": 0.5023126734505088, + "grad_norm": 1.0632209777832031, + "learning_rate": 5.205508847697844e-06, + "loss": 0.7109, + "step": 9774 + }, + { + "epoch": 0.5023640661938534, + "grad_norm": 1.0606930255889893, + "learning_rate": 5.204677294071018e-06, + "loss": 0.7473, + "step": 9775 + }, + { + "epoch": 0.5024154589371981, + "grad_norm": 1.1294353008270264, + "learning_rate": 5.203845734773428e-06, + "loss": 0.7257, + "step": 9776 + }, + { + "epoch": 0.5024668516805427, + "grad_norm": 1.1610182523727417, + "learning_rate": 5.203014169828114e-06, + "loss": 0.7297, + "step": 9777 + }, + { + "epoch": 0.5025182444238874, + "grad_norm": 1.1174776554107666, + "learning_rate": 5.202182599258113e-06, + "loss": 0.7721, + "step": 9778 + }, + { + "epoch": 0.502569637167232, + "grad_norm": 1.077193021774292, + "learning_rate": 5.201351023086469e-06, + "loss": 0.7883, + "step": 9779 + }, + { + "epoch": 0.5026210299105767, + "grad_norm": 1.0956318378448486, + "learning_rate": 5.200519441336218e-06, + "loss": 0.7533, + "step": 9780 + }, + { + "epoch": 0.5026724226539213, + "grad_norm": 1.056382179260254, + "learning_rate": 5.199687854030401e-06, + "loss": 0.7378, + "step": 9781 + }, + { + "epoch": 0.502723815397266, + "grad_norm": 1.1149080991744995, + "learning_rate": 5.198856261192058e-06, + "loss": 0.7838, + "step": 9782 + }, + { + "epoch": 0.5027752081406105, + "grad_norm": 1.1333467960357666, + "learning_rate": 5.198024662844229e-06, + "loss": 0.8131, + "step": 9783 + }, + { + "epoch": 0.5028266008839551, + "grad_norm": 1.0871139764785767, + "learning_rate": 5.197193059009953e-06, + "loss": 0.673, + "step": 9784 + }, + { + "epoch": 0.5028779936272998, + "grad_norm": 1.2073267698287964, + "learning_rate": 5.196361449712272e-06, + "loss": 0.7961, + "step": 9785 + }, + { + "epoch": 0.5029293863706444, + "grad_norm": 0.8785910606384277, + "learning_rate": 5.1955298349742235e-06, + "loss": 0.7065, + "step": 9786 + }, + { + "epoch": 0.5029807791139891, + "grad_norm": 1.110816240310669, + "learning_rate": 5.194698214818852e-06, + "loss": 0.7355, + "step": 9787 + }, + { + "epoch": 0.5030321718573337, + "grad_norm": 1.1505292654037476, + "learning_rate": 5.1938665892691966e-06, + "loss": 0.6293, + "step": 9788 + }, + { + "epoch": 0.5030835646006784, + "grad_norm": 1.11864173412323, + "learning_rate": 5.193034958348297e-06, + "loss": 0.739, + "step": 9789 + }, + { + "epoch": 0.503134957344023, + "grad_norm": 1.0772300958633423, + "learning_rate": 5.192203322079194e-06, + "loss": 0.7926, + "step": 9790 + }, + { + "epoch": 0.5031863500873677, + "grad_norm": 1.0961942672729492, + "learning_rate": 5.191371680484934e-06, + "loss": 0.7535, + "step": 9791 + }, + { + "epoch": 0.5032377428307123, + "grad_norm": 0.8938323259353638, + "learning_rate": 5.190540033588551e-06, + "loss": 0.691, + "step": 9792 + }, + { + "epoch": 0.503289135574057, + "grad_norm": 0.7154179215431213, + "learning_rate": 5.189708381413092e-06, + "loss": 0.6826, + "step": 9793 + }, + { + "epoch": 0.5033405283174016, + "grad_norm": 0.9982395768165588, + "learning_rate": 5.188876723981595e-06, + "loss": 0.7256, + "step": 9794 + }, + { + "epoch": 0.5033919210607463, + "grad_norm": 1.0275741815567017, + "learning_rate": 5.1880450613171054e-06, + "loss": 0.6308, + "step": 9795 + }, + { + "epoch": 0.5034433138040909, + "grad_norm": 1.0949006080627441, + "learning_rate": 5.187213393442663e-06, + "loss": 0.7752, + "step": 9796 + }, + { + "epoch": 0.5034947065474356, + "grad_norm": 1.186237096786499, + "learning_rate": 5.186381720381309e-06, + "loss": 0.7493, + "step": 9797 + }, + { + "epoch": 0.5035460992907801, + "grad_norm": 1.2436245679855347, + "learning_rate": 5.185550042156087e-06, + "loss": 0.6943, + "step": 9798 + }, + { + "epoch": 0.5035974920341247, + "grad_norm": 1.0442477464675903, + "learning_rate": 5.184718358790037e-06, + "loss": 0.7736, + "step": 9799 + }, + { + "epoch": 0.5036488847774694, + "grad_norm": 1.0788089036941528, + "learning_rate": 5.183886670306207e-06, + "loss": 0.7142, + "step": 9800 + }, + { + "epoch": 0.503700277520814, + "grad_norm": 0.7839220762252808, + "learning_rate": 5.183054976727634e-06, + "loss": 0.6696, + "step": 9801 + }, + { + "epoch": 0.5037516702641587, + "grad_norm": 0.8155900239944458, + "learning_rate": 5.182223278077363e-06, + "loss": 0.6816, + "step": 9802 + }, + { + "epoch": 0.5038030630075033, + "grad_norm": 1.1803193092346191, + "learning_rate": 5.1813915743784384e-06, + "loss": 0.7111, + "step": 9803 + }, + { + "epoch": 0.503854455750848, + "grad_norm": 1.0620794296264648, + "learning_rate": 5.180559865653901e-06, + "loss": 0.7454, + "step": 9804 + }, + { + "epoch": 0.5039058484941926, + "grad_norm": 1.0659598112106323, + "learning_rate": 5.179728151926795e-06, + "loss": 0.7483, + "step": 9805 + }, + { + "epoch": 0.5039572412375373, + "grad_norm": 1.0828704833984375, + "learning_rate": 5.178896433220164e-06, + "loss": 0.7106, + "step": 9806 + }, + { + "epoch": 0.5040086339808819, + "grad_norm": 1.0648293495178223, + "learning_rate": 5.178064709557051e-06, + "loss": 0.7219, + "step": 9807 + }, + { + "epoch": 0.5040600267242266, + "grad_norm": 1.0650081634521484, + "learning_rate": 5.177232980960499e-06, + "loss": 0.7155, + "step": 9808 + }, + { + "epoch": 0.5041114194675712, + "grad_norm": 0.7046471834182739, + "learning_rate": 5.176401247453553e-06, + "loss": 0.627, + "step": 9809 + }, + { + "epoch": 0.5041628122109159, + "grad_norm": 1.0383120775222778, + "learning_rate": 5.175569509059256e-06, + "loss": 0.7392, + "step": 9810 + }, + { + "epoch": 0.5042142049542605, + "grad_norm": 1.1036735773086548, + "learning_rate": 5.174737765800652e-06, + "loss": 0.7808, + "step": 9811 + }, + { + "epoch": 0.5042655976976052, + "grad_norm": 1.081710696220398, + "learning_rate": 5.1739060177007875e-06, + "loss": 0.7693, + "step": 9812 + }, + { + "epoch": 0.5043169904409497, + "grad_norm": 1.0076935291290283, + "learning_rate": 5.173074264782703e-06, + "loss": 0.7361, + "step": 9813 + }, + { + "epoch": 0.5043683831842943, + "grad_norm": 1.17203950881958, + "learning_rate": 5.172242507069446e-06, + "loss": 0.7011, + "step": 9814 + }, + { + "epoch": 0.504419775927639, + "grad_norm": 1.0138545036315918, + "learning_rate": 5.171410744584059e-06, + "loss": 0.7181, + "step": 9815 + }, + { + "epoch": 0.5044711686709836, + "grad_norm": 1.0348193645477295, + "learning_rate": 5.170578977349589e-06, + "loss": 0.6914, + "step": 9816 + }, + { + "epoch": 0.5045225614143283, + "grad_norm": 0.6981536746025085, + "learning_rate": 5.1697472053890785e-06, + "loss": 0.6297, + "step": 9817 + }, + { + "epoch": 0.5045739541576729, + "grad_norm": 0.7072228193283081, + "learning_rate": 5.168915428725574e-06, + "loss": 0.6388, + "step": 9818 + }, + { + "epoch": 0.5046253469010176, + "grad_norm": 1.1172759532928467, + "learning_rate": 5.16808364738212e-06, + "loss": 0.7241, + "step": 9819 + }, + { + "epoch": 0.5046767396443622, + "grad_norm": 0.7374457716941833, + "learning_rate": 5.1672518613817605e-06, + "loss": 0.6859, + "step": 9820 + }, + { + "epoch": 0.5047281323877069, + "grad_norm": 0.9920042157173157, + "learning_rate": 5.166420070747543e-06, + "loss": 0.708, + "step": 9821 + }, + { + "epoch": 0.5047795251310515, + "grad_norm": 0.7187145352363586, + "learning_rate": 5.1655882755025125e-06, + "loss": 0.7196, + "step": 9822 + }, + { + "epoch": 0.5048309178743962, + "grad_norm": 0.810431182384491, + "learning_rate": 5.164756475669713e-06, + "loss": 0.6609, + "step": 9823 + }, + { + "epoch": 0.5048823106177408, + "grad_norm": 1.058018684387207, + "learning_rate": 5.163924671272192e-06, + "loss": 0.7585, + "step": 9824 + }, + { + "epoch": 0.5049337033610855, + "grad_norm": 1.0703145265579224, + "learning_rate": 5.163092862332997e-06, + "loss": 0.6841, + "step": 9825 + }, + { + "epoch": 0.5049850961044301, + "grad_norm": 1.0309933423995972, + "learning_rate": 5.162261048875169e-06, + "loss": 0.696, + "step": 9826 + }, + { + "epoch": 0.5050364888477747, + "grad_norm": 1.0483273267745972, + "learning_rate": 5.161429230921759e-06, + "loss": 0.7515, + "step": 9827 + }, + { + "epoch": 0.5050878815911193, + "grad_norm": 0.8678324222564697, + "learning_rate": 5.16059740849581e-06, + "loss": 0.6417, + "step": 9828 + }, + { + "epoch": 0.5051392743344639, + "grad_norm": 1.1109814643859863, + "learning_rate": 5.159765581620369e-06, + "loss": 0.8359, + "step": 9829 + }, + { + "epoch": 0.5051906670778086, + "grad_norm": 1.0828970670700073, + "learning_rate": 5.158933750318484e-06, + "loss": 0.7397, + "step": 9830 + }, + { + "epoch": 0.5052420598211532, + "grad_norm": 1.1663058996200562, + "learning_rate": 5.158101914613201e-06, + "loss": 0.7285, + "step": 9831 + }, + { + "epoch": 0.5052934525644979, + "grad_norm": 1.0458813905715942, + "learning_rate": 5.157270074527565e-06, + "loss": 0.6395, + "step": 9832 + }, + { + "epoch": 0.5053448453078425, + "grad_norm": 1.175789713859558, + "learning_rate": 5.156438230084625e-06, + "loss": 0.7267, + "step": 9833 + }, + { + "epoch": 0.5053962380511872, + "grad_norm": 1.0505309104919434, + "learning_rate": 5.155606381307427e-06, + "loss": 0.7154, + "step": 9834 + }, + { + "epoch": 0.5054476307945318, + "grad_norm": 1.005852460861206, + "learning_rate": 5.154774528219019e-06, + "loss": 0.706, + "step": 9835 + }, + { + "epoch": 0.5054990235378765, + "grad_norm": 0.7402378916740417, + "learning_rate": 5.153942670842448e-06, + "loss": 0.6656, + "step": 9836 + }, + { + "epoch": 0.5055504162812211, + "grad_norm": 1.0661238431930542, + "learning_rate": 5.153110809200759e-06, + "loss": 0.6759, + "step": 9837 + }, + { + "epoch": 0.5056018090245658, + "grad_norm": 0.9975918531417847, + "learning_rate": 5.152278943317003e-06, + "loss": 0.7051, + "step": 9838 + }, + { + "epoch": 0.5056532017679104, + "grad_norm": 1.0874539613723755, + "learning_rate": 5.151447073214224e-06, + "loss": 0.7548, + "step": 9839 + }, + { + "epoch": 0.505704594511255, + "grad_norm": 1.1117216348648071, + "learning_rate": 5.150615198915474e-06, + "loss": 0.7214, + "step": 9840 + }, + { + "epoch": 0.5057559872545997, + "grad_norm": 1.1202377080917358, + "learning_rate": 5.149783320443796e-06, + "loss": 0.742, + "step": 9841 + }, + { + "epoch": 0.5058073799979443, + "grad_norm": 1.064525842666626, + "learning_rate": 5.148951437822241e-06, + "loss": 0.6909, + "step": 9842 + }, + { + "epoch": 0.5058587727412889, + "grad_norm": 1.0754978656768799, + "learning_rate": 5.148119551073858e-06, + "loss": 0.765, + "step": 9843 + }, + { + "epoch": 0.5059101654846335, + "grad_norm": 1.0283032655715942, + "learning_rate": 5.147287660221693e-06, + "loss": 0.7053, + "step": 9844 + }, + { + "epoch": 0.5059615582279782, + "grad_norm": 1.0967696905136108, + "learning_rate": 5.1464557652887935e-06, + "loss": 0.7832, + "step": 9845 + }, + { + "epoch": 0.5060129509713228, + "grad_norm": 0.7381188869476318, + "learning_rate": 5.145623866298211e-06, + "loss": 0.6268, + "step": 9846 + }, + { + "epoch": 0.5060643437146675, + "grad_norm": 1.0572466850280762, + "learning_rate": 5.1447919632729894e-06, + "loss": 0.7453, + "step": 9847 + }, + { + "epoch": 0.5061157364580121, + "grad_norm": 0.7554457783699036, + "learning_rate": 5.143960056236183e-06, + "loss": 0.6579, + "step": 9848 + }, + { + "epoch": 0.5061671292013568, + "grad_norm": 1.0734033584594727, + "learning_rate": 5.1431281452108365e-06, + "loss": 0.7078, + "step": 9849 + }, + { + "epoch": 0.5062185219447014, + "grad_norm": 1.057795524597168, + "learning_rate": 5.142296230219997e-06, + "loss": 0.7036, + "step": 9850 + }, + { + "epoch": 0.506269914688046, + "grad_norm": 1.1338948011398315, + "learning_rate": 5.14146431128672e-06, + "loss": 0.7102, + "step": 9851 + }, + { + "epoch": 0.5063213074313907, + "grad_norm": 1.0521376132965088, + "learning_rate": 5.140632388434048e-06, + "loss": 0.7244, + "step": 9852 + }, + { + "epoch": 0.5063727001747353, + "grad_norm": 0.8181077837944031, + "learning_rate": 5.139800461685034e-06, + "loss": 0.6847, + "step": 9853 + }, + { + "epoch": 0.50642409291808, + "grad_norm": 1.0888718366622925, + "learning_rate": 5.138968531062727e-06, + "loss": 0.7596, + "step": 9854 + }, + { + "epoch": 0.5064754856614246, + "grad_norm": 1.0510457754135132, + "learning_rate": 5.138136596590174e-06, + "loss": 0.7058, + "step": 9855 + }, + { + "epoch": 0.5065268784047693, + "grad_norm": 1.0899425745010376, + "learning_rate": 5.137304658290427e-06, + "loss": 0.7139, + "step": 9856 + }, + { + "epoch": 0.5065782711481139, + "grad_norm": 1.230408787727356, + "learning_rate": 5.136472716186535e-06, + "loss": 0.7262, + "step": 9857 + }, + { + "epoch": 0.5066296638914586, + "grad_norm": 1.0192619562149048, + "learning_rate": 5.135640770301545e-06, + "loss": 0.7649, + "step": 9858 + }, + { + "epoch": 0.5066810566348031, + "grad_norm": 0.8518606424331665, + "learning_rate": 5.134808820658511e-06, + "loss": 0.6881, + "step": 9859 + }, + { + "epoch": 0.5067324493781478, + "grad_norm": 0.7164705395698547, + "learning_rate": 5.133976867280481e-06, + "loss": 0.6915, + "step": 9860 + }, + { + "epoch": 0.5067838421214924, + "grad_norm": 1.0537047386169434, + "learning_rate": 5.133144910190504e-06, + "loss": 0.7172, + "step": 9861 + }, + { + "epoch": 0.5068352348648371, + "grad_norm": 1.061172604560852, + "learning_rate": 5.132312949411631e-06, + "loss": 0.7249, + "step": 9862 + }, + { + "epoch": 0.5068866276081817, + "grad_norm": 1.153988242149353, + "learning_rate": 5.131480984966911e-06, + "loss": 0.7244, + "step": 9863 + }, + { + "epoch": 0.5069380203515264, + "grad_norm": 1.0733693838119507, + "learning_rate": 5.130649016879398e-06, + "loss": 0.7108, + "step": 9864 + }, + { + "epoch": 0.506989413094871, + "grad_norm": 2.552502155303955, + "learning_rate": 5.1298170451721394e-06, + "loss": 0.7569, + "step": 9865 + }, + { + "epoch": 0.5070408058382156, + "grad_norm": 1.0848277807235718, + "learning_rate": 5.128985069868185e-06, + "loss": 0.7734, + "step": 9866 + }, + { + "epoch": 0.5070921985815603, + "grad_norm": 1.121888518333435, + "learning_rate": 5.128153090990587e-06, + "loss": 0.6832, + "step": 9867 + }, + { + "epoch": 0.5071435913249049, + "grad_norm": 1.10399329662323, + "learning_rate": 5.127321108562398e-06, + "loss": 0.7478, + "step": 9868 + }, + { + "epoch": 0.5071949840682496, + "grad_norm": 1.0932260751724243, + "learning_rate": 5.126489122606663e-06, + "loss": 0.7018, + "step": 9869 + }, + { + "epoch": 0.5072463768115942, + "grad_norm": 1.073297142982483, + "learning_rate": 5.125657133146439e-06, + "loss": 0.7086, + "step": 9870 + }, + { + "epoch": 0.5072977695549389, + "grad_norm": 1.020331621170044, + "learning_rate": 5.1248251402047725e-06, + "loss": 0.7026, + "step": 9871 + }, + { + "epoch": 0.5073491622982835, + "grad_norm": 1.0914050340652466, + "learning_rate": 5.1239931438047175e-06, + "loss": 0.728, + "step": 9872 + }, + { + "epoch": 0.5074005550416282, + "grad_norm": 1.0729299783706665, + "learning_rate": 5.123161143969323e-06, + "loss": 0.7288, + "step": 9873 + }, + { + "epoch": 0.5074519477849727, + "grad_norm": 1.0422276258468628, + "learning_rate": 5.122329140721641e-06, + "loss": 0.7662, + "step": 9874 + }, + { + "epoch": 0.5075033405283174, + "grad_norm": 1.1172897815704346, + "learning_rate": 5.121497134084726e-06, + "loss": 0.7177, + "step": 9875 + }, + { + "epoch": 0.507554733271662, + "grad_norm": 1.034440279006958, + "learning_rate": 5.120665124081626e-06, + "loss": 0.739, + "step": 9876 + }, + { + "epoch": 0.5076061260150067, + "grad_norm": 1.0757503509521484, + "learning_rate": 5.119833110735393e-06, + "loss": 0.731, + "step": 9877 + }, + { + "epoch": 0.5076575187583513, + "grad_norm": 1.1239045858383179, + "learning_rate": 5.1190010940690785e-06, + "loss": 0.7504, + "step": 9878 + }, + { + "epoch": 0.507708911501696, + "grad_norm": 1.0658067464828491, + "learning_rate": 5.118169074105734e-06, + "loss": 0.7231, + "step": 9879 + }, + { + "epoch": 0.5077603042450406, + "grad_norm": 0.7545881867408752, + "learning_rate": 5.117337050868415e-06, + "loss": 0.6886, + "step": 9880 + }, + { + "epoch": 0.5078116969883852, + "grad_norm": 1.0129677057266235, + "learning_rate": 5.116505024380168e-06, + "loss": 0.6922, + "step": 9881 + }, + { + "epoch": 0.5078630897317299, + "grad_norm": 1.0667717456817627, + "learning_rate": 5.1156729946640485e-06, + "loss": 0.7622, + "step": 9882 + }, + { + "epoch": 0.5079144824750745, + "grad_norm": 1.1132174730300903, + "learning_rate": 5.1148409617431075e-06, + "loss": 0.7872, + "step": 9883 + }, + { + "epoch": 0.5079658752184192, + "grad_norm": 1.0740832090377808, + "learning_rate": 5.114008925640398e-06, + "loss": 0.7618, + "step": 9884 + }, + { + "epoch": 0.5080172679617638, + "grad_norm": 1.092882513999939, + "learning_rate": 5.11317688637897e-06, + "loss": 0.6922, + "step": 9885 + }, + { + "epoch": 0.5080686607051085, + "grad_norm": 1.0890393257141113, + "learning_rate": 5.112344843981879e-06, + "loss": 0.7061, + "step": 9886 + }, + { + "epoch": 0.5081200534484531, + "grad_norm": 1.139702558517456, + "learning_rate": 5.111512798472176e-06, + "loss": 0.8031, + "step": 9887 + }, + { + "epoch": 0.5081714461917978, + "grad_norm": 1.0248024463653564, + "learning_rate": 5.110680749872913e-06, + "loss": 0.7195, + "step": 9888 + }, + { + "epoch": 0.5082228389351423, + "grad_norm": 0.7777139544487, + "learning_rate": 5.109848698207144e-06, + "loss": 0.6722, + "step": 9889 + }, + { + "epoch": 0.508274231678487, + "grad_norm": 1.1264641284942627, + "learning_rate": 5.109016643497919e-06, + "loss": 0.7731, + "step": 9890 + }, + { + "epoch": 0.5083256244218316, + "grad_norm": 0.7348080277442932, + "learning_rate": 5.108184585768294e-06, + "loss": 0.6771, + "step": 9891 + }, + { + "epoch": 0.5083770171651762, + "grad_norm": 1.0893429517745972, + "learning_rate": 5.10735252504132e-06, + "loss": 0.6846, + "step": 9892 + }, + { + "epoch": 0.5084284099085209, + "grad_norm": 1.1225247383117676, + "learning_rate": 5.106520461340051e-06, + "loss": 0.7067, + "step": 9893 + }, + { + "epoch": 0.5084798026518655, + "grad_norm": 1.0337660312652588, + "learning_rate": 5.10568839468754e-06, + "loss": 0.7696, + "step": 9894 + }, + { + "epoch": 0.5085311953952102, + "grad_norm": 1.0286237001419067, + "learning_rate": 5.1048563251068395e-06, + "loss": 0.6988, + "step": 9895 + }, + { + "epoch": 0.5085825881385548, + "grad_norm": 1.1379164457321167, + "learning_rate": 5.104024252621002e-06, + "loss": 0.7518, + "step": 9896 + }, + { + "epoch": 0.5086339808818995, + "grad_norm": 1.0808881521224976, + "learning_rate": 5.103192177253084e-06, + "loss": 0.6951, + "step": 9897 + }, + { + "epoch": 0.5086853736252441, + "grad_norm": 1.0295133590698242, + "learning_rate": 5.102360099026134e-06, + "loss": 0.7057, + "step": 9898 + }, + { + "epoch": 0.5087367663685888, + "grad_norm": 1.051543116569519, + "learning_rate": 5.10152801796321e-06, + "loss": 0.7242, + "step": 9899 + }, + { + "epoch": 0.5087881591119334, + "grad_norm": 1.0434367656707764, + "learning_rate": 5.100695934087363e-06, + "loss": 0.7192, + "step": 9900 + }, + { + "epoch": 0.5088395518552781, + "grad_norm": 1.0412479639053345, + "learning_rate": 5.099863847421647e-06, + "loss": 0.7107, + "step": 9901 + }, + { + "epoch": 0.5088909445986227, + "grad_norm": 1.094910979270935, + "learning_rate": 5.0990317579891165e-06, + "loss": 0.7107, + "step": 9902 + }, + { + "epoch": 0.5089423373419674, + "grad_norm": 1.0198018550872803, + "learning_rate": 5.098199665812823e-06, + "loss": 0.7135, + "step": 9903 + }, + { + "epoch": 0.5089937300853119, + "grad_norm": 0.7979608178138733, + "learning_rate": 5.0973675709158234e-06, + "loss": 0.6523, + "step": 9904 + }, + { + "epoch": 0.5090451228286565, + "grad_norm": 0.7448724508285522, + "learning_rate": 5.09653547332117e-06, + "loss": 0.6832, + "step": 9905 + }, + { + "epoch": 0.5090965155720012, + "grad_norm": 1.1511354446411133, + "learning_rate": 5.095703373051917e-06, + "loss": 0.7184, + "step": 9906 + }, + { + "epoch": 0.5091479083153458, + "grad_norm": 0.7776548266410828, + "learning_rate": 5.0948712701311185e-06, + "loss": 0.6957, + "step": 9907 + }, + { + "epoch": 0.5091993010586905, + "grad_norm": 1.082369327545166, + "learning_rate": 5.094039164581828e-06, + "loss": 0.682, + "step": 9908 + }, + { + "epoch": 0.5092506938020351, + "grad_norm": 1.1514898538589478, + "learning_rate": 5.093207056427101e-06, + "loss": 0.816, + "step": 9909 + }, + { + "epoch": 0.5093020865453798, + "grad_norm": 0.984919011592865, + "learning_rate": 5.092374945689992e-06, + "loss": 0.6843, + "step": 9910 + }, + { + "epoch": 0.5093534792887244, + "grad_norm": 1.0366929769515991, + "learning_rate": 5.0915428323935525e-06, + "loss": 0.7004, + "step": 9911 + }, + { + "epoch": 0.5094048720320691, + "grad_norm": 1.102841854095459, + "learning_rate": 5.0907107165608406e-06, + "loss": 0.7391, + "step": 9912 + }, + { + "epoch": 0.5094562647754137, + "grad_norm": 1.0569156408309937, + "learning_rate": 5.089878598214908e-06, + "loss": 0.6792, + "step": 9913 + }, + { + "epoch": 0.5095076575187584, + "grad_norm": 1.0401074886322021, + "learning_rate": 5.08904647737881e-06, + "loss": 0.7325, + "step": 9914 + }, + { + "epoch": 0.509559050262103, + "grad_norm": 1.025696873664856, + "learning_rate": 5.088214354075603e-06, + "loss": 0.7125, + "step": 9915 + }, + { + "epoch": 0.5096104430054477, + "grad_norm": 0.809995174407959, + "learning_rate": 5.087382228328338e-06, + "loss": 0.6852, + "step": 9916 + }, + { + "epoch": 0.5096618357487923, + "grad_norm": 1.0682092905044556, + "learning_rate": 5.086550100160074e-06, + "loss": 0.7195, + "step": 9917 + }, + { + "epoch": 0.509713228492137, + "grad_norm": 1.1462408304214478, + "learning_rate": 5.0857179695938655e-06, + "loss": 0.7639, + "step": 9918 + }, + { + "epoch": 0.5097646212354815, + "grad_norm": 0.9014225602149963, + "learning_rate": 5.084885836652762e-06, + "loss": 0.7075, + "step": 9919 + }, + { + "epoch": 0.5098160139788261, + "grad_norm": 0.7621055841445923, + "learning_rate": 5.0840537013598245e-06, + "loss": 0.7053, + "step": 9920 + }, + { + "epoch": 0.5098674067221708, + "grad_norm": 1.0539437532424927, + "learning_rate": 5.0832215637381065e-06, + "loss": 0.7304, + "step": 9921 + }, + { + "epoch": 0.5099187994655154, + "grad_norm": 0.6832962036132812, + "learning_rate": 5.082389423810661e-06, + "loss": 0.6825, + "step": 9922 + }, + { + "epoch": 0.5099701922088601, + "grad_norm": 1.0831444263458252, + "learning_rate": 5.081557281600546e-06, + "loss": 0.7533, + "step": 9923 + }, + { + "epoch": 0.5100215849522047, + "grad_norm": 0.8382664322853088, + "learning_rate": 5.080725137130813e-06, + "loss": 0.6879, + "step": 9924 + }, + { + "epoch": 0.5100729776955494, + "grad_norm": 1.068884015083313, + "learning_rate": 5.07989299042452e-06, + "loss": 0.7419, + "step": 9925 + }, + { + "epoch": 0.510124370438894, + "grad_norm": 0.7375056743621826, + "learning_rate": 5.079060841504722e-06, + "loss": 0.6332, + "step": 9926 + }, + { + "epoch": 0.5101757631822387, + "grad_norm": 1.2274845838546753, + "learning_rate": 5.0782286903944756e-06, + "loss": 0.6946, + "step": 9927 + }, + { + "epoch": 0.5102271559255833, + "grad_norm": 1.1557543277740479, + "learning_rate": 5.077396537116834e-06, + "loss": 0.7152, + "step": 9928 + }, + { + "epoch": 0.510278548668928, + "grad_norm": 1.0374456644058228, + "learning_rate": 5.076564381694855e-06, + "loss": 0.7224, + "step": 9929 + }, + { + "epoch": 0.5103299414122726, + "grad_norm": 1.1077392101287842, + "learning_rate": 5.075732224151591e-06, + "loss": 0.7681, + "step": 9930 + }, + { + "epoch": 0.5103813341556173, + "grad_norm": 1.0155898332595825, + "learning_rate": 5.0749000645101024e-06, + "loss": 0.6725, + "step": 9931 + }, + { + "epoch": 0.5104327268989619, + "grad_norm": 1.1397408246994019, + "learning_rate": 5.0740679027934396e-06, + "loss": 0.7295, + "step": 9932 + }, + { + "epoch": 0.5104841196423066, + "grad_norm": 1.0489726066589355, + "learning_rate": 5.073235739024662e-06, + "loss": 0.7521, + "step": 9933 + }, + { + "epoch": 0.5105355123856511, + "grad_norm": 1.1073660850524902, + "learning_rate": 5.072403573226824e-06, + "loss": 0.7808, + "step": 9934 + }, + { + "epoch": 0.5105869051289957, + "grad_norm": 0.9545060396194458, + "learning_rate": 5.0715714054229805e-06, + "loss": 0.6692, + "step": 9935 + }, + { + "epoch": 0.5106382978723404, + "grad_norm": 1.1086797714233398, + "learning_rate": 5.070739235636191e-06, + "loss": 0.7343, + "step": 9936 + }, + { + "epoch": 0.510689690615685, + "grad_norm": 1.048006534576416, + "learning_rate": 5.0699070638895085e-06, + "loss": 0.704, + "step": 9937 + }, + { + "epoch": 0.5107410833590297, + "grad_norm": 1.081774115562439, + "learning_rate": 5.069074890205988e-06, + "loss": 0.7776, + "step": 9938 + }, + { + "epoch": 0.5107924761023743, + "grad_norm": 1.0957489013671875, + "learning_rate": 5.0682427146086895e-06, + "loss": 0.705, + "step": 9939 + }, + { + "epoch": 0.510843868845719, + "grad_norm": 1.0766584873199463, + "learning_rate": 5.067410537120666e-06, + "loss": 0.6759, + "step": 9940 + }, + { + "epoch": 0.5108952615890636, + "grad_norm": 0.7760927677154541, + "learning_rate": 5.066578357764977e-06, + "loss": 0.6445, + "step": 9941 + }, + { + "epoch": 0.5109466543324083, + "grad_norm": 1.0265936851501465, + "learning_rate": 5.065746176564674e-06, + "loss": 0.6772, + "step": 9942 + }, + { + "epoch": 0.5109980470757529, + "grad_norm": 1.101129174232483, + "learning_rate": 5.064913993542816e-06, + "loss": 0.6589, + "step": 9943 + }, + { + "epoch": 0.5110494398190976, + "grad_norm": 1.0290597677230835, + "learning_rate": 5.0640818087224585e-06, + "loss": 0.7251, + "step": 9944 + }, + { + "epoch": 0.5111008325624422, + "grad_norm": 1.0331300497055054, + "learning_rate": 5.063249622126659e-06, + "loss": 0.7202, + "step": 9945 + }, + { + "epoch": 0.5111522253057869, + "grad_norm": 1.0417733192443848, + "learning_rate": 5.062417433778474e-06, + "loss": 0.7423, + "step": 9946 + }, + { + "epoch": 0.5112036180491315, + "grad_norm": 1.0495240688323975, + "learning_rate": 5.0615852437009595e-06, + "loss": 0.7096, + "step": 9947 + }, + { + "epoch": 0.5112550107924761, + "grad_norm": 0.7170805931091309, + "learning_rate": 5.060753051917171e-06, + "loss": 0.6781, + "step": 9948 + }, + { + "epoch": 0.5113064035358208, + "grad_norm": 1.0996216535568237, + "learning_rate": 5.059920858450168e-06, + "loss": 0.7297, + "step": 9949 + }, + { + "epoch": 0.5113577962791653, + "grad_norm": 0.9236396551132202, + "learning_rate": 5.0590886633230055e-06, + "loss": 0.7292, + "step": 9950 + }, + { + "epoch": 0.51140918902251, + "grad_norm": 1.0355949401855469, + "learning_rate": 5.058256466558737e-06, + "loss": 0.6779, + "step": 9951 + }, + { + "epoch": 0.5114605817658546, + "grad_norm": 0.7145227789878845, + "learning_rate": 5.057424268180425e-06, + "loss": 0.6418, + "step": 9952 + }, + { + "epoch": 0.5115119745091993, + "grad_norm": 0.7311011552810669, + "learning_rate": 5.056592068211123e-06, + "loss": 0.6776, + "step": 9953 + }, + { + "epoch": 0.5115633672525439, + "grad_norm": 1.0962895154953003, + "learning_rate": 5.055759866673887e-06, + "loss": 0.78, + "step": 9954 + }, + { + "epoch": 0.5116147599958886, + "grad_norm": 1.0451743602752686, + "learning_rate": 5.054927663591777e-06, + "loss": 0.7727, + "step": 9955 + }, + { + "epoch": 0.5116661527392332, + "grad_norm": 1.0709999799728394, + "learning_rate": 5.054095458987845e-06, + "loss": 0.733, + "step": 9956 + }, + { + "epoch": 0.5117175454825779, + "grad_norm": 1.0972120761871338, + "learning_rate": 5.053263252885154e-06, + "loss": 0.7979, + "step": 9957 + }, + { + "epoch": 0.5117689382259225, + "grad_norm": 0.9974589943885803, + "learning_rate": 5.052431045306758e-06, + "loss": 0.6952, + "step": 9958 + }, + { + "epoch": 0.5118203309692672, + "grad_norm": 1.06667959690094, + "learning_rate": 5.051598836275713e-06, + "loss": 0.7011, + "step": 9959 + }, + { + "epoch": 0.5118717237126118, + "grad_norm": 1.2247523069381714, + "learning_rate": 5.050766625815078e-06, + "loss": 0.6712, + "step": 9960 + }, + { + "epoch": 0.5119231164559565, + "grad_norm": 1.089448094367981, + "learning_rate": 5.04993441394791e-06, + "loss": 0.7714, + "step": 9961 + }, + { + "epoch": 0.5119745091993011, + "grad_norm": 1.1249605417251587, + "learning_rate": 5.049102200697263e-06, + "loss": 0.7324, + "step": 9962 + }, + { + "epoch": 0.5120259019426457, + "grad_norm": 1.035601019859314, + "learning_rate": 5.0482699860862e-06, + "loss": 0.7162, + "step": 9963 + }, + { + "epoch": 0.5120772946859904, + "grad_norm": 1.060017466545105, + "learning_rate": 5.047437770137772e-06, + "loss": 0.6778, + "step": 9964 + }, + { + "epoch": 0.5121286874293349, + "grad_norm": 1.140423059463501, + "learning_rate": 5.046605552875039e-06, + "loss": 0.7114, + "step": 9965 + }, + { + "epoch": 0.5121800801726796, + "grad_norm": 1.0284943580627441, + "learning_rate": 5.04577333432106e-06, + "loss": 0.7291, + "step": 9966 + }, + { + "epoch": 0.5122314729160242, + "grad_norm": 1.0369336605072021, + "learning_rate": 5.04494111449889e-06, + "loss": 0.7148, + "step": 9967 + }, + { + "epoch": 0.5122828656593689, + "grad_norm": 0.7402137517929077, + "learning_rate": 5.0441088934315875e-06, + "loss": 0.687, + "step": 9968 + }, + { + "epoch": 0.5123342584027135, + "grad_norm": 0.7451249361038208, + "learning_rate": 5.0432766711422095e-06, + "loss": 0.6985, + "step": 9969 + }, + { + "epoch": 0.5123856511460582, + "grad_norm": 1.0351440906524658, + "learning_rate": 5.042444447653814e-06, + "loss": 0.709, + "step": 9970 + }, + { + "epoch": 0.5124370438894028, + "grad_norm": 1.1339695453643799, + "learning_rate": 5.041612222989458e-06, + "loss": 0.754, + "step": 9971 + }, + { + "epoch": 0.5124884366327475, + "grad_norm": 1.0996034145355225, + "learning_rate": 5.040779997172198e-06, + "loss": 0.7807, + "step": 9972 + }, + { + "epoch": 0.5125398293760921, + "grad_norm": 1.024046540260315, + "learning_rate": 5.039947770225094e-06, + "loss": 0.7229, + "step": 9973 + }, + { + "epoch": 0.5125912221194368, + "grad_norm": 1.1051123142242432, + "learning_rate": 5.039115542171201e-06, + "loss": 0.7109, + "step": 9974 + }, + { + "epoch": 0.5126426148627814, + "grad_norm": 1.1773992776870728, + "learning_rate": 5.0382833130335785e-06, + "loss": 0.7709, + "step": 9975 + }, + { + "epoch": 0.512694007606126, + "grad_norm": 1.102802038192749, + "learning_rate": 5.037451082835282e-06, + "loss": 0.7984, + "step": 9976 + }, + { + "epoch": 0.5127454003494707, + "grad_norm": 1.1007990837097168, + "learning_rate": 5.036618851599372e-06, + "loss": 0.7723, + "step": 9977 + }, + { + "epoch": 0.5127967930928153, + "grad_norm": 1.059372901916504, + "learning_rate": 5.035786619348904e-06, + "loss": 0.724, + "step": 9978 + }, + { + "epoch": 0.51284818583616, + "grad_norm": 0.7930027842521667, + "learning_rate": 5.0349543861069375e-06, + "loss": 0.659, + "step": 9979 + }, + { + "epoch": 0.5128995785795045, + "grad_norm": 1.0394139289855957, + "learning_rate": 5.034122151896528e-06, + "loss": 0.7419, + "step": 9980 + }, + { + "epoch": 0.5129509713228492, + "grad_norm": 1.0532726049423218, + "learning_rate": 5.0332899167407345e-06, + "loss": 0.7668, + "step": 9981 + }, + { + "epoch": 0.5130023640661938, + "grad_norm": 1.030001163482666, + "learning_rate": 5.032457680662617e-06, + "loss": 0.7421, + "step": 9982 + }, + { + "epoch": 0.5130537568095385, + "grad_norm": 1.0722559690475464, + "learning_rate": 5.031625443685229e-06, + "loss": 0.7684, + "step": 9983 + }, + { + "epoch": 0.5131051495528831, + "grad_norm": 1.0781745910644531, + "learning_rate": 5.03079320583163e-06, + "loss": 0.6827, + "step": 9984 + }, + { + "epoch": 0.5131565422962278, + "grad_norm": 0.8634644150733948, + "learning_rate": 5.0299609671248794e-06, + "loss": 0.6325, + "step": 9985 + }, + { + "epoch": 0.5132079350395724, + "grad_norm": 1.0503097772598267, + "learning_rate": 5.029128727588033e-06, + "loss": 0.7377, + "step": 9986 + }, + { + "epoch": 0.513259327782917, + "grad_norm": 1.04427969455719, + "learning_rate": 5.028296487244151e-06, + "loss": 0.7441, + "step": 9987 + }, + { + "epoch": 0.5133107205262617, + "grad_norm": 1.1315069198608398, + "learning_rate": 5.027464246116289e-06, + "loss": 0.7332, + "step": 9988 + }, + { + "epoch": 0.5133621132696063, + "grad_norm": 1.2521798610687256, + "learning_rate": 5.026632004227507e-06, + "loss": 0.6841, + "step": 9989 + }, + { + "epoch": 0.513413506012951, + "grad_norm": 1.0465410947799683, + "learning_rate": 5.025799761600863e-06, + "loss": 0.6803, + "step": 9990 + }, + { + "epoch": 0.5134648987562956, + "grad_norm": 1.0811731815338135, + "learning_rate": 5.024967518259412e-06, + "loss": 0.7408, + "step": 9991 + }, + { + "epoch": 0.5135162914996403, + "grad_norm": 1.020810604095459, + "learning_rate": 5.024135274226215e-06, + "loss": 0.7477, + "step": 9992 + }, + { + "epoch": 0.5135676842429849, + "grad_norm": 1.0928665399551392, + "learning_rate": 5.02330302952433e-06, + "loss": 0.7356, + "step": 9993 + }, + { + "epoch": 0.5136190769863296, + "grad_norm": 1.1087265014648438, + "learning_rate": 5.022470784176813e-06, + "loss": 0.6909, + "step": 9994 + }, + { + "epoch": 0.5136704697296741, + "grad_norm": 1.0444000959396362, + "learning_rate": 5.021638538206722e-06, + "loss": 0.7003, + "step": 9995 + }, + { + "epoch": 0.5137218624730188, + "grad_norm": 0.978169322013855, + "learning_rate": 5.020806291637119e-06, + "loss": 0.6627, + "step": 9996 + }, + { + "epoch": 0.5137732552163634, + "grad_norm": 1.0952918529510498, + "learning_rate": 5.0199740444910585e-06, + "loss": 0.714, + "step": 9997 + }, + { + "epoch": 0.5138246479597081, + "grad_norm": 1.0135573148727417, + "learning_rate": 5.0191417967916e-06, + "loss": 0.7325, + "step": 9998 + }, + { + "epoch": 0.5138760407030527, + "grad_norm": 1.144079327583313, + "learning_rate": 5.018309548561801e-06, + "loss": 0.716, + "step": 9999 + }, + { + "epoch": 0.5139274334463974, + "grad_norm": 1.0562576055526733, + "learning_rate": 5.01747729982472e-06, + "loss": 0.7315, + "step": 10000 + }, + { + "epoch": 0.513978826189742, + "grad_norm": 1.0727083683013916, + "learning_rate": 5.016645050603416e-06, + "loss": 0.6809, + "step": 10001 + }, + { + "epoch": 0.5140302189330866, + "grad_norm": 0.7119978070259094, + "learning_rate": 5.015812800920945e-06, + "loss": 0.6537, + "step": 10002 + }, + { + "epoch": 0.5140816116764313, + "grad_norm": 0.7321502566337585, + "learning_rate": 5.014980550800368e-06, + "loss": 0.6737, + "step": 10003 + }, + { + "epoch": 0.5141330044197759, + "grad_norm": 1.125680923461914, + "learning_rate": 5.01414830026474e-06, + "loss": 0.7386, + "step": 10004 + }, + { + "epoch": 0.5141843971631206, + "grad_norm": 1.0520412921905518, + "learning_rate": 5.0133160493371225e-06, + "loss": 0.7153, + "step": 10005 + }, + { + "epoch": 0.5142357899064652, + "grad_norm": 1.0806152820587158, + "learning_rate": 5.01248379804057e-06, + "loss": 0.7748, + "step": 10006 + }, + { + "epoch": 0.5142871826498099, + "grad_norm": 0.7093769907951355, + "learning_rate": 5.0116515463981445e-06, + "loss": 0.644, + "step": 10007 + }, + { + "epoch": 0.5143385753931545, + "grad_norm": 1.1175665855407715, + "learning_rate": 5.010819294432903e-06, + "loss": 0.7956, + "step": 10008 + }, + { + "epoch": 0.5143899681364992, + "grad_norm": 1.0346875190734863, + "learning_rate": 5.0099870421679045e-06, + "loss": 0.8035, + "step": 10009 + }, + { + "epoch": 0.5144413608798437, + "grad_norm": 1.0701708793640137, + "learning_rate": 5.0091547896262035e-06, + "loss": 0.781, + "step": 10010 + }, + { + "epoch": 0.5144927536231884, + "grad_norm": 1.1261237859725952, + "learning_rate": 5.008322536830863e-06, + "loss": 0.7357, + "step": 10011 + }, + { + "epoch": 0.514544146366533, + "grad_norm": 1.097928524017334, + "learning_rate": 5.007490283804939e-06, + "loss": 0.7564, + "step": 10012 + }, + { + "epoch": 0.5145955391098777, + "grad_norm": 1.0451159477233887, + "learning_rate": 5.006658030571489e-06, + "loss": 0.6905, + "step": 10013 + }, + { + "epoch": 0.5146469318532223, + "grad_norm": 1.0826547145843506, + "learning_rate": 5.005825777153576e-06, + "loss": 0.7205, + "step": 10014 + }, + { + "epoch": 0.514698324596567, + "grad_norm": 0.7968506813049316, + "learning_rate": 5.004993523574251e-06, + "loss": 0.6967, + "step": 10015 + }, + { + "epoch": 0.5147497173399116, + "grad_norm": 1.021269679069519, + "learning_rate": 5.004161269856577e-06, + "loss": 0.7051, + "step": 10016 + }, + { + "epoch": 0.5148011100832562, + "grad_norm": 1.056077480316162, + "learning_rate": 5.003329016023611e-06, + "loss": 0.6972, + "step": 10017 + }, + { + "epoch": 0.5148525028266009, + "grad_norm": 0.735413670539856, + "learning_rate": 5.002496762098412e-06, + "loss": 0.6744, + "step": 10018 + }, + { + "epoch": 0.5149038955699455, + "grad_norm": 0.754229724407196, + "learning_rate": 5.00166450810404e-06, + "loss": 0.6715, + "step": 10019 + }, + { + "epoch": 0.5149552883132902, + "grad_norm": 1.1047674417495728, + "learning_rate": 5.000832254063549e-06, + "loss": 0.7997, + "step": 10020 + }, + { + "epoch": 0.5150066810566348, + "grad_norm": 1.0972453355789185, + "learning_rate": 5e-06, + "loss": 0.7358, + "step": 10021 + }, + { + "epoch": 0.5150580737999795, + "grad_norm": 1.1739020347595215, + "learning_rate": 4.999167745936452e-06, + "loss": 0.8159, + "step": 10022 + }, + { + "epoch": 0.5151094665433241, + "grad_norm": 1.0756162405014038, + "learning_rate": 4.998335491895963e-06, + "loss": 0.7041, + "step": 10023 + }, + { + "epoch": 0.5151608592866688, + "grad_norm": 1.083341360092163, + "learning_rate": 4.9975032379015884e-06, + "loss": 0.7095, + "step": 10024 + }, + { + "epoch": 0.5152122520300133, + "grad_norm": 1.0261797904968262, + "learning_rate": 4.9966709839763895e-06, + "loss": 0.7285, + "step": 10025 + }, + { + "epoch": 0.515263644773358, + "grad_norm": 1.1664340496063232, + "learning_rate": 4.995838730143425e-06, + "loss": 0.751, + "step": 10026 + }, + { + "epoch": 0.5153150375167026, + "grad_norm": 1.1296825408935547, + "learning_rate": 4.995006476425751e-06, + "loss": 0.7619, + "step": 10027 + }, + { + "epoch": 0.5153664302600472, + "grad_norm": 0.9979335069656372, + "learning_rate": 4.994174222846426e-06, + "loss": 0.7322, + "step": 10028 + }, + { + "epoch": 0.5154178230033919, + "grad_norm": 0.8124058246612549, + "learning_rate": 4.99334196942851e-06, + "loss": 0.6606, + "step": 10029 + }, + { + "epoch": 0.5154692157467365, + "grad_norm": 0.8053849339485168, + "learning_rate": 4.992509716195063e-06, + "loss": 0.6602, + "step": 10030 + }, + { + "epoch": 0.5155206084900812, + "grad_norm": 1.1213752031326294, + "learning_rate": 4.991677463169138e-06, + "loss": 0.7262, + "step": 10031 + }, + { + "epoch": 0.5155720012334258, + "grad_norm": 0.7765405774116516, + "learning_rate": 4.9908452103737965e-06, + "loss": 0.7036, + "step": 10032 + }, + { + "epoch": 0.5156233939767705, + "grad_norm": 1.114177942276001, + "learning_rate": 4.990012957832099e-06, + "loss": 0.7338, + "step": 10033 + }, + { + "epoch": 0.5156747867201151, + "grad_norm": 1.068792700767517, + "learning_rate": 4.989180705567098e-06, + "loss": 0.7376, + "step": 10034 + }, + { + "epoch": 0.5157261794634598, + "grad_norm": 1.0835320949554443, + "learning_rate": 4.988348453601856e-06, + "loss": 0.7111, + "step": 10035 + }, + { + "epoch": 0.5157775722068044, + "grad_norm": 1.068804144859314, + "learning_rate": 4.987516201959431e-06, + "loss": 0.6811, + "step": 10036 + }, + { + "epoch": 0.5158289649501491, + "grad_norm": 1.0691921710968018, + "learning_rate": 4.986683950662879e-06, + "loss": 0.7853, + "step": 10037 + }, + { + "epoch": 0.5158803576934937, + "grad_norm": 1.025018572807312, + "learning_rate": 4.98585169973526e-06, + "loss": 0.7909, + "step": 10038 + }, + { + "epoch": 0.5159317504368384, + "grad_norm": 0.9912868738174438, + "learning_rate": 4.985019449199635e-06, + "loss": 0.6044, + "step": 10039 + }, + { + "epoch": 0.515983143180183, + "grad_norm": 1.0241923332214355, + "learning_rate": 4.9841871990790565e-06, + "loss": 0.7355, + "step": 10040 + }, + { + "epoch": 0.5160345359235275, + "grad_norm": 0.734035849571228, + "learning_rate": 4.9833549493965854e-06, + "loss": 0.6246, + "step": 10041 + }, + { + "epoch": 0.5160859286668722, + "grad_norm": 1.035252332687378, + "learning_rate": 4.982522700175282e-06, + "loss": 0.7083, + "step": 10042 + }, + { + "epoch": 0.5161373214102168, + "grad_norm": 1.0407353639602661, + "learning_rate": 4.981690451438201e-06, + "loss": 0.7477, + "step": 10043 + }, + { + "epoch": 0.5161887141535615, + "grad_norm": 1.0906702280044556, + "learning_rate": 4.980858203208402e-06, + "loss": 0.7333, + "step": 10044 + }, + { + "epoch": 0.5162401068969061, + "grad_norm": 0.9903335571289062, + "learning_rate": 4.980025955508942e-06, + "loss": 0.6734, + "step": 10045 + }, + { + "epoch": 0.5162914996402508, + "grad_norm": 1.0677039623260498, + "learning_rate": 4.979193708362882e-06, + "loss": 0.6967, + "step": 10046 + }, + { + "epoch": 0.5163428923835954, + "grad_norm": 1.1202876567840576, + "learning_rate": 4.978361461793279e-06, + "loss": 0.7315, + "step": 10047 + }, + { + "epoch": 0.5163942851269401, + "grad_norm": 1.0525214672088623, + "learning_rate": 4.977529215823189e-06, + "loss": 0.7153, + "step": 10048 + }, + { + "epoch": 0.5164456778702847, + "grad_norm": 1.147531509399414, + "learning_rate": 4.9766969704756725e-06, + "loss": 0.7864, + "step": 10049 + }, + { + "epoch": 0.5164970706136294, + "grad_norm": 1.084831953048706, + "learning_rate": 4.9758647257737865e-06, + "loss": 0.7198, + "step": 10050 + }, + { + "epoch": 0.516548463356974, + "grad_norm": 0.8449286818504333, + "learning_rate": 4.975032481740589e-06, + "loss": 0.7134, + "step": 10051 + }, + { + "epoch": 0.5165998561003187, + "grad_norm": 1.09345543384552, + "learning_rate": 4.97420023839914e-06, + "loss": 0.7746, + "step": 10052 + }, + { + "epoch": 0.5166512488436633, + "grad_norm": 1.1523234844207764, + "learning_rate": 4.973367995772494e-06, + "loss": 0.7652, + "step": 10053 + }, + { + "epoch": 0.516702641587008, + "grad_norm": 1.1210484504699707, + "learning_rate": 4.972535753883712e-06, + "loss": 0.7441, + "step": 10054 + }, + { + "epoch": 0.5167540343303526, + "grad_norm": 0.9923346042633057, + "learning_rate": 4.971703512755852e-06, + "loss": 0.6943, + "step": 10055 + }, + { + "epoch": 0.5168054270736971, + "grad_norm": 1.0453273057937622, + "learning_rate": 4.970871272411968e-06, + "loss": 0.7679, + "step": 10056 + }, + { + "epoch": 0.5168568198170418, + "grad_norm": 1.0931272506713867, + "learning_rate": 4.970039032875122e-06, + "loss": 0.71, + "step": 10057 + }, + { + "epoch": 0.5169082125603864, + "grad_norm": 1.0573817491531372, + "learning_rate": 4.969206794168372e-06, + "loss": 0.6858, + "step": 10058 + }, + { + "epoch": 0.5169596053037311, + "grad_norm": 1.043793797492981, + "learning_rate": 4.968374556314774e-06, + "loss": 0.7537, + "step": 10059 + }, + { + "epoch": 0.5170109980470757, + "grad_norm": 1.0709426403045654, + "learning_rate": 4.967542319337385e-06, + "loss": 0.6552, + "step": 10060 + }, + { + "epoch": 0.5170623907904204, + "grad_norm": 1.0774037837982178, + "learning_rate": 4.966710083259265e-06, + "loss": 0.758, + "step": 10061 + }, + { + "epoch": 0.517113783533765, + "grad_norm": 1.1162967681884766, + "learning_rate": 4.965877848103474e-06, + "loss": 0.7456, + "step": 10062 + }, + { + "epoch": 0.5171651762771097, + "grad_norm": 1.0241361856460571, + "learning_rate": 4.965045613893064e-06, + "loss": 0.654, + "step": 10063 + }, + { + "epoch": 0.5172165690204543, + "grad_norm": 1.0501713752746582, + "learning_rate": 4.964213380651096e-06, + "loss": 0.6959, + "step": 10064 + }, + { + "epoch": 0.517267961763799, + "grad_norm": 1.083525538444519, + "learning_rate": 4.96338114840063e-06, + "loss": 0.667, + "step": 10065 + }, + { + "epoch": 0.5173193545071436, + "grad_norm": 1.0726380348205566, + "learning_rate": 4.962548917164719e-06, + "loss": 0.7419, + "step": 10066 + }, + { + "epoch": 0.5173707472504883, + "grad_norm": 0.7506343722343445, + "learning_rate": 4.961716686966423e-06, + "loss": 0.6315, + "step": 10067 + }, + { + "epoch": 0.5174221399938329, + "grad_norm": 0.7659317255020142, + "learning_rate": 4.960884457828801e-06, + "loss": 0.6765, + "step": 10068 + }, + { + "epoch": 0.5174735327371776, + "grad_norm": 1.0174864530563354, + "learning_rate": 4.960052229774908e-06, + "loss": 0.6993, + "step": 10069 + }, + { + "epoch": 0.5175249254805222, + "grad_norm": 1.0752745866775513, + "learning_rate": 4.959220002827802e-06, + "loss": 0.7202, + "step": 10070 + }, + { + "epoch": 0.5175763182238667, + "grad_norm": 1.0877786874771118, + "learning_rate": 4.9583877770105446e-06, + "loss": 0.7045, + "step": 10071 + }, + { + "epoch": 0.5176277109672114, + "grad_norm": 1.0791257619857788, + "learning_rate": 4.957555552346188e-06, + "loss": 0.7238, + "step": 10072 + }, + { + "epoch": 0.517679103710556, + "grad_norm": 1.1036862134933472, + "learning_rate": 4.956723328857791e-06, + "loss": 0.7397, + "step": 10073 + }, + { + "epoch": 0.5177304964539007, + "grad_norm": 1.1329838037490845, + "learning_rate": 4.955891106568414e-06, + "loss": 0.7505, + "step": 10074 + }, + { + "epoch": 0.5177818891972453, + "grad_norm": 1.0794751644134521, + "learning_rate": 4.9550588855011115e-06, + "loss": 0.7816, + "step": 10075 + }, + { + "epoch": 0.51783328194059, + "grad_norm": 1.0275871753692627, + "learning_rate": 4.954226665678941e-06, + "loss": 0.696, + "step": 10076 + }, + { + "epoch": 0.5178846746839346, + "grad_norm": 1.032483696937561, + "learning_rate": 4.953394447124961e-06, + "loss": 0.7308, + "step": 10077 + }, + { + "epoch": 0.5179360674272793, + "grad_norm": 1.056928277015686, + "learning_rate": 4.95256222986223e-06, + "loss": 0.6688, + "step": 10078 + }, + { + "epoch": 0.5179874601706239, + "grad_norm": 1.0809650421142578, + "learning_rate": 4.951730013913803e-06, + "loss": 0.7119, + "step": 10079 + }, + { + "epoch": 0.5180388529139686, + "grad_norm": 0.840225338935852, + "learning_rate": 4.950897799302737e-06, + "loss": 0.6552, + "step": 10080 + }, + { + "epoch": 0.5180902456573132, + "grad_norm": 1.2608189582824707, + "learning_rate": 4.950065586052093e-06, + "loss": 0.7485, + "step": 10081 + }, + { + "epoch": 0.5181416384006579, + "grad_norm": 1.1171305179595947, + "learning_rate": 4.949233374184923e-06, + "loss": 0.6957, + "step": 10082 + }, + { + "epoch": 0.5181930311440025, + "grad_norm": 1.106289029121399, + "learning_rate": 4.948401163724288e-06, + "loss": 0.7655, + "step": 10083 + }, + { + "epoch": 0.5182444238873471, + "grad_norm": 1.152886986732483, + "learning_rate": 4.9475689546932445e-06, + "loss": 0.7348, + "step": 10084 + }, + { + "epoch": 0.5182958166306918, + "grad_norm": 1.0269783735275269, + "learning_rate": 4.946736747114847e-06, + "loss": 0.6873, + "step": 10085 + }, + { + "epoch": 0.5183472093740363, + "grad_norm": 1.0755980014801025, + "learning_rate": 4.945904541012155e-06, + "loss": 0.7754, + "step": 10086 + }, + { + "epoch": 0.518398602117381, + "grad_norm": 1.166402816772461, + "learning_rate": 4.945072336408226e-06, + "loss": 0.7383, + "step": 10087 + }, + { + "epoch": 0.5184499948607256, + "grad_norm": 1.0920078754425049, + "learning_rate": 4.9442401333261134e-06, + "loss": 0.7521, + "step": 10088 + }, + { + "epoch": 0.5185013876040703, + "grad_norm": 1.0589168071746826, + "learning_rate": 4.943407931788878e-06, + "loss": 0.7312, + "step": 10089 + }, + { + "epoch": 0.5185527803474149, + "grad_norm": 1.1120007038116455, + "learning_rate": 4.942575731819577e-06, + "loss": 0.7695, + "step": 10090 + }, + { + "epoch": 0.5186041730907596, + "grad_norm": 1.0632996559143066, + "learning_rate": 4.941743533441264e-06, + "loss": 0.7013, + "step": 10091 + }, + { + "epoch": 0.5186555658341042, + "grad_norm": 1.064518928527832, + "learning_rate": 4.940911336676996e-06, + "loss": 0.6918, + "step": 10092 + }, + { + "epoch": 0.5187069585774489, + "grad_norm": 0.9917042851448059, + "learning_rate": 4.940079141549832e-06, + "loss": 0.693, + "step": 10093 + }, + { + "epoch": 0.5187583513207935, + "grad_norm": 1.0558648109436035, + "learning_rate": 4.93924694808283e-06, + "loss": 0.6743, + "step": 10094 + }, + { + "epoch": 0.5188097440641382, + "grad_norm": 1.0437923669815063, + "learning_rate": 4.938414756299041e-06, + "loss": 0.7187, + "step": 10095 + }, + { + "epoch": 0.5188611368074828, + "grad_norm": 1.1169308423995972, + "learning_rate": 4.937582566221527e-06, + "loss": 0.7063, + "step": 10096 + }, + { + "epoch": 0.5189125295508275, + "grad_norm": 1.0844709873199463, + "learning_rate": 4.9367503778733416e-06, + "loss": 0.7864, + "step": 10097 + }, + { + "epoch": 0.5189639222941721, + "grad_norm": 1.0871585607528687, + "learning_rate": 4.935918191277543e-06, + "loss": 0.747, + "step": 10098 + }, + { + "epoch": 0.5190153150375167, + "grad_norm": 1.0182937383651733, + "learning_rate": 4.935086006457186e-06, + "loss": 0.6907, + "step": 10099 + }, + { + "epoch": 0.5190667077808614, + "grad_norm": 1.0490105152130127, + "learning_rate": 4.934253823435329e-06, + "loss": 0.7234, + "step": 10100 + }, + { + "epoch": 0.5191181005242059, + "grad_norm": 1.078484058380127, + "learning_rate": 4.933421642235026e-06, + "loss": 0.7675, + "step": 10101 + }, + { + "epoch": 0.5191694932675506, + "grad_norm": 1.065463662147522, + "learning_rate": 4.932589462879334e-06, + "loss": 0.6461, + "step": 10102 + }, + { + "epoch": 0.5192208860108952, + "grad_norm": 1.0733187198638916, + "learning_rate": 4.931757285391312e-06, + "loss": 0.7449, + "step": 10103 + }, + { + "epoch": 0.5192722787542399, + "grad_norm": 1.0455801486968994, + "learning_rate": 4.930925109794013e-06, + "loss": 0.6703, + "step": 10104 + }, + { + "epoch": 0.5193236714975845, + "grad_norm": 1.0816839933395386, + "learning_rate": 4.930092936110493e-06, + "loss": 0.7065, + "step": 10105 + }, + { + "epoch": 0.5193750642409292, + "grad_norm": 1.0817430019378662, + "learning_rate": 4.929260764363812e-06, + "loss": 0.7376, + "step": 10106 + }, + { + "epoch": 0.5194264569842738, + "grad_norm": 1.0709303617477417, + "learning_rate": 4.92842859457702e-06, + "loss": 0.7111, + "step": 10107 + }, + { + "epoch": 0.5194778497276185, + "grad_norm": 1.1121569871902466, + "learning_rate": 4.927596426773178e-06, + "loss": 0.7007, + "step": 10108 + }, + { + "epoch": 0.5195292424709631, + "grad_norm": 0.9839836955070496, + "learning_rate": 4.926764260975339e-06, + "loss": 0.6718, + "step": 10109 + }, + { + "epoch": 0.5195806352143078, + "grad_norm": 1.1132720708847046, + "learning_rate": 4.925932097206562e-06, + "loss": 0.7776, + "step": 10110 + }, + { + "epoch": 0.5196320279576524, + "grad_norm": 0.9853299260139465, + "learning_rate": 4.925099935489899e-06, + "loss": 0.6324, + "step": 10111 + }, + { + "epoch": 0.519683420700997, + "grad_norm": 2.1348841190338135, + "learning_rate": 4.924267775848409e-06, + "loss": 0.7693, + "step": 10112 + }, + { + "epoch": 0.5197348134443417, + "grad_norm": 1.0486620664596558, + "learning_rate": 4.923435618305147e-06, + "loss": 0.6807, + "step": 10113 + }, + { + "epoch": 0.5197862061876863, + "grad_norm": 1.0264983177185059, + "learning_rate": 4.922603462883167e-06, + "loss": 0.7206, + "step": 10114 + }, + { + "epoch": 0.519837598931031, + "grad_norm": 1.076831340789795, + "learning_rate": 4.921771309605525e-06, + "loss": 0.7243, + "step": 10115 + }, + { + "epoch": 0.5198889916743756, + "grad_norm": 1.3218351602554321, + "learning_rate": 4.92093915849528e-06, + "loss": 0.7257, + "step": 10116 + }, + { + "epoch": 0.5199403844177202, + "grad_norm": 1.0666029453277588, + "learning_rate": 4.920107009575482e-06, + "loss": 0.7127, + "step": 10117 + }, + { + "epoch": 0.5199917771610648, + "grad_norm": 1.143746256828308, + "learning_rate": 4.919274862869189e-06, + "loss": 0.6984, + "step": 10118 + }, + { + "epoch": 0.5200431699044095, + "grad_norm": 0.7287662029266357, + "learning_rate": 4.918442718399458e-06, + "loss": 0.7039, + "step": 10119 + }, + { + "epoch": 0.5200945626477541, + "grad_norm": 1.0016412734985352, + "learning_rate": 4.917610576189341e-06, + "loss": 0.6697, + "step": 10120 + }, + { + "epoch": 0.5201459553910988, + "grad_norm": 1.0646061897277832, + "learning_rate": 4.916778436261895e-06, + "loss": 0.7257, + "step": 10121 + }, + { + "epoch": 0.5201973481344434, + "grad_norm": 1.1687465906143188, + "learning_rate": 4.915946298640177e-06, + "loss": 0.7019, + "step": 10122 + }, + { + "epoch": 0.520248740877788, + "grad_norm": 1.0763282775878906, + "learning_rate": 4.915114163347239e-06, + "loss": 0.795, + "step": 10123 + }, + { + "epoch": 0.5203001336211327, + "grad_norm": 1.0665556192398071, + "learning_rate": 4.914282030406137e-06, + "loss": 0.7865, + "step": 10124 + }, + { + "epoch": 0.5203515263644773, + "grad_norm": 0.7754822969436646, + "learning_rate": 4.913449899839926e-06, + "loss": 0.6761, + "step": 10125 + }, + { + "epoch": 0.520402919107822, + "grad_norm": 1.0359448194503784, + "learning_rate": 4.912617771671663e-06, + "loss": 0.7521, + "step": 10126 + }, + { + "epoch": 0.5204543118511666, + "grad_norm": 1.0206087827682495, + "learning_rate": 4.911785645924399e-06, + "loss": 0.6975, + "step": 10127 + }, + { + "epoch": 0.5205057045945113, + "grad_norm": 1.1768959760665894, + "learning_rate": 4.910953522621191e-06, + "loss": 0.6475, + "step": 10128 + }, + { + "epoch": 0.5205570973378559, + "grad_norm": 1.079666256904602, + "learning_rate": 4.910121401785094e-06, + "loss": 0.6817, + "step": 10129 + }, + { + "epoch": 0.5206084900812006, + "grad_norm": 0.7699293494224548, + "learning_rate": 4.909289283439161e-06, + "loss": 0.671, + "step": 10130 + }, + { + "epoch": 0.5206598828245452, + "grad_norm": 1.0507467985153198, + "learning_rate": 4.9084571676064475e-06, + "loss": 0.7393, + "step": 10131 + }, + { + "epoch": 0.5207112755678898, + "grad_norm": 1.1871628761291504, + "learning_rate": 4.9076250543100105e-06, + "loss": 0.7654, + "step": 10132 + }, + { + "epoch": 0.5207626683112344, + "grad_norm": 0.8325191140174866, + "learning_rate": 4.9067929435729e-06, + "loss": 0.6644, + "step": 10133 + }, + { + "epoch": 0.5208140610545791, + "grad_norm": 1.1358674764633179, + "learning_rate": 4.905960835418173e-06, + "loss": 0.7668, + "step": 10134 + }, + { + "epoch": 0.5208654537979237, + "grad_norm": 1.1057240962982178, + "learning_rate": 4.905128729868884e-06, + "loss": 0.7026, + "step": 10135 + }, + { + "epoch": 0.5209168465412684, + "grad_norm": 0.7924284338951111, + "learning_rate": 4.904296626948085e-06, + "loss": 0.6731, + "step": 10136 + }, + { + "epoch": 0.520968239284613, + "grad_norm": 1.0748714208602905, + "learning_rate": 4.903464526678831e-06, + "loss": 0.681, + "step": 10137 + }, + { + "epoch": 0.5210196320279576, + "grad_norm": 1.0847370624542236, + "learning_rate": 4.902632429084177e-06, + "loss": 0.7345, + "step": 10138 + }, + { + "epoch": 0.5210710247713023, + "grad_norm": 1.0287234783172607, + "learning_rate": 4.901800334187178e-06, + "loss": 0.745, + "step": 10139 + }, + { + "epoch": 0.5211224175146469, + "grad_norm": 1.1694872379302979, + "learning_rate": 4.900968242010885e-06, + "loss": 0.7201, + "step": 10140 + }, + { + "epoch": 0.5211738102579916, + "grad_norm": 1.1210083961486816, + "learning_rate": 4.900136152578354e-06, + "loss": 0.7147, + "step": 10141 + }, + { + "epoch": 0.5212252030013362, + "grad_norm": 1.1188421249389648, + "learning_rate": 4.899304065912639e-06, + "loss": 0.7554, + "step": 10142 + }, + { + "epoch": 0.5212765957446809, + "grad_norm": 1.0639983415603638, + "learning_rate": 4.898471982036792e-06, + "loss": 0.7317, + "step": 10143 + }, + { + "epoch": 0.5213279884880255, + "grad_norm": 0.739722490310669, + "learning_rate": 4.897639900973866e-06, + "loss": 0.6638, + "step": 10144 + }, + { + "epoch": 0.5213793812313702, + "grad_norm": 1.1600335836410522, + "learning_rate": 4.896807822746919e-06, + "loss": 0.7262, + "step": 10145 + }, + { + "epoch": 0.5214307739747148, + "grad_norm": 1.085508108139038, + "learning_rate": 4.8959757473789986e-06, + "loss": 0.7449, + "step": 10146 + }, + { + "epoch": 0.5214821667180594, + "grad_norm": 1.0488207340240479, + "learning_rate": 4.895143674893161e-06, + "loss": 0.7278, + "step": 10147 + }, + { + "epoch": 0.521533559461404, + "grad_norm": 1.114403486251831, + "learning_rate": 4.8943116053124615e-06, + "loss": 0.7415, + "step": 10148 + }, + { + "epoch": 0.5215849522047487, + "grad_norm": 1.0503954887390137, + "learning_rate": 4.89347953865995e-06, + "loss": 0.7192, + "step": 10149 + }, + { + "epoch": 0.5216363449480933, + "grad_norm": 1.0436384677886963, + "learning_rate": 4.892647474958681e-06, + "loss": 0.706, + "step": 10150 + }, + { + "epoch": 0.521687737691438, + "grad_norm": 1.090611219406128, + "learning_rate": 4.891815414231707e-06, + "loss": 0.7629, + "step": 10151 + }, + { + "epoch": 0.5217391304347826, + "grad_norm": 1.0910191535949707, + "learning_rate": 4.890983356502082e-06, + "loss": 0.687, + "step": 10152 + }, + { + "epoch": 0.5217905231781272, + "grad_norm": 1.1816563606262207, + "learning_rate": 4.890151301792857e-06, + "loss": 0.6715, + "step": 10153 + }, + { + "epoch": 0.5218419159214719, + "grad_norm": 0.9829698801040649, + "learning_rate": 4.889319250127087e-06, + "loss": 0.724, + "step": 10154 + }, + { + "epoch": 0.5218933086648165, + "grad_norm": 1.0997509956359863, + "learning_rate": 4.888487201527826e-06, + "loss": 0.7435, + "step": 10155 + }, + { + "epoch": 0.5219447014081612, + "grad_norm": 1.037282109260559, + "learning_rate": 4.8876551560181225e-06, + "loss": 0.6444, + "step": 10156 + }, + { + "epoch": 0.5219960941515058, + "grad_norm": 0.6939500570297241, + "learning_rate": 4.88682311362103e-06, + "loss": 0.6768, + "step": 10157 + }, + { + "epoch": 0.5220474868948505, + "grad_norm": 0.7951695322990417, + "learning_rate": 4.885991074359605e-06, + "loss": 0.6591, + "step": 10158 + }, + { + "epoch": 0.5220988796381951, + "grad_norm": 1.0964356660842896, + "learning_rate": 4.885159038256894e-06, + "loss": 0.7285, + "step": 10159 + }, + { + "epoch": 0.5221502723815398, + "grad_norm": 1.0419641733169556, + "learning_rate": 4.884327005335952e-06, + "loss": 0.7291, + "step": 10160 + }, + { + "epoch": 0.5222016651248844, + "grad_norm": 1.0542296171188354, + "learning_rate": 4.883494975619833e-06, + "loss": 0.7241, + "step": 10161 + }, + { + "epoch": 0.522253057868229, + "grad_norm": 1.0588669776916504, + "learning_rate": 4.882662949131587e-06, + "loss": 0.7139, + "step": 10162 + }, + { + "epoch": 0.5223044506115736, + "grad_norm": 1.071363091468811, + "learning_rate": 4.881830925894265e-06, + "loss": 0.7674, + "step": 10163 + }, + { + "epoch": 0.5223558433549182, + "grad_norm": 1.142183542251587, + "learning_rate": 4.880998905930924e-06, + "loss": 0.6654, + "step": 10164 + }, + { + "epoch": 0.5224072360982629, + "grad_norm": 1.1020852327346802, + "learning_rate": 4.88016688926461e-06, + "loss": 0.7294, + "step": 10165 + }, + { + "epoch": 0.5224586288416075, + "grad_norm": 1.1549524068832397, + "learning_rate": 4.8793348759183756e-06, + "loss": 0.7544, + "step": 10166 + }, + { + "epoch": 0.5225100215849522, + "grad_norm": 1.0551776885986328, + "learning_rate": 4.878502865915276e-06, + "loss": 0.698, + "step": 10167 + }, + { + "epoch": 0.5225614143282968, + "grad_norm": 0.7616724371910095, + "learning_rate": 4.87767085927836e-06, + "loss": 0.668, + "step": 10168 + }, + { + "epoch": 0.5226128070716415, + "grad_norm": 1.1036018133163452, + "learning_rate": 4.876838856030679e-06, + "loss": 0.7435, + "step": 10169 + }, + { + "epoch": 0.5226641998149861, + "grad_norm": 1.1078691482543945, + "learning_rate": 4.876006856195284e-06, + "loss": 0.7477, + "step": 10170 + }, + { + "epoch": 0.5227155925583308, + "grad_norm": 1.1335638761520386, + "learning_rate": 4.875174859795229e-06, + "loss": 0.7281, + "step": 10171 + }, + { + "epoch": 0.5227669853016754, + "grad_norm": 0.7079783082008362, + "learning_rate": 4.874342866853563e-06, + "loss": 0.6881, + "step": 10172 + }, + { + "epoch": 0.5228183780450201, + "grad_norm": 1.086212396621704, + "learning_rate": 4.873510877393337e-06, + "loss": 0.7363, + "step": 10173 + }, + { + "epoch": 0.5228697707883647, + "grad_norm": 1.057417392730713, + "learning_rate": 4.872678891437606e-06, + "loss": 0.7446, + "step": 10174 + }, + { + "epoch": 0.5229211635317094, + "grad_norm": 1.0979691743850708, + "learning_rate": 4.871846909009414e-06, + "loss": 0.7654, + "step": 10175 + }, + { + "epoch": 0.522972556275054, + "grad_norm": 0.6658157110214233, + "learning_rate": 4.8710149301318155e-06, + "loss": 0.6953, + "step": 10176 + }, + { + "epoch": 0.5230239490183985, + "grad_norm": 0.931196391582489, + "learning_rate": 4.870182954827863e-06, + "loss": 0.6932, + "step": 10177 + }, + { + "epoch": 0.5230753417617432, + "grad_norm": 0.7058324217796326, + "learning_rate": 4.869350983120603e-06, + "loss": 0.6797, + "step": 10178 + }, + { + "epoch": 0.5231267345050878, + "grad_norm": 1.1077871322631836, + "learning_rate": 4.868519015033089e-06, + "loss": 0.7127, + "step": 10179 + }, + { + "epoch": 0.5231781272484325, + "grad_norm": 1.0670933723449707, + "learning_rate": 4.8676870505883705e-06, + "loss": 0.7328, + "step": 10180 + }, + { + "epoch": 0.5232295199917771, + "grad_norm": 1.0528850555419922, + "learning_rate": 4.8668550898094975e-06, + "loss": 0.7198, + "step": 10181 + }, + { + "epoch": 0.5232809127351218, + "grad_norm": 0.7773083448410034, + "learning_rate": 4.86602313271952e-06, + "loss": 0.6487, + "step": 10182 + }, + { + "epoch": 0.5233323054784664, + "grad_norm": 1.0999242067337036, + "learning_rate": 4.8651911793414905e-06, + "loss": 0.752, + "step": 10183 + }, + { + "epoch": 0.5233836982218111, + "grad_norm": 1.0907025337219238, + "learning_rate": 4.864359229698456e-06, + "loss": 0.7515, + "step": 10184 + }, + { + "epoch": 0.5234350909651557, + "grad_norm": 0.6740009784698486, + "learning_rate": 4.863527283813467e-06, + "loss": 0.6762, + "step": 10185 + }, + { + "epoch": 0.5234864837085004, + "grad_norm": 1.061476707458496, + "learning_rate": 4.862695341709574e-06, + "loss": 0.7547, + "step": 10186 + }, + { + "epoch": 0.523537876451845, + "grad_norm": 1.0332081317901611, + "learning_rate": 4.861863403409828e-06, + "loss": 0.736, + "step": 10187 + }, + { + "epoch": 0.5235892691951897, + "grad_norm": 1.1380336284637451, + "learning_rate": 4.861031468937275e-06, + "loss": 0.7106, + "step": 10188 + }, + { + "epoch": 0.5236406619385343, + "grad_norm": 1.0341191291809082, + "learning_rate": 4.860199538314966e-06, + "loss": 0.6609, + "step": 10189 + }, + { + "epoch": 0.523692054681879, + "grad_norm": 0.7224943041801453, + "learning_rate": 4.859367611565953e-06, + "loss": 0.6675, + "step": 10190 + }, + { + "epoch": 0.5237434474252236, + "grad_norm": 1.0201328992843628, + "learning_rate": 4.858535688713281e-06, + "loss": 0.6438, + "step": 10191 + }, + { + "epoch": 0.5237948401685681, + "grad_norm": 1.0973433256149292, + "learning_rate": 4.857703769780002e-06, + "loss": 0.729, + "step": 10192 + }, + { + "epoch": 0.5238462329119128, + "grad_norm": 1.0729687213897705, + "learning_rate": 4.856871854789167e-06, + "loss": 0.7575, + "step": 10193 + }, + { + "epoch": 0.5238976256552574, + "grad_norm": 1.0226545333862305, + "learning_rate": 4.856039943763819e-06, + "loss": 0.7059, + "step": 10194 + }, + { + "epoch": 0.5239490183986021, + "grad_norm": 1.0748804807662964, + "learning_rate": 4.8552080367270105e-06, + "loss": 0.7484, + "step": 10195 + }, + { + "epoch": 0.5240004111419467, + "grad_norm": 1.5080205202102661, + "learning_rate": 4.854376133701792e-06, + "loss": 0.6819, + "step": 10196 + }, + { + "epoch": 0.5240518038852914, + "grad_norm": 1.1196929216384888, + "learning_rate": 4.853544234711207e-06, + "loss": 0.7374, + "step": 10197 + }, + { + "epoch": 0.524103196628636, + "grad_norm": 0.6745384335517883, + "learning_rate": 4.852712339778308e-06, + "loss": 0.6508, + "step": 10198 + }, + { + "epoch": 0.5241545893719807, + "grad_norm": 1.0236445665359497, + "learning_rate": 4.851880448926144e-06, + "loss": 0.7673, + "step": 10199 + }, + { + "epoch": 0.5242059821153253, + "grad_norm": 1.4175792932510376, + "learning_rate": 4.85104856217776e-06, + "loss": 0.7595, + "step": 10200 + }, + { + "epoch": 0.52425737485867, + "grad_norm": 1.0875300168991089, + "learning_rate": 4.850216679556205e-06, + "loss": 0.6993, + "step": 10201 + }, + { + "epoch": 0.5243087676020146, + "grad_norm": 1.0749180316925049, + "learning_rate": 4.849384801084527e-06, + "loss": 0.6939, + "step": 10202 + }, + { + "epoch": 0.5243601603453593, + "grad_norm": 1.117702603340149, + "learning_rate": 4.8485529267857765e-06, + "loss": 0.6837, + "step": 10203 + }, + { + "epoch": 0.5244115530887039, + "grad_norm": 0.8182233572006226, + "learning_rate": 4.847721056682999e-06, + "loss": 0.651, + "step": 10204 + }, + { + "epoch": 0.5244629458320486, + "grad_norm": 0.6755691766738892, + "learning_rate": 4.846889190799241e-06, + "loss": 0.6355, + "step": 10205 + }, + { + "epoch": 0.5245143385753932, + "grad_norm": 1.0055376291275024, + "learning_rate": 4.846057329157555e-06, + "loss": 0.7129, + "step": 10206 + }, + { + "epoch": 0.5245657313187378, + "grad_norm": 1.1391932964324951, + "learning_rate": 4.8452254717809826e-06, + "loss": 0.7123, + "step": 10207 + }, + { + "epoch": 0.5246171240620824, + "grad_norm": 1.0397183895111084, + "learning_rate": 4.844393618692573e-06, + "loss": 0.7576, + "step": 10208 + }, + { + "epoch": 0.524668516805427, + "grad_norm": 1.0056294202804565, + "learning_rate": 4.843561769915378e-06, + "loss": 0.7177, + "step": 10209 + }, + { + "epoch": 0.5247199095487717, + "grad_norm": 1.0432686805725098, + "learning_rate": 4.842729925472437e-06, + "loss": 0.6911, + "step": 10210 + }, + { + "epoch": 0.5247713022921163, + "grad_norm": 1.0614737272262573, + "learning_rate": 4.841898085386802e-06, + "loss": 0.6934, + "step": 10211 + }, + { + "epoch": 0.524822695035461, + "grad_norm": 1.086755394935608, + "learning_rate": 4.8410662496815185e-06, + "loss": 0.7108, + "step": 10212 + }, + { + "epoch": 0.5248740877788056, + "grad_norm": 0.7585174441337585, + "learning_rate": 4.8402344183796325e-06, + "loss": 0.6749, + "step": 10213 + }, + { + "epoch": 0.5249254805221503, + "grad_norm": 0.9998639225959778, + "learning_rate": 4.839402591504192e-06, + "loss": 0.7207, + "step": 10214 + }, + { + "epoch": 0.5249768732654949, + "grad_norm": 1.1510059833526611, + "learning_rate": 4.838570769078244e-06, + "loss": 0.7575, + "step": 10215 + }, + { + "epoch": 0.5250282660088396, + "grad_norm": 1.228126883506775, + "learning_rate": 4.837738951124832e-06, + "loss": 0.7772, + "step": 10216 + }, + { + "epoch": 0.5250796587521842, + "grad_norm": 1.0061830282211304, + "learning_rate": 4.836907137667005e-06, + "loss": 0.7468, + "step": 10217 + }, + { + "epoch": 0.5251310514955289, + "grad_norm": 0.7103826999664307, + "learning_rate": 4.836075328727808e-06, + "loss": 0.675, + "step": 10218 + }, + { + "epoch": 0.5251824442388735, + "grad_norm": 1.0398668050765991, + "learning_rate": 4.8352435243302884e-06, + "loss": 0.7278, + "step": 10219 + }, + { + "epoch": 0.5252338369822181, + "grad_norm": 1.0127153396606445, + "learning_rate": 4.834411724497489e-06, + "loss": 0.6853, + "step": 10220 + }, + { + "epoch": 0.5252852297255628, + "grad_norm": 1.0530105829238892, + "learning_rate": 4.833579929252458e-06, + "loss": 0.7219, + "step": 10221 + }, + { + "epoch": 0.5253366224689074, + "grad_norm": 0.8183920979499817, + "learning_rate": 4.832748138618241e-06, + "loss": 0.6258, + "step": 10222 + }, + { + "epoch": 0.525388015212252, + "grad_norm": 1.0908894538879395, + "learning_rate": 4.831916352617882e-06, + "loss": 0.7374, + "step": 10223 + }, + { + "epoch": 0.5254394079555966, + "grad_norm": 1.1123764514923096, + "learning_rate": 4.831084571274427e-06, + "loss": 0.6751, + "step": 10224 + }, + { + "epoch": 0.5254908006989413, + "grad_norm": 1.2426478862762451, + "learning_rate": 4.830252794610923e-06, + "loss": 0.7505, + "step": 10225 + }, + { + "epoch": 0.5255421934422859, + "grad_norm": 1.0600411891937256, + "learning_rate": 4.829421022650413e-06, + "loss": 0.7078, + "step": 10226 + }, + { + "epoch": 0.5255935861856306, + "grad_norm": 1.0181952714920044, + "learning_rate": 4.828589255415942e-06, + "loss": 0.7063, + "step": 10227 + }, + { + "epoch": 0.5256449789289752, + "grad_norm": 1.1194193363189697, + "learning_rate": 4.8277574929305565e-06, + "loss": 0.7232, + "step": 10228 + }, + { + "epoch": 0.5256963716723199, + "grad_norm": 1.054977297782898, + "learning_rate": 4.826925735217299e-06, + "loss": 0.7088, + "step": 10229 + }, + { + "epoch": 0.5257477644156645, + "grad_norm": 0.798984944820404, + "learning_rate": 4.826093982299214e-06, + "loss": 0.6856, + "step": 10230 + }, + { + "epoch": 0.5257991571590092, + "grad_norm": 1.0804462432861328, + "learning_rate": 4.825262234199349e-06, + "loss": 0.7682, + "step": 10231 + }, + { + "epoch": 0.5258505499023538, + "grad_norm": 1.0957400798797607, + "learning_rate": 4.8244304909407455e-06, + "loss": 0.7417, + "step": 10232 + }, + { + "epoch": 0.5259019426456985, + "grad_norm": 1.0321849584579468, + "learning_rate": 4.823598752546448e-06, + "loss": 0.7316, + "step": 10233 + }, + { + "epoch": 0.5259533353890431, + "grad_norm": 1.106554388999939, + "learning_rate": 4.822767019039502e-06, + "loss": 0.7384, + "step": 10234 + }, + { + "epoch": 0.5260047281323877, + "grad_norm": 1.0579158067703247, + "learning_rate": 4.821935290442951e-06, + "loss": 0.7738, + "step": 10235 + }, + { + "epoch": 0.5260561208757324, + "grad_norm": 1.1413239240646362, + "learning_rate": 4.821103566779837e-06, + "loss": 0.6916, + "step": 10236 + }, + { + "epoch": 0.526107513619077, + "grad_norm": 1.0151246786117554, + "learning_rate": 4.8202718480732054e-06, + "loss": 0.7383, + "step": 10237 + }, + { + "epoch": 0.5261589063624216, + "grad_norm": 1.1452213525772095, + "learning_rate": 4.819440134346101e-06, + "loss": 0.7968, + "step": 10238 + }, + { + "epoch": 0.5262102991057662, + "grad_norm": 1.0373313426971436, + "learning_rate": 4.818608425621563e-06, + "loss": 0.7407, + "step": 10239 + }, + { + "epoch": 0.5262616918491109, + "grad_norm": 1.0357334613800049, + "learning_rate": 4.817776721922637e-06, + "loss": 0.7358, + "step": 10240 + }, + { + "epoch": 0.5263130845924555, + "grad_norm": 1.0234582424163818, + "learning_rate": 4.816945023272368e-06, + "loss": 0.7112, + "step": 10241 + }, + { + "epoch": 0.5263644773358002, + "grad_norm": 1.0189048051834106, + "learning_rate": 4.816113329693794e-06, + "loss": 0.7221, + "step": 10242 + }, + { + "epoch": 0.5264158700791448, + "grad_norm": 0.9779747128486633, + "learning_rate": 4.815281641209963e-06, + "loss": 0.6886, + "step": 10243 + }, + { + "epoch": 0.5264672628224895, + "grad_norm": 1.1011875867843628, + "learning_rate": 4.814449957843916e-06, + "loss": 0.7168, + "step": 10244 + }, + { + "epoch": 0.5265186555658341, + "grad_norm": 1.0463697910308838, + "learning_rate": 4.813618279618693e-06, + "loss": 0.6907, + "step": 10245 + }, + { + "epoch": 0.5265700483091788, + "grad_norm": 1.132140040397644, + "learning_rate": 4.812786606557339e-06, + "loss": 0.7163, + "step": 10246 + }, + { + "epoch": 0.5266214410525234, + "grad_norm": 1.0805333852767944, + "learning_rate": 4.811954938682897e-06, + "loss": 0.7183, + "step": 10247 + }, + { + "epoch": 0.526672833795868, + "grad_norm": 1.066996693611145, + "learning_rate": 4.811123276018407e-06, + "loss": 0.7228, + "step": 10248 + }, + { + "epoch": 0.5267242265392127, + "grad_norm": 1.0911773443222046, + "learning_rate": 4.810291618586909e-06, + "loss": 0.697, + "step": 10249 + }, + { + "epoch": 0.5267756192825573, + "grad_norm": 1.148009181022644, + "learning_rate": 4.80945996641145e-06, + "loss": 0.7217, + "step": 10250 + }, + { + "epoch": 0.526827012025902, + "grad_norm": 1.0908042192459106, + "learning_rate": 4.808628319515068e-06, + "loss": 0.7276, + "step": 10251 + }, + { + "epoch": 0.5268784047692466, + "grad_norm": 1.0533244609832764, + "learning_rate": 4.807796677920807e-06, + "loss": 0.6403, + "step": 10252 + }, + { + "epoch": 0.5269297975125912, + "grad_norm": 0.8397180438041687, + "learning_rate": 4.806965041651704e-06, + "loss": 0.7166, + "step": 10253 + }, + { + "epoch": 0.5269811902559358, + "grad_norm": 1.136452555656433, + "learning_rate": 4.806133410730806e-06, + "loss": 0.7344, + "step": 10254 + }, + { + "epoch": 0.5270325829992805, + "grad_norm": 0.7566211819648743, + "learning_rate": 4.805301785181149e-06, + "loss": 0.6179, + "step": 10255 + }, + { + "epoch": 0.5270839757426251, + "grad_norm": 1.1047295331954956, + "learning_rate": 4.8044701650257765e-06, + "loss": 0.7254, + "step": 10256 + }, + { + "epoch": 0.5271353684859698, + "grad_norm": 0.7362626194953918, + "learning_rate": 4.8036385502877315e-06, + "loss": 0.6763, + "step": 10257 + }, + { + "epoch": 0.5271867612293144, + "grad_norm": 1.00727117061615, + "learning_rate": 4.802806940990049e-06, + "loss": 0.6939, + "step": 10258 + }, + { + "epoch": 0.527238153972659, + "grad_norm": 1.0371261835098267, + "learning_rate": 4.8019753371557725e-06, + "loss": 0.7149, + "step": 10259 + }, + { + "epoch": 0.5272895467160037, + "grad_norm": 0.7272729277610779, + "learning_rate": 4.801143738807945e-06, + "loss": 0.6567, + "step": 10260 + }, + { + "epoch": 0.5273409394593483, + "grad_norm": 1.1050797700881958, + "learning_rate": 4.8003121459696e-06, + "loss": 0.7665, + "step": 10261 + }, + { + "epoch": 0.527392332202693, + "grad_norm": 1.0831407308578491, + "learning_rate": 4.799480558663784e-06, + "loss": 0.6998, + "step": 10262 + }, + { + "epoch": 0.5274437249460376, + "grad_norm": 1.0474841594696045, + "learning_rate": 4.798648976913532e-06, + "loss": 0.6856, + "step": 10263 + }, + { + "epoch": 0.5274951176893823, + "grad_norm": 1.081330418586731, + "learning_rate": 4.797817400741888e-06, + "loss": 0.7484, + "step": 10264 + }, + { + "epoch": 0.5275465104327269, + "grad_norm": 1.0493674278259277, + "learning_rate": 4.796985830171888e-06, + "loss": 0.7066, + "step": 10265 + }, + { + "epoch": 0.5275979031760716, + "grad_norm": 1.0850765705108643, + "learning_rate": 4.796154265226573e-06, + "loss": 0.6975, + "step": 10266 + }, + { + "epoch": 0.5276492959194162, + "grad_norm": 1.1307713985443115, + "learning_rate": 4.795322705928984e-06, + "loss": 0.7218, + "step": 10267 + }, + { + "epoch": 0.5277006886627608, + "grad_norm": 1.0447183847427368, + "learning_rate": 4.794491152302157e-06, + "loss": 0.682, + "step": 10268 + }, + { + "epoch": 0.5277520814061054, + "grad_norm": 0.9921669960021973, + "learning_rate": 4.793659604369131e-06, + "loss": 0.7082, + "step": 10269 + }, + { + "epoch": 0.5278034741494501, + "grad_norm": 1.1212100982666016, + "learning_rate": 4.792828062152948e-06, + "loss": 0.7227, + "step": 10270 + }, + { + "epoch": 0.5278548668927947, + "grad_norm": 0.8607807755470276, + "learning_rate": 4.791996525676643e-06, + "loss": 0.6612, + "step": 10271 + }, + { + "epoch": 0.5279062596361394, + "grad_norm": 1.0803894996643066, + "learning_rate": 4.791164994963256e-06, + "loss": 0.7714, + "step": 10272 + }, + { + "epoch": 0.527957652379484, + "grad_norm": 0.9982879161834717, + "learning_rate": 4.7903334700358254e-06, + "loss": 0.7502, + "step": 10273 + }, + { + "epoch": 0.5280090451228286, + "grad_norm": 1.014747977256775, + "learning_rate": 4.7895019509173885e-06, + "loss": 0.708, + "step": 10274 + }, + { + "epoch": 0.5280604378661733, + "grad_norm": 1.0326505899429321, + "learning_rate": 4.788670437630984e-06, + "loss": 0.6256, + "step": 10275 + }, + { + "epoch": 0.5281118306095179, + "grad_norm": 1.1371957063674927, + "learning_rate": 4.787838930199651e-06, + "loss": 0.7921, + "step": 10276 + }, + { + "epoch": 0.5281632233528626, + "grad_norm": 1.0599125623703003, + "learning_rate": 4.7870074286464245e-06, + "loss": 0.7302, + "step": 10277 + }, + { + "epoch": 0.5282146160962072, + "grad_norm": 1.029662013053894, + "learning_rate": 4.7861759329943425e-06, + "loss": 0.7183, + "step": 10278 + }, + { + "epoch": 0.5282660088395519, + "grad_norm": 0.8277777433395386, + "learning_rate": 4.785344443266444e-06, + "loss": 0.7097, + "step": 10279 + }, + { + "epoch": 0.5283174015828965, + "grad_norm": 1.1486098766326904, + "learning_rate": 4.7845129594857656e-06, + "loss": 0.7438, + "step": 10280 + }, + { + "epoch": 0.5283687943262412, + "grad_norm": 1.0795249938964844, + "learning_rate": 4.783681481675343e-06, + "loss": 0.7004, + "step": 10281 + }, + { + "epoch": 0.5284201870695858, + "grad_norm": 1.1701425313949585, + "learning_rate": 4.782850009858214e-06, + "loss": 0.7015, + "step": 10282 + }, + { + "epoch": 0.5284715798129304, + "grad_norm": 1.0508663654327393, + "learning_rate": 4.782018544057415e-06, + "loss": 0.7707, + "step": 10283 + }, + { + "epoch": 0.528522972556275, + "grad_norm": 1.2026253938674927, + "learning_rate": 4.781187084295982e-06, + "loss": 0.7821, + "step": 10284 + }, + { + "epoch": 0.5285743652996197, + "grad_norm": 1.0074617862701416, + "learning_rate": 4.780355630596953e-06, + "loss": 0.7428, + "step": 10285 + }, + { + "epoch": 0.5286257580429643, + "grad_norm": 0.7750651836395264, + "learning_rate": 4.779524182983364e-06, + "loss": 0.7325, + "step": 10286 + }, + { + "epoch": 0.528677150786309, + "grad_norm": 0.9617918133735657, + "learning_rate": 4.778692741478248e-06, + "loss": 0.7238, + "step": 10287 + }, + { + "epoch": 0.5287285435296536, + "grad_norm": 0.992879331111908, + "learning_rate": 4.777861306104643e-06, + "loss": 0.7373, + "step": 10288 + }, + { + "epoch": 0.5287799362729982, + "grad_norm": 1.0186505317687988, + "learning_rate": 4.777029876885586e-06, + "loss": 0.7686, + "step": 10289 + }, + { + "epoch": 0.5288313290163429, + "grad_norm": 1.0073426961898804, + "learning_rate": 4.77619845384411e-06, + "loss": 0.7346, + "step": 10290 + }, + { + "epoch": 0.5288827217596875, + "grad_norm": 1.0588667392730713, + "learning_rate": 4.7753670370032516e-06, + "loss": 0.7166, + "step": 10291 + }, + { + "epoch": 0.5289341145030322, + "grad_norm": 1.1066269874572754, + "learning_rate": 4.7745356263860455e-06, + "loss": 0.7017, + "step": 10292 + }, + { + "epoch": 0.5289855072463768, + "grad_norm": 0.7340662479400635, + "learning_rate": 4.773704222015525e-06, + "loss": 0.6767, + "step": 10293 + }, + { + "epoch": 0.5290368999897215, + "grad_norm": 1.051391363143921, + "learning_rate": 4.7728728239147285e-06, + "loss": 0.7189, + "step": 10294 + }, + { + "epoch": 0.5290882927330661, + "grad_norm": 1.0894246101379395, + "learning_rate": 4.772041432106689e-06, + "loss": 0.7085, + "step": 10295 + }, + { + "epoch": 0.5291396854764108, + "grad_norm": 0.9623305201530457, + "learning_rate": 4.771210046614441e-06, + "loss": 0.6336, + "step": 10296 + }, + { + "epoch": 0.5291910782197554, + "grad_norm": 1.1843966245651245, + "learning_rate": 4.770378667461016e-06, + "loss": 0.7706, + "step": 10297 + }, + { + "epoch": 0.5292424709631001, + "grad_norm": 1.008323311805725, + "learning_rate": 4.769547294669452e-06, + "loss": 0.7694, + "step": 10298 + }, + { + "epoch": 0.5292938637064446, + "grad_norm": 1.0887399911880493, + "learning_rate": 4.7687159282627834e-06, + "loss": 0.7312, + "step": 10299 + }, + { + "epoch": 0.5293452564497892, + "grad_norm": 0.8242732882499695, + "learning_rate": 4.767884568264039e-06, + "loss": 0.681, + "step": 10300 + }, + { + "epoch": 0.5293966491931339, + "grad_norm": 1.1898654699325562, + "learning_rate": 4.767053214696256e-06, + "loss": 0.7314, + "step": 10301 + }, + { + "epoch": 0.5294480419364785, + "grad_norm": 0.675067663192749, + "learning_rate": 4.766221867582469e-06, + "loss": 0.6621, + "step": 10302 + }, + { + "epoch": 0.5294994346798232, + "grad_norm": 1.0693490505218506, + "learning_rate": 4.765390526945708e-06, + "loss": 0.7473, + "step": 10303 + }, + { + "epoch": 0.5295508274231678, + "grad_norm": 0.7189530730247498, + "learning_rate": 4.7645591928090065e-06, + "loss": 0.653, + "step": 10304 + }, + { + "epoch": 0.5296022201665125, + "grad_norm": 0.7237748503684998, + "learning_rate": 4.7637278651954e-06, + "loss": 0.6675, + "step": 10305 + }, + { + "epoch": 0.5296536129098571, + "grad_norm": 0.7539510726928711, + "learning_rate": 4.762896544127917e-06, + "loss": 0.6449, + "step": 10306 + }, + { + "epoch": 0.5297050056532018, + "grad_norm": 1.0273839235305786, + "learning_rate": 4.762065229629593e-06, + "loss": 0.7508, + "step": 10307 + }, + { + "epoch": 0.5297563983965464, + "grad_norm": 1.063821792602539, + "learning_rate": 4.761233921723462e-06, + "loss": 0.7068, + "step": 10308 + }, + { + "epoch": 0.5298077911398911, + "grad_norm": 8.256381034851074, + "learning_rate": 4.760402620432551e-06, + "loss": 0.7892, + "step": 10309 + }, + { + "epoch": 0.5298591838832357, + "grad_norm": 1.0470741987228394, + "learning_rate": 4.759571325779895e-06, + "loss": 0.7787, + "step": 10310 + }, + { + "epoch": 0.5299105766265804, + "grad_norm": 1.084010124206543, + "learning_rate": 4.758740037788525e-06, + "loss": 0.6789, + "step": 10311 + }, + { + "epoch": 0.529961969369925, + "grad_norm": 1.11545729637146, + "learning_rate": 4.757908756481475e-06, + "loss": 0.7282, + "step": 10312 + }, + { + "epoch": 0.5300133621132697, + "grad_norm": 1.0468107461929321, + "learning_rate": 4.757077481881774e-06, + "loss": 0.7078, + "step": 10313 + }, + { + "epoch": 0.5300647548566142, + "grad_norm": 1.0252079963684082, + "learning_rate": 4.756246214012451e-06, + "loss": 0.7009, + "step": 10314 + }, + { + "epoch": 0.5301161475999588, + "grad_norm": 1.0048270225524902, + "learning_rate": 4.7554149528965414e-06, + "loss": 0.717, + "step": 10315 + }, + { + "epoch": 0.5301675403433035, + "grad_norm": 1.0224692821502686, + "learning_rate": 4.754583698557073e-06, + "loss": 0.6806, + "step": 10316 + }, + { + "epoch": 0.5302189330866481, + "grad_norm": 1.0386464595794678, + "learning_rate": 4.7537524510170776e-06, + "loss": 0.7489, + "step": 10317 + }, + { + "epoch": 0.5302703258299928, + "grad_norm": 0.9968034029006958, + "learning_rate": 4.752921210299588e-06, + "loss": 0.6956, + "step": 10318 + }, + { + "epoch": 0.5303217185733374, + "grad_norm": 1.0880216360092163, + "learning_rate": 4.75208997642763e-06, + "loss": 0.7823, + "step": 10319 + }, + { + "epoch": 0.5303731113166821, + "grad_norm": 1.0688482522964478, + "learning_rate": 4.7512587494242355e-06, + "loss": 0.711, + "step": 10320 + }, + { + "epoch": 0.5304245040600267, + "grad_norm": 1.050085186958313, + "learning_rate": 4.750427529312436e-06, + "loss": 0.7676, + "step": 10321 + }, + { + "epoch": 0.5304758968033714, + "grad_norm": 1.0743380784988403, + "learning_rate": 4.749596316115258e-06, + "loss": 0.7693, + "step": 10322 + }, + { + "epoch": 0.530527289546716, + "grad_norm": 1.1947100162506104, + "learning_rate": 4.748765109855733e-06, + "loss": 0.7482, + "step": 10323 + }, + { + "epoch": 0.5305786822900607, + "grad_norm": 1.0616408586502075, + "learning_rate": 4.747933910556891e-06, + "loss": 0.6889, + "step": 10324 + }, + { + "epoch": 0.5306300750334053, + "grad_norm": 1.140880823135376, + "learning_rate": 4.74710271824176e-06, + "loss": 0.7335, + "step": 10325 + }, + { + "epoch": 0.53068146777675, + "grad_norm": 1.1313060522079468, + "learning_rate": 4.746271532933368e-06, + "loss": 0.7111, + "step": 10326 + }, + { + "epoch": 0.5307328605200946, + "grad_norm": 0.9172868728637695, + "learning_rate": 4.745440354654745e-06, + "loss": 0.704, + "step": 10327 + }, + { + "epoch": 0.5307842532634393, + "grad_norm": 1.1060254573822021, + "learning_rate": 4.744609183428921e-06, + "loss": 0.702, + "step": 10328 + }, + { + "epoch": 0.5308356460067838, + "grad_norm": 1.069342017173767, + "learning_rate": 4.7437780192789205e-06, + "loss": 0.7726, + "step": 10329 + }, + { + "epoch": 0.5308870387501284, + "grad_norm": 0.7403047680854797, + "learning_rate": 4.742946862227774e-06, + "loss": 0.6554, + "step": 10330 + }, + { + "epoch": 0.5309384314934731, + "grad_norm": 1.0570991039276123, + "learning_rate": 4.7421157122985115e-06, + "loss": 0.7455, + "step": 10331 + }, + { + "epoch": 0.5309898242368177, + "grad_norm": 1.0414022207260132, + "learning_rate": 4.741284569514156e-06, + "loss": 0.7043, + "step": 10332 + }, + { + "epoch": 0.5310412169801624, + "grad_norm": 1.1439743041992188, + "learning_rate": 4.740453433897737e-06, + "loss": 0.7093, + "step": 10333 + }, + { + "epoch": 0.531092609723507, + "grad_norm": 1.0015448331832886, + "learning_rate": 4.739622305472284e-06, + "loss": 0.7086, + "step": 10334 + }, + { + "epoch": 0.5311440024668517, + "grad_norm": 1.0276317596435547, + "learning_rate": 4.7387911842608206e-06, + "loss": 0.7034, + "step": 10335 + }, + { + "epoch": 0.5311953952101963, + "grad_norm": 1.0847787857055664, + "learning_rate": 4.737960070286376e-06, + "loss": 0.741, + "step": 10336 + }, + { + "epoch": 0.531246787953541, + "grad_norm": 0.9731278419494629, + "learning_rate": 4.737128963571979e-06, + "loss": 0.6941, + "step": 10337 + }, + { + "epoch": 0.5312981806968856, + "grad_norm": 1.1218215227127075, + "learning_rate": 4.736297864140651e-06, + "loss": 0.7018, + "step": 10338 + }, + { + "epoch": 0.5313495734402303, + "grad_norm": 1.1538740396499634, + "learning_rate": 4.735466772015421e-06, + "loss": 0.7594, + "step": 10339 + }, + { + "epoch": 0.5314009661835749, + "grad_norm": 1.0631097555160522, + "learning_rate": 4.734635687219317e-06, + "loss": 0.7043, + "step": 10340 + }, + { + "epoch": 0.5314523589269196, + "grad_norm": 1.0648765563964844, + "learning_rate": 4.733804609775362e-06, + "loss": 0.7742, + "step": 10341 + }, + { + "epoch": 0.5315037516702642, + "grad_norm": 1.1639678478240967, + "learning_rate": 4.732973539706581e-06, + "loss": 0.7954, + "step": 10342 + }, + { + "epoch": 0.5315551444136088, + "grad_norm": 1.1686532497406006, + "learning_rate": 4.732142477036002e-06, + "loss": 0.7192, + "step": 10343 + }, + { + "epoch": 0.5316065371569534, + "grad_norm": 1.0241726636886597, + "learning_rate": 4.731311421786651e-06, + "loss": 0.702, + "step": 10344 + }, + { + "epoch": 0.531657929900298, + "grad_norm": 0.9771272540092468, + "learning_rate": 4.730480373981551e-06, + "loss": 0.6897, + "step": 10345 + }, + { + "epoch": 0.5317093226436427, + "grad_norm": 0.9966562390327454, + "learning_rate": 4.729649333643728e-06, + "loss": 0.742, + "step": 10346 + }, + { + "epoch": 0.5317607153869873, + "grad_norm": 1.0112990140914917, + "learning_rate": 4.728818300796206e-06, + "loss": 0.6957, + "step": 10347 + }, + { + "epoch": 0.531812108130332, + "grad_norm": 1.060575246810913, + "learning_rate": 4.727987275462009e-06, + "loss": 0.7295, + "step": 10348 + }, + { + "epoch": 0.5318635008736766, + "grad_norm": 1.0529887676239014, + "learning_rate": 4.727156257664162e-06, + "loss": 0.7058, + "step": 10349 + }, + { + "epoch": 0.5319148936170213, + "grad_norm": 1.0608493089675903, + "learning_rate": 4.726325247425691e-06, + "loss": 0.747, + "step": 10350 + }, + { + "epoch": 0.5319662863603659, + "grad_norm": 1.175390601158142, + "learning_rate": 4.725494244769617e-06, + "loss": 0.6832, + "step": 10351 + }, + { + "epoch": 0.5320176791037106, + "grad_norm": 1.0679497718811035, + "learning_rate": 4.724663249718964e-06, + "loss": 0.7381, + "step": 10352 + }, + { + "epoch": 0.5320690718470552, + "grad_norm": 1.0728440284729004, + "learning_rate": 4.723832262296757e-06, + "loss": 0.7428, + "step": 10353 + }, + { + "epoch": 0.5321204645903999, + "grad_norm": 1.0997991561889648, + "learning_rate": 4.723001282526018e-06, + "loss": 0.7459, + "step": 10354 + }, + { + "epoch": 0.5321718573337445, + "grad_norm": 1.067649483680725, + "learning_rate": 4.72217031042977e-06, + "loss": 0.6444, + "step": 10355 + }, + { + "epoch": 0.5322232500770891, + "grad_norm": 1.0473803281784058, + "learning_rate": 4.721339346031037e-06, + "loss": 0.7296, + "step": 10356 + }, + { + "epoch": 0.5322746428204338, + "grad_norm": 1.011902928352356, + "learning_rate": 4.72050838935284e-06, + "loss": 0.7365, + "step": 10357 + }, + { + "epoch": 0.5323260355637784, + "grad_norm": 1.0421733856201172, + "learning_rate": 4.7196774404182016e-06, + "loss": 0.7038, + "step": 10358 + }, + { + "epoch": 0.532377428307123, + "grad_norm": 1.0585135221481323, + "learning_rate": 4.718846499250145e-06, + "loss": 0.7368, + "step": 10359 + }, + { + "epoch": 0.5324288210504676, + "grad_norm": 1.1533455848693848, + "learning_rate": 4.7180155658716935e-06, + "loss": 0.7233, + "step": 10360 + }, + { + "epoch": 0.5324802137938123, + "grad_norm": 1.0762910842895508, + "learning_rate": 4.717184640305865e-06, + "loss": 0.7217, + "step": 10361 + }, + { + "epoch": 0.5325316065371569, + "grad_norm": 1.035648226737976, + "learning_rate": 4.7163537225756825e-06, + "loss": 0.6751, + "step": 10362 + }, + { + "epoch": 0.5325829992805016, + "grad_norm": 1.0751457214355469, + "learning_rate": 4.71552281270417e-06, + "loss": 0.7823, + "step": 10363 + }, + { + "epoch": 0.5326343920238462, + "grad_norm": 1.0853872299194336, + "learning_rate": 4.714691910714345e-06, + "loss": 0.7104, + "step": 10364 + }, + { + "epoch": 0.5326857847671909, + "grad_norm": 1.078939437866211, + "learning_rate": 4.713861016629229e-06, + "loss": 0.7263, + "step": 10365 + }, + { + "epoch": 0.5327371775105355, + "grad_norm": 1.0856150388717651, + "learning_rate": 4.713030130471845e-06, + "loss": 0.7005, + "step": 10366 + }, + { + "epoch": 0.5327885702538802, + "grad_norm": 0.7965691089630127, + "learning_rate": 4.712199252265211e-06, + "loss": 0.6635, + "step": 10367 + }, + { + "epoch": 0.5328399629972248, + "grad_norm": 1.077125072479248, + "learning_rate": 4.711368382032347e-06, + "loss": 0.7211, + "step": 10368 + }, + { + "epoch": 0.5328913557405695, + "grad_norm": 1.1458086967468262, + "learning_rate": 4.7105375197962765e-06, + "loss": 0.7097, + "step": 10369 + }, + { + "epoch": 0.5329427484839141, + "grad_norm": 1.0119352340698242, + "learning_rate": 4.709706665580015e-06, + "loss": 0.7196, + "step": 10370 + }, + { + "epoch": 0.5329941412272587, + "grad_norm": 1.0383985042572021, + "learning_rate": 4.708875819406584e-06, + "loss": 0.6638, + "step": 10371 + }, + { + "epoch": 0.5330455339706034, + "grad_norm": 1.0867875814437866, + "learning_rate": 4.708044981299002e-06, + "loss": 0.7011, + "step": 10372 + }, + { + "epoch": 0.533096926713948, + "grad_norm": 1.0672067403793335, + "learning_rate": 4.7072141512802915e-06, + "loss": 0.7662, + "step": 10373 + }, + { + "epoch": 0.5331483194572927, + "grad_norm": 1.0415112972259521, + "learning_rate": 4.7063833293734665e-06, + "loss": 0.7055, + "step": 10374 + }, + { + "epoch": 0.5331997122006372, + "grad_norm": 1.07720947265625, + "learning_rate": 4.705552515601547e-06, + "loss": 0.7397, + "step": 10375 + }, + { + "epoch": 0.5332511049439819, + "grad_norm": 0.7575289011001587, + "learning_rate": 4.704721709987554e-06, + "loss": 0.6784, + "step": 10376 + }, + { + "epoch": 0.5333024976873265, + "grad_norm": 1.1129956245422363, + "learning_rate": 4.7038909125545025e-06, + "loss": 0.7165, + "step": 10377 + }, + { + "epoch": 0.5333538904306712, + "grad_norm": 1.0422471761703491, + "learning_rate": 4.703060123325412e-06, + "loss": 0.7206, + "step": 10378 + }, + { + "epoch": 0.5334052831740158, + "grad_norm": 1.0564301013946533, + "learning_rate": 4.702229342323302e-06, + "loss": 0.7652, + "step": 10379 + }, + { + "epoch": 0.5334566759173605, + "grad_norm": 0.7374210357666016, + "learning_rate": 4.7013985695711854e-06, + "loss": 0.6618, + "step": 10380 + }, + { + "epoch": 0.5335080686607051, + "grad_norm": 1.0290828943252563, + "learning_rate": 4.700567805092083e-06, + "loss": 0.705, + "step": 10381 + }, + { + "epoch": 0.5335594614040498, + "grad_norm": 1.0550882816314697, + "learning_rate": 4.6997370489090135e-06, + "loss": 0.7131, + "step": 10382 + }, + { + "epoch": 0.5336108541473944, + "grad_norm": 0.6664023399353027, + "learning_rate": 4.698906301044988e-06, + "loss": 0.6571, + "step": 10383 + }, + { + "epoch": 0.533662246890739, + "grad_norm": 1.0975451469421387, + "learning_rate": 4.698075561523027e-06, + "loss": 0.8014, + "step": 10384 + }, + { + "epoch": 0.5337136396340837, + "grad_norm": 1.128504991531372, + "learning_rate": 4.697244830366148e-06, + "loss": 0.7923, + "step": 10385 + }, + { + "epoch": 0.5337650323774283, + "grad_norm": 1.0647557973861694, + "learning_rate": 4.696414107597363e-06, + "loss": 0.7446, + "step": 10386 + }, + { + "epoch": 0.533816425120773, + "grad_norm": 1.050640344619751, + "learning_rate": 4.695583393239691e-06, + "loss": 0.6876, + "step": 10387 + }, + { + "epoch": 0.5338678178641176, + "grad_norm": 0.8202245831489563, + "learning_rate": 4.694752687316146e-06, + "loss": 0.6634, + "step": 10388 + }, + { + "epoch": 0.5339192106074623, + "grad_norm": 1.0535153150558472, + "learning_rate": 4.693921989849748e-06, + "loss": 0.7107, + "step": 10389 + }, + { + "epoch": 0.5339706033508068, + "grad_norm": 0.7564883232116699, + "learning_rate": 4.693091300863505e-06, + "loss": 0.6732, + "step": 10390 + }, + { + "epoch": 0.5340219960941515, + "grad_norm": 1.0116223096847534, + "learning_rate": 4.692260620380436e-06, + "loss": 0.6948, + "step": 10391 + }, + { + "epoch": 0.5340733888374961, + "grad_norm": 0.7743096947669983, + "learning_rate": 4.691429948423558e-06, + "loss": 0.669, + "step": 10392 + }, + { + "epoch": 0.5341247815808408, + "grad_norm": 1.0886726379394531, + "learning_rate": 4.6905992850158805e-06, + "loss": 0.7098, + "step": 10393 + }, + { + "epoch": 0.5341761743241854, + "grad_norm": 1.0373095273971558, + "learning_rate": 4.68976863018042e-06, + "loss": 0.7369, + "step": 10394 + }, + { + "epoch": 0.53422756706753, + "grad_norm": 0.8782917261123657, + "learning_rate": 4.688937983940191e-06, + "loss": 0.6345, + "step": 10395 + }, + { + "epoch": 0.5342789598108747, + "grad_norm": 1.0406688451766968, + "learning_rate": 4.688107346318206e-06, + "loss": 0.675, + "step": 10396 + }, + { + "epoch": 0.5343303525542193, + "grad_norm": 1.1739256381988525, + "learning_rate": 4.68727671733748e-06, + "loss": 0.7606, + "step": 10397 + }, + { + "epoch": 0.534381745297564, + "grad_norm": 0.7114465236663818, + "learning_rate": 4.6864460970210254e-06, + "loss": 0.6392, + "step": 10398 + }, + { + "epoch": 0.5344331380409086, + "grad_norm": 1.0907105207443237, + "learning_rate": 4.685615485391856e-06, + "loss": 0.7979, + "step": 10399 + }, + { + "epoch": 0.5344845307842533, + "grad_norm": 0.8148313760757446, + "learning_rate": 4.684784882472983e-06, + "loss": 0.6983, + "step": 10400 + }, + { + "epoch": 0.5345359235275979, + "grad_norm": 1.0856566429138184, + "learning_rate": 4.683954288287422e-06, + "loss": 0.7031, + "step": 10401 + }, + { + "epoch": 0.5345873162709426, + "grad_norm": 1.0352681875228882, + "learning_rate": 4.6831237028581815e-06, + "loss": 0.6817, + "step": 10402 + }, + { + "epoch": 0.5346387090142872, + "grad_norm": 1.0405305624008179, + "learning_rate": 4.682293126208275e-06, + "loss": 0.77, + "step": 10403 + }, + { + "epoch": 0.5346901017576319, + "grad_norm": 1.0445367097854614, + "learning_rate": 4.681462558360716e-06, + "loss": 0.7129, + "step": 10404 + }, + { + "epoch": 0.5347414945009764, + "grad_norm": 1.0563595294952393, + "learning_rate": 4.680631999338517e-06, + "loss": 0.7885, + "step": 10405 + }, + { + "epoch": 0.5347928872443211, + "grad_norm": 1.0365999937057495, + "learning_rate": 4.6798014491646845e-06, + "loss": 0.7276, + "step": 10406 + }, + { + "epoch": 0.5348442799876657, + "grad_norm": 1.0962785482406616, + "learning_rate": 4.678970907862232e-06, + "loss": 0.766, + "step": 10407 + }, + { + "epoch": 0.5348956727310104, + "grad_norm": 1.048727035522461, + "learning_rate": 4.678140375454173e-06, + "loss": 0.7257, + "step": 10408 + }, + { + "epoch": 0.534947065474355, + "grad_norm": 1.0664485692977905, + "learning_rate": 4.677309851963516e-06, + "loss": 0.7797, + "step": 10409 + }, + { + "epoch": 0.5349984582176996, + "grad_norm": 0.8399204015731812, + "learning_rate": 4.676479337413269e-06, + "loss": 0.6421, + "step": 10410 + }, + { + "epoch": 0.5350498509610443, + "grad_norm": 1.209705114364624, + "learning_rate": 4.675648831826448e-06, + "loss": 0.8031, + "step": 10411 + }, + { + "epoch": 0.5351012437043889, + "grad_norm": 1.0256298780441284, + "learning_rate": 4.674818335226056e-06, + "loss": 0.6778, + "step": 10412 + }, + { + "epoch": 0.5351526364477336, + "grad_norm": 1.0213309526443481, + "learning_rate": 4.673987847635107e-06, + "loss": 0.7648, + "step": 10413 + }, + { + "epoch": 0.5352040291910782, + "grad_norm": 0.896597146987915, + "learning_rate": 4.673157369076611e-06, + "loss": 0.643, + "step": 10414 + }, + { + "epoch": 0.5352554219344229, + "grad_norm": 1.0874860286712646, + "learning_rate": 4.672326899573574e-06, + "loss": 0.7262, + "step": 10415 + }, + { + "epoch": 0.5353068146777675, + "grad_norm": 1.0786807537078857, + "learning_rate": 4.671496439149006e-06, + "loss": 0.741, + "step": 10416 + }, + { + "epoch": 0.5353582074211122, + "grad_norm": 1.1190440654754639, + "learning_rate": 4.670665987825917e-06, + "loss": 0.7801, + "step": 10417 + }, + { + "epoch": 0.5354096001644568, + "grad_norm": 1.0380901098251343, + "learning_rate": 4.669835545627313e-06, + "loss": 0.7344, + "step": 10418 + }, + { + "epoch": 0.5354609929078015, + "grad_norm": 1.1197763681411743, + "learning_rate": 4.669005112576204e-06, + "loss": 0.6671, + "step": 10419 + }, + { + "epoch": 0.535512385651146, + "grad_norm": 1.0650744438171387, + "learning_rate": 4.668174688695597e-06, + "loss": 0.7408, + "step": 10420 + }, + { + "epoch": 0.5355637783944907, + "grad_norm": 1.0149824619293213, + "learning_rate": 4.667344274008503e-06, + "loss": 0.7432, + "step": 10421 + }, + { + "epoch": 0.5356151711378353, + "grad_norm": 1.055267572402954, + "learning_rate": 4.6665138685379236e-06, + "loss": 0.7176, + "step": 10422 + }, + { + "epoch": 0.53566656388118, + "grad_norm": 1.0338681936264038, + "learning_rate": 4.665683472306869e-06, + "loss": 0.8122, + "step": 10423 + }, + { + "epoch": 0.5357179566245246, + "grad_norm": 1.0707435607910156, + "learning_rate": 4.664853085338348e-06, + "loss": 0.7333, + "step": 10424 + }, + { + "epoch": 0.5357693493678692, + "grad_norm": 1.0820441246032715, + "learning_rate": 4.664022707655362e-06, + "loss": 0.7082, + "step": 10425 + }, + { + "epoch": 0.5358207421112139, + "grad_norm": 1.1046950817108154, + "learning_rate": 4.663192339280921e-06, + "loss": 0.7334, + "step": 10426 + }, + { + "epoch": 0.5358721348545585, + "grad_norm": 0.6828492879867554, + "learning_rate": 4.662361980238031e-06, + "loss": 0.6424, + "step": 10427 + }, + { + "epoch": 0.5359235275979032, + "grad_norm": 1.030519723892212, + "learning_rate": 4.661531630549696e-06, + "loss": 0.6895, + "step": 10428 + }, + { + "epoch": 0.5359749203412478, + "grad_norm": 1.0477770566940308, + "learning_rate": 4.660701290238923e-06, + "loss": 0.6857, + "step": 10429 + }, + { + "epoch": 0.5360263130845925, + "grad_norm": 1.0766668319702148, + "learning_rate": 4.659870959328719e-06, + "loss": 0.7591, + "step": 10430 + }, + { + "epoch": 0.5360777058279371, + "grad_norm": 1.157882809638977, + "learning_rate": 4.659040637842085e-06, + "loss": 0.7332, + "step": 10431 + }, + { + "epoch": 0.5361290985712818, + "grad_norm": 1.0176080465316772, + "learning_rate": 4.658210325802028e-06, + "loss": 0.7363, + "step": 10432 + }, + { + "epoch": 0.5361804913146264, + "grad_norm": 1.0790412425994873, + "learning_rate": 4.657380023231556e-06, + "loss": 0.7267, + "step": 10433 + }, + { + "epoch": 0.5362318840579711, + "grad_norm": 1.1094882488250732, + "learning_rate": 4.656549730153665e-06, + "loss": 0.7133, + "step": 10434 + }, + { + "epoch": 0.5362832768013156, + "grad_norm": 1.136902928352356, + "learning_rate": 4.655719446591364e-06, + "loss": 0.7407, + "step": 10435 + }, + { + "epoch": 0.5363346695446602, + "grad_norm": 1.0712867975234985, + "learning_rate": 4.654889172567658e-06, + "loss": 0.7605, + "step": 10436 + }, + { + "epoch": 0.5363860622880049, + "grad_norm": 1.0519243478775024, + "learning_rate": 4.654058908105548e-06, + "loss": 0.6973, + "step": 10437 + }, + { + "epoch": 0.5364374550313495, + "grad_norm": 1.1641972064971924, + "learning_rate": 4.6532286532280385e-06, + "loss": 0.7291, + "step": 10438 + }, + { + "epoch": 0.5364888477746942, + "grad_norm": 1.0016021728515625, + "learning_rate": 4.652398407958132e-06, + "loss": 0.6809, + "step": 10439 + }, + { + "epoch": 0.5365402405180388, + "grad_norm": 1.022668480873108, + "learning_rate": 4.651568172318832e-06, + "loss": 0.7341, + "step": 10440 + }, + { + "epoch": 0.5365916332613835, + "grad_norm": 1.185032606124878, + "learning_rate": 4.650737946333139e-06, + "loss": 0.7567, + "step": 10441 + }, + { + "epoch": 0.5366430260047281, + "grad_norm": 1.1219017505645752, + "learning_rate": 4.649907730024056e-06, + "loss": 0.7512, + "step": 10442 + }, + { + "epoch": 0.5366944187480728, + "grad_norm": 1.0617390871047974, + "learning_rate": 4.649077523414587e-06, + "loss": 0.7386, + "step": 10443 + }, + { + "epoch": 0.5367458114914174, + "grad_norm": 1.080819010734558, + "learning_rate": 4.648247326527729e-06, + "loss": 0.7609, + "step": 10444 + }, + { + "epoch": 0.5367972042347621, + "grad_norm": 1.0205076932907104, + "learning_rate": 4.647417139386487e-06, + "loss": 0.7139, + "step": 10445 + }, + { + "epoch": 0.5368485969781067, + "grad_norm": 1.0862131118774414, + "learning_rate": 4.646586962013861e-06, + "loss": 0.7192, + "step": 10446 + }, + { + "epoch": 0.5368999897214514, + "grad_norm": 1.1342219114303589, + "learning_rate": 4.645756794432852e-06, + "loss": 0.772, + "step": 10447 + }, + { + "epoch": 0.536951382464796, + "grad_norm": 1.0725730657577515, + "learning_rate": 4.644926636666461e-06, + "loss": 0.6864, + "step": 10448 + }, + { + "epoch": 0.5370027752081407, + "grad_norm": 1.062612771987915, + "learning_rate": 4.644096488737687e-06, + "loss": 0.7076, + "step": 10449 + }, + { + "epoch": 0.5370541679514852, + "grad_norm": 1.066922903060913, + "learning_rate": 4.64326635066953e-06, + "loss": 0.7139, + "step": 10450 + }, + { + "epoch": 0.5371055606948298, + "grad_norm": 1.0566930770874023, + "learning_rate": 4.64243622248499e-06, + "loss": 0.765, + "step": 10451 + }, + { + "epoch": 0.5371569534381745, + "grad_norm": 1.1445295810699463, + "learning_rate": 4.641606104207067e-06, + "loss": 0.7379, + "step": 10452 + }, + { + "epoch": 0.5372083461815191, + "grad_norm": 1.1217474937438965, + "learning_rate": 4.640775995858762e-06, + "loss": 0.6979, + "step": 10453 + }, + { + "epoch": 0.5372597389248638, + "grad_norm": 1.1136583089828491, + "learning_rate": 4.63994589746307e-06, + "loss": 0.7596, + "step": 10454 + }, + { + "epoch": 0.5373111316682084, + "grad_norm": 1.1378822326660156, + "learning_rate": 4.63911580904299e-06, + "loss": 0.7225, + "step": 10455 + }, + { + "epoch": 0.5373625244115531, + "grad_norm": 1.1026440858840942, + "learning_rate": 4.638285730621525e-06, + "loss": 0.6889, + "step": 10456 + }, + { + "epoch": 0.5374139171548977, + "grad_norm": 1.0863673686981201, + "learning_rate": 4.637455662221666e-06, + "loss": 0.7586, + "step": 10457 + }, + { + "epoch": 0.5374653098982424, + "grad_norm": 1.096804141998291, + "learning_rate": 4.636625603866417e-06, + "loss": 0.7488, + "step": 10458 + }, + { + "epoch": 0.537516702641587, + "grad_norm": 1.1065821647644043, + "learning_rate": 4.635795555578772e-06, + "loss": 0.701, + "step": 10459 + }, + { + "epoch": 0.5375680953849317, + "grad_norm": 1.1217025518417358, + "learning_rate": 4.634965517381729e-06, + "loss": 0.7053, + "step": 10460 + }, + { + "epoch": 0.5376194881282763, + "grad_norm": 1.0425738096237183, + "learning_rate": 4.634135489298284e-06, + "loss": 0.699, + "step": 10461 + }, + { + "epoch": 0.537670880871621, + "grad_norm": 1.0263397693634033, + "learning_rate": 4.6333054713514384e-06, + "loss": 0.7232, + "step": 10462 + }, + { + "epoch": 0.5377222736149656, + "grad_norm": 1.1022967100143433, + "learning_rate": 4.632475463564182e-06, + "loss": 0.698, + "step": 10463 + }, + { + "epoch": 0.5377736663583103, + "grad_norm": 0.9844361543655396, + "learning_rate": 4.631645465959514e-06, + "loss": 0.6991, + "step": 10464 + }, + { + "epoch": 0.5378250591016549, + "grad_norm": 1.078611135482788, + "learning_rate": 4.630815478560432e-06, + "loss": 0.7363, + "step": 10465 + }, + { + "epoch": 0.5378764518449994, + "grad_norm": 1.1333802938461304, + "learning_rate": 4.629985501389928e-06, + "loss": 0.7787, + "step": 10466 + }, + { + "epoch": 0.5379278445883441, + "grad_norm": 1.0465352535247803, + "learning_rate": 4.629155534470997e-06, + "loss": 0.6898, + "step": 10467 + }, + { + "epoch": 0.5379792373316887, + "grad_norm": 1.1531829833984375, + "learning_rate": 4.628325577826638e-06, + "loss": 0.7499, + "step": 10468 + }, + { + "epoch": 0.5380306300750334, + "grad_norm": 1.084073543548584, + "learning_rate": 4.627495631479843e-06, + "loss": 0.7204, + "step": 10469 + }, + { + "epoch": 0.538082022818378, + "grad_norm": 1.0935046672821045, + "learning_rate": 4.626665695453606e-06, + "loss": 0.6936, + "step": 10470 + }, + { + "epoch": 0.5381334155617227, + "grad_norm": 1.030538558959961, + "learning_rate": 4.625835769770923e-06, + "loss": 0.7003, + "step": 10471 + }, + { + "epoch": 0.5381848083050673, + "grad_norm": 1.0489671230316162, + "learning_rate": 4.625005854454788e-06, + "loss": 0.7521, + "step": 10472 + }, + { + "epoch": 0.538236201048412, + "grad_norm": 1.0836923122406006, + "learning_rate": 4.6241759495281916e-06, + "loss": 0.7137, + "step": 10473 + }, + { + "epoch": 0.5382875937917566, + "grad_norm": 1.1485892534255981, + "learning_rate": 4.623346055014128e-06, + "loss": 0.7482, + "step": 10474 + }, + { + "epoch": 0.5383389865351013, + "grad_norm": 1.1004501581192017, + "learning_rate": 4.622516170935593e-06, + "loss": 0.721, + "step": 10475 + }, + { + "epoch": 0.5383903792784459, + "grad_norm": 1.1818166971206665, + "learning_rate": 4.6216862973155756e-06, + "loss": 0.708, + "step": 10476 + }, + { + "epoch": 0.5384417720217906, + "grad_norm": 0.7811095714569092, + "learning_rate": 4.62085643417707e-06, + "loss": 0.6724, + "step": 10477 + }, + { + "epoch": 0.5384931647651352, + "grad_norm": 1.1231791973114014, + "learning_rate": 4.620026581543068e-06, + "loss": 0.75, + "step": 10478 + }, + { + "epoch": 0.5385445575084798, + "grad_norm": 1.0823745727539062, + "learning_rate": 4.619196739436561e-06, + "loss": 0.7248, + "step": 10479 + }, + { + "epoch": 0.5385959502518245, + "grad_norm": 1.0404369831085205, + "learning_rate": 4.618366907880541e-06, + "loss": 0.7332, + "step": 10480 + }, + { + "epoch": 0.538647342995169, + "grad_norm": 1.0908387899398804, + "learning_rate": 4.617537086898e-06, + "loss": 0.6897, + "step": 10481 + }, + { + "epoch": 0.5386987357385137, + "grad_norm": 1.050247073173523, + "learning_rate": 4.616707276511927e-06, + "loss": 0.6876, + "step": 10482 + }, + { + "epoch": 0.5387501284818583, + "grad_norm": 1.2456105947494507, + "learning_rate": 4.615877476745313e-06, + "loss": 0.7396, + "step": 10483 + }, + { + "epoch": 0.538801521225203, + "grad_norm": 1.017628788948059, + "learning_rate": 4.615047687621149e-06, + "loss": 0.7094, + "step": 10484 + }, + { + "epoch": 0.5388529139685476, + "grad_norm": 1.0478549003601074, + "learning_rate": 4.614217909162428e-06, + "loss": 0.7151, + "step": 10485 + }, + { + "epoch": 0.5389043067118923, + "grad_norm": 1.0526535511016846, + "learning_rate": 4.613388141392134e-06, + "loss": 0.7315, + "step": 10486 + }, + { + "epoch": 0.5389556994552369, + "grad_norm": 1.0699610710144043, + "learning_rate": 4.612558384333259e-06, + "loss": 0.6779, + "step": 10487 + }, + { + "epoch": 0.5390070921985816, + "grad_norm": 0.713754415512085, + "learning_rate": 4.611728638008792e-06, + "loss": 0.6433, + "step": 10488 + }, + { + "epoch": 0.5390584849419262, + "grad_norm": 0.6681198477745056, + "learning_rate": 4.610898902441723e-06, + "loss": 0.6715, + "step": 10489 + }, + { + "epoch": 0.5391098776852709, + "grad_norm": 1.075799822807312, + "learning_rate": 4.61006917765504e-06, + "loss": 0.699, + "step": 10490 + }, + { + "epoch": 0.5391612704286155, + "grad_norm": 1.0532586574554443, + "learning_rate": 4.609239463671732e-06, + "loss": 0.7361, + "step": 10491 + }, + { + "epoch": 0.5392126631719601, + "grad_norm": 1.116230845451355, + "learning_rate": 4.608409760514784e-06, + "loss": 0.6918, + "step": 10492 + }, + { + "epoch": 0.5392640559153048, + "grad_norm": 1.04694402217865, + "learning_rate": 4.607580068207185e-06, + "loss": 0.7287, + "step": 10493 + }, + { + "epoch": 0.5393154486586494, + "grad_norm": 1.0567365884780884, + "learning_rate": 4.606750386771926e-06, + "loss": 0.7328, + "step": 10494 + }, + { + "epoch": 0.5393668414019941, + "grad_norm": 1.0793025493621826, + "learning_rate": 4.605920716231988e-06, + "loss": 0.6959, + "step": 10495 + }, + { + "epoch": 0.5394182341453386, + "grad_norm": 1.0591254234313965, + "learning_rate": 4.605091056610361e-06, + "loss": 0.7167, + "step": 10496 + }, + { + "epoch": 0.5394696268886833, + "grad_norm": 1.094467282295227, + "learning_rate": 4.6042614079300316e-06, + "loss": 0.7391, + "step": 10497 + }, + { + "epoch": 0.5395210196320279, + "grad_norm": 1.1201578378677368, + "learning_rate": 4.6034317702139865e-06, + "loss": 0.7154, + "step": 10498 + }, + { + "epoch": 0.5395724123753726, + "grad_norm": 1.0707334280014038, + "learning_rate": 4.60260214348521e-06, + "loss": 0.7424, + "step": 10499 + }, + { + "epoch": 0.5396238051187172, + "grad_norm": 1.2802926301956177, + "learning_rate": 4.601772527766687e-06, + "loss": 0.7252, + "step": 10500 + }, + { + "epoch": 0.5396751978620619, + "grad_norm": 0.8095126748085022, + "learning_rate": 4.600942923081405e-06, + "loss": 0.6501, + "step": 10501 + }, + { + "epoch": 0.5397265906054065, + "grad_norm": 1.0807127952575684, + "learning_rate": 4.600113329452347e-06, + "loss": 0.7407, + "step": 10502 + }, + { + "epoch": 0.5397779833487512, + "grad_norm": 1.0562797784805298, + "learning_rate": 4.599283746902498e-06, + "loss": 0.7015, + "step": 10503 + }, + { + "epoch": 0.5398293760920958, + "grad_norm": 1.066268801689148, + "learning_rate": 4.598454175454845e-06, + "loss": 0.7697, + "step": 10504 + }, + { + "epoch": 0.5398807688354404, + "grad_norm": 1.0533407926559448, + "learning_rate": 4.597624615132367e-06, + "loss": 0.7127, + "step": 10505 + }, + { + "epoch": 0.5399321615787851, + "grad_norm": 0.6977935433387756, + "learning_rate": 4.596795065958053e-06, + "loss": 0.6423, + "step": 10506 + }, + { + "epoch": 0.5399835543221297, + "grad_norm": 0.7883232831954956, + "learning_rate": 4.595965527954883e-06, + "loss": 0.6595, + "step": 10507 + }, + { + "epoch": 0.5400349470654744, + "grad_norm": 1.1504698991775513, + "learning_rate": 4.59513600114584e-06, + "loss": 0.6964, + "step": 10508 + }, + { + "epoch": 0.540086339808819, + "grad_norm": 1.0998119115829468, + "learning_rate": 4.594306485553907e-06, + "loss": 0.7463, + "step": 10509 + }, + { + "epoch": 0.5401377325521637, + "grad_norm": 1.0223561525344849, + "learning_rate": 4.5934769812020694e-06, + "loss": 0.694, + "step": 10510 + }, + { + "epoch": 0.5401891252955082, + "grad_norm": 1.0451717376708984, + "learning_rate": 4.592647488113304e-06, + "loss": 0.7294, + "step": 10511 + }, + { + "epoch": 0.5402405180388529, + "grad_norm": 1.0131254196166992, + "learning_rate": 4.591818006310597e-06, + "loss": 0.7333, + "step": 10512 + }, + { + "epoch": 0.5402919107821975, + "grad_norm": 1.0558116436004639, + "learning_rate": 4.590988535816929e-06, + "loss": 0.7775, + "step": 10513 + }, + { + "epoch": 0.5403433035255422, + "grad_norm": 1.0845632553100586, + "learning_rate": 4.5901590766552815e-06, + "loss": 0.7219, + "step": 10514 + }, + { + "epoch": 0.5403946962688868, + "grad_norm": 1.0390470027923584, + "learning_rate": 4.589329628848634e-06, + "loss": 0.6995, + "step": 10515 + }, + { + "epoch": 0.5404460890122315, + "grad_norm": 1.0534014701843262, + "learning_rate": 4.588500192419967e-06, + "loss": 0.6922, + "step": 10516 + }, + { + "epoch": 0.5404974817555761, + "grad_norm": 0.8329530954360962, + "learning_rate": 4.587670767392264e-06, + "loss": 0.7116, + "step": 10517 + }, + { + "epoch": 0.5405488744989208, + "grad_norm": 1.092004656791687, + "learning_rate": 4.5868413537885e-06, + "loss": 0.7492, + "step": 10518 + }, + { + "epoch": 0.5406002672422654, + "grad_norm": 0.7006005048751831, + "learning_rate": 4.586011951631657e-06, + "loss": 0.6398, + "step": 10519 + }, + { + "epoch": 0.54065165998561, + "grad_norm": 1.5161834955215454, + "learning_rate": 4.585182560944715e-06, + "loss": 0.6713, + "step": 10520 + }, + { + "epoch": 0.5407030527289547, + "grad_norm": 0.6968628168106079, + "learning_rate": 4.5843531817506524e-06, + "loss": 0.6589, + "step": 10521 + }, + { + "epoch": 0.5407544454722993, + "grad_norm": 1.0662380456924438, + "learning_rate": 4.583523814072447e-06, + "loss": 0.773, + "step": 10522 + }, + { + "epoch": 0.540805838215644, + "grad_norm": 0.7012926340103149, + "learning_rate": 4.58269445793308e-06, + "loss": 0.6212, + "step": 10523 + }, + { + "epoch": 0.5408572309589886, + "grad_norm": 1.1276826858520508, + "learning_rate": 4.581865113355526e-06, + "loss": 0.6581, + "step": 10524 + }, + { + "epoch": 0.5409086237023333, + "grad_norm": 1.0877556800842285, + "learning_rate": 4.581035780362764e-06, + "loss": 0.6767, + "step": 10525 + }, + { + "epoch": 0.5409600164456778, + "grad_norm": 1.0615692138671875, + "learning_rate": 4.580206458977774e-06, + "loss": 0.7316, + "step": 10526 + }, + { + "epoch": 0.5410114091890225, + "grad_norm": 1.1296268701553345, + "learning_rate": 4.579377149223528e-06, + "loss": 0.7644, + "step": 10527 + }, + { + "epoch": 0.5410628019323671, + "grad_norm": 1.3686052560806274, + "learning_rate": 4.578547851123006e-06, + "loss": 0.7025, + "step": 10528 + }, + { + "epoch": 0.5411141946757118, + "grad_norm": 1.0313549041748047, + "learning_rate": 4.577718564699183e-06, + "loss": 0.6916, + "step": 10529 + }, + { + "epoch": 0.5411655874190564, + "grad_norm": 12.718899726867676, + "learning_rate": 4.576889289975037e-06, + "loss": 0.7649, + "step": 10530 + }, + { + "epoch": 0.541216980162401, + "grad_norm": 0.8783236742019653, + "learning_rate": 4.576060026973542e-06, + "loss": 0.6953, + "step": 10531 + }, + { + "epoch": 0.5412683729057457, + "grad_norm": 0.7239128351211548, + "learning_rate": 4.575230775717674e-06, + "loss": 0.6934, + "step": 10532 + }, + { + "epoch": 0.5413197656490903, + "grad_norm": 0.9787454009056091, + "learning_rate": 4.574401536230411e-06, + "loss": 0.705, + "step": 10533 + }, + { + "epoch": 0.541371158392435, + "grad_norm": 0.8964075446128845, + "learning_rate": 4.573572308534723e-06, + "loss": 0.6879, + "step": 10534 + }, + { + "epoch": 0.5414225511357796, + "grad_norm": 1.1308505535125732, + "learning_rate": 4.572743092653586e-06, + "loss": 0.7013, + "step": 10535 + }, + { + "epoch": 0.5414739438791243, + "grad_norm": 0.7774497270584106, + "learning_rate": 4.571913888609977e-06, + "loss": 0.6941, + "step": 10536 + }, + { + "epoch": 0.5415253366224689, + "grad_norm": 1.0057257413864136, + "learning_rate": 4.571084696426865e-06, + "loss": 0.6928, + "step": 10537 + }, + { + "epoch": 0.5415767293658136, + "grad_norm": 0.7181929349899292, + "learning_rate": 4.570255516127227e-06, + "loss": 0.6553, + "step": 10538 + }, + { + "epoch": 0.5416281221091582, + "grad_norm": 1.1059304475784302, + "learning_rate": 4.569426347734035e-06, + "loss": 0.739, + "step": 10539 + }, + { + "epoch": 0.5416795148525029, + "grad_norm": 1.07304048538208, + "learning_rate": 4.568597191270261e-06, + "loss": 0.6747, + "step": 10540 + }, + { + "epoch": 0.5417309075958475, + "grad_norm": 1.0642088651657104, + "learning_rate": 4.56776804675888e-06, + "loss": 0.7592, + "step": 10541 + }, + { + "epoch": 0.5417823003391921, + "grad_norm": 1.059482455253601, + "learning_rate": 4.5669389142228634e-06, + "loss": 0.7299, + "step": 10542 + }, + { + "epoch": 0.5418336930825367, + "grad_norm": 1.064316987991333, + "learning_rate": 4.5661097936851815e-06, + "loss": 0.7224, + "step": 10543 + }, + { + "epoch": 0.5418850858258814, + "grad_norm": 1.140000581741333, + "learning_rate": 4.5652806851688055e-06, + "loss": 0.7243, + "step": 10544 + }, + { + "epoch": 0.541936478569226, + "grad_norm": 1.0654624700546265, + "learning_rate": 4.5644515886967086e-06, + "loss": 0.7317, + "step": 10545 + }, + { + "epoch": 0.5419878713125706, + "grad_norm": 1.0676462650299072, + "learning_rate": 4.563622504291862e-06, + "loss": 0.7393, + "step": 10546 + }, + { + "epoch": 0.5420392640559153, + "grad_norm": 1.0956944227218628, + "learning_rate": 4.562793431977234e-06, + "loss": 0.7294, + "step": 10547 + }, + { + "epoch": 0.5420906567992599, + "grad_norm": 1.0937620401382446, + "learning_rate": 4.561964371775795e-06, + "loss": 0.7673, + "step": 10548 + }, + { + "epoch": 0.5421420495426046, + "grad_norm": 1.1047075986862183, + "learning_rate": 4.561135323710516e-06, + "loss": 0.7172, + "step": 10549 + }, + { + "epoch": 0.5421934422859492, + "grad_norm": 1.049577236175537, + "learning_rate": 4.560306287804366e-06, + "loss": 0.7404, + "step": 10550 + }, + { + "epoch": 0.5422448350292939, + "grad_norm": 0.7473107576370239, + "learning_rate": 4.5594772640803155e-06, + "loss": 0.6402, + "step": 10551 + }, + { + "epoch": 0.5422962277726385, + "grad_norm": 1.1504651308059692, + "learning_rate": 4.558648252561332e-06, + "loss": 0.7579, + "step": 10552 + }, + { + "epoch": 0.5423476205159832, + "grad_norm": 1.0655155181884766, + "learning_rate": 4.557819253270382e-06, + "loss": 0.7109, + "step": 10553 + }, + { + "epoch": 0.5423990132593278, + "grad_norm": 0.7419631481170654, + "learning_rate": 4.556990266230437e-06, + "loss": 0.6603, + "step": 10554 + }, + { + "epoch": 0.5424504060026725, + "grad_norm": 1.0555150508880615, + "learning_rate": 4.5561612914644646e-06, + "loss": 0.7067, + "step": 10555 + }, + { + "epoch": 0.5425017987460171, + "grad_norm": 0.7106221914291382, + "learning_rate": 4.5553323289954295e-06, + "loss": 0.6676, + "step": 10556 + }, + { + "epoch": 0.5425531914893617, + "grad_norm": 1.0670651197433472, + "learning_rate": 4.5545033788463015e-06, + "loss": 0.7154, + "step": 10557 + }, + { + "epoch": 0.5426045842327063, + "grad_norm": 1.2171638011932373, + "learning_rate": 4.553674441040047e-06, + "loss": 0.7477, + "step": 10558 + }, + { + "epoch": 0.542655976976051, + "grad_norm": 1.1148573160171509, + "learning_rate": 4.552845515599631e-06, + "loss": 0.7902, + "step": 10559 + }, + { + "epoch": 0.5427073697193956, + "grad_norm": 1.125661849975586, + "learning_rate": 4.55201660254802e-06, + "loss": 0.6832, + "step": 10560 + }, + { + "epoch": 0.5427587624627402, + "grad_norm": 0.7490405440330505, + "learning_rate": 4.551187701908179e-06, + "loss": 0.6739, + "step": 10561 + }, + { + "epoch": 0.5428101552060849, + "grad_norm": 1.113082766532898, + "learning_rate": 4.550358813703077e-06, + "loss": 0.761, + "step": 10562 + }, + { + "epoch": 0.5428615479494295, + "grad_norm": 1.028760313987732, + "learning_rate": 4.549529937955676e-06, + "loss": 0.707, + "step": 10563 + }, + { + "epoch": 0.5429129406927742, + "grad_norm": 1.1576337814331055, + "learning_rate": 4.548701074688941e-06, + "loss": 0.7206, + "step": 10564 + }, + { + "epoch": 0.5429643334361188, + "grad_norm": 1.0066628456115723, + "learning_rate": 4.547872223925838e-06, + "loss": 0.6641, + "step": 10565 + }, + { + "epoch": 0.5430157261794635, + "grad_norm": 1.1145844459533691, + "learning_rate": 4.547043385689329e-06, + "loss": 0.7278, + "step": 10566 + }, + { + "epoch": 0.5430671189228081, + "grad_norm": 0.7140308022499084, + "learning_rate": 4.546214560002378e-06, + "loss": 0.6166, + "step": 10567 + }, + { + "epoch": 0.5431185116661528, + "grad_norm": 1.1552926301956177, + "learning_rate": 4.545385746887951e-06, + "loss": 0.7096, + "step": 10568 + }, + { + "epoch": 0.5431699044094974, + "grad_norm": 1.1254518032073975, + "learning_rate": 4.5445569463690065e-06, + "loss": 0.7284, + "step": 10569 + }, + { + "epoch": 0.5432212971528421, + "grad_norm": 1.1393241882324219, + "learning_rate": 4.54372815846851e-06, + "loss": 0.7422, + "step": 10570 + }, + { + "epoch": 0.5432726898961867, + "grad_norm": 0.8248307108879089, + "learning_rate": 4.542899383209424e-06, + "loss": 0.6839, + "step": 10571 + }, + { + "epoch": 0.5433240826395312, + "grad_norm": 1.0755550861358643, + "learning_rate": 4.5420706206147094e-06, + "loss": 0.7685, + "step": 10572 + }, + { + "epoch": 0.5433754753828759, + "grad_norm": 1.0673508644104004, + "learning_rate": 4.541241870707327e-06, + "loss": 0.7436, + "step": 10573 + }, + { + "epoch": 0.5434268681262205, + "grad_norm": 1.147019863128662, + "learning_rate": 4.540413133510242e-06, + "loss": 0.7231, + "step": 10574 + }, + { + "epoch": 0.5434782608695652, + "grad_norm": 1.0845848321914673, + "learning_rate": 4.53958440904641e-06, + "loss": 0.7445, + "step": 10575 + }, + { + "epoch": 0.5435296536129098, + "grad_norm": 1.2527992725372314, + "learning_rate": 4.538755697338795e-06, + "loss": 0.8395, + "step": 10576 + }, + { + "epoch": 0.5435810463562545, + "grad_norm": 1.0688257217407227, + "learning_rate": 4.5379269984103545e-06, + "loss": 0.7293, + "step": 10577 + }, + { + "epoch": 0.5436324390995991, + "grad_norm": 1.0712201595306396, + "learning_rate": 4.537098312284053e-06, + "loss": 0.727, + "step": 10578 + }, + { + "epoch": 0.5436838318429438, + "grad_norm": 1.0583878755569458, + "learning_rate": 4.536269638982846e-06, + "loss": 0.6695, + "step": 10579 + }, + { + "epoch": 0.5437352245862884, + "grad_norm": 1.0115925073623657, + "learning_rate": 4.535440978529692e-06, + "loss": 0.7264, + "step": 10580 + }, + { + "epoch": 0.5437866173296331, + "grad_norm": 1.0664162635803223, + "learning_rate": 4.534612330947553e-06, + "loss": 0.7213, + "step": 10581 + }, + { + "epoch": 0.5438380100729777, + "grad_norm": 1.0657728910446167, + "learning_rate": 4.533783696259384e-06, + "loss": 0.7413, + "step": 10582 + }, + { + "epoch": 0.5438894028163224, + "grad_norm": 1.066246747970581, + "learning_rate": 4.532955074488146e-06, + "loss": 0.7057, + "step": 10583 + }, + { + "epoch": 0.543940795559667, + "grad_norm": 1.015142798423767, + "learning_rate": 4.532126465656798e-06, + "loss": 0.71, + "step": 10584 + }, + { + "epoch": 0.5439921883030117, + "grad_norm": 1.0686124563217163, + "learning_rate": 4.531297869788291e-06, + "loss": 0.7174, + "step": 10585 + }, + { + "epoch": 0.5440435810463563, + "grad_norm": 1.0495305061340332, + "learning_rate": 4.5304692869055865e-06, + "loss": 0.7284, + "step": 10586 + }, + { + "epoch": 0.5440949737897008, + "grad_norm": 1.096439242362976, + "learning_rate": 4.5296407170316435e-06, + "loss": 0.6757, + "step": 10587 + }, + { + "epoch": 0.5441463665330455, + "grad_norm": 1.0084543228149414, + "learning_rate": 4.528812160189412e-06, + "loss": 0.6978, + "step": 10588 + }, + { + "epoch": 0.5441977592763901, + "grad_norm": 0.786526083946228, + "learning_rate": 4.527983616401851e-06, + "loss": 0.6527, + "step": 10589 + }, + { + "epoch": 0.5442491520197348, + "grad_norm": 0.7137128114700317, + "learning_rate": 4.5271550856919176e-06, + "loss": 0.6503, + "step": 10590 + }, + { + "epoch": 0.5443005447630794, + "grad_norm": 1.0851043462753296, + "learning_rate": 4.526326568082564e-06, + "loss": 0.7444, + "step": 10591 + }, + { + "epoch": 0.5443519375064241, + "grad_norm": 1.0560194253921509, + "learning_rate": 4.525498063596747e-06, + "loss": 0.7349, + "step": 10592 + }, + { + "epoch": 0.5444033302497687, + "grad_norm": 1.1076501607894897, + "learning_rate": 4.52466957225742e-06, + "loss": 0.7046, + "step": 10593 + }, + { + "epoch": 0.5444547229931134, + "grad_norm": 1.0844630002975464, + "learning_rate": 4.52384109408754e-06, + "loss": 0.7474, + "step": 10594 + }, + { + "epoch": 0.544506115736458, + "grad_norm": 1.1662654876708984, + "learning_rate": 4.523012629110056e-06, + "loss": 0.6892, + "step": 10595 + }, + { + "epoch": 0.5445575084798027, + "grad_norm": 1.0642030239105225, + "learning_rate": 4.522184177347923e-06, + "loss": 0.7001, + "step": 10596 + }, + { + "epoch": 0.5446089012231473, + "grad_norm": 1.1036603450775146, + "learning_rate": 4.5213557388240985e-06, + "loss": 0.7524, + "step": 10597 + }, + { + "epoch": 0.544660293966492, + "grad_norm": 1.138271689414978, + "learning_rate": 4.520527313561528e-06, + "loss": 0.7396, + "step": 10598 + }, + { + "epoch": 0.5447116867098366, + "grad_norm": 1.0434203147888184, + "learning_rate": 4.519698901583167e-06, + "loss": 0.7112, + "step": 10599 + }, + { + "epoch": 0.5447630794531813, + "grad_norm": 1.0473898649215698, + "learning_rate": 4.518870502911968e-06, + "loss": 0.7371, + "step": 10600 + }, + { + "epoch": 0.5448144721965259, + "grad_norm": 1.0573638677597046, + "learning_rate": 4.5180421175708816e-06, + "loss": 0.6618, + "step": 10601 + }, + { + "epoch": 0.5448658649398704, + "grad_norm": 1.0986590385437012, + "learning_rate": 4.51721374558286e-06, + "loss": 0.7602, + "step": 10602 + }, + { + "epoch": 0.5449172576832151, + "grad_norm": 0.8032775521278381, + "learning_rate": 4.516385386970853e-06, + "loss": 0.6567, + "step": 10603 + }, + { + "epoch": 0.5449686504265597, + "grad_norm": 1.1532690525054932, + "learning_rate": 4.515557041757811e-06, + "loss": 0.6528, + "step": 10604 + }, + { + "epoch": 0.5450200431699044, + "grad_norm": 1.1178356409072876, + "learning_rate": 4.514728709966682e-06, + "loss": 0.7511, + "step": 10605 + }, + { + "epoch": 0.545071435913249, + "grad_norm": 1.1708389520645142, + "learning_rate": 4.51390039162042e-06, + "loss": 0.7331, + "step": 10606 + }, + { + "epoch": 0.5451228286565937, + "grad_norm": 1.0684096813201904, + "learning_rate": 4.5130720867419735e-06, + "loss": 0.7115, + "step": 10607 + }, + { + "epoch": 0.5451742213999383, + "grad_norm": 0.6783982515335083, + "learning_rate": 4.512243795354288e-06, + "loss": 0.6582, + "step": 10608 + }, + { + "epoch": 0.545225614143283, + "grad_norm": 1.0814152956008911, + "learning_rate": 4.511415517480313e-06, + "loss": 0.7748, + "step": 10609 + }, + { + "epoch": 0.5452770068866276, + "grad_norm": 1.036346673965454, + "learning_rate": 4.510587253143002e-06, + "loss": 0.7264, + "step": 10610 + }, + { + "epoch": 0.5453283996299723, + "grad_norm": 1.080217957496643, + "learning_rate": 4.5097590023652945e-06, + "loss": 0.7752, + "step": 10611 + }, + { + "epoch": 0.5453797923733169, + "grad_norm": 1.0431418418884277, + "learning_rate": 4.508930765170143e-06, + "loss": 0.6888, + "step": 10612 + }, + { + "epoch": 0.5454311851166616, + "grad_norm": 0.7632734775543213, + "learning_rate": 4.5081025415804944e-06, + "loss": 0.6571, + "step": 10613 + }, + { + "epoch": 0.5454825778600062, + "grad_norm": 1.1495115756988525, + "learning_rate": 4.507274331619293e-06, + "loss": 0.7203, + "step": 10614 + }, + { + "epoch": 0.5455339706033508, + "grad_norm": 1.0857124328613281, + "learning_rate": 4.506446135309487e-06, + "loss": 0.7413, + "step": 10615 + }, + { + "epoch": 0.5455853633466955, + "grad_norm": 1.0376615524291992, + "learning_rate": 4.5056179526740235e-06, + "loss": 0.69, + "step": 10616 + }, + { + "epoch": 0.54563675609004, + "grad_norm": 1.0994131565093994, + "learning_rate": 4.504789783735844e-06, + "loss": 0.7257, + "step": 10617 + }, + { + "epoch": 0.5456881488333847, + "grad_norm": 0.972282350063324, + "learning_rate": 4.503961628517897e-06, + "loss": 0.7818, + "step": 10618 + }, + { + "epoch": 0.5457395415767293, + "grad_norm": 1.0303558111190796, + "learning_rate": 4.503133487043128e-06, + "loss": 0.678, + "step": 10619 + }, + { + "epoch": 0.545790934320074, + "grad_norm": 1.0216469764709473, + "learning_rate": 4.502305359334477e-06, + "loss": 0.7433, + "step": 10620 + }, + { + "epoch": 0.5458423270634186, + "grad_norm": 1.0060765743255615, + "learning_rate": 4.501477245414891e-06, + "loss": 0.7064, + "step": 10621 + }, + { + "epoch": 0.5458937198067633, + "grad_norm": 1.0621914863586426, + "learning_rate": 4.5006491453073125e-06, + "loss": 0.7021, + "step": 10622 + }, + { + "epoch": 0.5459451125501079, + "grad_norm": 1.2142199277877808, + "learning_rate": 4.499821059034688e-06, + "loss": 0.7415, + "step": 10623 + }, + { + "epoch": 0.5459965052934526, + "grad_norm": 1.0349504947662354, + "learning_rate": 4.4989929866199555e-06, + "loss": 0.7128, + "step": 10624 + }, + { + "epoch": 0.5460478980367972, + "grad_norm": 1.0740280151367188, + "learning_rate": 4.4981649280860605e-06, + "loss": 0.7782, + "step": 10625 + }, + { + "epoch": 0.5460992907801419, + "grad_norm": 1.088388442993164, + "learning_rate": 4.497336883455945e-06, + "loss": 0.7515, + "step": 10626 + }, + { + "epoch": 0.5461506835234865, + "grad_norm": 0.8848722577095032, + "learning_rate": 4.49650885275255e-06, + "loss": 0.692, + "step": 10627 + }, + { + "epoch": 0.5462020762668311, + "grad_norm": 1.0878801345825195, + "learning_rate": 4.495680835998815e-06, + "loss": 0.7177, + "step": 10628 + }, + { + "epoch": 0.5462534690101758, + "grad_norm": 1.0452316999435425, + "learning_rate": 4.494852833217687e-06, + "loss": 0.689, + "step": 10629 + }, + { + "epoch": 0.5463048617535204, + "grad_norm": 1.0819647312164307, + "learning_rate": 4.494024844432099e-06, + "loss": 0.663, + "step": 10630 + }, + { + "epoch": 0.5463562544968651, + "grad_norm": 1.043624997138977, + "learning_rate": 4.493196869664995e-06, + "loss": 0.7192, + "step": 10631 + }, + { + "epoch": 0.5464076472402097, + "grad_norm": 1.0298439264297485, + "learning_rate": 4.492368908939316e-06, + "loss": 0.7128, + "step": 10632 + }, + { + "epoch": 0.5464590399835543, + "grad_norm": 1.0974156856536865, + "learning_rate": 4.491540962277997e-06, + "loss": 0.7095, + "step": 10633 + }, + { + "epoch": 0.5465104327268989, + "grad_norm": 1.0415605306625366, + "learning_rate": 4.490713029703981e-06, + "loss": 0.7722, + "step": 10634 + }, + { + "epoch": 0.5465618254702436, + "grad_norm": 1.1123496294021606, + "learning_rate": 4.4898851112402066e-06, + "loss": 0.839, + "step": 10635 + }, + { + "epoch": 0.5466132182135882, + "grad_norm": 1.152778148651123, + "learning_rate": 4.489057206909609e-06, + "loss": 0.7462, + "step": 10636 + }, + { + "epoch": 0.5466646109569329, + "grad_norm": 1.079625129699707, + "learning_rate": 4.488229316735127e-06, + "loss": 0.7172, + "step": 10637 + }, + { + "epoch": 0.5467160037002775, + "grad_norm": 1.1122641563415527, + "learning_rate": 4.487401440739699e-06, + "loss": 0.7413, + "step": 10638 + }, + { + "epoch": 0.5467673964436222, + "grad_norm": 1.0967655181884766, + "learning_rate": 4.486573578946264e-06, + "loss": 0.7317, + "step": 10639 + }, + { + "epoch": 0.5468187891869668, + "grad_norm": 1.1058483123779297, + "learning_rate": 4.485745731377754e-06, + "loss": 0.7572, + "step": 10640 + }, + { + "epoch": 0.5468701819303114, + "grad_norm": 1.0113062858581543, + "learning_rate": 4.484917898057108e-06, + "loss": 0.7636, + "step": 10641 + }, + { + "epoch": 0.5469215746736561, + "grad_norm": 1.076033353805542, + "learning_rate": 4.4840900790072626e-06, + "loss": 0.7558, + "step": 10642 + }, + { + "epoch": 0.5469729674170007, + "grad_norm": 0.997805118560791, + "learning_rate": 4.4832622742511504e-06, + "loss": 0.675, + "step": 10643 + }, + { + "epoch": 0.5470243601603454, + "grad_norm": 1.115605354309082, + "learning_rate": 4.482434483811708e-06, + "loss": 0.6423, + "step": 10644 + }, + { + "epoch": 0.54707575290369, + "grad_norm": 1.0271010398864746, + "learning_rate": 4.481606707711873e-06, + "loss": 0.7278, + "step": 10645 + }, + { + "epoch": 0.5471271456470347, + "grad_norm": 1.1900086402893066, + "learning_rate": 4.480778945974575e-06, + "loss": 0.7568, + "step": 10646 + }, + { + "epoch": 0.5471785383903793, + "grad_norm": 0.9834240078926086, + "learning_rate": 4.4799511986227485e-06, + "loss": 0.712, + "step": 10647 + }, + { + "epoch": 0.5472299311337239, + "grad_norm": 1.0759097337722778, + "learning_rate": 4.4791234656793305e-06, + "loss": 0.7529, + "step": 10648 + }, + { + "epoch": 0.5472813238770685, + "grad_norm": 0.7260752320289612, + "learning_rate": 4.47829574716725e-06, + "loss": 0.6678, + "step": 10649 + }, + { + "epoch": 0.5473327166204132, + "grad_norm": 1.131471037864685, + "learning_rate": 4.477468043109441e-06, + "loss": 0.7585, + "step": 10650 + }, + { + "epoch": 0.5473841093637578, + "grad_norm": 1.0357584953308105, + "learning_rate": 4.476640353528838e-06, + "loss": 0.7842, + "step": 10651 + }, + { + "epoch": 0.5474355021071025, + "grad_norm": 1.116679310798645, + "learning_rate": 4.475812678448369e-06, + "loss": 0.69, + "step": 10652 + }, + { + "epoch": 0.5474868948504471, + "grad_norm": 0.7543790936470032, + "learning_rate": 4.47498501789097e-06, + "loss": 0.6624, + "step": 10653 + }, + { + "epoch": 0.5475382875937918, + "grad_norm": 1.011736273765564, + "learning_rate": 4.474157371879567e-06, + "loss": 0.6862, + "step": 10654 + }, + { + "epoch": 0.5475896803371364, + "grad_norm": 0.7670367956161499, + "learning_rate": 4.473329740437096e-06, + "loss": 0.6899, + "step": 10655 + }, + { + "epoch": 0.547641073080481, + "grad_norm": 1.1023555994033813, + "learning_rate": 4.4725021235864815e-06, + "loss": 0.7279, + "step": 10656 + }, + { + "epoch": 0.5476924658238257, + "grad_norm": 0.6802577376365662, + "learning_rate": 4.471674521350657e-06, + "loss": 0.6448, + "step": 10657 + }, + { + "epoch": 0.5477438585671703, + "grad_norm": 0.7912315726280212, + "learning_rate": 4.4708469337525535e-06, + "loss": 0.6582, + "step": 10658 + }, + { + "epoch": 0.547795251310515, + "grad_norm": 0.9843100309371948, + "learning_rate": 4.470019360815095e-06, + "loss": 0.669, + "step": 10659 + }, + { + "epoch": 0.5478466440538596, + "grad_norm": 1.1052237749099731, + "learning_rate": 4.469191802561214e-06, + "loss": 0.7583, + "step": 10660 + }, + { + "epoch": 0.5478980367972043, + "grad_norm": 1.303665041923523, + "learning_rate": 4.468364259013839e-06, + "loss": 0.6935, + "step": 10661 + }, + { + "epoch": 0.5479494295405489, + "grad_norm": 1.0570329427719116, + "learning_rate": 4.467536730195895e-06, + "loss": 0.7541, + "step": 10662 + }, + { + "epoch": 0.5480008222838935, + "grad_norm": 1.0624116659164429, + "learning_rate": 4.4667092161303104e-06, + "loss": 0.6755, + "step": 10663 + }, + { + "epoch": 0.5480522150272381, + "grad_norm": 1.0217534303665161, + "learning_rate": 4.465881716840014e-06, + "loss": 0.7119, + "step": 10664 + }, + { + "epoch": 0.5481036077705828, + "grad_norm": 1.056186556816101, + "learning_rate": 4.46505423234793e-06, + "loss": 0.714, + "step": 10665 + }, + { + "epoch": 0.5481550005139274, + "grad_norm": 1.056942343711853, + "learning_rate": 4.464226762676985e-06, + "loss": 0.7198, + "step": 10666 + }, + { + "epoch": 0.548206393257272, + "grad_norm": 1.0565210580825806, + "learning_rate": 4.463399307850108e-06, + "loss": 0.7017, + "step": 10667 + }, + { + "epoch": 0.5482577860006167, + "grad_norm": 1.080053687095642, + "learning_rate": 4.46257186789022e-06, + "loss": 0.7424, + "step": 10668 + }, + { + "epoch": 0.5483091787439613, + "grad_norm": 0.7140719890594482, + "learning_rate": 4.4617444428202464e-06, + "loss": 0.6641, + "step": 10669 + }, + { + "epoch": 0.548360571487306, + "grad_norm": 1.0117716789245605, + "learning_rate": 4.460917032663114e-06, + "loss": 0.7004, + "step": 10670 + }, + { + "epoch": 0.5484119642306506, + "grad_norm": 1.001701831817627, + "learning_rate": 4.460089637441748e-06, + "loss": 0.6945, + "step": 10671 + }, + { + "epoch": 0.5484633569739953, + "grad_norm": 1.0643306970596313, + "learning_rate": 4.459262257179068e-06, + "loss": 0.7039, + "step": 10672 + }, + { + "epoch": 0.5485147497173399, + "grad_norm": 1.0727132558822632, + "learning_rate": 4.458434891897999e-06, + "loss": 0.7899, + "step": 10673 + }, + { + "epoch": 0.5485661424606846, + "grad_norm": 0.7940266132354736, + "learning_rate": 4.457607541621466e-06, + "loss": 0.6834, + "step": 10674 + }, + { + "epoch": 0.5486175352040292, + "grad_norm": 1.0557655096054077, + "learning_rate": 4.456780206372388e-06, + "loss": 0.7664, + "step": 10675 + }, + { + "epoch": 0.5486689279473739, + "grad_norm": 0.9803177118301392, + "learning_rate": 4.455952886173689e-06, + "loss": 0.7003, + "step": 10676 + }, + { + "epoch": 0.5487203206907185, + "grad_norm": 1.1900302171707153, + "learning_rate": 4.455125581048293e-06, + "loss": 0.7249, + "step": 10677 + }, + { + "epoch": 0.5487717134340631, + "grad_norm": 0.683269202709198, + "learning_rate": 4.4542982910191155e-06, + "loss": 0.6918, + "step": 10678 + }, + { + "epoch": 0.5488231061774077, + "grad_norm": 1.1276438236236572, + "learning_rate": 4.4534710161090814e-06, + "loss": 0.709, + "step": 10679 + }, + { + "epoch": 0.5488744989207524, + "grad_norm": 1.0157021284103394, + "learning_rate": 4.4526437563411114e-06, + "loss": 0.6645, + "step": 10680 + }, + { + "epoch": 0.548925891664097, + "grad_norm": 1.107812523841858, + "learning_rate": 4.451816511738122e-06, + "loss": 0.7325, + "step": 10681 + }, + { + "epoch": 0.5489772844074416, + "grad_norm": 1.0462309122085571, + "learning_rate": 4.450989282323035e-06, + "loss": 0.7089, + "step": 10682 + }, + { + "epoch": 0.5490286771507863, + "grad_norm": 0.6964511871337891, + "learning_rate": 4.45016206811877e-06, + "loss": 0.6092, + "step": 10683 + }, + { + "epoch": 0.5490800698941309, + "grad_norm": 1.1735060214996338, + "learning_rate": 4.449334869148244e-06, + "loss": 0.759, + "step": 10684 + }, + { + "epoch": 0.5491314626374756, + "grad_norm": 1.0112004280090332, + "learning_rate": 4.448507685434378e-06, + "loss": 0.7302, + "step": 10685 + }, + { + "epoch": 0.5491828553808202, + "grad_norm": 1.0510773658752441, + "learning_rate": 4.4476805170000866e-06, + "loss": 0.7159, + "step": 10686 + }, + { + "epoch": 0.5492342481241649, + "grad_norm": 1.1503227949142456, + "learning_rate": 4.446853363868291e-06, + "loss": 0.7093, + "step": 10687 + }, + { + "epoch": 0.5492856408675095, + "grad_norm": 1.0337512493133545, + "learning_rate": 4.446026226061903e-06, + "loss": 0.7178, + "step": 10688 + }, + { + "epoch": 0.5493370336108542, + "grad_norm": 1.1303627490997314, + "learning_rate": 4.445199103603844e-06, + "loss": 0.7502, + "step": 10689 + }, + { + "epoch": 0.5493884263541988, + "grad_norm": 1.1557204723358154, + "learning_rate": 4.4443719965170296e-06, + "loss": 0.6793, + "step": 10690 + }, + { + "epoch": 0.5494398190975435, + "grad_norm": 1.1321524381637573, + "learning_rate": 4.4435449048243724e-06, + "loss": 0.7108, + "step": 10691 + }, + { + "epoch": 0.5494912118408881, + "grad_norm": 0.7390400171279907, + "learning_rate": 4.442717828548789e-06, + "loss": 0.6329, + "step": 10692 + }, + { + "epoch": 0.5495426045842327, + "grad_norm": 1.178370475769043, + "learning_rate": 4.4418907677131965e-06, + "loss": 0.7761, + "step": 10693 + }, + { + "epoch": 0.5495939973275773, + "grad_norm": 1.013204574584961, + "learning_rate": 4.4410637223405064e-06, + "loss": 0.6744, + "step": 10694 + }, + { + "epoch": 0.549645390070922, + "grad_norm": 1.0659483671188354, + "learning_rate": 4.4402366924536335e-06, + "loss": 0.7176, + "step": 10695 + }, + { + "epoch": 0.5496967828142666, + "grad_norm": 0.8338388204574585, + "learning_rate": 4.439409678075494e-06, + "loss": 0.6632, + "step": 10696 + }, + { + "epoch": 0.5497481755576112, + "grad_norm": 1.110988736152649, + "learning_rate": 4.438582679228997e-06, + "loss": 0.717, + "step": 10697 + }, + { + "epoch": 0.5497995683009559, + "grad_norm": 1.0613336563110352, + "learning_rate": 4.437755695937057e-06, + "loss": 0.7201, + "step": 10698 + }, + { + "epoch": 0.5498509610443005, + "grad_norm": 1.077130675315857, + "learning_rate": 4.436928728222589e-06, + "loss": 0.6953, + "step": 10699 + }, + { + "epoch": 0.5499023537876452, + "grad_norm": 0.9596279859542847, + "learning_rate": 4.4361017761085e-06, + "loss": 0.6801, + "step": 10700 + }, + { + "epoch": 0.5499537465309898, + "grad_norm": 0.7890768647193909, + "learning_rate": 4.4352748396177034e-06, + "loss": 0.6904, + "step": 10701 + }, + { + "epoch": 0.5500051392743345, + "grad_norm": 1.0956487655639648, + "learning_rate": 4.434447918773111e-06, + "loss": 0.7589, + "step": 10702 + }, + { + "epoch": 0.5500565320176791, + "grad_norm": 1.0249221324920654, + "learning_rate": 4.4336210135976335e-06, + "loss": 0.698, + "step": 10703 + }, + { + "epoch": 0.5501079247610238, + "grad_norm": 0.8186615705490112, + "learning_rate": 4.4327941241141805e-06, + "loss": 0.7116, + "step": 10704 + }, + { + "epoch": 0.5501593175043684, + "grad_norm": 0.7293616533279419, + "learning_rate": 4.43196725034566e-06, + "loss": 0.6848, + "step": 10705 + }, + { + "epoch": 0.5502107102477131, + "grad_norm": 0.7331995368003845, + "learning_rate": 4.431140392314985e-06, + "loss": 0.6834, + "step": 10706 + }, + { + "epoch": 0.5502621029910577, + "grad_norm": 0.7464468479156494, + "learning_rate": 4.4303135500450604e-06, + "loss": 0.6372, + "step": 10707 + }, + { + "epoch": 0.5503134957344022, + "grad_norm": 1.0478460788726807, + "learning_rate": 4.429486723558797e-06, + "loss": 0.7135, + "step": 10708 + }, + { + "epoch": 0.5503648884777469, + "grad_norm": 1.0936639308929443, + "learning_rate": 4.428659912879104e-06, + "loss": 0.7173, + "step": 10709 + }, + { + "epoch": 0.5504162812210915, + "grad_norm": 1.091056227684021, + "learning_rate": 4.427833118028885e-06, + "loss": 0.7534, + "step": 10710 + }, + { + "epoch": 0.5504676739644362, + "grad_norm": 1.055601954460144, + "learning_rate": 4.427006339031048e-06, + "loss": 0.7471, + "step": 10711 + }, + { + "epoch": 0.5505190667077808, + "grad_norm": 0.7356933355331421, + "learning_rate": 4.426179575908504e-06, + "loss": 0.6325, + "step": 10712 + }, + { + "epoch": 0.5505704594511255, + "grad_norm": 1.018147349357605, + "learning_rate": 4.425352828684154e-06, + "loss": 0.7804, + "step": 10713 + }, + { + "epoch": 0.5506218521944701, + "grad_norm": 1.048281192779541, + "learning_rate": 4.4245260973809055e-06, + "loss": 0.7115, + "step": 10714 + }, + { + "epoch": 0.5506732449378148, + "grad_norm": 0.9898094534873962, + "learning_rate": 4.423699382021665e-06, + "loss": 0.6796, + "step": 10715 + }, + { + "epoch": 0.5507246376811594, + "grad_norm": 0.7895371317863464, + "learning_rate": 4.422872682629335e-06, + "loss": 0.6959, + "step": 10716 + }, + { + "epoch": 0.5507760304245041, + "grad_norm": 1.1945199966430664, + "learning_rate": 4.422045999226821e-06, + "loss": 0.6928, + "step": 10717 + }, + { + "epoch": 0.5508274231678487, + "grad_norm": 0.7323358654975891, + "learning_rate": 4.421219331837027e-06, + "loss": 0.6849, + "step": 10718 + }, + { + "epoch": 0.5508788159111934, + "grad_norm": 1.1355222463607788, + "learning_rate": 4.42039268048286e-06, + "loss": 0.7553, + "step": 10719 + }, + { + "epoch": 0.550930208654538, + "grad_norm": 1.0402915477752686, + "learning_rate": 4.4195660451872175e-06, + "loss": 0.7106, + "step": 10720 + }, + { + "epoch": 0.5509816013978827, + "grad_norm": 1.1234004497528076, + "learning_rate": 4.4187394259730044e-06, + "loss": 0.6836, + "step": 10721 + }, + { + "epoch": 0.5510329941412273, + "grad_norm": 1.0415055751800537, + "learning_rate": 4.4179128228631255e-06, + "loss": 0.6834, + "step": 10722 + }, + { + "epoch": 0.551084386884572, + "grad_norm": 1.0618886947631836, + "learning_rate": 4.417086235880478e-06, + "loss": 0.7072, + "step": 10723 + }, + { + "epoch": 0.5511357796279165, + "grad_norm": 11.103466033935547, + "learning_rate": 4.416259665047965e-06, + "loss": 0.6903, + "step": 10724 + }, + { + "epoch": 0.5511871723712611, + "grad_norm": 1.159605860710144, + "learning_rate": 4.415433110388489e-06, + "loss": 0.7612, + "step": 10725 + }, + { + "epoch": 0.5512385651146058, + "grad_norm": 1.057541012763977, + "learning_rate": 4.414606571924948e-06, + "loss": 0.7863, + "step": 10726 + }, + { + "epoch": 0.5512899578579504, + "grad_norm": 1.0768178701400757, + "learning_rate": 4.413780049680243e-06, + "loss": 0.6737, + "step": 10727 + }, + { + "epoch": 0.5513413506012951, + "grad_norm": 1.142947793006897, + "learning_rate": 4.412953543677276e-06, + "loss": 0.7368, + "step": 10728 + }, + { + "epoch": 0.5513927433446397, + "grad_norm": 1.0690091848373413, + "learning_rate": 4.412127053938941e-06, + "loss": 0.7309, + "step": 10729 + }, + { + "epoch": 0.5514441360879844, + "grad_norm": 1.0647177696228027, + "learning_rate": 4.4113005804881404e-06, + "loss": 0.7471, + "step": 10730 + }, + { + "epoch": 0.551495528831329, + "grad_norm": 1.0899895429611206, + "learning_rate": 4.410474123347771e-06, + "loss": 0.7327, + "step": 10731 + }, + { + "epoch": 0.5515469215746737, + "grad_norm": 1.1271027326583862, + "learning_rate": 4.409647682540732e-06, + "loss": 0.7575, + "step": 10732 + }, + { + "epoch": 0.5515983143180183, + "grad_norm": 1.1342661380767822, + "learning_rate": 4.408821258089918e-06, + "loss": 0.8081, + "step": 10733 + }, + { + "epoch": 0.551649707061363, + "grad_norm": 1.0233356952667236, + "learning_rate": 4.407994850018228e-06, + "loss": 0.7154, + "step": 10734 + }, + { + "epoch": 0.5517010998047076, + "grad_norm": 0.7880732417106628, + "learning_rate": 4.407168458348558e-06, + "loss": 0.6555, + "step": 10735 + }, + { + "epoch": 0.5517524925480523, + "grad_norm": 1.5140451192855835, + "learning_rate": 4.406342083103802e-06, + "loss": 0.7895, + "step": 10736 + }, + { + "epoch": 0.5518038852913969, + "grad_norm": 1.0206595659255981, + "learning_rate": 4.405515724306858e-06, + "loss": 0.7037, + "step": 10737 + }, + { + "epoch": 0.5518552780347415, + "grad_norm": 1.0632920265197754, + "learning_rate": 4.404689381980622e-06, + "loss": 0.7316, + "step": 10738 + }, + { + "epoch": 0.5519066707780861, + "grad_norm": 1.092975378036499, + "learning_rate": 4.4038630561479836e-06, + "loss": 0.671, + "step": 10739 + }, + { + "epoch": 0.5519580635214307, + "grad_norm": 1.203824520111084, + "learning_rate": 4.40303674683184e-06, + "loss": 0.714, + "step": 10740 + }, + { + "epoch": 0.5520094562647754, + "grad_norm": 0.7501099109649658, + "learning_rate": 4.402210454055088e-06, + "loss": 0.6612, + "step": 10741 + }, + { + "epoch": 0.55206084900812, + "grad_norm": 1.0705556869506836, + "learning_rate": 4.401384177840614e-06, + "loss": 0.7577, + "step": 10742 + }, + { + "epoch": 0.5521122417514647, + "grad_norm": 1.0825904607772827, + "learning_rate": 4.400557918211314e-06, + "loss": 0.7695, + "step": 10743 + }, + { + "epoch": 0.5521636344948093, + "grad_norm": 0.8202421069145203, + "learning_rate": 4.399731675190081e-06, + "loss": 0.6976, + "step": 10744 + }, + { + "epoch": 0.552215027238154, + "grad_norm": 1.0230708122253418, + "learning_rate": 4.398905448799806e-06, + "loss": 0.7278, + "step": 10745 + }, + { + "epoch": 0.5522664199814986, + "grad_norm": 1.0411584377288818, + "learning_rate": 4.398079239063381e-06, + "loss": 0.6549, + "step": 10746 + }, + { + "epoch": 0.5523178127248433, + "grad_norm": 1.065542459487915, + "learning_rate": 4.397253046003696e-06, + "loss": 0.7506, + "step": 10747 + }, + { + "epoch": 0.5523692054681879, + "grad_norm": 1.110318660736084, + "learning_rate": 4.3964268696436425e-06, + "loss": 0.7507, + "step": 10748 + }, + { + "epoch": 0.5524205982115326, + "grad_norm": 1.082706332206726, + "learning_rate": 4.395600710006108e-06, + "loss": 0.7402, + "step": 10749 + }, + { + "epoch": 0.5524719909548772, + "grad_norm": 1.0900524854660034, + "learning_rate": 4.394774567113985e-06, + "loss": 0.7147, + "step": 10750 + }, + { + "epoch": 0.5525233836982218, + "grad_norm": 1.077059030532837, + "learning_rate": 4.393948440990162e-06, + "loss": 0.7555, + "step": 10751 + }, + { + "epoch": 0.5525747764415665, + "grad_norm": 1.0487898588180542, + "learning_rate": 4.393122331657525e-06, + "loss": 0.6919, + "step": 10752 + }, + { + "epoch": 0.5526261691849111, + "grad_norm": 1.1181272268295288, + "learning_rate": 4.392296239138963e-06, + "loss": 0.726, + "step": 10753 + }, + { + "epoch": 0.5526775619282557, + "grad_norm": 1.0952658653259277, + "learning_rate": 4.391470163457366e-06, + "loss": 0.7212, + "step": 10754 + }, + { + "epoch": 0.5527289546716003, + "grad_norm": 1.0867129564285278, + "learning_rate": 4.390644104635619e-06, + "loss": 0.7362, + "step": 10755 + }, + { + "epoch": 0.552780347414945, + "grad_norm": 1.0549699068069458, + "learning_rate": 4.389818062696609e-06, + "loss": 0.7506, + "step": 10756 + }, + { + "epoch": 0.5528317401582896, + "grad_norm": 1.1104376316070557, + "learning_rate": 4.388992037663223e-06, + "loss": 0.8272, + "step": 10757 + }, + { + "epoch": 0.5528831329016343, + "grad_norm": 1.1957920789718628, + "learning_rate": 4.388166029558346e-06, + "loss": 0.685, + "step": 10758 + }, + { + "epoch": 0.5529345256449789, + "grad_norm": 1.0681620836257935, + "learning_rate": 4.387340038404862e-06, + "loss": 0.7239, + "step": 10759 + }, + { + "epoch": 0.5529859183883236, + "grad_norm": 1.079546570777893, + "learning_rate": 4.38651406422566e-06, + "loss": 0.68, + "step": 10760 + }, + { + "epoch": 0.5530373111316682, + "grad_norm": 1.0699070692062378, + "learning_rate": 4.38568810704362e-06, + "loss": 0.7349, + "step": 10761 + }, + { + "epoch": 0.5530887038750129, + "grad_norm": 1.018877625465393, + "learning_rate": 4.384862166881627e-06, + "loss": 0.7077, + "step": 10762 + }, + { + "epoch": 0.5531400966183575, + "grad_norm": 1.120166540145874, + "learning_rate": 4.384036243762564e-06, + "loss": 0.7722, + "step": 10763 + }, + { + "epoch": 0.5531914893617021, + "grad_norm": 1.0192737579345703, + "learning_rate": 4.383210337709318e-06, + "loss": 0.7532, + "step": 10764 + }, + { + "epoch": 0.5532428821050468, + "grad_norm": 1.0826292037963867, + "learning_rate": 4.382384448744766e-06, + "loss": 0.7475, + "step": 10765 + }, + { + "epoch": 0.5532942748483914, + "grad_norm": 0.9969246983528137, + "learning_rate": 4.381558576891791e-06, + "loss": 0.6797, + "step": 10766 + }, + { + "epoch": 0.5533456675917361, + "grad_norm": 1.2029521465301514, + "learning_rate": 4.380732722173278e-06, + "loss": 0.7745, + "step": 10767 + }, + { + "epoch": 0.5533970603350807, + "grad_norm": 1.0097633600234985, + "learning_rate": 4.379906884612104e-06, + "loss": 0.6923, + "step": 10768 + }, + { + "epoch": 0.5534484530784253, + "grad_norm": 0.749476432800293, + "learning_rate": 4.379081064231151e-06, + "loss": 0.6584, + "step": 10769 + }, + { + "epoch": 0.5534998458217699, + "grad_norm": 0.9810890555381775, + "learning_rate": 4.378255261053302e-06, + "loss": 0.7225, + "step": 10770 + }, + { + "epoch": 0.5535512385651146, + "grad_norm": 1.0234977006912231, + "learning_rate": 4.377429475101432e-06, + "loss": 0.7264, + "step": 10771 + }, + { + "epoch": 0.5536026313084592, + "grad_norm": 1.089060664176941, + "learning_rate": 4.376603706398422e-06, + "loss": 0.7191, + "step": 10772 + }, + { + "epoch": 0.5536540240518039, + "grad_norm": 1.1215370893478394, + "learning_rate": 4.375777954967152e-06, + "loss": 0.7021, + "step": 10773 + }, + { + "epoch": 0.5537054167951485, + "grad_norm": 1.0508062839508057, + "learning_rate": 4.3749522208304975e-06, + "loss": 0.7321, + "step": 10774 + }, + { + "epoch": 0.5537568095384932, + "grad_norm": 1.063835859298706, + "learning_rate": 4.374126504011338e-06, + "loss": 0.7057, + "step": 10775 + }, + { + "epoch": 0.5538082022818378, + "grad_norm": 1.1080663204193115, + "learning_rate": 4.373300804532551e-06, + "loss": 0.7151, + "step": 10776 + }, + { + "epoch": 0.5538595950251824, + "grad_norm": 1.084398865699768, + "learning_rate": 4.372475122417011e-06, + "loss": 0.6901, + "step": 10777 + }, + { + "epoch": 0.5539109877685271, + "grad_norm": 0.7795893549919128, + "learning_rate": 4.371649457687596e-06, + "loss": 0.6656, + "step": 10778 + }, + { + "epoch": 0.5539623805118717, + "grad_norm": 1.1194603443145752, + "learning_rate": 4.370823810367181e-06, + "loss": 0.7441, + "step": 10779 + }, + { + "epoch": 0.5540137732552164, + "grad_norm": 1.0783047676086426, + "learning_rate": 4.369998180478644e-06, + "loss": 0.7096, + "step": 10780 + }, + { + "epoch": 0.554065165998561, + "grad_norm": 0.8340365886688232, + "learning_rate": 4.3691725680448555e-06, + "loss": 0.6768, + "step": 10781 + }, + { + "epoch": 0.5541165587419057, + "grad_norm": 0.7244275808334351, + "learning_rate": 4.368346973088692e-06, + "loss": 0.6651, + "step": 10782 + }, + { + "epoch": 0.5541679514852503, + "grad_norm": 1.045332908630371, + "learning_rate": 4.367521395633029e-06, + "loss": 0.6927, + "step": 10783 + }, + { + "epoch": 0.5542193442285949, + "grad_norm": 1.0329526662826538, + "learning_rate": 4.366695835700736e-06, + "loss": 0.6818, + "step": 10784 + }, + { + "epoch": 0.5542707369719395, + "grad_norm": 1.077038288116455, + "learning_rate": 4.3658702933146885e-06, + "loss": 0.7902, + "step": 10785 + }, + { + "epoch": 0.5543221297152842, + "grad_norm": 1.0502935647964478, + "learning_rate": 4.365044768497759e-06, + "loss": 0.7655, + "step": 10786 + }, + { + "epoch": 0.5543735224586288, + "grad_norm": 0.7210720181465149, + "learning_rate": 4.364219261272817e-06, + "loss": 0.6974, + "step": 10787 + }, + { + "epoch": 0.5544249152019735, + "grad_norm": 1.1018387079238892, + "learning_rate": 4.363393771662736e-06, + "loss": 0.7202, + "step": 10788 + }, + { + "epoch": 0.5544763079453181, + "grad_norm": 1.1479874849319458, + "learning_rate": 4.362568299690389e-06, + "loss": 0.7691, + "step": 10789 + }, + { + "epoch": 0.5545277006886628, + "grad_norm": 1.1982343196868896, + "learning_rate": 4.361742845378641e-06, + "loss": 0.7179, + "step": 10790 + }, + { + "epoch": 0.5545790934320074, + "grad_norm": 1.0024508237838745, + "learning_rate": 4.360917408750366e-06, + "loss": 0.6668, + "step": 10791 + }, + { + "epoch": 0.554630486175352, + "grad_norm": 1.507300615310669, + "learning_rate": 4.360091989828434e-06, + "loss": 0.7143, + "step": 10792 + }, + { + "epoch": 0.5546818789186967, + "grad_norm": 1.1222071647644043, + "learning_rate": 4.3592665886357095e-06, + "loss": 0.8018, + "step": 10793 + }, + { + "epoch": 0.5547332716620413, + "grad_norm": 1.0552480220794678, + "learning_rate": 4.358441205195064e-06, + "loss": 0.71, + "step": 10794 + }, + { + "epoch": 0.554784664405386, + "grad_norm": 1.0867812633514404, + "learning_rate": 4.357615839529365e-06, + "loss": 0.7568, + "step": 10795 + }, + { + "epoch": 0.5548360571487306, + "grad_norm": 1.0883057117462158, + "learning_rate": 4.356790491661481e-06, + "loss": 0.7587, + "step": 10796 + }, + { + "epoch": 0.5548874498920753, + "grad_norm": 0.7757565379142761, + "learning_rate": 4.355965161614277e-06, + "loss": 0.6841, + "step": 10797 + }, + { + "epoch": 0.5549388426354199, + "grad_norm": 1.0912585258483887, + "learning_rate": 4.355139849410622e-06, + "loss": 0.7008, + "step": 10798 + }, + { + "epoch": 0.5549902353787646, + "grad_norm": 1.0982303619384766, + "learning_rate": 4.354314555073381e-06, + "loss": 0.7354, + "step": 10799 + }, + { + "epoch": 0.5550416281221091, + "grad_norm": 1.1373717784881592, + "learning_rate": 4.353489278625418e-06, + "loss": 0.7492, + "step": 10800 + }, + { + "epoch": 0.5550930208654538, + "grad_norm": 1.125774621963501, + "learning_rate": 4.3526640200895985e-06, + "loss": 0.7186, + "step": 10801 + }, + { + "epoch": 0.5551444136087984, + "grad_norm": 1.130272388458252, + "learning_rate": 4.35183877948879e-06, + "loss": 0.7403, + "step": 10802 + }, + { + "epoch": 0.555195806352143, + "grad_norm": 1.0017223358154297, + "learning_rate": 4.351013556845851e-06, + "loss": 0.7075, + "step": 10803 + }, + { + "epoch": 0.5552471990954877, + "grad_norm": 1.0309321880340576, + "learning_rate": 4.35018835218365e-06, + "loss": 0.6835, + "step": 10804 + }, + { + "epoch": 0.5552985918388323, + "grad_norm": 1.0943472385406494, + "learning_rate": 4.349363165525048e-06, + "loss": 0.6406, + "step": 10805 + }, + { + "epoch": 0.555349984582177, + "grad_norm": 1.037827968597412, + "learning_rate": 4.348537996892908e-06, + "loss": 0.7708, + "step": 10806 + }, + { + "epoch": 0.5554013773255216, + "grad_norm": 1.009874701499939, + "learning_rate": 4.34771284631009e-06, + "loss": 0.707, + "step": 10807 + }, + { + "epoch": 0.5554527700688663, + "grad_norm": 1.0244871377944946, + "learning_rate": 4.346887713799459e-06, + "loss": 0.6938, + "step": 10808 + }, + { + "epoch": 0.5555041628122109, + "grad_norm": 1.0723369121551514, + "learning_rate": 4.346062599383874e-06, + "loss": 0.6962, + "step": 10809 + }, + { + "epoch": 0.5555555555555556, + "grad_norm": 1.0282995700836182, + "learning_rate": 4.345237503086194e-06, + "loss": 0.7723, + "step": 10810 + }, + { + "epoch": 0.5556069482989002, + "grad_norm": 1.0838866233825684, + "learning_rate": 4.344412424929281e-06, + "loss": 0.7677, + "step": 10811 + }, + { + "epoch": 0.5556583410422449, + "grad_norm": 1.2450435161590576, + "learning_rate": 4.343587364935997e-06, + "loss": 0.8431, + "step": 10812 + }, + { + "epoch": 0.5557097337855895, + "grad_norm": 1.1466076374053955, + "learning_rate": 4.342762323129196e-06, + "loss": 0.7335, + "step": 10813 + }, + { + "epoch": 0.5557611265289342, + "grad_norm": 1.0430716276168823, + "learning_rate": 4.341937299531739e-06, + "loss": 0.7385, + "step": 10814 + }, + { + "epoch": 0.5558125192722787, + "grad_norm": 1.0248312950134277, + "learning_rate": 4.341112294166486e-06, + "loss": 0.7003, + "step": 10815 + }, + { + "epoch": 0.5558639120156234, + "grad_norm": 1.0875623226165771, + "learning_rate": 4.340287307056289e-06, + "loss": 0.6972, + "step": 10816 + }, + { + "epoch": 0.555915304758968, + "grad_norm": 0.7583824992179871, + "learning_rate": 4.339462338224009e-06, + "loss": 0.6836, + "step": 10817 + }, + { + "epoch": 0.5559666975023126, + "grad_norm": 1.0924382209777832, + "learning_rate": 4.338637387692503e-06, + "loss": 0.7458, + "step": 10818 + }, + { + "epoch": 0.5560180902456573, + "grad_norm": 1.034134864807129, + "learning_rate": 4.337812455484625e-06, + "loss": 0.7277, + "step": 10819 + }, + { + "epoch": 0.5560694829890019, + "grad_norm": 0.7286117672920227, + "learning_rate": 4.33698754162323e-06, + "loss": 0.6603, + "step": 10820 + }, + { + "epoch": 0.5561208757323466, + "grad_norm": 1.0440044403076172, + "learning_rate": 4.336162646131177e-06, + "loss": 0.6971, + "step": 10821 + }, + { + "epoch": 0.5561722684756912, + "grad_norm": 1.3210711479187012, + "learning_rate": 4.3353377690313156e-06, + "loss": 0.7244, + "step": 10822 + }, + { + "epoch": 0.5562236612190359, + "grad_norm": 1.0478872060775757, + "learning_rate": 4.334512910346501e-06, + "loss": 0.7499, + "step": 10823 + }, + { + "epoch": 0.5562750539623805, + "grad_norm": 1.0514074563980103, + "learning_rate": 4.33368807009959e-06, + "loss": 0.6824, + "step": 10824 + }, + { + "epoch": 0.5563264467057252, + "grad_norm": 1.0398327112197876, + "learning_rate": 4.33286324831343e-06, + "loss": 0.7399, + "step": 10825 + }, + { + "epoch": 0.5563778394490698, + "grad_norm": 0.7362720370292664, + "learning_rate": 4.332038445010877e-06, + "loss": 0.6637, + "step": 10826 + }, + { + "epoch": 0.5564292321924145, + "grad_norm": 1.0675467252731323, + "learning_rate": 4.331213660214782e-06, + "loss": 0.7453, + "step": 10827 + }, + { + "epoch": 0.5564806249357591, + "grad_norm": 1.0229288339614868, + "learning_rate": 4.330388893947998e-06, + "loss": 0.7025, + "step": 10828 + }, + { + "epoch": 0.5565320176791038, + "grad_norm": 1.016847014427185, + "learning_rate": 4.329564146233373e-06, + "loss": 0.7177, + "step": 10829 + }, + { + "epoch": 0.5565834104224483, + "grad_norm": 1.0456981658935547, + "learning_rate": 4.328739417093759e-06, + "loss": 0.7461, + "step": 10830 + }, + { + "epoch": 0.556634803165793, + "grad_norm": 1.0804519653320312, + "learning_rate": 4.3279147065520074e-06, + "loss": 0.7449, + "step": 10831 + }, + { + "epoch": 0.5566861959091376, + "grad_norm": 1.106640100479126, + "learning_rate": 4.327090014630964e-06, + "loss": 0.7086, + "step": 10832 + }, + { + "epoch": 0.5567375886524822, + "grad_norm": 1.16204833984375, + "learning_rate": 4.3262653413534795e-06, + "loss": 0.7748, + "step": 10833 + }, + { + "epoch": 0.5567889813958269, + "grad_norm": 0.830194354057312, + "learning_rate": 4.325440686742404e-06, + "loss": 0.6554, + "step": 10834 + }, + { + "epoch": 0.5568403741391715, + "grad_norm": 1.0983648300170898, + "learning_rate": 4.324616050820581e-06, + "loss": 0.7072, + "step": 10835 + }, + { + "epoch": 0.5568917668825162, + "grad_norm": 1.1424726247787476, + "learning_rate": 4.3237914336108596e-06, + "loss": 0.7498, + "step": 10836 + }, + { + "epoch": 0.5569431596258608, + "grad_norm": 1.0313241481781006, + "learning_rate": 4.322966835136089e-06, + "loss": 0.6964, + "step": 10837 + }, + { + "epoch": 0.5569945523692055, + "grad_norm": 0.7365595698356628, + "learning_rate": 4.322142255419112e-06, + "loss": 0.6867, + "step": 10838 + }, + { + "epoch": 0.5570459451125501, + "grad_norm": 1.1180793046951294, + "learning_rate": 4.321317694482776e-06, + "loss": 0.7285, + "step": 10839 + }, + { + "epoch": 0.5570973378558948, + "grad_norm": 1.0917288064956665, + "learning_rate": 4.3204931523499255e-06, + "loss": 0.7688, + "step": 10840 + }, + { + "epoch": 0.5571487305992394, + "grad_norm": 1.1243914365768433, + "learning_rate": 4.319668629043408e-06, + "loss": 0.7179, + "step": 10841 + }, + { + "epoch": 0.5572001233425841, + "grad_norm": 1.0081278085708618, + "learning_rate": 4.3188441245860625e-06, + "loss": 0.6624, + "step": 10842 + }, + { + "epoch": 0.5572515160859287, + "grad_norm": 1.0330395698547363, + "learning_rate": 4.318019639000737e-06, + "loss": 0.7453, + "step": 10843 + }, + { + "epoch": 0.5573029088292734, + "grad_norm": 1.016631841659546, + "learning_rate": 4.317195172310273e-06, + "loss": 0.7431, + "step": 10844 + }, + { + "epoch": 0.5573543015726179, + "grad_norm": 1.0297139883041382, + "learning_rate": 4.316370724537512e-06, + "loss": 0.7007, + "step": 10845 + }, + { + "epoch": 0.5574056943159625, + "grad_norm": 0.7857033014297485, + "learning_rate": 4.3155462957052976e-06, + "loss": 0.6847, + "step": 10846 + }, + { + "epoch": 0.5574570870593072, + "grad_norm": 1.0852837562561035, + "learning_rate": 4.3147218858364705e-06, + "loss": 0.6979, + "step": 10847 + }, + { + "epoch": 0.5575084798026518, + "grad_norm": 1.0930466651916504, + "learning_rate": 4.313897494953872e-06, + "loss": 0.6894, + "step": 10848 + }, + { + "epoch": 0.5575598725459965, + "grad_norm": 1.0994106531143188, + "learning_rate": 4.313073123080342e-06, + "loss": 0.7399, + "step": 10849 + }, + { + "epoch": 0.5576112652893411, + "grad_norm": 1.2448499202728271, + "learning_rate": 4.312248770238723e-06, + "loss": 0.7095, + "step": 10850 + }, + { + "epoch": 0.5576626580326858, + "grad_norm": 0.7117018103599548, + "learning_rate": 4.311424436451851e-06, + "loss": 0.7052, + "step": 10851 + }, + { + "epoch": 0.5577140507760304, + "grad_norm": 1.0360850095748901, + "learning_rate": 4.310600121742566e-06, + "loss": 0.6877, + "step": 10852 + }, + { + "epoch": 0.5577654435193751, + "grad_norm": 1.0797063112258911, + "learning_rate": 4.3097758261337075e-06, + "loss": 0.7199, + "step": 10853 + }, + { + "epoch": 0.5578168362627197, + "grad_norm": 1.0390173196792603, + "learning_rate": 4.3089515496481116e-06, + "loss": 0.6873, + "step": 10854 + }, + { + "epoch": 0.5578682290060644, + "grad_norm": 0.8859268426895142, + "learning_rate": 4.308127292308616e-06, + "loss": 0.6546, + "step": 10855 + }, + { + "epoch": 0.557919621749409, + "grad_norm": 0.7278128266334534, + "learning_rate": 4.307303054138058e-06, + "loss": 0.6482, + "step": 10856 + }, + { + "epoch": 0.5579710144927537, + "grad_norm": 1.0542969703674316, + "learning_rate": 4.306478835159276e-06, + "loss": 0.6939, + "step": 10857 + }, + { + "epoch": 0.5580224072360983, + "grad_norm": 1.133650541305542, + "learning_rate": 4.305654635395101e-06, + "loss": 0.773, + "step": 10858 + }, + { + "epoch": 0.558073799979443, + "grad_norm": 1.0271515846252441, + "learning_rate": 4.304830454868372e-06, + "loss": 0.7248, + "step": 10859 + }, + { + "epoch": 0.5581251927227875, + "grad_norm": 1.0311022996902466, + "learning_rate": 4.304006293601922e-06, + "loss": 0.7105, + "step": 10860 + }, + { + "epoch": 0.5581765854661321, + "grad_norm": 0.802905261516571, + "learning_rate": 4.303182151618585e-06, + "loss": 0.6952, + "step": 10861 + }, + { + "epoch": 0.5582279782094768, + "grad_norm": 1.1078637838363647, + "learning_rate": 4.3023580289411945e-06, + "loss": 0.6971, + "step": 10862 + }, + { + "epoch": 0.5582793709528214, + "grad_norm": 1.0535496473312378, + "learning_rate": 4.301533925592586e-06, + "loss": 0.7196, + "step": 10863 + }, + { + "epoch": 0.5583307636961661, + "grad_norm": 1.0529030561447144, + "learning_rate": 4.3007098415955884e-06, + "loss": 0.6926, + "step": 10864 + }, + { + "epoch": 0.5583821564395107, + "grad_norm": 1.0854411125183105, + "learning_rate": 4.299885776973035e-06, + "loss": 0.6672, + "step": 10865 + }, + { + "epoch": 0.5584335491828554, + "grad_norm": 1.0727567672729492, + "learning_rate": 4.29906173174776e-06, + "loss": 0.7279, + "step": 10866 + }, + { + "epoch": 0.5584849419262, + "grad_norm": 1.1993035078048706, + "learning_rate": 4.29823770594259e-06, + "loss": 0.7409, + "step": 10867 + }, + { + "epoch": 0.5585363346695447, + "grad_norm": 1.045276403427124, + "learning_rate": 4.297413699580356e-06, + "loss": 0.673, + "step": 10868 + }, + { + "epoch": 0.5585877274128893, + "grad_norm": 1.0450563430786133, + "learning_rate": 4.296589712683891e-06, + "loss": 0.7188, + "step": 10869 + }, + { + "epoch": 0.558639120156234, + "grad_norm": 0.7919402718544006, + "learning_rate": 4.295765745276021e-06, + "loss": 0.6953, + "step": 10870 + }, + { + "epoch": 0.5586905128995786, + "grad_norm": 1.091357707977295, + "learning_rate": 4.294941797379576e-06, + "loss": 0.6813, + "step": 10871 + }, + { + "epoch": 0.5587419056429233, + "grad_norm": 1.139304757118225, + "learning_rate": 4.294117869017384e-06, + "loss": 0.7432, + "step": 10872 + }, + { + "epoch": 0.5587932983862679, + "grad_norm": 1.0857518911361694, + "learning_rate": 4.293293960212275e-06, + "loss": 0.7152, + "step": 10873 + }, + { + "epoch": 0.5588446911296125, + "grad_norm": 1.033813714981079, + "learning_rate": 4.292470070987072e-06, + "loss": 0.7175, + "step": 10874 + }, + { + "epoch": 0.5588960838729571, + "grad_norm": 1.107561707496643, + "learning_rate": 4.2916462013646035e-06, + "loss": 0.7022, + "step": 10875 + }, + { + "epoch": 0.5589474766163017, + "grad_norm": 1.0633291006088257, + "learning_rate": 4.290822351367697e-06, + "loss": 0.7361, + "step": 10876 + }, + { + "epoch": 0.5589988693596464, + "grad_norm": 1.0579557418823242, + "learning_rate": 4.289998521019176e-06, + "loss": 0.6944, + "step": 10877 + }, + { + "epoch": 0.559050262102991, + "grad_norm": 1.080997109413147, + "learning_rate": 4.2891747103418645e-06, + "loss": 0.7485, + "step": 10878 + }, + { + "epoch": 0.5591016548463357, + "grad_norm": 0.9321607947349548, + "learning_rate": 4.2883509193585895e-06, + "loss": 0.6649, + "step": 10879 + }, + { + "epoch": 0.5591530475896803, + "grad_norm": 1.020646333694458, + "learning_rate": 4.287527148092173e-06, + "loss": 0.7294, + "step": 10880 + }, + { + "epoch": 0.559204440333025, + "grad_norm": 1.0576215982437134, + "learning_rate": 4.286703396565439e-06, + "loss": 0.7687, + "step": 10881 + }, + { + "epoch": 0.5592558330763696, + "grad_norm": 1.1232056617736816, + "learning_rate": 4.285879664801213e-06, + "loss": 0.7099, + "step": 10882 + }, + { + "epoch": 0.5593072258197143, + "grad_norm": 0.769159197807312, + "learning_rate": 4.2850559528223124e-06, + "loss": 0.6081, + "step": 10883 + }, + { + "epoch": 0.5593586185630589, + "grad_norm": 1.1259429454803467, + "learning_rate": 4.28423226065156e-06, + "loss": 0.7477, + "step": 10884 + }, + { + "epoch": 0.5594100113064036, + "grad_norm": 0.6801486611366272, + "learning_rate": 4.283408588311781e-06, + "loss": 0.7073, + "step": 10885 + }, + { + "epoch": 0.5594614040497482, + "grad_norm": 1.060115098953247, + "learning_rate": 4.28258493582579e-06, + "loss": 0.7705, + "step": 10886 + }, + { + "epoch": 0.5595127967930928, + "grad_norm": 0.8084056973457336, + "learning_rate": 4.2817613032164094e-06, + "loss": 0.6218, + "step": 10887 + }, + { + "epoch": 0.5595641895364375, + "grad_norm": 0.726311206817627, + "learning_rate": 4.28093769050646e-06, + "loss": 0.6172, + "step": 10888 + }, + { + "epoch": 0.5596155822797821, + "grad_norm": 0.6586894392967224, + "learning_rate": 4.2801140977187605e-06, + "loss": 0.6673, + "step": 10889 + }, + { + "epoch": 0.5596669750231268, + "grad_norm": 1.1524149179458618, + "learning_rate": 4.2792905248761275e-06, + "loss": 0.7329, + "step": 10890 + }, + { + "epoch": 0.5597183677664713, + "grad_norm": 0.7418955564498901, + "learning_rate": 4.27846697200138e-06, + "loss": 0.6604, + "step": 10891 + }, + { + "epoch": 0.559769760509816, + "grad_norm": 1.1461540460586548, + "learning_rate": 4.277643439117337e-06, + "loss": 0.7818, + "step": 10892 + }, + { + "epoch": 0.5598211532531606, + "grad_norm": 1.064190149307251, + "learning_rate": 4.276819926246811e-06, + "loss": 0.7055, + "step": 10893 + }, + { + "epoch": 0.5598725459965053, + "grad_norm": 1.0946849584579468, + "learning_rate": 4.2759964334126206e-06, + "loss": 0.7415, + "step": 10894 + }, + { + "epoch": 0.5599239387398499, + "grad_norm": 1.0719082355499268, + "learning_rate": 4.275172960637583e-06, + "loss": 0.7158, + "step": 10895 + }, + { + "epoch": 0.5599753314831946, + "grad_norm": 0.7232871651649475, + "learning_rate": 4.27434950794451e-06, + "loss": 0.6617, + "step": 10896 + }, + { + "epoch": 0.5600267242265392, + "grad_norm": 1.0991734266281128, + "learning_rate": 4.273526075356218e-06, + "loss": 0.7213, + "step": 10897 + }, + { + "epoch": 0.5600781169698839, + "grad_norm": 0.8204271197319031, + "learning_rate": 4.272702662895521e-06, + "loss": 0.645, + "step": 10898 + }, + { + "epoch": 0.5601295097132285, + "grad_norm": 0.7794287800788879, + "learning_rate": 4.27187927058523e-06, + "loss": 0.6511, + "step": 10899 + }, + { + "epoch": 0.5601809024565731, + "grad_norm": 0.7154523730278015, + "learning_rate": 4.271055898448161e-06, + "loss": 0.6411, + "step": 10900 + }, + { + "epoch": 0.5602322951999178, + "grad_norm": 1.1501259803771973, + "learning_rate": 4.2702325465071266e-06, + "loss": 0.7286, + "step": 10901 + }, + { + "epoch": 0.5602836879432624, + "grad_norm": 1.219252109527588, + "learning_rate": 4.269409214784934e-06, + "loss": 0.681, + "step": 10902 + }, + { + "epoch": 0.5603350806866071, + "grad_norm": 1.021567940711975, + "learning_rate": 4.268585903304398e-06, + "loss": 0.7136, + "step": 10903 + }, + { + "epoch": 0.5603864734299517, + "grad_norm": 0.9399927258491516, + "learning_rate": 4.267762612088327e-06, + "loss": 0.6859, + "step": 10904 + }, + { + "epoch": 0.5604378661732964, + "grad_norm": 1.0040158033370972, + "learning_rate": 4.266939341159535e-06, + "loss": 0.6616, + "step": 10905 + }, + { + "epoch": 0.5604892589166409, + "grad_norm": 1.042953372001648, + "learning_rate": 4.266116090540827e-06, + "loss": 0.7074, + "step": 10906 + }, + { + "epoch": 0.5605406516599856, + "grad_norm": 1.1135450601577759, + "learning_rate": 4.265292860255013e-06, + "loss": 0.8105, + "step": 10907 + }, + { + "epoch": 0.5605920444033302, + "grad_norm": 1.07875394821167, + "learning_rate": 4.264469650324904e-06, + "loss": 0.7178, + "step": 10908 + }, + { + "epoch": 0.5606434371466749, + "grad_norm": 1.16061532497406, + "learning_rate": 4.263646460773304e-06, + "loss": 0.7192, + "step": 10909 + }, + { + "epoch": 0.5606948298900195, + "grad_norm": 1.0205036401748657, + "learning_rate": 4.262823291623022e-06, + "loss": 0.7489, + "step": 10910 + }, + { + "epoch": 0.5607462226333642, + "grad_norm": 1.0088468790054321, + "learning_rate": 4.262000142896865e-06, + "loss": 0.7324, + "step": 10911 + }, + { + "epoch": 0.5607976153767088, + "grad_norm": 1.0353134870529175, + "learning_rate": 4.261177014617638e-06, + "loss": 0.7536, + "step": 10912 + }, + { + "epoch": 0.5608490081200534, + "grad_norm": 0.9968785643577576, + "learning_rate": 4.260353906808146e-06, + "loss": 0.732, + "step": 10913 + }, + { + "epoch": 0.5609004008633981, + "grad_norm": 1.09078848361969, + "learning_rate": 4.2595308194911975e-06, + "loss": 0.7079, + "step": 10914 + }, + { + "epoch": 0.5609517936067427, + "grad_norm": 0.82099449634552, + "learning_rate": 4.258707752689593e-06, + "loss": 0.6534, + "step": 10915 + }, + { + "epoch": 0.5610031863500874, + "grad_norm": 1.041460394859314, + "learning_rate": 4.257884706426136e-06, + "loss": 0.7423, + "step": 10916 + }, + { + "epoch": 0.561054579093432, + "grad_norm": 1.1150918006896973, + "learning_rate": 4.257061680723634e-06, + "loss": 0.7062, + "step": 10917 + }, + { + "epoch": 0.5611059718367767, + "grad_norm": 1.0044100284576416, + "learning_rate": 4.2562386756048855e-06, + "loss": 0.6841, + "step": 10918 + }, + { + "epoch": 0.5611573645801213, + "grad_norm": 1.0380232334136963, + "learning_rate": 4.255415691092693e-06, + "loss": 0.7216, + "step": 10919 + }, + { + "epoch": 0.561208757323466, + "grad_norm": 1.0614938735961914, + "learning_rate": 4.2545927272098595e-06, + "loss": 0.7242, + "step": 10920 + }, + { + "epoch": 0.5612601500668105, + "grad_norm": 0.7795531153678894, + "learning_rate": 4.253769783979186e-06, + "loss": 0.6142, + "step": 10921 + }, + { + "epoch": 0.5613115428101552, + "grad_norm": 1.075221061706543, + "learning_rate": 4.252946861423472e-06, + "loss": 0.7308, + "step": 10922 + }, + { + "epoch": 0.5613629355534998, + "grad_norm": 1.0202656984329224, + "learning_rate": 4.252123959565517e-06, + "loss": 0.6351, + "step": 10923 + }, + { + "epoch": 0.5614143282968445, + "grad_norm": 1.0964025259017944, + "learning_rate": 4.251301078428123e-06, + "loss": 0.6909, + "step": 10924 + }, + { + "epoch": 0.5614657210401891, + "grad_norm": 1.060097098350525, + "learning_rate": 4.250478218034084e-06, + "loss": 0.75, + "step": 10925 + }, + { + "epoch": 0.5615171137835338, + "grad_norm": 0.9913344383239746, + "learning_rate": 4.249655378406201e-06, + "loss": 0.7108, + "step": 10926 + }, + { + "epoch": 0.5615685065268784, + "grad_norm": 1.0450727939605713, + "learning_rate": 4.248832559567273e-06, + "loss": 0.7362, + "step": 10927 + }, + { + "epoch": 0.561619899270223, + "grad_norm": 1.1383724212646484, + "learning_rate": 4.248009761540092e-06, + "loss": 0.7707, + "step": 10928 + }, + { + "epoch": 0.5616712920135677, + "grad_norm": 1.1207242012023926, + "learning_rate": 4.247186984347458e-06, + "loss": 0.7397, + "step": 10929 + }, + { + "epoch": 0.5617226847569123, + "grad_norm": 1.0426716804504395, + "learning_rate": 4.246364228012167e-06, + "loss": 0.7112, + "step": 10930 + }, + { + "epoch": 0.561774077500257, + "grad_norm": 1.0183974504470825, + "learning_rate": 4.245541492557012e-06, + "loss": 0.704, + "step": 10931 + }, + { + "epoch": 0.5618254702436016, + "grad_norm": 1.0449295043945312, + "learning_rate": 4.244718778004789e-06, + "loss": 0.7187, + "step": 10932 + }, + { + "epoch": 0.5618768629869463, + "grad_norm": 1.1089107990264893, + "learning_rate": 4.243896084378294e-06, + "loss": 0.734, + "step": 10933 + }, + { + "epoch": 0.5619282557302909, + "grad_norm": 0.8600550889968872, + "learning_rate": 4.243073411700315e-06, + "loss": 0.6606, + "step": 10934 + }, + { + "epoch": 0.5619796484736356, + "grad_norm": 1.0280994176864624, + "learning_rate": 4.242250759993649e-06, + "loss": 0.6897, + "step": 10935 + }, + { + "epoch": 0.5620310412169801, + "grad_norm": 1.0299351215362549, + "learning_rate": 4.2414281292810875e-06, + "loss": 0.677, + "step": 10936 + }, + { + "epoch": 0.5620824339603248, + "grad_norm": 1.1593406200408936, + "learning_rate": 4.240605519585424e-06, + "loss": 0.7699, + "step": 10937 + }, + { + "epoch": 0.5621338267036694, + "grad_norm": 1.1487523317337036, + "learning_rate": 4.239782930929446e-06, + "loss": 0.7264, + "step": 10938 + }, + { + "epoch": 0.562185219447014, + "grad_norm": 1.085103988647461, + "learning_rate": 4.238960363335946e-06, + "loss": 0.7375, + "step": 10939 + }, + { + "epoch": 0.5622366121903587, + "grad_norm": 1.0652331113815308, + "learning_rate": 4.238137816827714e-06, + "loss": 0.7077, + "step": 10940 + }, + { + "epoch": 0.5622880049337033, + "grad_norm": 1.1124215126037598, + "learning_rate": 4.237315291427538e-06, + "loss": 0.7225, + "step": 10941 + }, + { + "epoch": 0.562339397677048, + "grad_norm": 1.0701571702957153, + "learning_rate": 4.236492787158208e-06, + "loss": 0.711, + "step": 10942 + }, + { + "epoch": 0.5623907904203926, + "grad_norm": 1.0151680707931519, + "learning_rate": 4.235670304042514e-06, + "loss": 0.7009, + "step": 10943 + }, + { + "epoch": 0.5624421831637373, + "grad_norm": 1.0536092519760132, + "learning_rate": 4.23484784210324e-06, + "loss": 0.7426, + "step": 10944 + }, + { + "epoch": 0.5624935759070819, + "grad_norm": 1.0561531782150269, + "learning_rate": 4.2340254013631744e-06, + "loss": 0.7033, + "step": 10945 + }, + { + "epoch": 0.5625449686504266, + "grad_norm": 1.090956687927246, + "learning_rate": 4.233202981845106e-06, + "loss": 0.7297, + "step": 10946 + }, + { + "epoch": 0.5625963613937712, + "grad_norm": 1.052664875984192, + "learning_rate": 4.232380583571817e-06, + "loss": 0.7623, + "step": 10947 + }, + { + "epoch": 0.5626477541371159, + "grad_norm": 0.7026613354682922, + "learning_rate": 4.231558206566093e-06, + "loss": 0.6838, + "step": 10948 + }, + { + "epoch": 0.5626991468804605, + "grad_norm": 1.1899166107177734, + "learning_rate": 4.230735850850722e-06, + "loss": 0.6964, + "step": 10949 + }, + { + "epoch": 0.5627505396238052, + "grad_norm": 0.8542056083679199, + "learning_rate": 4.229913516448485e-06, + "loss": 0.6107, + "step": 10950 + }, + { + "epoch": 0.5628019323671497, + "grad_norm": 1.102874994277954, + "learning_rate": 4.229091203382166e-06, + "loss": 0.7877, + "step": 10951 + }, + { + "epoch": 0.5628533251104944, + "grad_norm": 1.0361963510513306, + "learning_rate": 4.22826891167455e-06, + "loss": 0.7374, + "step": 10952 + }, + { + "epoch": 0.562904717853839, + "grad_norm": 1.048791527748108, + "learning_rate": 4.227446641348418e-06, + "loss": 0.712, + "step": 10953 + }, + { + "epoch": 0.5629561105971836, + "grad_norm": 1.0534428358078003, + "learning_rate": 4.2266243924265496e-06, + "loss": 0.6611, + "step": 10954 + }, + { + "epoch": 0.5630075033405283, + "grad_norm": 1.0068188905715942, + "learning_rate": 4.225802164931728e-06, + "loss": 0.7647, + "step": 10955 + }, + { + "epoch": 0.5630588960838729, + "grad_norm": 1.0235264301300049, + "learning_rate": 4.224979958886736e-06, + "loss": 0.6533, + "step": 10956 + }, + { + "epoch": 0.5631102888272176, + "grad_norm": 1.0482769012451172, + "learning_rate": 4.224157774314348e-06, + "loss": 0.7444, + "step": 10957 + }, + { + "epoch": 0.5631616815705622, + "grad_norm": 1.1097075939178467, + "learning_rate": 4.223335611237347e-06, + "loss": 0.7266, + "step": 10958 + }, + { + "epoch": 0.5632130743139069, + "grad_norm": 1.046775221824646, + "learning_rate": 4.222513469678512e-06, + "loss": 0.7734, + "step": 10959 + }, + { + "epoch": 0.5632644670572515, + "grad_norm": 0.7375898361206055, + "learning_rate": 4.22169134966062e-06, + "loss": 0.689, + "step": 10960 + }, + { + "epoch": 0.5633158598005962, + "grad_norm": 1.0279228687286377, + "learning_rate": 4.220869251206448e-06, + "loss": 0.7163, + "step": 10961 + }, + { + "epoch": 0.5633672525439408, + "grad_norm": 1.03515625, + "learning_rate": 4.220047174338775e-06, + "loss": 0.7376, + "step": 10962 + }, + { + "epoch": 0.5634186452872855, + "grad_norm": 0.7687429785728455, + "learning_rate": 4.2192251190803745e-06, + "loss": 0.6572, + "step": 10963 + }, + { + "epoch": 0.5634700380306301, + "grad_norm": 1.0392944812774658, + "learning_rate": 4.218403085454025e-06, + "loss": 0.6674, + "step": 10964 + }, + { + "epoch": 0.5635214307739748, + "grad_norm": 1.0999081134796143, + "learning_rate": 4.2175810734825e-06, + "loss": 0.7551, + "step": 10965 + }, + { + "epoch": 0.5635728235173193, + "grad_norm": 0.7833535075187683, + "learning_rate": 4.2167590831885766e-06, + "loss": 0.6795, + "step": 10966 + }, + { + "epoch": 0.563624216260664, + "grad_norm": 1.1171650886535645, + "learning_rate": 4.215937114595024e-06, + "loss": 0.6722, + "step": 10967 + }, + { + "epoch": 0.5636756090040086, + "grad_norm": 1.1054211854934692, + "learning_rate": 4.215115167724619e-06, + "loss": 0.6316, + "step": 10968 + }, + { + "epoch": 0.5637270017473532, + "grad_norm": 1.1324361562728882, + "learning_rate": 4.214293242600136e-06, + "loss": 0.7447, + "step": 10969 + }, + { + "epoch": 0.5637783944906979, + "grad_norm": 1.0604054927825928, + "learning_rate": 4.213471339244342e-06, + "loss": 0.7348, + "step": 10970 + }, + { + "epoch": 0.5638297872340425, + "grad_norm": 1.2330487966537476, + "learning_rate": 4.212649457680012e-06, + "loss": 0.6863, + "step": 10971 + }, + { + "epoch": 0.5638811799773872, + "grad_norm": 1.082266092300415, + "learning_rate": 4.211827597929917e-06, + "loss": 0.6692, + "step": 10972 + }, + { + "epoch": 0.5639325727207318, + "grad_norm": 1.0797470808029175, + "learning_rate": 4.211005760016826e-06, + "loss": 0.6614, + "step": 10973 + }, + { + "epoch": 0.5639839654640765, + "grad_norm": 1.0578923225402832, + "learning_rate": 4.210183943963509e-06, + "loss": 0.7394, + "step": 10974 + }, + { + "epoch": 0.5640353582074211, + "grad_norm": 1.0117113590240479, + "learning_rate": 4.209362149792739e-06, + "loss": 0.7089, + "step": 10975 + }, + { + "epoch": 0.5640867509507658, + "grad_norm": 1.05546236038208, + "learning_rate": 4.208540377527277e-06, + "loss": 0.7337, + "step": 10976 + }, + { + "epoch": 0.5641381436941104, + "grad_norm": 1.1539944410324097, + "learning_rate": 4.207718627189896e-06, + "loss": 0.7284, + "step": 10977 + }, + { + "epoch": 0.5641895364374551, + "grad_norm": 0.7652021050453186, + "learning_rate": 4.206896898803364e-06, + "loss": 0.6619, + "step": 10978 + }, + { + "epoch": 0.5642409291807997, + "grad_norm": 1.0343468189239502, + "learning_rate": 4.206075192390444e-06, + "loss": 0.7405, + "step": 10979 + }, + { + "epoch": 0.5642923219241444, + "grad_norm": 1.0969241857528687, + "learning_rate": 4.2052535079739035e-06, + "loss": 0.7341, + "step": 10980 + }, + { + "epoch": 0.564343714667489, + "grad_norm": 1.0535887479782104, + "learning_rate": 4.20443184557651e-06, + "loss": 0.7278, + "step": 10981 + }, + { + "epoch": 0.5643951074108335, + "grad_norm": 1.0153982639312744, + "learning_rate": 4.203610205221027e-06, + "loss": 0.7031, + "step": 10982 + }, + { + "epoch": 0.5644465001541782, + "grad_norm": 1.0559056997299194, + "learning_rate": 4.202788586930217e-06, + "loss": 0.7766, + "step": 10983 + }, + { + "epoch": 0.5644978928975228, + "grad_norm": 1.0815242528915405, + "learning_rate": 4.201966990726847e-06, + "loss": 0.6552, + "step": 10984 + }, + { + "epoch": 0.5645492856408675, + "grad_norm": 1.1948115825653076, + "learning_rate": 4.20114541663368e-06, + "loss": 0.7069, + "step": 10985 + }, + { + "epoch": 0.5646006783842121, + "grad_norm": 1.096306324005127, + "learning_rate": 4.200323864673474e-06, + "loss": 0.7564, + "step": 10986 + }, + { + "epoch": 0.5646520711275568, + "grad_norm": 1.010321021080017, + "learning_rate": 4.1995023348689935e-06, + "loss": 0.7105, + "step": 10987 + }, + { + "epoch": 0.5647034638709014, + "grad_norm": 1.0823198556900024, + "learning_rate": 4.198680827243004e-06, + "loss": 0.7261, + "step": 10988 + }, + { + "epoch": 0.5647548566142461, + "grad_norm": 0.9965549111366272, + "learning_rate": 4.197859341818257e-06, + "loss": 0.6983, + "step": 10989 + }, + { + "epoch": 0.5648062493575907, + "grad_norm": 1.101845145225525, + "learning_rate": 4.197037878617519e-06, + "loss": 0.7241, + "step": 10990 + }, + { + "epoch": 0.5648576421009354, + "grad_norm": 1.0959886312484741, + "learning_rate": 4.196216437663549e-06, + "loss": 0.7129, + "step": 10991 + }, + { + "epoch": 0.56490903484428, + "grad_norm": 1.0790483951568604, + "learning_rate": 4.195395018979102e-06, + "loss": 0.7573, + "step": 10992 + }, + { + "epoch": 0.5649604275876247, + "grad_norm": 1.087011694908142, + "learning_rate": 4.19457362258694e-06, + "loss": 0.7537, + "step": 10993 + }, + { + "epoch": 0.5650118203309693, + "grad_norm": 1.0050640106201172, + "learning_rate": 4.1937522485098215e-06, + "loss": 0.731, + "step": 10994 + }, + { + "epoch": 0.565063213074314, + "grad_norm": 0.6807528138160706, + "learning_rate": 4.192930896770498e-06, + "loss": 0.6855, + "step": 10995 + }, + { + "epoch": 0.5651146058176586, + "grad_norm": 0.7436098456382751, + "learning_rate": 4.192109567391729e-06, + "loss": 0.6514, + "step": 10996 + }, + { + "epoch": 0.5651659985610031, + "grad_norm": 1.0787243843078613, + "learning_rate": 4.19128826039627e-06, + "loss": 0.7702, + "step": 10997 + }, + { + "epoch": 0.5652173913043478, + "grad_norm": 1.0713164806365967, + "learning_rate": 4.190466975806878e-06, + "loss": 0.7768, + "step": 10998 + }, + { + "epoch": 0.5652687840476924, + "grad_norm": 1.1721802949905396, + "learning_rate": 4.189645713646303e-06, + "loss": 0.7479, + "step": 10999 + }, + { + "epoch": 0.5653201767910371, + "grad_norm": 1.0569698810577393, + "learning_rate": 4.188824473937301e-06, + "loss": 0.7025, + "step": 11000 + }, + { + "epoch": 0.5653715695343817, + "grad_norm": 1.3192484378814697, + "learning_rate": 4.188003256702628e-06, + "loss": 0.6671, + "step": 11001 + }, + { + "epoch": 0.5654229622777264, + "grad_norm": 1.0347923040390015, + "learning_rate": 4.187182061965031e-06, + "loss": 0.6839, + "step": 11002 + }, + { + "epoch": 0.565474355021071, + "grad_norm": 1.0944939851760864, + "learning_rate": 4.186360889747266e-06, + "loss": 0.776, + "step": 11003 + }, + { + "epoch": 0.5655257477644157, + "grad_norm": 1.1577610969543457, + "learning_rate": 4.185539740072083e-06, + "loss": 0.7471, + "step": 11004 + }, + { + "epoch": 0.5655771405077603, + "grad_norm": 1.1276535987854004, + "learning_rate": 4.184718612962232e-06, + "loss": 0.6993, + "step": 11005 + }, + { + "epoch": 0.565628533251105, + "grad_norm": 1.052153468132019, + "learning_rate": 4.1838975084404645e-06, + "loss": 0.7171, + "step": 11006 + }, + { + "epoch": 0.5656799259944496, + "grad_norm": 1.0479240417480469, + "learning_rate": 4.183076426529531e-06, + "loss": 0.7375, + "step": 11007 + }, + { + "epoch": 0.5657313187377943, + "grad_norm": 1.0511529445648193, + "learning_rate": 4.182255367252175e-06, + "loss": 0.6863, + "step": 11008 + }, + { + "epoch": 0.5657827114811389, + "grad_norm": 0.7077540159225464, + "learning_rate": 4.18143433063115e-06, + "loss": 0.6704, + "step": 11009 + }, + { + "epoch": 0.5658341042244835, + "grad_norm": 1.0110533237457275, + "learning_rate": 4.180613316689202e-06, + "loss": 0.73, + "step": 11010 + }, + { + "epoch": 0.5658854969678282, + "grad_norm": 1.1433515548706055, + "learning_rate": 4.179792325449077e-06, + "loss": 0.6777, + "step": 11011 + }, + { + "epoch": 0.5659368897111727, + "grad_norm": 1.0699175596237183, + "learning_rate": 4.178971356933521e-06, + "loss": 0.7538, + "step": 11012 + }, + { + "epoch": 0.5659882824545174, + "grad_norm": 1.1669446229934692, + "learning_rate": 4.1781504111652806e-06, + "loss": 0.7066, + "step": 11013 + }, + { + "epoch": 0.566039675197862, + "grad_norm": 1.0704914331436157, + "learning_rate": 4.177329488167102e-06, + "loss": 0.7082, + "step": 11014 + }, + { + "epoch": 0.5660910679412067, + "grad_norm": 1.0695264339447021, + "learning_rate": 4.176508587961728e-06, + "loss": 0.7001, + "step": 11015 + }, + { + "epoch": 0.5661424606845513, + "grad_norm": 1.090102195739746, + "learning_rate": 4.175687710571902e-06, + "loss": 0.6625, + "step": 11016 + }, + { + "epoch": 0.566193853427896, + "grad_norm": 1.1025681495666504, + "learning_rate": 4.174866856020369e-06, + "loss": 0.7657, + "step": 11017 + }, + { + "epoch": 0.5662452461712406, + "grad_norm": 1.0325428247451782, + "learning_rate": 4.174046024329869e-06, + "loss": 0.7016, + "step": 11018 + }, + { + "epoch": 0.5662966389145853, + "grad_norm": 1.0691859722137451, + "learning_rate": 4.173225215523146e-06, + "loss": 0.6608, + "step": 11019 + }, + { + "epoch": 0.5663480316579299, + "grad_norm": 1.178205966949463, + "learning_rate": 4.172404429622941e-06, + "loss": 0.7005, + "step": 11020 + }, + { + "epoch": 0.5663994244012746, + "grad_norm": 1.0893807411193848, + "learning_rate": 4.1715836666519924e-06, + "loss": 0.6973, + "step": 11021 + }, + { + "epoch": 0.5664508171446192, + "grad_norm": 1.113642930984497, + "learning_rate": 4.1707629266330414e-06, + "loss": 0.7835, + "step": 11022 + }, + { + "epoch": 0.5665022098879638, + "grad_norm": 1.0823112726211548, + "learning_rate": 4.16994220958883e-06, + "loss": 0.7172, + "step": 11023 + }, + { + "epoch": 0.5665536026313085, + "grad_norm": 1.1308764219284058, + "learning_rate": 4.1691215155420924e-06, + "loss": 0.7029, + "step": 11024 + }, + { + "epoch": 0.5666049953746531, + "grad_norm": 0.8499131202697754, + "learning_rate": 4.168300844515569e-06, + "loss": 0.6848, + "step": 11025 + }, + { + "epoch": 0.5666563881179978, + "grad_norm": 1.0882580280303955, + "learning_rate": 4.167480196531999e-06, + "loss": 0.7179, + "step": 11026 + }, + { + "epoch": 0.5667077808613423, + "grad_norm": 1.0615354776382446, + "learning_rate": 4.166659571614115e-06, + "loss": 0.7024, + "step": 11027 + }, + { + "epoch": 0.566759173604687, + "grad_norm": 1.1401622295379639, + "learning_rate": 4.165838969784656e-06, + "loss": 0.8025, + "step": 11028 + }, + { + "epoch": 0.5668105663480316, + "grad_norm": 1.0660475492477417, + "learning_rate": 4.165018391066355e-06, + "loss": 0.699, + "step": 11029 + }, + { + "epoch": 0.5668619590913763, + "grad_norm": 1.002280831336975, + "learning_rate": 4.164197835481952e-06, + "loss": 0.7021, + "step": 11030 + }, + { + "epoch": 0.5669133518347209, + "grad_norm": 0.7960710525512695, + "learning_rate": 4.1633773030541756e-06, + "loss": 0.6526, + "step": 11031 + }, + { + "epoch": 0.5669647445780656, + "grad_norm": 0.967799723148346, + "learning_rate": 4.162556793805762e-06, + "loss": 0.7273, + "step": 11032 + }, + { + "epoch": 0.5670161373214102, + "grad_norm": 1.10124671459198, + "learning_rate": 4.161736307759444e-06, + "loss": 0.7687, + "step": 11033 + }, + { + "epoch": 0.5670675300647549, + "grad_norm": 0.7569063901901245, + "learning_rate": 4.160915844937953e-06, + "loss": 0.6544, + "step": 11034 + }, + { + "epoch": 0.5671189228080995, + "grad_norm": 1.1301634311676025, + "learning_rate": 4.160095405364021e-06, + "loss": 0.7566, + "step": 11035 + }, + { + "epoch": 0.5671703155514441, + "grad_norm": 1.0733606815338135, + "learning_rate": 4.159274989060382e-06, + "loss": 0.7214, + "step": 11036 + }, + { + "epoch": 0.5672217082947888, + "grad_norm": 0.9614720344543457, + "learning_rate": 4.158454596049761e-06, + "loss": 0.6712, + "step": 11037 + }, + { + "epoch": 0.5672731010381334, + "grad_norm": 0.7964835166931152, + "learning_rate": 4.15763422635489e-06, + "loss": 0.7113, + "step": 11038 + }, + { + "epoch": 0.5673244937814781, + "grad_norm": 1.1233831644058228, + "learning_rate": 4.156813879998501e-06, + "loss": 0.7044, + "step": 11039 + }, + { + "epoch": 0.5673758865248227, + "grad_norm": 1.1169955730438232, + "learning_rate": 4.155993557003317e-06, + "loss": 0.6653, + "step": 11040 + }, + { + "epoch": 0.5674272792681674, + "grad_norm": 1.0280835628509521, + "learning_rate": 4.155173257392069e-06, + "loss": 0.7314, + "step": 11041 + }, + { + "epoch": 0.5674786720115119, + "grad_norm": 0.708518385887146, + "learning_rate": 4.154352981187485e-06, + "loss": 0.6711, + "step": 11042 + }, + { + "epoch": 0.5675300647548566, + "grad_norm": 1.1703786849975586, + "learning_rate": 4.1535327284122885e-06, + "loss": 0.7227, + "step": 11043 + }, + { + "epoch": 0.5675814574982012, + "grad_norm": 1.1340153217315674, + "learning_rate": 4.152712499089207e-06, + "loss": 0.6961, + "step": 11044 + }, + { + "epoch": 0.5676328502415459, + "grad_norm": 1.0886057615280151, + "learning_rate": 4.1518922932409655e-06, + "loss": 0.7653, + "step": 11045 + }, + { + "epoch": 0.5676842429848905, + "grad_norm": 1.1166150569915771, + "learning_rate": 4.1510721108902916e-06, + "loss": 0.7194, + "step": 11046 + }, + { + "epoch": 0.5677356357282352, + "grad_norm": 1.125437617301941, + "learning_rate": 4.150251952059903e-06, + "loss": 0.7652, + "step": 11047 + }, + { + "epoch": 0.5677870284715798, + "grad_norm": 1.1061116456985474, + "learning_rate": 4.149431816772526e-06, + "loss": 0.7274, + "step": 11048 + }, + { + "epoch": 0.5678384212149244, + "grad_norm": 1.0566638708114624, + "learning_rate": 4.148611705050886e-06, + "loss": 0.705, + "step": 11049 + }, + { + "epoch": 0.5678898139582691, + "grad_norm": 1.1856369972229004, + "learning_rate": 4.1477916169177005e-06, + "loss": 0.6572, + "step": 11050 + }, + { + "epoch": 0.5679412067016137, + "grad_norm": 1.1192147731781006, + "learning_rate": 4.146971552395692e-06, + "loss": 0.6844, + "step": 11051 + }, + { + "epoch": 0.5679925994449584, + "grad_norm": 0.9797646403312683, + "learning_rate": 4.146151511507582e-06, + "loss": 0.6754, + "step": 11052 + }, + { + "epoch": 0.568043992188303, + "grad_norm": 1.043342113494873, + "learning_rate": 4.14533149427609e-06, + "loss": 0.7951, + "step": 11053 + }, + { + "epoch": 0.5680953849316477, + "grad_norm": 1.0352638959884644, + "learning_rate": 4.144511500723936e-06, + "loss": 0.7125, + "step": 11054 + }, + { + "epoch": 0.5681467776749923, + "grad_norm": 1.108858346939087, + "learning_rate": 4.143691530873837e-06, + "loss": 0.7257, + "step": 11055 + }, + { + "epoch": 0.568198170418337, + "grad_norm": 1.123024344444275, + "learning_rate": 4.142871584748511e-06, + "loss": 0.7509, + "step": 11056 + }, + { + "epoch": 0.5682495631616816, + "grad_norm": 1.127915859222412, + "learning_rate": 4.142051662370678e-06, + "loss": 0.7346, + "step": 11057 + }, + { + "epoch": 0.5683009559050262, + "grad_norm": 1.0987194776535034, + "learning_rate": 4.1412317637630526e-06, + "loss": 0.7135, + "step": 11058 + }, + { + "epoch": 0.5683523486483708, + "grad_norm": 1.120337963104248, + "learning_rate": 4.140411888948351e-06, + "loss": 0.6603, + "step": 11059 + }, + { + "epoch": 0.5684037413917155, + "grad_norm": 0.7597155570983887, + "learning_rate": 4.139592037949287e-06, + "loss": 0.6982, + "step": 11060 + }, + { + "epoch": 0.5684551341350601, + "grad_norm": 1.1014034748077393, + "learning_rate": 4.138772210788578e-06, + "loss": 0.6997, + "step": 11061 + }, + { + "epoch": 0.5685065268784047, + "grad_norm": 1.0854308605194092, + "learning_rate": 4.137952407488939e-06, + "loss": 0.7421, + "step": 11062 + }, + { + "epoch": 0.5685579196217494, + "grad_norm": 1.0766063928604126, + "learning_rate": 4.1371326280730786e-06, + "loss": 0.6922, + "step": 11063 + }, + { + "epoch": 0.568609312365094, + "grad_norm": 1.138703465461731, + "learning_rate": 4.136312872563713e-06, + "loss": 0.7609, + "step": 11064 + }, + { + "epoch": 0.5686607051084387, + "grad_norm": 1.2177025079727173, + "learning_rate": 4.135493140983554e-06, + "loss": 0.7316, + "step": 11065 + }, + { + "epoch": 0.5687120978517833, + "grad_norm": 1.0706874132156372, + "learning_rate": 4.134673433355312e-06, + "loss": 0.7754, + "step": 11066 + }, + { + "epoch": 0.568763490595128, + "grad_norm": 1.150875449180603, + "learning_rate": 4.1338537497016975e-06, + "loss": 0.6622, + "step": 11067 + }, + { + "epoch": 0.5688148833384726, + "grad_norm": 1.1102443933486938, + "learning_rate": 4.133034090045424e-06, + "loss": 0.7811, + "step": 11068 + }, + { + "epoch": 0.5688662760818173, + "grad_norm": 1.1013644933700562, + "learning_rate": 4.132214454409195e-06, + "loss": 0.7081, + "step": 11069 + }, + { + "epoch": 0.5689176688251619, + "grad_norm": 1.1496880054473877, + "learning_rate": 4.131394842815723e-06, + "loss": 0.77, + "step": 11070 + }, + { + "epoch": 0.5689690615685066, + "grad_norm": 1.1390855312347412, + "learning_rate": 4.130575255287718e-06, + "loss": 0.7419, + "step": 11071 + }, + { + "epoch": 0.5690204543118512, + "grad_norm": 1.0418493747711182, + "learning_rate": 4.129755691847882e-06, + "loss": 0.6541, + "step": 11072 + }, + { + "epoch": 0.5690718470551958, + "grad_norm": 1.11246919631958, + "learning_rate": 4.128936152518924e-06, + "loss": 0.6826, + "step": 11073 + }, + { + "epoch": 0.5691232397985404, + "grad_norm": 0.7747859358787537, + "learning_rate": 4.128116637323552e-06, + "loss": 0.641, + "step": 11074 + }, + { + "epoch": 0.569174632541885, + "grad_norm": 1.0382137298583984, + "learning_rate": 4.127297146284469e-06, + "loss": 0.6783, + "step": 11075 + }, + { + "epoch": 0.5692260252852297, + "grad_norm": 0.6698956489562988, + "learning_rate": 4.126477679424381e-06, + "loss": 0.6544, + "step": 11076 + }, + { + "epoch": 0.5692774180285743, + "grad_norm": 1.09820556640625, + "learning_rate": 4.125658236765991e-06, + "loss": 0.7094, + "step": 11077 + }, + { + "epoch": 0.569328810771919, + "grad_norm": 1.089430570602417, + "learning_rate": 4.124838818332006e-06, + "loss": 0.6953, + "step": 11078 + }, + { + "epoch": 0.5693802035152636, + "grad_norm": 1.0253576040267944, + "learning_rate": 4.124019424145123e-06, + "loss": 0.7345, + "step": 11079 + }, + { + "epoch": 0.5694315962586083, + "grad_norm": 1.0222501754760742, + "learning_rate": 4.123200054228047e-06, + "loss": 0.6694, + "step": 11080 + }, + { + "epoch": 0.5694829890019529, + "grad_norm": 1.0777531862258911, + "learning_rate": 4.122380708603481e-06, + "loss": 0.7092, + "step": 11081 + }, + { + "epoch": 0.5695343817452976, + "grad_norm": 1.0338901281356812, + "learning_rate": 4.121561387294122e-06, + "loss": 0.6733, + "step": 11082 + }, + { + "epoch": 0.5695857744886422, + "grad_norm": 1.091964840888977, + "learning_rate": 4.120742090322672e-06, + "loss": 0.7169, + "step": 11083 + }, + { + "epoch": 0.5696371672319869, + "grad_norm": 1.0256109237670898, + "learning_rate": 4.1199228177118315e-06, + "loss": 0.6553, + "step": 11084 + }, + { + "epoch": 0.5696885599753315, + "grad_norm": 1.0932872295379639, + "learning_rate": 4.119103569484296e-06, + "loss": 0.7236, + "step": 11085 + }, + { + "epoch": 0.5697399527186762, + "grad_norm": 1.0545307397842407, + "learning_rate": 4.1182843456627655e-06, + "loss": 0.7133, + "step": 11086 + }, + { + "epoch": 0.5697913454620208, + "grad_norm": 1.0992408990859985, + "learning_rate": 4.11746514626994e-06, + "loss": 0.6983, + "step": 11087 + }, + { + "epoch": 0.5698427382053654, + "grad_norm": 0.7405276298522949, + "learning_rate": 4.11664597132851e-06, + "loss": 0.6516, + "step": 11088 + }, + { + "epoch": 0.56989413094871, + "grad_norm": 1.0907763242721558, + "learning_rate": 4.115826820861177e-06, + "loss": 0.6907, + "step": 11089 + }, + { + "epoch": 0.5699455236920546, + "grad_norm": 0.798585057258606, + "learning_rate": 4.115007694890631e-06, + "loss": 0.6318, + "step": 11090 + }, + { + "epoch": 0.5699969164353993, + "grad_norm": 1.4931803941726685, + "learning_rate": 4.114188593439574e-06, + "loss": 0.7302, + "step": 11091 + }, + { + "epoch": 0.5700483091787439, + "grad_norm": 0.7386412620544434, + "learning_rate": 4.113369516530693e-06, + "loss": 0.6402, + "step": 11092 + }, + { + "epoch": 0.5700997019220886, + "grad_norm": 1.1752276420593262, + "learning_rate": 4.112550464186683e-06, + "loss": 0.8014, + "step": 11093 + }, + { + "epoch": 0.5701510946654332, + "grad_norm": 1.0570437908172607, + "learning_rate": 4.111731436430239e-06, + "loss": 0.738, + "step": 11094 + }, + { + "epoch": 0.5702024874087779, + "grad_norm": 1.031988501548767, + "learning_rate": 4.11091243328405e-06, + "loss": 0.7353, + "step": 11095 + }, + { + "epoch": 0.5702538801521225, + "grad_norm": 1.2070701122283936, + "learning_rate": 4.1100934547708085e-06, + "loss": 0.7533, + "step": 11096 + }, + { + "epoch": 0.5703052728954672, + "grad_norm": 1.1088544130325317, + "learning_rate": 4.1092745009132066e-06, + "loss": 0.7167, + "step": 11097 + }, + { + "epoch": 0.5703566656388118, + "grad_norm": 0.655411958694458, + "learning_rate": 4.108455571733931e-06, + "loss": 0.641, + "step": 11098 + }, + { + "epoch": 0.5704080583821565, + "grad_norm": 1.1411075592041016, + "learning_rate": 4.107636667255671e-06, + "loss": 0.7698, + "step": 11099 + }, + { + "epoch": 0.5704594511255011, + "grad_norm": 1.0562725067138672, + "learning_rate": 4.10681778750112e-06, + "loss": 0.7221, + "step": 11100 + }, + { + "epoch": 0.5705108438688458, + "grad_norm": 1.0551602840423584, + "learning_rate": 4.10599893249296e-06, + "loss": 0.7187, + "step": 11101 + }, + { + "epoch": 0.5705622366121904, + "grad_norm": 1.1278977394104004, + "learning_rate": 4.1051801022538805e-06, + "loss": 0.7081, + "step": 11102 + }, + { + "epoch": 0.570613629355535, + "grad_norm": 1.0314875841140747, + "learning_rate": 4.104361296806568e-06, + "loss": 0.7068, + "step": 11103 + }, + { + "epoch": 0.5706650220988796, + "grad_norm": 1.1056450605392456, + "learning_rate": 4.103542516173706e-06, + "loss": 0.7586, + "step": 11104 + }, + { + "epoch": 0.5707164148422242, + "grad_norm": 1.0216871500015259, + "learning_rate": 4.102723760377984e-06, + "loss": 0.6757, + "step": 11105 + }, + { + "epoch": 0.5707678075855689, + "grad_norm": 1.1434584856033325, + "learning_rate": 4.101905029442082e-06, + "loss": 0.7134, + "step": 11106 + }, + { + "epoch": 0.5708192003289135, + "grad_norm": 1.0356143712997437, + "learning_rate": 4.1010863233886865e-06, + "loss": 0.6647, + "step": 11107 + }, + { + "epoch": 0.5708705930722582, + "grad_norm": 1.0801175832748413, + "learning_rate": 4.100267642240478e-06, + "loss": 0.6958, + "step": 11108 + }, + { + "epoch": 0.5709219858156028, + "grad_norm": 0.7265595197677612, + "learning_rate": 4.0994489860201404e-06, + "loss": 0.6429, + "step": 11109 + }, + { + "epoch": 0.5709733785589475, + "grad_norm": 0.6772834062576294, + "learning_rate": 4.098630354750358e-06, + "loss": 0.6284, + "step": 11110 + }, + { + "epoch": 0.5710247713022921, + "grad_norm": 1.0246405601501465, + "learning_rate": 4.097811748453806e-06, + "loss": 0.6914, + "step": 11111 + }, + { + "epoch": 0.5710761640456368, + "grad_norm": 0.7394922971725464, + "learning_rate": 4.0969931671531675e-06, + "loss": 0.6366, + "step": 11112 + }, + { + "epoch": 0.5711275567889814, + "grad_norm": 0.7672628164291382, + "learning_rate": 4.096174610871123e-06, + "loss": 0.6739, + "step": 11113 + }, + { + "epoch": 0.5711789495323261, + "grad_norm": 0.7954023480415344, + "learning_rate": 4.0953560796303494e-06, + "loss": 0.6551, + "step": 11114 + }, + { + "epoch": 0.5712303422756707, + "grad_norm": 1.0739543437957764, + "learning_rate": 4.0945375734535245e-06, + "loss": 0.724, + "step": 11115 + }, + { + "epoch": 0.5712817350190154, + "grad_norm": 0.7781845331192017, + "learning_rate": 4.0937190923633285e-06, + "loss": 0.6706, + "step": 11116 + }, + { + "epoch": 0.57133312776236, + "grad_norm": 1.1202223300933838, + "learning_rate": 4.092900636382435e-06, + "loss": 0.7076, + "step": 11117 + }, + { + "epoch": 0.5713845205057045, + "grad_norm": 1.1085606813430786, + "learning_rate": 4.092082205533522e-06, + "loss": 0.7123, + "step": 11118 + }, + { + "epoch": 0.5714359132490492, + "grad_norm": 1.130695104598999, + "learning_rate": 4.091263799839266e-06, + "loss": 0.6728, + "step": 11119 + }, + { + "epoch": 0.5714873059923938, + "grad_norm": 1.096818208694458, + "learning_rate": 4.090445419322338e-06, + "loss": 0.7559, + "step": 11120 + }, + { + "epoch": 0.5715386987357385, + "grad_norm": 1.071595549583435, + "learning_rate": 4.089627064005414e-06, + "loss": 0.7239, + "step": 11121 + }, + { + "epoch": 0.5715900914790831, + "grad_norm": 1.0662117004394531, + "learning_rate": 4.088808733911167e-06, + "loss": 0.7038, + "step": 11122 + }, + { + "epoch": 0.5716414842224278, + "grad_norm": 0.8210490942001343, + "learning_rate": 4.087990429062272e-06, + "loss": 0.6503, + "step": 11123 + }, + { + "epoch": 0.5716928769657724, + "grad_norm": 1.068395972251892, + "learning_rate": 4.087172149481397e-06, + "loss": 0.6696, + "step": 11124 + }, + { + "epoch": 0.5717442697091171, + "grad_norm": 1.0581468343734741, + "learning_rate": 4.086353895191213e-06, + "loss": 0.6948, + "step": 11125 + }, + { + "epoch": 0.5717956624524617, + "grad_norm": 1.0160549879074097, + "learning_rate": 4.0855356662143956e-06, + "loss": 0.6927, + "step": 11126 + }, + { + "epoch": 0.5718470551958064, + "grad_norm": 1.1025118827819824, + "learning_rate": 4.084717462573608e-06, + "loss": 0.7389, + "step": 11127 + }, + { + "epoch": 0.571898447939151, + "grad_norm": 1.1304795742034912, + "learning_rate": 4.083899284291523e-06, + "loss": 0.7732, + "step": 11128 + }, + { + "epoch": 0.5719498406824957, + "grad_norm": 1.076361060142517, + "learning_rate": 4.083081131390811e-06, + "loss": 0.7042, + "step": 11129 + }, + { + "epoch": 0.5720012334258403, + "grad_norm": 1.0693769454956055, + "learning_rate": 4.082263003894134e-06, + "loss": 0.765, + "step": 11130 + }, + { + "epoch": 0.572052626169185, + "grad_norm": 1.0774987936019897, + "learning_rate": 4.081444901824163e-06, + "loss": 0.7234, + "step": 11131 + }, + { + "epoch": 0.5721040189125296, + "grad_norm": 1.0646545886993408, + "learning_rate": 4.080626825203564e-06, + "loss": 0.724, + "step": 11132 + }, + { + "epoch": 0.5721554116558741, + "grad_norm": 1.1105787754058838, + "learning_rate": 4.0798087740550006e-06, + "loss": 0.6962, + "step": 11133 + }, + { + "epoch": 0.5722068043992188, + "grad_norm": 1.0818690061569214, + "learning_rate": 4.078990748401138e-06, + "loss": 0.7445, + "step": 11134 + }, + { + "epoch": 0.5722581971425634, + "grad_norm": 1.064070701599121, + "learning_rate": 4.078172748264642e-06, + "loss": 0.7317, + "step": 11135 + }, + { + "epoch": 0.5723095898859081, + "grad_norm": 1.026812195777893, + "learning_rate": 4.077354773668174e-06, + "loss": 0.6828, + "step": 11136 + }, + { + "epoch": 0.5723609826292527, + "grad_norm": 1.7307419776916504, + "learning_rate": 4.076536824634398e-06, + "loss": 0.6693, + "step": 11137 + }, + { + "epoch": 0.5724123753725974, + "grad_norm": 1.0935274362564087, + "learning_rate": 4.075718901185976e-06, + "loss": 0.7039, + "step": 11138 + }, + { + "epoch": 0.572463768115942, + "grad_norm": 0.7822026014328003, + "learning_rate": 4.07490100334557e-06, + "loss": 0.67, + "step": 11139 + }, + { + "epoch": 0.5725151608592867, + "grad_norm": 0.9213756918907166, + "learning_rate": 4.074083131135838e-06, + "loss": 0.6692, + "step": 11140 + }, + { + "epoch": 0.5725665536026313, + "grad_norm": 1.1129777431488037, + "learning_rate": 4.073265284579441e-06, + "loss": 0.7597, + "step": 11141 + }, + { + "epoch": 0.572617946345976, + "grad_norm": 0.7165144681930542, + "learning_rate": 4.072447463699042e-06, + "loss": 0.6394, + "step": 11142 + }, + { + "epoch": 0.5726693390893206, + "grad_norm": 1.121860384941101, + "learning_rate": 4.071629668517292e-06, + "loss": 0.6887, + "step": 11143 + }, + { + "epoch": 0.5727207318326653, + "grad_norm": 1.1139198541641235, + "learning_rate": 4.070811899056854e-06, + "loss": 0.7311, + "step": 11144 + }, + { + "epoch": 0.5727721245760099, + "grad_norm": 1.0638325214385986, + "learning_rate": 4.0699941553403845e-06, + "loss": 0.7539, + "step": 11145 + }, + { + "epoch": 0.5728235173193545, + "grad_norm": 1.109394907951355, + "learning_rate": 4.069176437390538e-06, + "loss": 0.7114, + "step": 11146 + }, + { + "epoch": 0.5728749100626992, + "grad_norm": 1.0663273334503174, + "learning_rate": 4.068358745229971e-06, + "loss": 0.733, + "step": 11147 + }, + { + "epoch": 0.5729263028060438, + "grad_norm": 0.9138264060020447, + "learning_rate": 4.067541078881341e-06, + "loss": 0.6559, + "step": 11148 + }, + { + "epoch": 0.5729776955493884, + "grad_norm": 1.0690398216247559, + "learning_rate": 4.066723438367297e-06, + "loss": 0.7053, + "step": 11149 + }, + { + "epoch": 0.573029088292733, + "grad_norm": 1.0747218132019043, + "learning_rate": 4.0659058237104954e-06, + "loss": 0.7706, + "step": 11150 + }, + { + "epoch": 0.5730804810360777, + "grad_norm": 1.125722885131836, + "learning_rate": 4.065088234933591e-06, + "loss": 0.7398, + "step": 11151 + }, + { + "epoch": 0.5731318737794223, + "grad_norm": 1.0590214729309082, + "learning_rate": 4.064270672059231e-06, + "loss": 0.7327, + "step": 11152 + }, + { + "epoch": 0.573183266522767, + "grad_norm": 0.7506423592567444, + "learning_rate": 4.0634531351100695e-06, + "loss": 0.6388, + "step": 11153 + }, + { + "epoch": 0.5732346592661116, + "grad_norm": 1.1002992391586304, + "learning_rate": 4.062635624108756e-06, + "loss": 0.7147, + "step": 11154 + }, + { + "epoch": 0.5732860520094563, + "grad_norm": 1.1006497144699097, + "learning_rate": 4.061818139077944e-06, + "loss": 0.7417, + "step": 11155 + }, + { + "epoch": 0.5733374447528009, + "grad_norm": 0.7600101828575134, + "learning_rate": 4.061000680040278e-06, + "loss": 0.6612, + "step": 11156 + }, + { + "epoch": 0.5733888374961456, + "grad_norm": 0.9178047776222229, + "learning_rate": 4.0601832470184065e-06, + "loss": 0.6719, + "step": 11157 + }, + { + "epoch": 0.5734402302394902, + "grad_norm": 1.0835356712341309, + "learning_rate": 4.059365840034981e-06, + "loss": 0.7696, + "step": 11158 + }, + { + "epoch": 0.5734916229828348, + "grad_norm": 1.0631808042526245, + "learning_rate": 4.058548459112645e-06, + "loss": 0.6997, + "step": 11159 + }, + { + "epoch": 0.5735430157261795, + "grad_norm": 0.7505224347114563, + "learning_rate": 4.057731104274046e-06, + "loss": 0.6873, + "step": 11160 + }, + { + "epoch": 0.5735944084695241, + "grad_norm": 1.0952993631362915, + "learning_rate": 4.056913775541832e-06, + "loss": 0.7137, + "step": 11161 + }, + { + "epoch": 0.5736458012128688, + "grad_norm": 1.0359461307525635, + "learning_rate": 4.056096472938643e-06, + "loss": 0.6479, + "step": 11162 + }, + { + "epoch": 0.5736971939562134, + "grad_norm": 1.0359959602355957, + "learning_rate": 4.0552791964871256e-06, + "loss": 0.75, + "step": 11163 + }, + { + "epoch": 0.573748586699558, + "grad_norm": 1.011143684387207, + "learning_rate": 4.054461946209926e-06, + "loss": 0.6856, + "step": 11164 + }, + { + "epoch": 0.5737999794429026, + "grad_norm": 1.0200068950653076, + "learning_rate": 4.053644722129681e-06, + "loss": 0.6883, + "step": 11165 + }, + { + "epoch": 0.5738513721862473, + "grad_norm": 1.0730551481246948, + "learning_rate": 4.052827524269034e-06, + "loss": 0.7292, + "step": 11166 + }, + { + "epoch": 0.5739027649295919, + "grad_norm": 1.065383791923523, + "learning_rate": 4.05201035265063e-06, + "loss": 0.7855, + "step": 11167 + }, + { + "epoch": 0.5739541576729366, + "grad_norm": 1.1123597621917725, + "learning_rate": 4.051193207297106e-06, + "loss": 0.8085, + "step": 11168 + }, + { + "epoch": 0.5740055504162812, + "grad_norm": 1.0475029945373535, + "learning_rate": 4.050376088231103e-06, + "loss": 0.7276, + "step": 11169 + }, + { + "epoch": 0.5740569431596259, + "grad_norm": 1.0452054738998413, + "learning_rate": 4.049558995475259e-06, + "loss": 0.7432, + "step": 11170 + }, + { + "epoch": 0.5741083359029705, + "grad_norm": 1.0660079717636108, + "learning_rate": 4.048741929052215e-06, + "loss": 0.724, + "step": 11171 + }, + { + "epoch": 0.5741597286463151, + "grad_norm": 1.050893783569336, + "learning_rate": 4.047924888984605e-06, + "loss": 0.7273, + "step": 11172 + }, + { + "epoch": 0.5742111213896598, + "grad_norm": 1.1354444026947021, + "learning_rate": 4.047107875295067e-06, + "loss": 0.7051, + "step": 11173 + }, + { + "epoch": 0.5742625141330044, + "grad_norm": 0.8164092898368835, + "learning_rate": 4.04629088800624e-06, + "loss": 0.6564, + "step": 11174 + }, + { + "epoch": 0.5743139068763491, + "grad_norm": 0.7610651850700378, + "learning_rate": 4.045473927140754e-06, + "loss": 0.6924, + "step": 11175 + }, + { + "epoch": 0.5743652996196937, + "grad_norm": 1.0491775274276733, + "learning_rate": 4.044656992721246e-06, + "loss": 0.705, + "step": 11176 + }, + { + "epoch": 0.5744166923630384, + "grad_norm": 1.1353886127471924, + "learning_rate": 4.043840084770353e-06, + "loss": 0.7558, + "step": 11177 + }, + { + "epoch": 0.574468085106383, + "grad_norm": 1.1617685556411743, + "learning_rate": 4.043023203310703e-06, + "loss": 0.7115, + "step": 11178 + }, + { + "epoch": 0.5745194778497276, + "grad_norm": 1.1784075498580933, + "learning_rate": 4.042206348364931e-06, + "loss": 0.6914, + "step": 11179 + }, + { + "epoch": 0.5745708705930722, + "grad_norm": 1.2941726446151733, + "learning_rate": 4.0413895199556714e-06, + "loss": 0.8257, + "step": 11180 + }, + { + "epoch": 0.5746222633364169, + "grad_norm": 1.01173734664917, + "learning_rate": 4.040572718105549e-06, + "loss": 0.6937, + "step": 11181 + }, + { + "epoch": 0.5746736560797615, + "grad_norm": 1.060337781906128, + "learning_rate": 4.039755942837198e-06, + "loss": 0.7409, + "step": 11182 + }, + { + "epoch": 0.5747250488231062, + "grad_norm": 1.071387529373169, + "learning_rate": 4.038939194173249e-06, + "loss": 0.7677, + "step": 11183 + }, + { + "epoch": 0.5747764415664508, + "grad_norm": 1.0330414772033691, + "learning_rate": 4.038122472136327e-06, + "loss": 0.6949, + "step": 11184 + }, + { + "epoch": 0.5748278343097954, + "grad_norm": 1.0479745864868164, + "learning_rate": 4.037305776749062e-06, + "loss": 0.7026, + "step": 11185 + }, + { + "epoch": 0.5748792270531401, + "grad_norm": 0.7281084656715393, + "learning_rate": 4.0364891080340805e-06, + "loss": 0.6631, + "step": 11186 + }, + { + "epoch": 0.5749306197964847, + "grad_norm": 1.0737628936767578, + "learning_rate": 4.035672466014011e-06, + "loss": 0.688, + "step": 11187 + }, + { + "epoch": 0.5749820125398294, + "grad_norm": 1.040325403213501, + "learning_rate": 4.034855850711476e-06, + "loss": 0.6704, + "step": 11188 + }, + { + "epoch": 0.575033405283174, + "grad_norm": 1.0865850448608398, + "learning_rate": 4.034039262149104e-06, + "loss": 0.7103, + "step": 11189 + }, + { + "epoch": 0.5750847980265187, + "grad_norm": 0.7549672722816467, + "learning_rate": 4.033222700349519e-06, + "loss": 0.6513, + "step": 11190 + }, + { + "epoch": 0.5751361907698633, + "grad_norm": 1.0655280351638794, + "learning_rate": 4.032406165335342e-06, + "loss": 0.7231, + "step": 11191 + }, + { + "epoch": 0.575187583513208, + "grad_norm": 1.0084712505340576, + "learning_rate": 4.031589657129196e-06, + "loss": 0.7265, + "step": 11192 + }, + { + "epoch": 0.5752389762565526, + "grad_norm": 1.0789988040924072, + "learning_rate": 4.030773175753707e-06, + "loss": 0.7168, + "step": 11193 + }, + { + "epoch": 0.5752903689998972, + "grad_norm": 1.0806828737258911, + "learning_rate": 4.029956721231492e-06, + "loss": 0.7114, + "step": 11194 + }, + { + "epoch": 0.5753417617432418, + "grad_norm": 1.0372344255447388, + "learning_rate": 4.029140293585172e-06, + "loss": 0.6991, + "step": 11195 + }, + { + "epoch": 0.5753931544865865, + "grad_norm": 1.200129747390747, + "learning_rate": 4.028323892837369e-06, + "loss": 0.6665, + "step": 11196 + }, + { + "epoch": 0.5754445472299311, + "grad_norm": 1.0972546339035034, + "learning_rate": 4.0275075190107e-06, + "loss": 0.7224, + "step": 11197 + }, + { + "epoch": 0.5754959399732757, + "grad_norm": 0.730038046836853, + "learning_rate": 4.026691172127786e-06, + "loss": 0.5976, + "step": 11198 + }, + { + "epoch": 0.5755473327166204, + "grad_norm": 1.0375398397445679, + "learning_rate": 4.025874852211241e-06, + "loss": 0.6598, + "step": 11199 + }, + { + "epoch": 0.575598725459965, + "grad_norm": 1.113016963005066, + "learning_rate": 4.025058559283687e-06, + "loss": 0.7246, + "step": 11200 + }, + { + "epoch": 0.5756501182033097, + "grad_norm": 1.0992497205734253, + "learning_rate": 4.024242293367735e-06, + "loss": 0.71, + "step": 11201 + }, + { + "epoch": 0.5757015109466543, + "grad_norm": 1.1115483045578003, + "learning_rate": 4.023426054486002e-06, + "loss": 0.8117, + "step": 11202 + }, + { + "epoch": 0.575752903689999, + "grad_norm": 1.040340542793274, + "learning_rate": 4.022609842661105e-06, + "loss": 0.7448, + "step": 11203 + }, + { + "epoch": 0.5758042964333436, + "grad_norm": 0.8044306039810181, + "learning_rate": 4.021793657915654e-06, + "loss": 0.6441, + "step": 11204 + }, + { + "epoch": 0.5758556891766883, + "grad_norm": 1.0453929901123047, + "learning_rate": 4.020977500272264e-06, + "loss": 0.7334, + "step": 11205 + }, + { + "epoch": 0.5759070819200329, + "grad_norm": 1.1692211627960205, + "learning_rate": 4.020161369753548e-06, + "loss": 0.7911, + "step": 11206 + }, + { + "epoch": 0.5759584746633776, + "grad_norm": 1.1164125204086304, + "learning_rate": 4.0193452663821155e-06, + "loss": 0.7096, + "step": 11207 + }, + { + "epoch": 0.5760098674067222, + "grad_norm": 1.1373391151428223, + "learning_rate": 4.01852919018058e-06, + "loss": 0.7258, + "step": 11208 + }, + { + "epoch": 0.5760612601500668, + "grad_norm": 1.2386666536331177, + "learning_rate": 4.017713141171551e-06, + "loss": 0.6911, + "step": 11209 + }, + { + "epoch": 0.5761126528934114, + "grad_norm": 1.1416493654251099, + "learning_rate": 4.016897119377635e-06, + "loss": 0.7764, + "step": 11210 + }, + { + "epoch": 0.576164045636756, + "grad_norm": 0.7335790395736694, + "learning_rate": 4.016081124821443e-06, + "loss": 0.7018, + "step": 11211 + }, + { + "epoch": 0.5762154383801007, + "grad_norm": 0.7119911909103394, + "learning_rate": 4.0152651575255865e-06, + "loss": 0.6922, + "step": 11212 + }, + { + "epoch": 0.5762668311234453, + "grad_norm": 1.0537574291229248, + "learning_rate": 4.014449217512665e-06, + "loss": 0.7002, + "step": 11213 + }, + { + "epoch": 0.57631822386679, + "grad_norm": 1.0979411602020264, + "learning_rate": 4.013633304805289e-06, + "loss": 0.7471, + "step": 11214 + }, + { + "epoch": 0.5763696166101346, + "grad_norm": 1.0876742601394653, + "learning_rate": 4.012817419426063e-06, + "loss": 0.6873, + "step": 11215 + }, + { + "epoch": 0.5764210093534793, + "grad_norm": 1.0729069709777832, + "learning_rate": 4.0120015613975955e-06, + "loss": 0.7417, + "step": 11216 + }, + { + "epoch": 0.5764724020968239, + "grad_norm": 1.066092610359192, + "learning_rate": 4.011185730742485e-06, + "loss": 0.7239, + "step": 11217 + }, + { + "epoch": 0.5765237948401686, + "grad_norm": 1.0434346199035645, + "learning_rate": 4.010369927483338e-06, + "loss": 0.7042, + "step": 11218 + }, + { + "epoch": 0.5765751875835132, + "grad_norm": 1.0563926696777344, + "learning_rate": 4.0095541516427565e-06, + "loss": 0.6929, + "step": 11219 + }, + { + "epoch": 0.5766265803268579, + "grad_norm": 0.6743776798248291, + "learning_rate": 4.008738403243341e-06, + "loss": 0.6228, + "step": 11220 + }, + { + "epoch": 0.5766779730702025, + "grad_norm": 1.0235823392868042, + "learning_rate": 4.007922682307693e-06, + "loss": 0.7155, + "step": 11221 + }, + { + "epoch": 0.5767293658135472, + "grad_norm": 1.1041605472564697, + "learning_rate": 4.007106988858417e-06, + "loss": 0.7318, + "step": 11222 + }, + { + "epoch": 0.5767807585568918, + "grad_norm": 0.7637249827384949, + "learning_rate": 4.006291322918106e-06, + "loss": 0.6186, + "step": 11223 + }, + { + "epoch": 0.5768321513002365, + "grad_norm": 0.9446680545806885, + "learning_rate": 4.005475684509362e-06, + "loss": 0.6688, + "step": 11224 + }, + { + "epoch": 0.576883544043581, + "grad_norm": 1.0663249492645264, + "learning_rate": 4.004660073654785e-06, + "loss": 0.7168, + "step": 11225 + }, + { + "epoch": 0.5769349367869256, + "grad_norm": 1.0669889450073242, + "learning_rate": 4.003844490376967e-06, + "loss": 0.7059, + "step": 11226 + }, + { + "epoch": 0.5769863295302703, + "grad_norm": 1.1404662132263184, + "learning_rate": 4.003028934698507e-06, + "loss": 0.7279, + "step": 11227 + }, + { + "epoch": 0.5770377222736149, + "grad_norm": 1.03303062915802, + "learning_rate": 4.002213406642003e-06, + "loss": 0.6643, + "step": 11228 + }, + { + "epoch": 0.5770891150169596, + "grad_norm": 1.05778169631958, + "learning_rate": 4.001397906230047e-06, + "loss": 0.7051, + "step": 11229 + }, + { + "epoch": 0.5771405077603042, + "grad_norm": 1.0519039630889893, + "learning_rate": 4.0005824334852325e-06, + "loss": 0.7692, + "step": 11230 + }, + { + "epoch": 0.5771919005036489, + "grad_norm": 1.0377106666564941, + "learning_rate": 3.999766988430156e-06, + "loss": 0.6821, + "step": 11231 + }, + { + "epoch": 0.5772432932469935, + "grad_norm": 1.1369349956512451, + "learning_rate": 3.99895157108741e-06, + "loss": 0.7297, + "step": 11232 + }, + { + "epoch": 0.5772946859903382, + "grad_norm": 1.1392085552215576, + "learning_rate": 3.998136181479583e-06, + "loss": 0.7466, + "step": 11233 + }, + { + "epoch": 0.5773460787336828, + "grad_norm": 1.1218342781066895, + "learning_rate": 3.997320819629268e-06, + "loss": 0.7568, + "step": 11234 + }, + { + "epoch": 0.5773974714770275, + "grad_norm": 1.0153709650039673, + "learning_rate": 3.996505485559057e-06, + "loss": 0.7426, + "step": 11235 + }, + { + "epoch": 0.5774488642203721, + "grad_norm": 1.050020456314087, + "learning_rate": 3.995690179291536e-06, + "loss": 0.7661, + "step": 11236 + }, + { + "epoch": 0.5775002569637168, + "grad_norm": 1.0119903087615967, + "learning_rate": 3.994874900849295e-06, + "loss": 0.6648, + "step": 11237 + }, + { + "epoch": 0.5775516497070614, + "grad_norm": 0.7563510537147522, + "learning_rate": 3.994059650254924e-06, + "loss": 0.6803, + "step": 11238 + }, + { + "epoch": 0.5776030424504061, + "grad_norm": 1.0323697328567505, + "learning_rate": 3.993244427531008e-06, + "loss": 0.6764, + "step": 11239 + }, + { + "epoch": 0.5776544351937506, + "grad_norm": 1.083554744720459, + "learning_rate": 3.992429232700134e-06, + "loss": 0.704, + "step": 11240 + }, + { + "epoch": 0.5777058279370952, + "grad_norm": 0.754530668258667, + "learning_rate": 3.99161406578489e-06, + "loss": 0.6466, + "step": 11241 + }, + { + "epoch": 0.5777572206804399, + "grad_norm": 1.0307697057724, + "learning_rate": 3.990798926807857e-06, + "loss": 0.7134, + "step": 11242 + }, + { + "epoch": 0.5778086134237845, + "grad_norm": 1.0322731733322144, + "learning_rate": 3.989983815791622e-06, + "loss": 0.6779, + "step": 11243 + }, + { + "epoch": 0.5778600061671292, + "grad_norm": 1.0287203788757324, + "learning_rate": 3.989168732758768e-06, + "loss": 0.668, + "step": 11244 + }, + { + "epoch": 0.5779113989104738, + "grad_norm": 1.1255407333374023, + "learning_rate": 3.988353677731876e-06, + "loss": 0.7193, + "step": 11245 + }, + { + "epoch": 0.5779627916538185, + "grad_norm": 1.1046271324157715, + "learning_rate": 3.987538650733527e-06, + "loss": 0.6597, + "step": 11246 + }, + { + "epoch": 0.5780141843971631, + "grad_norm": 1.130760908126831, + "learning_rate": 3.986723651786305e-06, + "loss": 0.6805, + "step": 11247 + }, + { + "epoch": 0.5780655771405078, + "grad_norm": 1.0813477039337158, + "learning_rate": 3.98590868091279e-06, + "loss": 0.8068, + "step": 11248 + }, + { + "epoch": 0.5781169698838524, + "grad_norm": 1.0859423875808716, + "learning_rate": 3.98509373813556e-06, + "loss": 0.7072, + "step": 11249 + }, + { + "epoch": 0.5781683626271971, + "grad_norm": 1.067419409751892, + "learning_rate": 3.984278823477193e-06, + "loss": 0.7368, + "step": 11250 + }, + { + "epoch": 0.5782197553705417, + "grad_norm": 1.018715739250183, + "learning_rate": 3.983463936960272e-06, + "loss": 0.7221, + "step": 11251 + }, + { + "epoch": 0.5782711481138864, + "grad_norm": 1.115850567817688, + "learning_rate": 3.982649078607367e-06, + "loss": 0.7565, + "step": 11252 + }, + { + "epoch": 0.578322540857231, + "grad_norm": 1.0523897409439087, + "learning_rate": 3.981834248441058e-06, + "loss": 0.7242, + "step": 11253 + }, + { + "epoch": 0.5783739336005757, + "grad_norm": 1.1480294466018677, + "learning_rate": 3.981019446483922e-06, + "loss": 0.7345, + "step": 11254 + }, + { + "epoch": 0.5784253263439202, + "grad_norm": 1.0524932146072388, + "learning_rate": 3.98020467275853e-06, + "loss": 0.7078, + "step": 11255 + }, + { + "epoch": 0.5784767190872648, + "grad_norm": 1.0533568859100342, + "learning_rate": 3.979389927287458e-06, + "loss": 0.6979, + "step": 11256 + }, + { + "epoch": 0.5785281118306095, + "grad_norm": 1.436596155166626, + "learning_rate": 3.978575210093281e-06, + "loss": 0.7406, + "step": 11257 + }, + { + "epoch": 0.5785795045739541, + "grad_norm": 1.0759296417236328, + "learning_rate": 3.977760521198568e-06, + "loss": 0.6905, + "step": 11258 + }, + { + "epoch": 0.5786308973172988, + "grad_norm": 1.1142526865005493, + "learning_rate": 3.9769458606258946e-06, + "loss": 0.7035, + "step": 11259 + }, + { + "epoch": 0.5786822900606434, + "grad_norm": 0.9966619610786438, + "learning_rate": 3.976131228397828e-06, + "loss": 0.7216, + "step": 11260 + }, + { + "epoch": 0.5787336828039881, + "grad_norm": 1.0635515451431274, + "learning_rate": 3.97531662453694e-06, + "loss": 0.6814, + "step": 11261 + }, + { + "epoch": 0.5787850755473327, + "grad_norm": 0.7547945380210876, + "learning_rate": 3.974502049065799e-06, + "loss": 0.6742, + "step": 11262 + }, + { + "epoch": 0.5788364682906774, + "grad_norm": 1.2300665378570557, + "learning_rate": 3.973687502006974e-06, + "loss": 0.7288, + "step": 11263 + }, + { + "epoch": 0.578887861034022, + "grad_norm": 1.023860216140747, + "learning_rate": 3.9728729833830355e-06, + "loss": 0.6907, + "step": 11264 + }, + { + "epoch": 0.5789392537773667, + "grad_norm": 1.06327486038208, + "learning_rate": 3.972058493216546e-06, + "loss": 0.7026, + "step": 11265 + }, + { + "epoch": 0.5789906465207113, + "grad_norm": 1.0746352672576904, + "learning_rate": 3.971244031530074e-06, + "loss": 0.6994, + "step": 11266 + }, + { + "epoch": 0.579042039264056, + "grad_norm": 1.1093498468399048, + "learning_rate": 3.970429598346186e-06, + "loss": 0.7512, + "step": 11267 + }, + { + "epoch": 0.5790934320074006, + "grad_norm": 1.1092314720153809, + "learning_rate": 3.969615193687443e-06, + "loss": 0.7514, + "step": 11268 + }, + { + "epoch": 0.5791448247507452, + "grad_norm": 1.2824609279632568, + "learning_rate": 3.96880081757641e-06, + "loss": 0.7228, + "step": 11269 + }, + { + "epoch": 0.5791962174940898, + "grad_norm": 1.0995031595230103, + "learning_rate": 3.967986470035653e-06, + "loss": 0.729, + "step": 11270 + }, + { + "epoch": 0.5792476102374344, + "grad_norm": 1.0830724239349365, + "learning_rate": 3.967172151087731e-06, + "loss": 0.7386, + "step": 11271 + }, + { + "epoch": 0.5792990029807791, + "grad_norm": 1.0968003273010254, + "learning_rate": 3.966357860755206e-06, + "loss": 0.7012, + "step": 11272 + }, + { + "epoch": 0.5793503957241237, + "grad_norm": 1.0464897155761719, + "learning_rate": 3.9655435990606415e-06, + "loss": 0.6683, + "step": 11273 + }, + { + "epoch": 0.5794017884674684, + "grad_norm": 1.1040109395980835, + "learning_rate": 3.964729366026593e-06, + "loss": 0.7153, + "step": 11274 + }, + { + "epoch": 0.579453181210813, + "grad_norm": 0.9912078976631165, + "learning_rate": 3.96391516167562e-06, + "loss": 0.7157, + "step": 11275 + }, + { + "epoch": 0.5795045739541577, + "grad_norm": 1.0945379734039307, + "learning_rate": 3.963100986030286e-06, + "loss": 0.7679, + "step": 11276 + }, + { + "epoch": 0.5795559666975023, + "grad_norm": 1.0576988458633423, + "learning_rate": 3.962286839113142e-06, + "loss": 0.6607, + "step": 11277 + }, + { + "epoch": 0.579607359440847, + "grad_norm": 0.7103452086448669, + "learning_rate": 3.961472720946748e-06, + "loss": 0.6558, + "step": 11278 + }, + { + "epoch": 0.5796587521841916, + "grad_norm": 1.052274465560913, + "learning_rate": 3.960658631553658e-06, + "loss": 0.74, + "step": 11279 + }, + { + "epoch": 0.5797101449275363, + "grad_norm": 0.7780666351318359, + "learning_rate": 3.959844570956429e-06, + "loss": 0.6468, + "step": 11280 + }, + { + "epoch": 0.5797615376708809, + "grad_norm": 0.701003909111023, + "learning_rate": 3.959030539177614e-06, + "loss": 0.6764, + "step": 11281 + }, + { + "epoch": 0.5798129304142255, + "grad_norm": 1.0648292303085327, + "learning_rate": 3.958216536239767e-06, + "loss": 0.7013, + "step": 11282 + }, + { + "epoch": 0.5798643231575702, + "grad_norm": 1.0383782386779785, + "learning_rate": 3.9574025621654435e-06, + "loss": 0.7399, + "step": 11283 + }, + { + "epoch": 0.5799157159009148, + "grad_norm": 1.099942684173584, + "learning_rate": 3.9565886169771895e-06, + "loss": 0.7641, + "step": 11284 + }, + { + "epoch": 0.5799671086442594, + "grad_norm": 1.0414830446243286, + "learning_rate": 3.955774700697559e-06, + "loss": 0.73, + "step": 11285 + }, + { + "epoch": 0.580018501387604, + "grad_norm": 0.825836718082428, + "learning_rate": 3.954960813349104e-06, + "loss": 0.6899, + "step": 11286 + }, + { + "epoch": 0.5800698941309487, + "grad_norm": 0.7473108768463135, + "learning_rate": 3.954146954954371e-06, + "loss": 0.6595, + "step": 11287 + }, + { + "epoch": 0.5801212868742933, + "grad_norm": 1.0817381143569946, + "learning_rate": 3.953333125535909e-06, + "loss": 0.7535, + "step": 11288 + }, + { + "epoch": 0.580172679617638, + "grad_norm": 0.88969886302948, + "learning_rate": 3.952519325116268e-06, + "loss": 0.6164, + "step": 11289 + }, + { + "epoch": 0.5802240723609826, + "grad_norm": 1.030044436454773, + "learning_rate": 3.951705553717994e-06, + "loss": 0.6778, + "step": 11290 + }, + { + "epoch": 0.5802754651043273, + "grad_norm": 1.0894672870635986, + "learning_rate": 3.950891811363632e-06, + "loss": 0.7515, + "step": 11291 + }, + { + "epoch": 0.5803268578476719, + "grad_norm": 1.0215415954589844, + "learning_rate": 3.95007809807573e-06, + "loss": 0.6758, + "step": 11292 + }, + { + "epoch": 0.5803782505910166, + "grad_norm": 1.221496820449829, + "learning_rate": 3.94926441387683e-06, + "loss": 0.7268, + "step": 11293 + }, + { + "epoch": 0.5804296433343612, + "grad_norm": 1.122450590133667, + "learning_rate": 3.948450758789477e-06, + "loss": 0.698, + "step": 11294 + }, + { + "epoch": 0.5804810360777058, + "grad_norm": 1.0512635707855225, + "learning_rate": 3.9476371328362135e-06, + "loss": 0.708, + "step": 11295 + }, + { + "epoch": 0.5805324288210505, + "grad_norm": 1.0841056108474731, + "learning_rate": 3.946823536039584e-06, + "loss": 0.7221, + "step": 11296 + }, + { + "epoch": 0.5805838215643951, + "grad_norm": 1.1028002500534058, + "learning_rate": 3.946009968422127e-06, + "loss": 0.7422, + "step": 11297 + }, + { + "epoch": 0.5806352143077398, + "grad_norm": 1.0233488082885742, + "learning_rate": 3.945196430006384e-06, + "loss": 0.6852, + "step": 11298 + }, + { + "epoch": 0.5806866070510844, + "grad_norm": 1.0731375217437744, + "learning_rate": 3.944382920814895e-06, + "loss": 0.6896, + "step": 11299 + }, + { + "epoch": 0.580737999794429, + "grad_norm": 1.1066044569015503, + "learning_rate": 3.943569440870199e-06, + "loss": 0.7257, + "step": 11300 + }, + { + "epoch": 0.5807893925377736, + "grad_norm": 1.0877400636672974, + "learning_rate": 3.942755990194834e-06, + "loss": 0.7956, + "step": 11301 + }, + { + "epoch": 0.5808407852811183, + "grad_norm": 1.018005132675171, + "learning_rate": 3.9419425688113396e-06, + "loss": 0.6921, + "step": 11302 + }, + { + "epoch": 0.5808921780244629, + "grad_norm": 1.1270915269851685, + "learning_rate": 3.9411291767422476e-06, + "loss": 0.7263, + "step": 11303 + }, + { + "epoch": 0.5809435707678076, + "grad_norm": 1.0828816890716553, + "learning_rate": 3.940315814010097e-06, + "loss": 0.7462, + "step": 11304 + }, + { + "epoch": 0.5809949635111522, + "grad_norm": 0.9924828410148621, + "learning_rate": 3.939502480637424e-06, + "loss": 0.7071, + "step": 11305 + }, + { + "epoch": 0.5810463562544969, + "grad_norm": 1.0831063985824585, + "learning_rate": 3.938689176646759e-06, + "loss": 0.7123, + "step": 11306 + }, + { + "epoch": 0.5810977489978415, + "grad_norm": 1.037266731262207, + "learning_rate": 3.9378759020606375e-06, + "loss": 0.6558, + "step": 11307 + }, + { + "epoch": 0.5811491417411861, + "grad_norm": 1.030300498008728, + "learning_rate": 3.93706265690159e-06, + "loss": 0.678, + "step": 11308 + }, + { + "epoch": 0.5812005344845308, + "grad_norm": 0.8015631437301636, + "learning_rate": 3.936249441192153e-06, + "loss": 0.6612, + "step": 11309 + }, + { + "epoch": 0.5812519272278754, + "grad_norm": 1.0393797159194946, + "learning_rate": 3.935436254954853e-06, + "loss": 0.7019, + "step": 11310 + }, + { + "epoch": 0.5813033199712201, + "grad_norm": 1.111364483833313, + "learning_rate": 3.93462309821222e-06, + "loss": 0.7222, + "step": 11311 + }, + { + "epoch": 0.5813547127145647, + "grad_norm": 1.0797027349472046, + "learning_rate": 3.9338099709867865e-06, + "loss": 0.6692, + "step": 11312 + }, + { + "epoch": 0.5814061054579094, + "grad_norm": 1.0732990503311157, + "learning_rate": 3.9329968733010764e-06, + "loss": 0.6956, + "step": 11313 + }, + { + "epoch": 0.581457498201254, + "grad_norm": 1.0442534685134888, + "learning_rate": 3.93218380517762e-06, + "loss": 0.7168, + "step": 11314 + }, + { + "epoch": 0.5815088909445987, + "grad_norm": 1.111409306526184, + "learning_rate": 3.931370766638946e-06, + "loss": 0.7161, + "step": 11315 + }, + { + "epoch": 0.5815602836879432, + "grad_norm": 0.7506732940673828, + "learning_rate": 3.930557757707576e-06, + "loss": 0.682, + "step": 11316 + }, + { + "epoch": 0.5816116764312879, + "grad_norm": 1.1169236898422241, + "learning_rate": 3.929744778406037e-06, + "loss": 0.7605, + "step": 11317 + }, + { + "epoch": 0.5816630691746325, + "grad_norm": 1.052832841873169, + "learning_rate": 3.928931828756857e-06, + "loss": 0.6902, + "step": 11318 + }, + { + "epoch": 0.5817144619179772, + "grad_norm": 1.0512350797653198, + "learning_rate": 3.928118908782552e-06, + "loss": 0.6963, + "step": 11319 + }, + { + "epoch": 0.5817658546613218, + "grad_norm": 1.026297688484192, + "learning_rate": 3.927306018505649e-06, + "loss": 0.7146, + "step": 11320 + }, + { + "epoch": 0.5818172474046664, + "grad_norm": 1.0498464107513428, + "learning_rate": 3.926493157948672e-06, + "loss": 0.7284, + "step": 11321 + }, + { + "epoch": 0.5818686401480111, + "grad_norm": 1.1579846143722534, + "learning_rate": 3.925680327134137e-06, + "loss": 0.7335, + "step": 11322 + }, + { + "epoch": 0.5819200328913557, + "grad_norm": 1.041756510734558, + "learning_rate": 3.924867526084567e-06, + "loss": 0.6957, + "step": 11323 + }, + { + "epoch": 0.5819714256347004, + "grad_norm": 1.0770649909973145, + "learning_rate": 3.92405475482248e-06, + "loss": 0.7097, + "step": 11324 + }, + { + "epoch": 0.582022818378045, + "grad_norm": 1.1626355648040771, + "learning_rate": 3.9232420133704e-06, + "loss": 0.7354, + "step": 11325 + }, + { + "epoch": 0.5820742111213897, + "grad_norm": 1.0778135061264038, + "learning_rate": 3.9224293017508365e-06, + "loss": 0.719, + "step": 11326 + }, + { + "epoch": 0.5821256038647343, + "grad_norm": 0.6938640475273132, + "learning_rate": 3.921616619986311e-06, + "loss": 0.651, + "step": 11327 + }, + { + "epoch": 0.582176996608079, + "grad_norm": 1.1208105087280273, + "learning_rate": 3.920803968099341e-06, + "loss": 0.7323, + "step": 11328 + }, + { + "epoch": 0.5822283893514236, + "grad_norm": 1.0829557180404663, + "learning_rate": 3.919991346112436e-06, + "loss": 0.7376, + "step": 11329 + }, + { + "epoch": 0.5822797820947683, + "grad_norm": 1.105506420135498, + "learning_rate": 3.919178754048114e-06, + "loss": 0.6508, + "step": 11330 + }, + { + "epoch": 0.5823311748381128, + "grad_norm": 1.0941522121429443, + "learning_rate": 3.9183661919288905e-06, + "loss": 0.6793, + "step": 11331 + }, + { + "epoch": 0.5823825675814575, + "grad_norm": 1.1453266143798828, + "learning_rate": 3.917553659777276e-06, + "loss": 0.7256, + "step": 11332 + }, + { + "epoch": 0.5824339603248021, + "grad_norm": 1.1139830350875854, + "learning_rate": 3.916741157615781e-06, + "loss": 0.6947, + "step": 11333 + }, + { + "epoch": 0.5824853530681467, + "grad_norm": 1.1017076969146729, + "learning_rate": 3.915928685466921e-06, + "loss": 0.7248, + "step": 11334 + }, + { + "epoch": 0.5825367458114914, + "grad_norm": 1.1475648880004883, + "learning_rate": 3.915116243353201e-06, + "loss": 0.7383, + "step": 11335 + }, + { + "epoch": 0.582588138554836, + "grad_norm": 1.0920730829238892, + "learning_rate": 3.914303831297132e-06, + "loss": 0.7193, + "step": 11336 + }, + { + "epoch": 0.5826395312981807, + "grad_norm": 1.0276113748550415, + "learning_rate": 3.913491449321227e-06, + "loss": 0.674, + "step": 11337 + }, + { + "epoch": 0.5826909240415253, + "grad_norm": 1.2802693843841553, + "learning_rate": 3.912679097447987e-06, + "loss": 0.719, + "step": 11338 + }, + { + "epoch": 0.58274231678487, + "grad_norm": 1.037237524986267, + "learning_rate": 3.911866775699923e-06, + "loss": 0.7321, + "step": 11339 + }, + { + "epoch": 0.5827937095282146, + "grad_norm": 1.059210181236267, + "learning_rate": 3.911054484099539e-06, + "loss": 0.749, + "step": 11340 + }, + { + "epoch": 0.5828451022715593, + "grad_norm": 1.0539554357528687, + "learning_rate": 3.9102422226693436e-06, + "loss": 0.7352, + "step": 11341 + }, + { + "epoch": 0.5828964950149039, + "grad_norm": 1.1592546701431274, + "learning_rate": 3.909429991431838e-06, + "loss": 0.7065, + "step": 11342 + }, + { + "epoch": 0.5829478877582486, + "grad_norm": 1.0351492166519165, + "learning_rate": 3.9086177904095266e-06, + "loss": 0.6888, + "step": 11343 + }, + { + "epoch": 0.5829992805015932, + "grad_norm": 1.046500563621521, + "learning_rate": 3.907805619624914e-06, + "loss": 0.6884, + "step": 11344 + }, + { + "epoch": 0.5830506732449379, + "grad_norm": 1.0641207695007324, + "learning_rate": 3.9069934791004985e-06, + "loss": 0.7248, + "step": 11345 + }, + { + "epoch": 0.5831020659882824, + "grad_norm": 1.0540226697921753, + "learning_rate": 3.906181368858783e-06, + "loss": 0.6541, + "step": 11346 + }, + { + "epoch": 0.583153458731627, + "grad_norm": 1.0949785709381104, + "learning_rate": 3.90536928892227e-06, + "loss": 0.7033, + "step": 11347 + }, + { + "epoch": 0.5832048514749717, + "grad_norm": 1.098457932472229, + "learning_rate": 3.904557239313456e-06, + "loss": 0.7132, + "step": 11348 + }, + { + "epoch": 0.5832562442183163, + "grad_norm": 1.0412571430206299, + "learning_rate": 3.903745220054838e-06, + "loss": 0.6922, + "step": 11349 + }, + { + "epoch": 0.583307636961661, + "grad_norm": 1.0224560499191284, + "learning_rate": 3.902933231168919e-06, + "loss": 0.6823, + "step": 11350 + }, + { + "epoch": 0.5833590297050056, + "grad_norm": 1.0984859466552734, + "learning_rate": 3.902121272678191e-06, + "loss": 0.7004, + "step": 11351 + }, + { + "epoch": 0.5834104224483503, + "grad_norm": 1.108841896057129, + "learning_rate": 3.901309344605152e-06, + "loss": 0.7373, + "step": 11352 + }, + { + "epoch": 0.5834618151916949, + "grad_norm": 1.081255555152893, + "learning_rate": 3.9004974469722986e-06, + "loss": 0.7206, + "step": 11353 + }, + { + "epoch": 0.5835132079350396, + "grad_norm": 1.0958716869354248, + "learning_rate": 3.899685579802122e-06, + "loss": 0.7595, + "step": 11354 + }, + { + "epoch": 0.5835646006783842, + "grad_norm": 1.0395506620407104, + "learning_rate": 3.898873743117117e-06, + "loss": 0.7104, + "step": 11355 + }, + { + "epoch": 0.5836159934217289, + "grad_norm": 1.0136529207229614, + "learning_rate": 3.898061936939776e-06, + "loss": 0.6765, + "step": 11356 + }, + { + "epoch": 0.5836673861650735, + "grad_norm": 1.0542958974838257, + "learning_rate": 3.897250161292595e-06, + "loss": 0.7047, + "step": 11357 + }, + { + "epoch": 0.5837187789084182, + "grad_norm": 1.0979572534561157, + "learning_rate": 3.896438416198058e-06, + "loss": 0.7381, + "step": 11358 + }, + { + "epoch": 0.5837701716517628, + "grad_norm": 0.7045717239379883, + "learning_rate": 3.895626701678658e-06, + "loss": 0.6959, + "step": 11359 + }, + { + "epoch": 0.5838215643951075, + "grad_norm": 0.851304292678833, + "learning_rate": 3.894815017756887e-06, + "loss": 0.634, + "step": 11360 + }, + { + "epoch": 0.583872957138452, + "grad_norm": 1.0458976030349731, + "learning_rate": 3.89400336445523e-06, + "loss": 0.7246, + "step": 11361 + }, + { + "epoch": 0.5839243498817966, + "grad_norm": 0.8145836591720581, + "learning_rate": 3.893191741796174e-06, + "loss": 0.6559, + "step": 11362 + }, + { + "epoch": 0.5839757426251413, + "grad_norm": 1.1119391918182373, + "learning_rate": 3.89238014980221e-06, + "loss": 0.7032, + "step": 11363 + }, + { + "epoch": 0.5840271353684859, + "grad_norm": 1.0739480257034302, + "learning_rate": 3.8915685884958185e-06, + "loss": 0.7189, + "step": 11364 + }, + { + "epoch": 0.5840785281118306, + "grad_norm": 1.0773121118545532, + "learning_rate": 3.890757057899488e-06, + "loss": 0.6732, + "step": 11365 + }, + { + "epoch": 0.5841299208551752, + "grad_norm": 1.0478408336639404, + "learning_rate": 3.889945558035703e-06, + "loss": 0.7045, + "step": 11366 + }, + { + "epoch": 0.5841813135985199, + "grad_norm": 1.1085213422775269, + "learning_rate": 3.889134088926945e-06, + "loss": 0.7146, + "step": 11367 + }, + { + "epoch": 0.5842327063418645, + "grad_norm": 0.8725783228874207, + "learning_rate": 3.8883226505956966e-06, + "loss": 0.6623, + "step": 11368 + }, + { + "epoch": 0.5842840990852092, + "grad_norm": 1.0414930582046509, + "learning_rate": 3.887511243064442e-06, + "loss": 0.7137, + "step": 11369 + }, + { + "epoch": 0.5843354918285538, + "grad_norm": 1.217572569847107, + "learning_rate": 3.886699866355658e-06, + "loss": 0.7737, + "step": 11370 + }, + { + "epoch": 0.5843868845718985, + "grad_norm": 0.7633144855499268, + "learning_rate": 3.8858885204918265e-06, + "loss": 0.6156, + "step": 11371 + }, + { + "epoch": 0.5844382773152431, + "grad_norm": 1.065104365348816, + "learning_rate": 3.885077205495426e-06, + "loss": 0.7214, + "step": 11372 + }, + { + "epoch": 0.5844896700585878, + "grad_norm": 0.9881397485733032, + "learning_rate": 3.884265921388935e-06, + "loss": 0.7561, + "step": 11373 + }, + { + "epoch": 0.5845410628019324, + "grad_norm": 1.1618558168411255, + "learning_rate": 3.883454668194832e-06, + "loss": 0.6889, + "step": 11374 + }, + { + "epoch": 0.5845924555452771, + "grad_norm": 0.7622618079185486, + "learning_rate": 3.882643445935591e-06, + "loss": 0.7173, + "step": 11375 + }, + { + "epoch": 0.5846438482886216, + "grad_norm": 1.0533428192138672, + "learning_rate": 3.881832254633693e-06, + "loss": 0.7157, + "step": 11376 + }, + { + "epoch": 0.5846952410319662, + "grad_norm": 1.059116244316101, + "learning_rate": 3.881021094311604e-06, + "loss": 0.716, + "step": 11377 + }, + { + "epoch": 0.5847466337753109, + "grad_norm": 1.0572422742843628, + "learning_rate": 3.880209964991804e-06, + "loss": 0.7526, + "step": 11378 + }, + { + "epoch": 0.5847980265186555, + "grad_norm": 1.0340025424957275, + "learning_rate": 3.879398866696767e-06, + "loss": 0.7371, + "step": 11379 + }, + { + "epoch": 0.5848494192620002, + "grad_norm": 0.9992474317550659, + "learning_rate": 3.878587799448962e-06, + "loss": 0.7007, + "step": 11380 + }, + { + "epoch": 0.5849008120053448, + "grad_norm": 0.7635375261306763, + "learning_rate": 3.87777676327086e-06, + "loss": 0.6406, + "step": 11381 + }, + { + "epoch": 0.5849522047486895, + "grad_norm": 1.1275649070739746, + "learning_rate": 3.876965758184934e-06, + "loss": 0.7617, + "step": 11382 + }, + { + "epoch": 0.5850035974920341, + "grad_norm": 1.1199650764465332, + "learning_rate": 3.876154784213651e-06, + "loss": 0.7407, + "step": 11383 + }, + { + "epoch": 0.5850549902353788, + "grad_norm": 1.0286909341812134, + "learning_rate": 3.875343841379481e-06, + "loss": 0.7101, + "step": 11384 + }, + { + "epoch": 0.5851063829787234, + "grad_norm": 1.0594675540924072, + "learning_rate": 3.874532929704895e-06, + "loss": 0.759, + "step": 11385 + }, + { + "epoch": 0.5851577757220681, + "grad_norm": 1.0797507762908936, + "learning_rate": 3.873722049212354e-06, + "loss": 0.687, + "step": 11386 + }, + { + "epoch": 0.5852091684654127, + "grad_norm": 1.0735011100769043, + "learning_rate": 3.8729111999243275e-06, + "loss": 0.7044, + "step": 11387 + }, + { + "epoch": 0.5852605612087574, + "grad_norm": 0.8172444105148315, + "learning_rate": 3.872100381863281e-06, + "loss": 0.6631, + "step": 11388 + }, + { + "epoch": 0.585311953952102, + "grad_norm": 1.151502251625061, + "learning_rate": 3.871289595051679e-06, + "loss": 0.7113, + "step": 11389 + }, + { + "epoch": 0.5853633466954467, + "grad_norm": 1.1006399393081665, + "learning_rate": 3.870478839511984e-06, + "loss": 0.6957, + "step": 11390 + }, + { + "epoch": 0.5854147394387912, + "grad_norm": 0.6983252167701721, + "learning_rate": 3.869668115266658e-06, + "loss": 0.5999, + "step": 11391 + }, + { + "epoch": 0.5854661321821358, + "grad_norm": 1.2201707363128662, + "learning_rate": 3.868857422338165e-06, + "loss": 0.7259, + "step": 11392 + }, + { + "epoch": 0.5855175249254805, + "grad_norm": 1.1849849224090576, + "learning_rate": 3.868046760748964e-06, + "loss": 0.7413, + "step": 11393 + }, + { + "epoch": 0.5855689176688251, + "grad_norm": 0.8120681643486023, + "learning_rate": 3.867236130521516e-06, + "loss": 0.6511, + "step": 11394 + }, + { + "epoch": 0.5856203104121698, + "grad_norm": 1.1112028360366821, + "learning_rate": 3.866425531678282e-06, + "loss": 0.7398, + "step": 11395 + }, + { + "epoch": 0.5856717031555144, + "grad_norm": 0.6971831917762756, + "learning_rate": 3.865614964241717e-06, + "loss": 0.6495, + "step": 11396 + }, + { + "epoch": 0.5857230958988591, + "grad_norm": 1.069751262664795, + "learning_rate": 3.86480442823428e-06, + "loss": 0.6733, + "step": 11397 + }, + { + "epoch": 0.5857744886422037, + "grad_norm": 1.0719739198684692, + "learning_rate": 3.863993923678429e-06, + "loss": 0.6913, + "step": 11398 + }, + { + "epoch": 0.5858258813855484, + "grad_norm": 1.015916109085083, + "learning_rate": 3.863183450596617e-06, + "loss": 0.699, + "step": 11399 + }, + { + "epoch": 0.585877274128893, + "grad_norm": 9.213147163391113, + "learning_rate": 3.8623730090113e-06, + "loss": 0.7258, + "step": 11400 + }, + { + "epoch": 0.5859286668722377, + "grad_norm": 1.0615557432174683, + "learning_rate": 3.861562598944933e-06, + "loss": 0.7186, + "step": 11401 + }, + { + "epoch": 0.5859800596155823, + "grad_norm": 0.7782182097434998, + "learning_rate": 3.860752220419968e-06, + "loss": 0.6361, + "step": 11402 + }, + { + "epoch": 0.586031452358927, + "grad_norm": 1.0788471698760986, + "learning_rate": 3.859941873458857e-06, + "loss": 0.6974, + "step": 11403 + }, + { + "epoch": 0.5860828451022716, + "grad_norm": 0.745480477809906, + "learning_rate": 3.859131558084052e-06, + "loss": 0.6714, + "step": 11404 + }, + { + "epoch": 0.5861342378456162, + "grad_norm": 1.0484389066696167, + "learning_rate": 3.858321274318006e-06, + "loss": 0.7332, + "step": 11405 + }, + { + "epoch": 0.5861856305889609, + "grad_norm": 0.6571010947227478, + "learning_rate": 3.857511022183163e-06, + "loss": 0.6702, + "step": 11406 + }, + { + "epoch": 0.5862370233323054, + "grad_norm": 1.0439367294311523, + "learning_rate": 3.856700801701975e-06, + "loss": 0.7635, + "step": 11407 + }, + { + "epoch": 0.5862884160756501, + "grad_norm": 1.0609468221664429, + "learning_rate": 3.855890612896892e-06, + "loss": 0.6463, + "step": 11408 + }, + { + "epoch": 0.5863398088189947, + "grad_norm": 1.0257848501205444, + "learning_rate": 3.855080455790357e-06, + "loss": 0.6744, + "step": 11409 + }, + { + "epoch": 0.5863912015623394, + "grad_norm": 1.0989704132080078, + "learning_rate": 3.854270330404817e-06, + "loss": 0.7123, + "step": 11410 + }, + { + "epoch": 0.586442594305684, + "grad_norm": 1.060628890991211, + "learning_rate": 3.853460236762719e-06, + "loss": 0.7386, + "step": 11411 + }, + { + "epoch": 0.5864939870490287, + "grad_norm": 1.069916844367981, + "learning_rate": 3.8526501748865075e-06, + "loss": 0.7188, + "step": 11412 + }, + { + "epoch": 0.5865453797923733, + "grad_norm": 1.0351778268814087, + "learning_rate": 3.8518401447986225e-06, + "loss": 0.7271, + "step": 11413 + }, + { + "epoch": 0.586596772535718, + "grad_norm": 1.0779837369918823, + "learning_rate": 3.8510301465215105e-06, + "loss": 0.6725, + "step": 11414 + }, + { + "epoch": 0.5866481652790626, + "grad_norm": 1.1485346555709839, + "learning_rate": 3.850220180077611e-06, + "loss": 0.7224, + "step": 11415 + }, + { + "epoch": 0.5866995580224073, + "grad_norm": 1.1570924520492554, + "learning_rate": 3.8494102454893645e-06, + "loss": 0.7447, + "step": 11416 + }, + { + "epoch": 0.5867509507657519, + "grad_norm": 1.137811303138733, + "learning_rate": 3.848600342779215e-06, + "loss": 0.7356, + "step": 11417 + }, + { + "epoch": 0.5868023435090965, + "grad_norm": 1.0702612400054932, + "learning_rate": 3.847790471969596e-06, + "loss": 0.7196, + "step": 11418 + }, + { + "epoch": 0.5868537362524412, + "grad_norm": 1.0681898593902588, + "learning_rate": 3.8469806330829475e-06, + "loss": 0.7633, + "step": 11419 + }, + { + "epoch": 0.5869051289957858, + "grad_norm": 1.1845144033432007, + "learning_rate": 3.846170826141708e-06, + "loss": 0.7261, + "step": 11420 + }, + { + "epoch": 0.5869565217391305, + "grad_norm": 1.036198616027832, + "learning_rate": 3.8453610511683155e-06, + "loss": 0.6733, + "step": 11421 + }, + { + "epoch": 0.587007914482475, + "grad_norm": 1.0969637632369995, + "learning_rate": 3.844551308185202e-06, + "loss": 0.7468, + "step": 11422 + }, + { + "epoch": 0.5870593072258197, + "grad_norm": 0.743741512298584, + "learning_rate": 3.843741597214802e-06, + "loss": 0.6569, + "step": 11423 + }, + { + "epoch": 0.5871106999691643, + "grad_norm": 1.019382357597351, + "learning_rate": 3.842931918279552e-06, + "loss": 0.6991, + "step": 11424 + }, + { + "epoch": 0.587162092712509, + "grad_norm": 1.1030614376068115, + "learning_rate": 3.842122271401883e-06, + "loss": 0.7385, + "step": 11425 + }, + { + "epoch": 0.5872134854558536, + "grad_norm": 0.7521125674247742, + "learning_rate": 3.841312656604228e-06, + "loss": 0.6453, + "step": 11426 + }, + { + "epoch": 0.5872648781991983, + "grad_norm": 1.0179952383041382, + "learning_rate": 3.840503073909019e-06, + "loss": 0.7372, + "step": 11427 + }, + { + "epoch": 0.5873162709425429, + "grad_norm": 1.0493487119674683, + "learning_rate": 3.839693523338684e-06, + "loss": 0.7276, + "step": 11428 + }, + { + "epoch": 0.5873676636858876, + "grad_norm": 0.7874628305435181, + "learning_rate": 3.838884004915652e-06, + "loss": 0.6575, + "step": 11429 + }, + { + "epoch": 0.5874190564292322, + "grad_norm": 1.09451162815094, + "learning_rate": 3.838074518662355e-06, + "loss": 0.7229, + "step": 11430 + }, + { + "epoch": 0.5874704491725768, + "grad_norm": 0.8572195172309875, + "learning_rate": 3.8372650646012155e-06, + "loss": 0.6875, + "step": 11431 + }, + { + "epoch": 0.5875218419159215, + "grad_norm": 1.0479536056518555, + "learning_rate": 3.836455642754663e-06, + "loss": 0.6967, + "step": 11432 + }, + { + "epoch": 0.5875732346592661, + "grad_norm": 1.066697359085083, + "learning_rate": 3.835646253145123e-06, + "loss": 0.6812, + "step": 11433 + }, + { + "epoch": 0.5876246274026108, + "grad_norm": 1.0161327123641968, + "learning_rate": 3.8348368957950215e-06, + "loss": 0.6988, + "step": 11434 + }, + { + "epoch": 0.5876760201459554, + "grad_norm": 1.0635770559310913, + "learning_rate": 3.8340275707267804e-06, + "loss": 0.6866, + "step": 11435 + }, + { + "epoch": 0.5877274128893001, + "grad_norm": 1.1258184909820557, + "learning_rate": 3.833218277962823e-06, + "loss": 0.7857, + "step": 11436 + }, + { + "epoch": 0.5877788056326446, + "grad_norm": 1.1673272848129272, + "learning_rate": 3.832409017525575e-06, + "loss": 0.7742, + "step": 11437 + }, + { + "epoch": 0.5878301983759893, + "grad_norm": 0.7034832835197449, + "learning_rate": 3.8315997894374526e-06, + "loss": 0.6734, + "step": 11438 + }, + { + "epoch": 0.5878815911193339, + "grad_norm": 1.0350385904312134, + "learning_rate": 3.8307905937208774e-06, + "loss": 0.704, + "step": 11439 + }, + { + "epoch": 0.5879329838626786, + "grad_norm": 0.7118763327598572, + "learning_rate": 3.829981430398273e-06, + "loss": 0.6371, + "step": 11440 + }, + { + "epoch": 0.5879843766060232, + "grad_norm": 1.0899955034255981, + "learning_rate": 3.8291722994920526e-06, + "loss": 0.6795, + "step": 11441 + }, + { + "epoch": 0.5880357693493679, + "grad_norm": 1.0256341695785522, + "learning_rate": 3.828363201024635e-06, + "loss": 0.7315, + "step": 11442 + }, + { + "epoch": 0.5880871620927125, + "grad_norm": 1.04414963722229, + "learning_rate": 3.8275541350184405e-06, + "loss": 0.7078, + "step": 11443 + }, + { + "epoch": 0.5881385548360571, + "grad_norm": 1.0545594692230225, + "learning_rate": 3.82674510149588e-06, + "loss": 0.7147, + "step": 11444 + }, + { + "epoch": 0.5881899475794018, + "grad_norm": 1.1301032304763794, + "learning_rate": 3.8259361004793725e-06, + "loss": 0.7732, + "step": 11445 + }, + { + "epoch": 0.5882413403227464, + "grad_norm": 1.1334842443466187, + "learning_rate": 3.825127131991332e-06, + "loss": 0.7776, + "step": 11446 + }, + { + "epoch": 0.5882927330660911, + "grad_norm": 0.7404409050941467, + "learning_rate": 3.824318196054167e-06, + "loss": 0.636, + "step": 11447 + }, + { + "epoch": 0.5883441258094357, + "grad_norm": 1.0624581575393677, + "learning_rate": 3.823509292690295e-06, + "loss": 0.7105, + "step": 11448 + }, + { + "epoch": 0.5883955185527804, + "grad_norm": 0.9810363054275513, + "learning_rate": 3.8227004219221245e-06, + "loss": 0.6799, + "step": 11449 + }, + { + "epoch": 0.588446911296125, + "grad_norm": 1.235578179359436, + "learning_rate": 3.821891583772069e-06, + "loss": 0.6709, + "step": 11450 + }, + { + "epoch": 0.5884983040394697, + "grad_norm": 1.0692559480667114, + "learning_rate": 3.821082778262533e-06, + "loss": 0.7205, + "step": 11451 + }, + { + "epoch": 0.5885496967828142, + "grad_norm": 1.0192731618881226, + "learning_rate": 3.82027400541593e-06, + "loss": 0.6985, + "step": 11452 + }, + { + "epoch": 0.5886010895261589, + "grad_norm": 0.9839527010917664, + "learning_rate": 3.819465265254666e-06, + "loss": 0.6554, + "step": 11453 + }, + { + "epoch": 0.5886524822695035, + "grad_norm": 0.7507407069206238, + "learning_rate": 3.818656557801146e-06, + "loss": 0.6579, + "step": 11454 + }, + { + "epoch": 0.5887038750128482, + "grad_norm": 0.7295409440994263, + "learning_rate": 3.81784788307778e-06, + "loss": 0.6973, + "step": 11455 + }, + { + "epoch": 0.5887552677561928, + "grad_norm": 1.1134618520736694, + "learning_rate": 3.81703924110697e-06, + "loss": 0.6967, + "step": 11456 + }, + { + "epoch": 0.5888066604995374, + "grad_norm": 1.0342113971710205, + "learning_rate": 3.81623063191112e-06, + "loss": 0.6944, + "step": 11457 + }, + { + "epoch": 0.5888580532428821, + "grad_norm": 1.019368290901184, + "learning_rate": 3.815422055512633e-06, + "loss": 0.6835, + "step": 11458 + }, + { + "epoch": 0.5889094459862267, + "grad_norm": 1.0310980081558228, + "learning_rate": 3.8146135119339155e-06, + "loss": 0.7262, + "step": 11459 + }, + { + "epoch": 0.5889608387295714, + "grad_norm": 1.0068244934082031, + "learning_rate": 3.813805001197364e-06, + "loss": 0.6786, + "step": 11460 + }, + { + "epoch": 0.589012231472916, + "grad_norm": 0.7800037264823914, + "learning_rate": 3.8129965233253796e-06, + "loss": 0.6307, + "step": 11461 + }, + { + "epoch": 0.5890636242162607, + "grad_norm": 1.088172435760498, + "learning_rate": 3.8121880783403652e-06, + "loss": 0.7996, + "step": 11462 + }, + { + "epoch": 0.5891150169596053, + "grad_norm": 1.0754464864730835, + "learning_rate": 3.8113796662647166e-06, + "loss": 0.715, + "step": 11463 + }, + { + "epoch": 0.58916640970295, + "grad_norm": 1.0561606884002686, + "learning_rate": 3.8105712871208315e-06, + "loss": 0.7602, + "step": 11464 + }, + { + "epoch": 0.5892178024462946, + "grad_norm": 1.1313804388046265, + "learning_rate": 3.8097629409311075e-06, + "loss": 0.7305, + "step": 11465 + }, + { + "epoch": 0.5892691951896393, + "grad_norm": 1.0491067171096802, + "learning_rate": 3.8089546277179423e-06, + "loss": 0.6884, + "step": 11466 + }, + { + "epoch": 0.5893205879329838, + "grad_norm": 1.03443443775177, + "learning_rate": 3.8081463475037276e-06, + "loss": 0.6774, + "step": 11467 + }, + { + "epoch": 0.5893719806763285, + "grad_norm": 0.9868887066841125, + "learning_rate": 3.807338100310859e-06, + "loss": 0.6921, + "step": 11468 + }, + { + "epoch": 0.5894233734196731, + "grad_norm": 1.26797354221344, + "learning_rate": 3.806529886161732e-06, + "loss": 0.701, + "step": 11469 + }, + { + "epoch": 0.5894747661630177, + "grad_norm": 0.9926590323448181, + "learning_rate": 3.8057217050787348e-06, + "loss": 0.6712, + "step": 11470 + }, + { + "epoch": 0.5895261589063624, + "grad_norm": 1.0562177896499634, + "learning_rate": 3.8049135570842604e-06, + "loss": 0.7092, + "step": 11471 + }, + { + "epoch": 0.589577551649707, + "grad_norm": 1.1049439907073975, + "learning_rate": 3.8041054422007017e-06, + "loss": 0.749, + "step": 11472 + }, + { + "epoch": 0.5896289443930517, + "grad_norm": 0.768600583076477, + "learning_rate": 3.8032973604504443e-06, + "loss": 0.6591, + "step": 11473 + }, + { + "epoch": 0.5896803371363963, + "grad_norm": 0.7454254031181335, + "learning_rate": 3.802489311855878e-06, + "loss": 0.6671, + "step": 11474 + }, + { + "epoch": 0.589731729879741, + "grad_norm": 1.1571478843688965, + "learning_rate": 3.8016812964393923e-06, + "loss": 0.6873, + "step": 11475 + }, + { + "epoch": 0.5897831226230856, + "grad_norm": 1.0735392570495605, + "learning_rate": 3.8008733142233718e-06, + "loss": 0.7022, + "step": 11476 + }, + { + "epoch": 0.5898345153664303, + "grad_norm": 1.075069546699524, + "learning_rate": 3.800065365230203e-06, + "loss": 0.6875, + "step": 11477 + }, + { + "epoch": 0.5898859081097749, + "grad_norm": 0.6997358798980713, + "learning_rate": 3.7992574494822734e-06, + "loss": 0.6649, + "step": 11478 + }, + { + "epoch": 0.5899373008531196, + "grad_norm": 0.6858736872673035, + "learning_rate": 3.798449567001963e-06, + "loss": 0.6237, + "step": 11479 + }, + { + "epoch": 0.5899886935964642, + "grad_norm": 1.1006945371627808, + "learning_rate": 3.7976417178116552e-06, + "loss": 0.7519, + "step": 11480 + }, + { + "epoch": 0.5900400863398089, + "grad_norm": 1.0428390502929688, + "learning_rate": 3.7968339019337347e-06, + "loss": 0.7508, + "step": 11481 + }, + { + "epoch": 0.5900914790831535, + "grad_norm": 1.025068759918213, + "learning_rate": 3.7960261193905836e-06, + "loss": 0.746, + "step": 11482 + }, + { + "epoch": 0.590142871826498, + "grad_norm": 1.090218186378479, + "learning_rate": 3.795218370204578e-06, + "loss": 0.7187, + "step": 11483 + }, + { + "epoch": 0.5901942645698427, + "grad_norm": 0.8351790308952332, + "learning_rate": 3.7944106543980995e-06, + "loss": 0.6172, + "step": 11484 + }, + { + "epoch": 0.5902456573131873, + "grad_norm": 1.1013123989105225, + "learning_rate": 3.7936029719935276e-06, + "loss": 0.7106, + "step": 11485 + }, + { + "epoch": 0.590297050056532, + "grad_norm": 1.015156626701355, + "learning_rate": 3.792795323013238e-06, + "loss": 0.6869, + "step": 11486 + }, + { + "epoch": 0.5903484427998766, + "grad_norm": 1.13678777217865, + "learning_rate": 3.791987707479608e-06, + "loss": 0.8274, + "step": 11487 + }, + { + "epoch": 0.5903998355432213, + "grad_norm": 0.851416826248169, + "learning_rate": 3.7911801254150152e-06, + "loss": 0.6657, + "step": 11488 + }, + { + "epoch": 0.5904512282865659, + "grad_norm": 1.0114825963974, + "learning_rate": 3.790372576841831e-06, + "loss": 0.661, + "step": 11489 + }, + { + "epoch": 0.5905026210299106, + "grad_norm": 1.0497239828109741, + "learning_rate": 3.789565061782431e-06, + "loss": 0.6994, + "step": 11490 + }, + { + "epoch": 0.5905540137732552, + "grad_norm": 1.1517434120178223, + "learning_rate": 3.78875758025919e-06, + "loss": 0.7113, + "step": 11491 + }, + { + "epoch": 0.5906054065165999, + "grad_norm": 1.1280349493026733, + "learning_rate": 3.7879501322944756e-06, + "loss": 0.6752, + "step": 11492 + }, + { + "epoch": 0.5906567992599445, + "grad_norm": 1.0300226211547852, + "learning_rate": 3.787142717910661e-06, + "loss": 0.7306, + "step": 11493 + }, + { + "epoch": 0.5907081920032892, + "grad_norm": 1.1014257669448853, + "learning_rate": 3.7863353371301176e-06, + "loss": 0.6951, + "step": 11494 + }, + { + "epoch": 0.5907595847466338, + "grad_norm": 1.0422186851501465, + "learning_rate": 3.785527989975213e-06, + "loss": 0.7229, + "step": 11495 + }, + { + "epoch": 0.5908109774899785, + "grad_norm": 1.0603100061416626, + "learning_rate": 3.784720676468315e-06, + "loss": 0.7344, + "step": 11496 + }, + { + "epoch": 0.5908623702333231, + "grad_norm": 1.0279526710510254, + "learning_rate": 3.783913396631793e-06, + "loss": 0.6351, + "step": 11497 + }, + { + "epoch": 0.5909137629766676, + "grad_norm": 1.1161231994628906, + "learning_rate": 3.783106150488014e-06, + "loss": 0.7396, + "step": 11498 + }, + { + "epoch": 0.5909651557200123, + "grad_norm": 1.1192265748977661, + "learning_rate": 3.7822989380593393e-06, + "loss": 0.6818, + "step": 11499 + }, + { + "epoch": 0.5910165484633569, + "grad_norm": 1.1414432525634766, + "learning_rate": 3.781491759368136e-06, + "loss": 0.7216, + "step": 11500 + }, + { + "epoch": 0.5910679412067016, + "grad_norm": 1.0672099590301514, + "learning_rate": 3.7806846144367704e-06, + "loss": 0.6644, + "step": 11501 + }, + { + "epoch": 0.5911193339500462, + "grad_norm": 1.0559134483337402, + "learning_rate": 3.779877503287599e-06, + "loss": 0.702, + "step": 11502 + }, + { + "epoch": 0.5911707266933909, + "grad_norm": 1.0466864109039307, + "learning_rate": 3.779070425942988e-06, + "loss": 0.6968, + "step": 11503 + }, + { + "epoch": 0.5912221194367355, + "grad_norm": 1.0700088739395142, + "learning_rate": 3.778263382425297e-06, + "loss": 0.7692, + "step": 11504 + }, + { + "epoch": 0.5912735121800802, + "grad_norm": 1.0586183071136475, + "learning_rate": 3.7774563727568857e-06, + "loss": 0.7382, + "step": 11505 + }, + { + "epoch": 0.5913249049234248, + "grad_norm": 1.0176730155944824, + "learning_rate": 3.7766493969601136e-06, + "loss": 0.6501, + "step": 11506 + }, + { + "epoch": 0.5913762976667695, + "grad_norm": 0.8501995205879211, + "learning_rate": 3.7758424550573385e-06, + "loss": 0.6794, + "step": 11507 + }, + { + "epoch": 0.5914276904101141, + "grad_norm": 1.127898931503296, + "learning_rate": 3.7750355470709167e-06, + "loss": 0.7355, + "step": 11508 + }, + { + "epoch": 0.5914790831534588, + "grad_norm": 1.0252556800842285, + "learning_rate": 3.7742286730232036e-06, + "loss": 0.6954, + "step": 11509 + }, + { + "epoch": 0.5915304758968034, + "grad_norm": 1.053673267364502, + "learning_rate": 3.7734218329365586e-06, + "loss": 0.7503, + "step": 11510 + }, + { + "epoch": 0.5915818686401481, + "grad_norm": 0.7045313715934753, + "learning_rate": 3.77261502683333e-06, + "loss": 0.6904, + "step": 11511 + }, + { + "epoch": 0.5916332613834927, + "grad_norm": 1.056077003479004, + "learning_rate": 3.7718082547358744e-06, + "loss": 0.7455, + "step": 11512 + }, + { + "epoch": 0.5916846541268372, + "grad_norm": 1.1707674264907837, + "learning_rate": 3.7710015166665433e-06, + "loss": 0.7597, + "step": 11513 + }, + { + "epoch": 0.5917360468701819, + "grad_norm": 1.1338014602661133, + "learning_rate": 3.7701948126476907e-06, + "loss": 0.6744, + "step": 11514 + }, + { + "epoch": 0.5917874396135265, + "grad_norm": 1.035415768623352, + "learning_rate": 3.769388142701662e-06, + "loss": 0.7209, + "step": 11515 + }, + { + "epoch": 0.5918388323568712, + "grad_norm": 0.7342637777328491, + "learning_rate": 3.76858150685081e-06, + "loss": 0.6311, + "step": 11516 + }, + { + "epoch": 0.5918902251002158, + "grad_norm": 1.0672574043273926, + "learning_rate": 3.767774905117484e-06, + "loss": 0.7405, + "step": 11517 + }, + { + "epoch": 0.5919416178435605, + "grad_norm": 1.0254216194152832, + "learning_rate": 3.766968337524029e-06, + "loss": 0.6809, + "step": 11518 + }, + { + "epoch": 0.5919930105869051, + "grad_norm": 1.1160180568695068, + "learning_rate": 3.766161804092793e-06, + "loss": 0.7406, + "step": 11519 + }, + { + "epoch": 0.5920444033302498, + "grad_norm": 0.7081332206726074, + "learning_rate": 3.7653553048461233e-06, + "loss": 0.6537, + "step": 11520 + }, + { + "epoch": 0.5920957960735944, + "grad_norm": 1.1758373975753784, + "learning_rate": 3.7645488398063613e-06, + "loss": 0.7305, + "step": 11521 + }, + { + "epoch": 0.5921471888169391, + "grad_norm": 1.0893099308013916, + "learning_rate": 3.7637424089958526e-06, + "loss": 0.7492, + "step": 11522 + }, + { + "epoch": 0.5921985815602837, + "grad_norm": 0.9970418810844421, + "learning_rate": 3.7629360124369417e-06, + "loss": 0.6971, + "step": 11523 + }, + { + "epoch": 0.5922499743036284, + "grad_norm": 1.1113557815551758, + "learning_rate": 3.7621296501519676e-06, + "loss": 0.6493, + "step": 11524 + }, + { + "epoch": 0.592301367046973, + "grad_norm": 1.0411341190338135, + "learning_rate": 3.7613233221632715e-06, + "loss": 0.6843, + "step": 11525 + }, + { + "epoch": 0.5923527597903177, + "grad_norm": 1.0228313207626343, + "learning_rate": 3.760517028493196e-06, + "loss": 0.6722, + "step": 11526 + }, + { + "epoch": 0.5924041525336623, + "grad_norm": 0.7046017646789551, + "learning_rate": 3.7597107691640777e-06, + "loss": 0.618, + "step": 11527 + }, + { + "epoch": 0.5924555452770068, + "grad_norm": 1.0765703916549683, + "learning_rate": 3.7589045441982554e-06, + "loss": 0.7196, + "step": 11528 + }, + { + "epoch": 0.5925069380203515, + "grad_norm": 1.1152563095092773, + "learning_rate": 3.7580983536180667e-06, + "loss": 0.7696, + "step": 11529 + }, + { + "epoch": 0.5925583307636961, + "grad_norm": 1.0075186491012573, + "learning_rate": 3.7572921974458493e-06, + "loss": 0.7453, + "step": 11530 + }, + { + "epoch": 0.5926097235070408, + "grad_norm": 0.7157771587371826, + "learning_rate": 3.7564860757039347e-06, + "loss": 0.665, + "step": 11531 + }, + { + "epoch": 0.5926611162503854, + "grad_norm": 1.517165184020996, + "learning_rate": 3.7556799884146596e-06, + "loss": 0.7078, + "step": 11532 + }, + { + "epoch": 0.5927125089937301, + "grad_norm": 1.034657597541809, + "learning_rate": 3.754873935600359e-06, + "loss": 0.6777, + "step": 11533 + }, + { + "epoch": 0.5927639017370747, + "grad_norm": 0.7848665714263916, + "learning_rate": 3.754067917283361e-06, + "loss": 0.679, + "step": 11534 + }, + { + "epoch": 0.5928152944804194, + "grad_norm": 1.0560595989227295, + "learning_rate": 3.753261933485999e-06, + "loss": 0.6842, + "step": 11535 + }, + { + "epoch": 0.592866687223764, + "grad_norm": 1.1652435064315796, + "learning_rate": 3.7524559842306056e-06, + "loss": 0.732, + "step": 11536 + }, + { + "epoch": 0.5929180799671087, + "grad_norm": 0.7382861375808716, + "learning_rate": 3.7516500695395075e-06, + "loss": 0.6362, + "step": 11537 + }, + { + "epoch": 0.5929694727104533, + "grad_norm": 0.7808324694633484, + "learning_rate": 3.750844189435034e-06, + "loss": 0.6772, + "step": 11538 + }, + { + "epoch": 0.593020865453798, + "grad_norm": 0.7170704007148743, + "learning_rate": 3.7500383439395153e-06, + "loss": 0.6522, + "step": 11539 + }, + { + "epoch": 0.5930722581971426, + "grad_norm": 1.1628870964050293, + "learning_rate": 3.749232533075273e-06, + "loss": 0.7285, + "step": 11540 + }, + { + "epoch": 0.5931236509404872, + "grad_norm": 1.0524749755859375, + "learning_rate": 3.748426756864637e-06, + "loss": 0.7032, + "step": 11541 + }, + { + "epoch": 0.5931750436838319, + "grad_norm": 1.0761662721633911, + "learning_rate": 3.7476210153299295e-06, + "loss": 0.7632, + "step": 11542 + }, + { + "epoch": 0.5932264364271764, + "grad_norm": 1.0588785409927368, + "learning_rate": 3.7468153084934783e-06, + "loss": 0.7053, + "step": 11543 + }, + { + "epoch": 0.5932778291705211, + "grad_norm": 1.0721025466918945, + "learning_rate": 3.746009636377601e-06, + "loss": 0.7278, + "step": 11544 + }, + { + "epoch": 0.5933292219138657, + "grad_norm": 1.1676280498504639, + "learning_rate": 3.7452039990046214e-06, + "loss": 0.6952, + "step": 11545 + }, + { + "epoch": 0.5933806146572104, + "grad_norm": 1.1545284986495972, + "learning_rate": 3.7443983963968622e-06, + "loss": 0.7053, + "step": 11546 + }, + { + "epoch": 0.593432007400555, + "grad_norm": 1.070684790611267, + "learning_rate": 3.74359282857664e-06, + "loss": 0.7544, + "step": 11547 + }, + { + "epoch": 0.5934834001438997, + "grad_norm": 1.0640026330947876, + "learning_rate": 3.7427872955662764e-06, + "loss": 0.7137, + "step": 11548 + }, + { + "epoch": 0.5935347928872443, + "grad_norm": 1.206650972366333, + "learning_rate": 3.7419817973880904e-06, + "loss": 0.785, + "step": 11549 + }, + { + "epoch": 0.593586185630589, + "grad_norm": 1.0431939363479614, + "learning_rate": 3.741176334064396e-06, + "loss": 0.7568, + "step": 11550 + }, + { + "epoch": 0.5936375783739336, + "grad_norm": 1.1005574464797974, + "learning_rate": 3.74037090561751e-06, + "loss": 0.6928, + "step": 11551 + }, + { + "epoch": 0.5936889711172783, + "grad_norm": 1.129004716873169, + "learning_rate": 3.7395655120697493e-06, + "loss": 0.7232, + "step": 11552 + }, + { + "epoch": 0.5937403638606229, + "grad_norm": 1.1927634477615356, + "learning_rate": 3.7387601534434254e-06, + "loss": 0.7031, + "step": 11553 + }, + { + "epoch": 0.5937917566039675, + "grad_norm": 1.035874605178833, + "learning_rate": 3.7379548297608524e-06, + "loss": 0.712, + "step": 11554 + }, + { + "epoch": 0.5938431493473122, + "grad_norm": 1.0102505683898926, + "learning_rate": 3.737149541044345e-06, + "loss": 0.6867, + "step": 11555 + }, + { + "epoch": 0.5938945420906568, + "grad_norm": 1.0925077199935913, + "learning_rate": 3.7363442873162105e-06, + "loss": 0.7103, + "step": 11556 + }, + { + "epoch": 0.5939459348340015, + "grad_norm": 1.0355783700942993, + "learning_rate": 3.7355390685987607e-06, + "loss": 0.7611, + "step": 11557 + }, + { + "epoch": 0.593997327577346, + "grad_norm": 1.0587716102600098, + "learning_rate": 3.7347338849143065e-06, + "loss": 0.6764, + "step": 11558 + }, + { + "epoch": 0.5940487203206907, + "grad_norm": 0.7768186926841736, + "learning_rate": 3.733928736285155e-06, + "loss": 0.6648, + "step": 11559 + }, + { + "epoch": 0.5941001130640353, + "grad_norm": 1.1297720670700073, + "learning_rate": 3.7331236227336125e-06, + "loss": 0.7545, + "step": 11560 + }, + { + "epoch": 0.59415150580738, + "grad_norm": 0.7062411308288574, + "learning_rate": 3.7323185442819863e-06, + "loss": 0.6308, + "step": 11561 + }, + { + "epoch": 0.5942028985507246, + "grad_norm": 0.7260512113571167, + "learning_rate": 3.731513500952584e-06, + "loss": 0.6289, + "step": 11562 + }, + { + "epoch": 0.5942542912940693, + "grad_norm": 0.683921754360199, + "learning_rate": 3.7307084927677058e-06, + "loss": 0.7028, + "step": 11563 + }, + { + "epoch": 0.5943056840374139, + "grad_norm": 0.7255199551582336, + "learning_rate": 3.729903519749657e-06, + "loss": 0.6375, + "step": 11564 + }, + { + "epoch": 0.5943570767807586, + "grad_norm": 1.007773518562317, + "learning_rate": 3.7290985819207424e-06, + "loss": 0.7204, + "step": 11565 + }, + { + "epoch": 0.5944084695241032, + "grad_norm": 1.0374869108200073, + "learning_rate": 3.7282936793032594e-06, + "loss": 0.7309, + "step": 11566 + }, + { + "epoch": 0.5944598622674478, + "grad_norm": 1.0316556692123413, + "learning_rate": 3.7274888119195107e-06, + "loss": 0.7544, + "step": 11567 + }, + { + "epoch": 0.5945112550107925, + "grad_norm": 1.0869574546813965, + "learning_rate": 3.7266839797917965e-06, + "loss": 0.7279, + "step": 11568 + }, + { + "epoch": 0.5945626477541371, + "grad_norm": 1.0463942289352417, + "learning_rate": 3.7258791829424134e-06, + "loss": 0.7092, + "step": 11569 + }, + { + "epoch": 0.5946140404974818, + "grad_norm": 1.1058204174041748, + "learning_rate": 3.7250744213936594e-06, + "loss": 0.7276, + "step": 11570 + }, + { + "epoch": 0.5946654332408264, + "grad_norm": 0.7811251878738403, + "learning_rate": 3.7242696951678353e-06, + "loss": 0.6373, + "step": 11571 + }, + { + "epoch": 0.5947168259841711, + "grad_norm": 1.1674333810806274, + "learning_rate": 3.723465004287231e-06, + "loss": 0.8053, + "step": 11572 + }, + { + "epoch": 0.5947682187275157, + "grad_norm": 1.0823485851287842, + "learning_rate": 3.722660348774143e-06, + "loss": 0.7337, + "step": 11573 + }, + { + "epoch": 0.5948196114708603, + "grad_norm": 1.0887356996536255, + "learning_rate": 3.7218557286508663e-06, + "loss": 0.6892, + "step": 11574 + }, + { + "epoch": 0.5948710042142049, + "grad_norm": 1.0993797779083252, + "learning_rate": 3.7210511439396934e-06, + "loss": 0.6934, + "step": 11575 + }, + { + "epoch": 0.5949223969575496, + "grad_norm": 1.061801791191101, + "learning_rate": 3.7202465946629145e-06, + "loss": 0.6734, + "step": 11576 + }, + { + "epoch": 0.5949737897008942, + "grad_norm": 1.0608551502227783, + "learning_rate": 3.719442080842821e-06, + "loss": 0.6932, + "step": 11577 + }, + { + "epoch": 0.5950251824442389, + "grad_norm": 1.026380181312561, + "learning_rate": 3.7186376025017036e-06, + "loss": 0.6585, + "step": 11578 + }, + { + "epoch": 0.5950765751875835, + "grad_norm": 1.1202996969223022, + "learning_rate": 3.71783315966185e-06, + "loss": 0.6924, + "step": 11579 + }, + { + "epoch": 0.5951279679309281, + "grad_norm": 0.9567104578018188, + "learning_rate": 3.7170287523455474e-06, + "loss": 0.6711, + "step": 11580 + }, + { + "epoch": 0.5951793606742728, + "grad_norm": 0.7997528910636902, + "learning_rate": 3.7162243805750863e-06, + "loss": 0.6678, + "step": 11581 + }, + { + "epoch": 0.5952307534176174, + "grad_norm": 1.0786402225494385, + "learning_rate": 3.7154200443727473e-06, + "loss": 0.7708, + "step": 11582 + }, + { + "epoch": 0.5952821461609621, + "grad_norm": 1.1045929193496704, + "learning_rate": 3.714615743760818e-06, + "loss": 0.6729, + "step": 11583 + }, + { + "epoch": 0.5953335389043067, + "grad_norm": 0.9946085810661316, + "learning_rate": 3.713811478761584e-06, + "loss": 0.7403, + "step": 11584 + }, + { + "epoch": 0.5953849316476514, + "grad_norm": 1.2164396047592163, + "learning_rate": 3.7130072493973247e-06, + "loss": 0.7642, + "step": 11585 + }, + { + "epoch": 0.595436324390996, + "grad_norm": 1.07233464717865, + "learning_rate": 3.712203055690322e-06, + "loss": 0.6574, + "step": 11586 + }, + { + "epoch": 0.5954877171343407, + "grad_norm": 1.118683099746704, + "learning_rate": 3.71139889766286e-06, + "loss": 0.7201, + "step": 11587 + }, + { + "epoch": 0.5955391098776853, + "grad_norm": 0.7063745260238647, + "learning_rate": 3.7105947753372164e-06, + "loss": 0.6554, + "step": 11588 + }, + { + "epoch": 0.5955905026210299, + "grad_norm": 1.0506211519241333, + "learning_rate": 3.7097906887356694e-06, + "loss": 0.6734, + "step": 11589 + }, + { + "epoch": 0.5956418953643745, + "grad_norm": 1.1040539741516113, + "learning_rate": 3.7089866378804985e-06, + "loss": 0.7552, + "step": 11590 + }, + { + "epoch": 0.5956932881077192, + "grad_norm": 1.1247520446777344, + "learning_rate": 3.7081826227939826e-06, + "loss": 0.7786, + "step": 11591 + }, + { + "epoch": 0.5957446808510638, + "grad_norm": 1.1174639463424683, + "learning_rate": 3.707378643498393e-06, + "loss": 0.7758, + "step": 11592 + }, + { + "epoch": 0.5957960735944084, + "grad_norm": 1.0261585712432861, + "learning_rate": 3.7065747000160067e-06, + "loss": 0.693, + "step": 11593 + }, + { + "epoch": 0.5958474663377531, + "grad_norm": 1.1189981698989868, + "learning_rate": 3.7057707923690998e-06, + "loss": 0.7434, + "step": 11594 + }, + { + "epoch": 0.5958988590810977, + "grad_norm": 0.8146452903747559, + "learning_rate": 3.704966920579941e-06, + "loss": 0.6789, + "step": 11595 + }, + { + "epoch": 0.5959502518244424, + "grad_norm": 1.0878275632858276, + "learning_rate": 3.7041630846708047e-06, + "loss": 0.6598, + "step": 11596 + }, + { + "epoch": 0.596001644567787, + "grad_norm": 1.0307964086532593, + "learning_rate": 3.7033592846639634e-06, + "loss": 0.6947, + "step": 11597 + }, + { + "epoch": 0.5960530373111317, + "grad_norm": 1.0266512632369995, + "learning_rate": 3.7025555205816833e-06, + "loss": 0.6963, + "step": 11598 + }, + { + "epoch": 0.5961044300544763, + "grad_norm": 1.0802282094955444, + "learning_rate": 3.7017517924462363e-06, + "loss": 0.6882, + "step": 11599 + }, + { + "epoch": 0.596155822797821, + "grad_norm": 0.7667773962020874, + "learning_rate": 3.7009481002798914e-06, + "loss": 0.6452, + "step": 11600 + }, + { + "epoch": 0.5962072155411656, + "grad_norm": 1.1536296606063843, + "learning_rate": 3.7001444441049116e-06, + "loss": 0.6935, + "step": 11601 + }, + { + "epoch": 0.5962586082845103, + "grad_norm": 0.7323923707008362, + "learning_rate": 3.6993408239435645e-06, + "loss": 0.6385, + "step": 11602 + }, + { + "epoch": 0.5963100010278549, + "grad_norm": 1.0601736307144165, + "learning_rate": 3.6985372398181185e-06, + "loss": 0.6817, + "step": 11603 + }, + { + "epoch": 0.5963613937711995, + "grad_norm": 1.1324458122253418, + "learning_rate": 3.6977336917508334e-06, + "loss": 0.6983, + "step": 11604 + }, + { + "epoch": 0.5964127865145441, + "grad_norm": 0.690304696559906, + "learning_rate": 3.6969301797639725e-06, + "loss": 0.6299, + "step": 11605 + }, + { + "epoch": 0.5964641792578887, + "grad_norm": 1.0298535823822021, + "learning_rate": 3.6961267038797988e-06, + "loss": 0.717, + "step": 11606 + }, + { + "epoch": 0.5965155720012334, + "grad_norm": 0.8577505946159363, + "learning_rate": 3.695323264120575e-06, + "loss": 0.634, + "step": 11607 + }, + { + "epoch": 0.596566964744578, + "grad_norm": 1.0696932077407837, + "learning_rate": 3.6945198605085573e-06, + "loss": 0.7, + "step": 11608 + }, + { + "epoch": 0.5966183574879227, + "grad_norm": 1.0264880657196045, + "learning_rate": 3.6937164930660096e-06, + "loss": 0.7179, + "step": 11609 + }, + { + "epoch": 0.5966697502312673, + "grad_norm": 1.0531730651855469, + "learning_rate": 3.692913161815186e-06, + "loss": 0.7303, + "step": 11610 + }, + { + "epoch": 0.596721142974612, + "grad_norm": 1.0644382238388062, + "learning_rate": 3.692109866778344e-06, + "loss": 0.6674, + "step": 11611 + }, + { + "epoch": 0.5967725357179566, + "grad_norm": 0.6842978000640869, + "learning_rate": 3.6913066079777404e-06, + "loss": 0.6799, + "step": 11612 + }, + { + "epoch": 0.5968239284613013, + "grad_norm": 1.1380321979522705, + "learning_rate": 3.6905033854356315e-06, + "loss": 0.7257, + "step": 11613 + }, + { + "epoch": 0.5968753212046459, + "grad_norm": 1.0504555702209473, + "learning_rate": 3.689700199174268e-06, + "loss": 0.6603, + "step": 11614 + }, + { + "epoch": 0.5969267139479906, + "grad_norm": 1.145670771598816, + "learning_rate": 3.688897049215905e-06, + "loss": 0.7004, + "step": 11615 + }, + { + "epoch": 0.5969781066913352, + "grad_norm": 1.1740987300872803, + "learning_rate": 3.688093935582795e-06, + "loss": 0.7634, + "step": 11616 + }, + { + "epoch": 0.5970294994346799, + "grad_norm": 1.0503276586532593, + "learning_rate": 3.6872908582971878e-06, + "loss": 0.7693, + "step": 11617 + }, + { + "epoch": 0.5970808921780245, + "grad_norm": 1.0400291681289673, + "learning_rate": 3.6864878173813333e-06, + "loss": 0.7464, + "step": 11618 + }, + { + "epoch": 0.597132284921369, + "grad_norm": 1.0749807357788086, + "learning_rate": 3.6856848128574816e-06, + "loss": 0.7078, + "step": 11619 + }, + { + "epoch": 0.5971836776647137, + "grad_norm": 1.1221987009048462, + "learning_rate": 3.6848818447478783e-06, + "loss": 0.6786, + "step": 11620 + }, + { + "epoch": 0.5972350704080583, + "grad_norm": 1.0897870063781738, + "learning_rate": 3.6840789130747728e-06, + "loss": 0.7569, + "step": 11621 + }, + { + "epoch": 0.597286463151403, + "grad_norm": 0.7247182130813599, + "learning_rate": 3.68327601786041e-06, + "loss": 0.6727, + "step": 11622 + }, + { + "epoch": 0.5973378558947476, + "grad_norm": 1.014936923980713, + "learning_rate": 3.6824731591270368e-06, + "loss": 0.6759, + "step": 11623 + }, + { + "epoch": 0.5973892486380923, + "grad_norm": 0.7325012683868408, + "learning_rate": 3.681670336896893e-06, + "loss": 0.6639, + "step": 11624 + }, + { + "epoch": 0.5974406413814369, + "grad_norm": 1.0550287961959839, + "learning_rate": 3.680867551192224e-06, + "loss": 0.7376, + "step": 11625 + }, + { + "epoch": 0.5974920341247816, + "grad_norm": 1.0291054248809814, + "learning_rate": 3.680064802035273e-06, + "loss": 0.7328, + "step": 11626 + }, + { + "epoch": 0.5975434268681262, + "grad_norm": 1.0198391675949097, + "learning_rate": 3.679262089448278e-06, + "loss": 0.6988, + "step": 11627 + }, + { + "epoch": 0.5975948196114709, + "grad_norm": 1.2681646347045898, + "learning_rate": 3.678459413453479e-06, + "loss": 0.7767, + "step": 11628 + }, + { + "epoch": 0.5976462123548155, + "grad_norm": 1.1136642694473267, + "learning_rate": 3.6776567740731185e-06, + "loss": 0.7552, + "step": 11629 + }, + { + "epoch": 0.5976976050981602, + "grad_norm": 1.073568344116211, + "learning_rate": 3.676854171329429e-06, + "loss": 0.752, + "step": 11630 + }, + { + "epoch": 0.5977489978415048, + "grad_norm": 0.7971687316894531, + "learning_rate": 3.6760516052446514e-06, + "loss": 0.6518, + "step": 11631 + }, + { + "epoch": 0.5978003905848495, + "grad_norm": 1.1023242473602295, + "learning_rate": 3.6752490758410218e-06, + "loss": 0.7159, + "step": 11632 + }, + { + "epoch": 0.5978517833281941, + "grad_norm": 0.7493652701377869, + "learning_rate": 3.674446583140772e-06, + "loss": 0.6232, + "step": 11633 + }, + { + "epoch": 0.5979031760715386, + "grad_norm": 1.0546517372131348, + "learning_rate": 3.6736441271661366e-06, + "loss": 0.6756, + "step": 11634 + }, + { + "epoch": 0.5979545688148833, + "grad_norm": 0.663945198059082, + "learning_rate": 3.6728417079393506e-06, + "loss": 0.6618, + "step": 11635 + }, + { + "epoch": 0.5980059615582279, + "grad_norm": 1.0837557315826416, + "learning_rate": 3.6720393254826423e-06, + "loss": 0.7142, + "step": 11636 + }, + { + "epoch": 0.5980573543015726, + "grad_norm": 1.0990701913833618, + "learning_rate": 3.6712369798182445e-06, + "loss": 0.688, + "step": 11637 + }, + { + "epoch": 0.5981087470449172, + "grad_norm": 0.9982719421386719, + "learning_rate": 3.6704346709683863e-06, + "loss": 0.7082, + "step": 11638 + }, + { + "epoch": 0.5981601397882619, + "grad_norm": 1.1350024938583374, + "learning_rate": 3.6696323989552973e-06, + "loss": 0.6982, + "step": 11639 + }, + { + "epoch": 0.5982115325316065, + "grad_norm": 1.0660784244537354, + "learning_rate": 3.6688301638012036e-06, + "loss": 0.7206, + "step": 11640 + }, + { + "epoch": 0.5982629252749512, + "grad_norm": 1.0749998092651367, + "learning_rate": 3.6680279655283325e-06, + "loss": 0.7334, + "step": 11641 + }, + { + "epoch": 0.5983143180182958, + "grad_norm": 1.0760926008224487, + "learning_rate": 3.667225804158912e-06, + "loss": 0.6986, + "step": 11642 + }, + { + "epoch": 0.5983657107616405, + "grad_norm": 1.033233880996704, + "learning_rate": 3.6664236797151624e-06, + "loss": 0.7147, + "step": 11643 + }, + { + "epoch": 0.5984171035049851, + "grad_norm": 1.0752875804901123, + "learning_rate": 3.6656215922193096e-06, + "loss": 0.7152, + "step": 11644 + }, + { + "epoch": 0.5984684962483298, + "grad_norm": 1.0163453817367554, + "learning_rate": 3.6648195416935774e-06, + "loss": 0.6696, + "step": 11645 + }, + { + "epoch": 0.5985198889916744, + "grad_norm": 1.0013858079910278, + "learning_rate": 3.6640175281601833e-06, + "loss": 0.6845, + "step": 11646 + }, + { + "epoch": 0.5985712817350191, + "grad_norm": 0.7177518606185913, + "learning_rate": 3.663215551641351e-06, + "loss": 0.6888, + "step": 11647 + }, + { + "epoch": 0.5986226744783637, + "grad_norm": 1.096608281135559, + "learning_rate": 3.6624136121592995e-06, + "loss": 0.768, + "step": 11648 + }, + { + "epoch": 0.5986740672217082, + "grad_norm": 1.0215293169021606, + "learning_rate": 3.6616117097362464e-06, + "loss": 0.6327, + "step": 11649 + }, + { + "epoch": 0.5987254599650529, + "grad_norm": 1.0305129289627075, + "learning_rate": 3.6608098443944098e-06, + "loss": 0.7101, + "step": 11650 + }, + { + "epoch": 0.5987768527083975, + "grad_norm": 1.0206955671310425, + "learning_rate": 3.6600080161560073e-06, + "loss": 0.7401, + "step": 11651 + }, + { + "epoch": 0.5988282454517422, + "grad_norm": 1.0805392265319824, + "learning_rate": 3.6592062250432513e-06, + "loss": 0.7816, + "step": 11652 + }, + { + "epoch": 0.5988796381950868, + "grad_norm": 1.2910736799240112, + "learning_rate": 3.658404471078357e-06, + "loss": 0.7484, + "step": 11653 + }, + { + "epoch": 0.5989310309384315, + "grad_norm": 1.0983330011367798, + "learning_rate": 3.657602754283539e-06, + "loss": 0.6854, + "step": 11654 + }, + { + "epoch": 0.5989824236817761, + "grad_norm": 1.1698567867279053, + "learning_rate": 3.6568010746810113e-06, + "loss": 0.7388, + "step": 11655 + }, + { + "epoch": 0.5990338164251208, + "grad_norm": 1.0521223545074463, + "learning_rate": 3.6559994322929805e-06, + "loss": 0.6431, + "step": 11656 + }, + { + "epoch": 0.5990852091684654, + "grad_norm": 1.1070897579193115, + "learning_rate": 3.6551978271416587e-06, + "loss": 0.729, + "step": 11657 + }, + { + "epoch": 0.5991366019118101, + "grad_norm": 1.0167614221572876, + "learning_rate": 3.6543962592492566e-06, + "loss": 0.7097, + "step": 11658 + }, + { + "epoch": 0.5991879946551547, + "grad_norm": 0.8062676191329956, + "learning_rate": 3.65359472863798e-06, + "loss": 0.7016, + "step": 11659 + }, + { + "epoch": 0.5992393873984994, + "grad_norm": 0.7227058410644531, + "learning_rate": 3.6527932353300385e-06, + "loss": 0.6897, + "step": 11660 + }, + { + "epoch": 0.599290780141844, + "grad_norm": 1.0217188596725464, + "learning_rate": 3.6519917793476366e-06, + "loss": 0.7292, + "step": 11661 + }, + { + "epoch": 0.5993421728851887, + "grad_norm": 0.752909243106842, + "learning_rate": 3.651190360712978e-06, + "loss": 0.6529, + "step": 11662 + }, + { + "epoch": 0.5993935656285333, + "grad_norm": 1.0698862075805664, + "learning_rate": 3.6503889794482695e-06, + "loss": 0.7409, + "step": 11663 + }, + { + "epoch": 0.599444958371878, + "grad_norm": 1.3373955488204956, + "learning_rate": 3.6495876355757142e-06, + "loss": 0.7123, + "step": 11664 + }, + { + "epoch": 0.5994963511152225, + "grad_norm": 1.2184749841690063, + "learning_rate": 3.6487863291175114e-06, + "loss": 0.7078, + "step": 11665 + }, + { + "epoch": 0.5995477438585671, + "grad_norm": 1.2269978523254395, + "learning_rate": 3.6479850600958626e-06, + "loss": 0.7708, + "step": 11666 + }, + { + "epoch": 0.5995991366019118, + "grad_norm": 0.8566799163818359, + "learning_rate": 3.6471838285329686e-06, + "loss": 0.6976, + "step": 11667 + }, + { + "epoch": 0.5996505293452564, + "grad_norm": 1.0815638303756714, + "learning_rate": 3.64638263445103e-06, + "loss": 0.697, + "step": 11668 + }, + { + "epoch": 0.5997019220886011, + "grad_norm": 1.1381852626800537, + "learning_rate": 3.6455814778722406e-06, + "loss": 0.7504, + "step": 11669 + }, + { + "epoch": 0.5997533148319457, + "grad_norm": 0.9927752017974854, + "learning_rate": 3.6447803588187987e-06, + "loss": 0.6789, + "step": 11670 + }, + { + "epoch": 0.5998047075752904, + "grad_norm": 1.0346016883850098, + "learning_rate": 3.643979277312901e-06, + "loss": 0.6972, + "step": 11671 + }, + { + "epoch": 0.599856100318635, + "grad_norm": 1.1680797338485718, + "learning_rate": 3.643178233376742e-06, + "loss": 0.7197, + "step": 11672 + }, + { + "epoch": 0.5999074930619797, + "grad_norm": 0.6752480268478394, + "learning_rate": 3.6423772270325135e-06, + "loss": 0.6182, + "step": 11673 + }, + { + "epoch": 0.5999588858053243, + "grad_norm": 1.0828133821487427, + "learning_rate": 3.6415762583024124e-06, + "loss": 0.7292, + "step": 11674 + }, + { + "epoch": 0.600010278548669, + "grad_norm": 1.0699411630630493, + "learning_rate": 3.6407753272086245e-06, + "loss": 0.6896, + "step": 11675 + }, + { + "epoch": 0.6000616712920136, + "grad_norm": 1.1065906286239624, + "learning_rate": 3.6399744337733434e-06, + "loss": 0.7443, + "step": 11676 + }, + { + "epoch": 0.6001130640353582, + "grad_norm": 1.0475575923919678, + "learning_rate": 3.639173578018761e-06, + "loss": 0.7223, + "step": 11677 + }, + { + "epoch": 0.6001644567787029, + "grad_norm": 0.7609853148460388, + "learning_rate": 3.6383727599670598e-06, + "loss": 0.6446, + "step": 11678 + }, + { + "epoch": 0.6002158495220475, + "grad_norm": 1.1061346530914307, + "learning_rate": 3.637571979640431e-06, + "loss": 0.7572, + "step": 11679 + }, + { + "epoch": 0.6002672422653921, + "grad_norm": 1.0751458406448364, + "learning_rate": 3.6367712370610607e-06, + "loss": 0.7741, + "step": 11680 + }, + { + "epoch": 0.6003186350087367, + "grad_norm": 0.8085489869117737, + "learning_rate": 3.6359705322511337e-06, + "loss": 0.6636, + "step": 11681 + }, + { + "epoch": 0.6003700277520814, + "grad_norm": 1.0430597066879272, + "learning_rate": 3.635169865232833e-06, + "loss": 0.6927, + "step": 11682 + }, + { + "epoch": 0.600421420495426, + "grad_norm": 1.0076080560684204, + "learning_rate": 3.634369236028344e-06, + "loss": 0.7009, + "step": 11683 + }, + { + "epoch": 0.6004728132387707, + "grad_norm": 1.0606179237365723, + "learning_rate": 3.6335686446598497e-06, + "loss": 0.7447, + "step": 11684 + }, + { + "epoch": 0.6005242059821153, + "grad_norm": 0.7435125112533569, + "learning_rate": 3.6327680911495265e-06, + "loss": 0.678, + "step": 11685 + }, + { + "epoch": 0.60057559872546, + "grad_norm": 1.2992113828659058, + "learning_rate": 3.631967575519558e-06, + "loss": 0.7244, + "step": 11686 + }, + { + "epoch": 0.6006269914688046, + "grad_norm": 0.755803644657135, + "learning_rate": 3.6311670977921245e-06, + "loss": 0.6691, + "step": 11687 + }, + { + "epoch": 0.6006783842121493, + "grad_norm": 1.100875735282898, + "learning_rate": 3.6303666579893994e-06, + "loss": 0.7124, + "step": 11688 + }, + { + "epoch": 0.6007297769554939, + "grad_norm": 1.0880601406097412, + "learning_rate": 3.629566256133562e-06, + "loss": 0.753, + "step": 11689 + }, + { + "epoch": 0.6007811696988385, + "grad_norm": 1.1492772102355957, + "learning_rate": 3.62876589224679e-06, + "loss": 0.7784, + "step": 11690 + }, + { + "epoch": 0.6008325624421832, + "grad_norm": 1.1275568008422852, + "learning_rate": 3.627965566351255e-06, + "loss": 0.7312, + "step": 11691 + }, + { + "epoch": 0.6008839551855278, + "grad_norm": 10.821277618408203, + "learning_rate": 3.627165278469132e-06, + "loss": 0.8398, + "step": 11692 + }, + { + "epoch": 0.6009353479288725, + "grad_norm": 1.0418554544448853, + "learning_rate": 3.626365028622596e-06, + "loss": 0.7503, + "step": 11693 + }, + { + "epoch": 0.6009867406722171, + "grad_norm": 1.113433837890625, + "learning_rate": 3.625564816833814e-06, + "loss": 0.7725, + "step": 11694 + }, + { + "epoch": 0.6010381334155617, + "grad_norm": 0.708872377872467, + "learning_rate": 3.6247646431249596e-06, + "loss": 0.6314, + "step": 11695 + }, + { + "epoch": 0.6010895261589063, + "grad_norm": 0.7126109004020691, + "learning_rate": 3.623964507518203e-06, + "loss": 0.6458, + "step": 11696 + }, + { + "epoch": 0.601140918902251, + "grad_norm": 1.0847748517990112, + "learning_rate": 3.6231644100357094e-06, + "loss": 0.7663, + "step": 11697 + }, + { + "epoch": 0.6011923116455956, + "grad_norm": 1.0719671249389648, + "learning_rate": 3.6223643506996483e-06, + "loss": 0.7239, + "step": 11698 + }, + { + "epoch": 0.6012437043889403, + "grad_norm": 1.075773000717163, + "learning_rate": 3.6215643295321856e-06, + "loss": 0.7318, + "step": 11699 + }, + { + "epoch": 0.6012950971322849, + "grad_norm": 1.1973028182983398, + "learning_rate": 3.6207643465554886e-06, + "loss": 0.7249, + "step": 11700 + }, + { + "epoch": 0.6013464898756296, + "grad_norm": 1.0512226819992065, + "learning_rate": 3.619964401791718e-06, + "loss": 0.7533, + "step": 11701 + }, + { + "epoch": 0.6013978826189742, + "grad_norm": 0.6967232823371887, + "learning_rate": 3.619164495263039e-06, + "loss": 0.6646, + "step": 11702 + }, + { + "epoch": 0.6014492753623188, + "grad_norm": 1.0241248607635498, + "learning_rate": 3.6183646269916157e-06, + "loss": 0.6661, + "step": 11703 + }, + { + "epoch": 0.6015006681056635, + "grad_norm": 1.0468987226486206, + "learning_rate": 3.617564796999605e-06, + "loss": 0.7527, + "step": 11704 + }, + { + "epoch": 0.6015520608490081, + "grad_norm": 1.119964838027954, + "learning_rate": 3.6167650053091686e-06, + "loss": 0.7496, + "step": 11705 + }, + { + "epoch": 0.6016034535923528, + "grad_norm": 1.0813889503479004, + "learning_rate": 3.615965251942467e-06, + "loss": 0.7477, + "step": 11706 + }, + { + "epoch": 0.6016548463356974, + "grad_norm": 1.116546869277954, + "learning_rate": 3.6151655369216553e-06, + "loss": 0.7315, + "step": 11707 + }, + { + "epoch": 0.6017062390790421, + "grad_norm": 0.7476264834403992, + "learning_rate": 3.6143658602688918e-06, + "loss": 0.6453, + "step": 11708 + }, + { + "epoch": 0.6017576318223867, + "grad_norm": 0.758554995059967, + "learning_rate": 3.613566222006333e-06, + "loss": 0.6899, + "step": 11709 + }, + { + "epoch": 0.6018090245657313, + "grad_norm": 1.0207343101501465, + "learning_rate": 3.612766622156132e-06, + "loss": 0.6966, + "step": 11710 + }, + { + "epoch": 0.6018604173090759, + "grad_norm": 1.0654312372207642, + "learning_rate": 3.611967060740444e-06, + "loss": 0.6971, + "step": 11711 + }, + { + "epoch": 0.6019118100524206, + "grad_norm": 1.036113977432251, + "learning_rate": 3.61116753778142e-06, + "loss": 0.6866, + "step": 11712 + }, + { + "epoch": 0.6019632027957652, + "grad_norm": 1.0024757385253906, + "learning_rate": 3.610368053301212e-06, + "loss": 0.7112, + "step": 11713 + }, + { + "epoch": 0.6020145955391099, + "grad_norm": 1.0704784393310547, + "learning_rate": 3.6095686073219706e-06, + "loss": 0.7163, + "step": 11714 + }, + { + "epoch": 0.6020659882824545, + "grad_norm": 0.7803016901016235, + "learning_rate": 3.6087691998658445e-06, + "loss": 0.6724, + "step": 11715 + }, + { + "epoch": 0.6021173810257991, + "grad_norm": 1.097983479499817, + "learning_rate": 3.607969830954985e-06, + "loss": 0.7446, + "step": 11716 + }, + { + "epoch": 0.6021687737691438, + "grad_norm": 0.7536942958831787, + "learning_rate": 3.607170500611534e-06, + "loss": 0.657, + "step": 11717 + }, + { + "epoch": 0.6022201665124884, + "grad_norm": 1.1865179538726807, + "learning_rate": 3.606371208857642e-06, + "loss": 0.7218, + "step": 11718 + }, + { + "epoch": 0.6022715592558331, + "grad_norm": 1.0578848123550415, + "learning_rate": 3.605571955715454e-06, + "loss": 0.7401, + "step": 11719 + }, + { + "epoch": 0.6023229519991777, + "grad_norm": 1.1631569862365723, + "learning_rate": 3.604772741207111e-06, + "loss": 0.7259, + "step": 11720 + }, + { + "epoch": 0.6023743447425224, + "grad_norm": 1.0604852437973022, + "learning_rate": 3.603973565354757e-06, + "loss": 0.7317, + "step": 11721 + }, + { + "epoch": 0.602425737485867, + "grad_norm": 1.0480883121490479, + "learning_rate": 3.6031744281805354e-06, + "loss": 0.7332, + "step": 11722 + }, + { + "epoch": 0.6024771302292117, + "grad_norm": 0.7200015783309937, + "learning_rate": 3.6023753297065853e-06, + "loss": 0.6605, + "step": 11723 + }, + { + "epoch": 0.6025285229725563, + "grad_norm": 1.068132758140564, + "learning_rate": 3.601576269955047e-06, + "loss": 0.7307, + "step": 11724 + }, + { + "epoch": 0.6025799157159009, + "grad_norm": 1.0491328239440918, + "learning_rate": 3.600777248948062e-06, + "loss": 0.6706, + "step": 11725 + }, + { + "epoch": 0.6026313084592455, + "grad_norm": 1.086289882659912, + "learning_rate": 3.599978266707762e-06, + "loss": 0.7746, + "step": 11726 + }, + { + "epoch": 0.6026827012025902, + "grad_norm": 1.0240601301193237, + "learning_rate": 3.599179323256288e-06, + "loss": 0.7034, + "step": 11727 + }, + { + "epoch": 0.6027340939459348, + "grad_norm": 0.831072211265564, + "learning_rate": 3.598380418615775e-06, + "loss": 0.6563, + "step": 11728 + }, + { + "epoch": 0.6027854866892794, + "grad_norm": 0.9224907755851746, + "learning_rate": 3.5975815528083545e-06, + "loss": 0.6713, + "step": 11729 + }, + { + "epoch": 0.6028368794326241, + "grad_norm": 1.0543758869171143, + "learning_rate": 3.5967827258561626e-06, + "loss": 0.7365, + "step": 11730 + }, + { + "epoch": 0.6028882721759687, + "grad_norm": 1.0721358060836792, + "learning_rate": 3.59598393778133e-06, + "loss": 0.7286, + "step": 11731 + }, + { + "epoch": 0.6029396649193134, + "grad_norm": 1.0560622215270996, + "learning_rate": 3.5951851886059898e-06, + "loss": 0.721, + "step": 11732 + }, + { + "epoch": 0.602991057662658, + "grad_norm": 1.0434192419052124, + "learning_rate": 3.5943864783522693e-06, + "loss": 0.6894, + "step": 11733 + }, + { + "epoch": 0.6030424504060027, + "grad_norm": 0.717589795589447, + "learning_rate": 3.5935878070422993e-06, + "loss": 0.7028, + "step": 11734 + }, + { + "epoch": 0.6030938431493473, + "grad_norm": 1.0498300790786743, + "learning_rate": 3.592789174698209e-06, + "loss": 0.759, + "step": 11735 + }, + { + "epoch": 0.603145235892692, + "grad_norm": 1.0579242706298828, + "learning_rate": 3.5919905813421214e-06, + "loss": 0.6718, + "step": 11736 + }, + { + "epoch": 0.6031966286360366, + "grad_norm": 1.1115270853042603, + "learning_rate": 3.5911920269961642e-06, + "loss": 0.7154, + "step": 11737 + }, + { + "epoch": 0.6032480213793813, + "grad_norm": 1.057466983795166, + "learning_rate": 3.590393511682464e-06, + "loss": 0.7389, + "step": 11738 + }, + { + "epoch": 0.6032994141227259, + "grad_norm": 1.1278526782989502, + "learning_rate": 3.5895950354231414e-06, + "loss": 0.7101, + "step": 11739 + }, + { + "epoch": 0.6033508068660706, + "grad_norm": 1.0959759950637817, + "learning_rate": 3.588796598240319e-06, + "loss": 0.7177, + "step": 11740 + }, + { + "epoch": 0.6034021996094151, + "grad_norm": 0.7061886191368103, + "learning_rate": 3.5879982001561205e-06, + "loss": 0.701, + "step": 11741 + }, + { + "epoch": 0.6034535923527597, + "grad_norm": 1.0735312700271606, + "learning_rate": 3.5871998411926635e-06, + "loss": 0.7551, + "step": 11742 + }, + { + "epoch": 0.6035049850961044, + "grad_norm": 1.0347939729690552, + "learning_rate": 3.5864015213720694e-06, + "loss": 0.7363, + "step": 11743 + }, + { + "epoch": 0.603556377839449, + "grad_norm": 1.0885738134384155, + "learning_rate": 3.5856032407164572e-06, + "loss": 0.6831, + "step": 11744 + }, + { + "epoch": 0.6036077705827937, + "grad_norm": 1.2064160108566284, + "learning_rate": 3.58480499924794e-06, + "loss": 0.6967, + "step": 11745 + }, + { + "epoch": 0.6036591633261383, + "grad_norm": 1.189035415649414, + "learning_rate": 3.584006796988636e-06, + "loss": 0.7264, + "step": 11746 + }, + { + "epoch": 0.603710556069483, + "grad_norm": 0.7170475125312805, + "learning_rate": 3.583208633960661e-06, + "loss": 0.628, + "step": 11747 + }, + { + "epoch": 0.6037619488128276, + "grad_norm": 1.103580355644226, + "learning_rate": 3.5824105101861296e-06, + "loss": 0.7175, + "step": 11748 + }, + { + "epoch": 0.6038133415561723, + "grad_norm": 0.9946369528770447, + "learning_rate": 3.581612425687151e-06, + "loss": 0.7539, + "step": 11749 + }, + { + "epoch": 0.6038647342995169, + "grad_norm": 1.0530636310577393, + "learning_rate": 3.5808143804858388e-06, + "loss": 0.7205, + "step": 11750 + }, + { + "epoch": 0.6039161270428616, + "grad_norm": 1.059380054473877, + "learning_rate": 3.5800163746043037e-06, + "loss": 0.7356, + "step": 11751 + }, + { + "epoch": 0.6039675197862062, + "grad_norm": 1.0513274669647217, + "learning_rate": 3.5792184080646547e-06, + "loss": 0.6743, + "step": 11752 + }, + { + "epoch": 0.6040189125295509, + "grad_norm": 1.059709906578064, + "learning_rate": 3.578420480889e-06, + "loss": 0.7441, + "step": 11753 + }, + { + "epoch": 0.6040703052728955, + "grad_norm": 1.5214269161224365, + "learning_rate": 3.57762259309945e-06, + "loss": 0.6808, + "step": 11754 + }, + { + "epoch": 0.6041216980162402, + "grad_norm": 0.7365110516548157, + "learning_rate": 3.576824744718106e-06, + "loss": 0.6476, + "step": 11755 + }, + { + "epoch": 0.6041730907595847, + "grad_norm": 0.7900581955909729, + "learning_rate": 3.576026935767074e-06, + "loss": 0.6679, + "step": 11756 + }, + { + "epoch": 0.6042244835029293, + "grad_norm": 0.8507570028305054, + "learning_rate": 3.5752291662684622e-06, + "loss": 0.6142, + "step": 11757 + }, + { + "epoch": 0.604275876246274, + "grad_norm": 1.0490598678588867, + "learning_rate": 3.5744314362443687e-06, + "loss": 0.677, + "step": 11758 + }, + { + "epoch": 0.6043272689896186, + "grad_norm": 1.072921633720398, + "learning_rate": 3.573633745716897e-06, + "loss": 0.7443, + "step": 11759 + }, + { + "epoch": 0.6043786617329633, + "grad_norm": 1.035670518875122, + "learning_rate": 3.5728360947081485e-06, + "loss": 0.7021, + "step": 11760 + }, + { + "epoch": 0.6044300544763079, + "grad_norm": 1.1715338230133057, + "learning_rate": 3.5720384832402217e-06, + "loss": 0.7311, + "step": 11761 + }, + { + "epoch": 0.6044814472196526, + "grad_norm": 1.0863670110702515, + "learning_rate": 3.5712409113352163e-06, + "loss": 0.6934, + "step": 11762 + }, + { + "epoch": 0.6045328399629972, + "grad_norm": 1.0841480493545532, + "learning_rate": 3.570443379015229e-06, + "loss": 0.7053, + "step": 11763 + }, + { + "epoch": 0.6045842327063419, + "grad_norm": 0.7957704663276672, + "learning_rate": 3.5696458863023563e-06, + "loss": 0.6817, + "step": 11764 + }, + { + "epoch": 0.6046356254496865, + "grad_norm": 1.0549801588058472, + "learning_rate": 3.5688484332186923e-06, + "loss": 0.682, + "step": 11765 + }, + { + "epoch": 0.6046870181930312, + "grad_norm": 1.0382803678512573, + "learning_rate": 3.5680510197863325e-06, + "loss": 0.7133, + "step": 11766 + }, + { + "epoch": 0.6047384109363758, + "grad_norm": 1.0233252048492432, + "learning_rate": 3.567253646027372e-06, + "loss": 0.731, + "step": 11767 + }, + { + "epoch": 0.6047898036797205, + "grad_norm": 1.0693570375442505, + "learning_rate": 3.5664563119638986e-06, + "loss": 0.7064, + "step": 11768 + }, + { + "epoch": 0.6048411964230651, + "grad_norm": 1.039784550666809, + "learning_rate": 3.565659017618004e-06, + "loss": 0.7181, + "step": 11769 + }, + { + "epoch": 0.6048925891664098, + "grad_norm": 1.1097475290298462, + "learning_rate": 3.564861763011781e-06, + "loss": 0.6816, + "step": 11770 + }, + { + "epoch": 0.6049439819097543, + "grad_norm": 1.183729648590088, + "learning_rate": 3.564064548167314e-06, + "loss": 0.737, + "step": 11771 + }, + { + "epoch": 0.6049953746530989, + "grad_norm": 1.2708473205566406, + "learning_rate": 3.5632673731066935e-06, + "loss": 0.7365, + "step": 11772 + }, + { + "epoch": 0.6050467673964436, + "grad_norm": 1.0473058223724365, + "learning_rate": 3.5624702378520047e-06, + "loss": 0.7275, + "step": 11773 + }, + { + "epoch": 0.6050981601397882, + "grad_norm": 1.0184966325759888, + "learning_rate": 3.5616731424253337e-06, + "loss": 0.753, + "step": 11774 + }, + { + "epoch": 0.6051495528831329, + "grad_norm": 1.0237458944320679, + "learning_rate": 3.5608760868487634e-06, + "loss": 0.691, + "step": 11775 + }, + { + "epoch": 0.6052009456264775, + "grad_norm": 1.064508318901062, + "learning_rate": 3.5600790711443776e-06, + "loss": 0.6798, + "step": 11776 + }, + { + "epoch": 0.6052523383698222, + "grad_norm": 1.0603259801864624, + "learning_rate": 3.559282095334261e-06, + "loss": 0.7544, + "step": 11777 + }, + { + "epoch": 0.6053037311131668, + "grad_norm": 1.0520302057266235, + "learning_rate": 3.55848515944049e-06, + "loss": 0.7453, + "step": 11778 + }, + { + "epoch": 0.6053551238565115, + "grad_norm": 1.138112187385559, + "learning_rate": 3.5576882634851467e-06, + "loss": 0.7626, + "step": 11779 + }, + { + "epoch": 0.6054065165998561, + "grad_norm": 1.0735533237457275, + "learning_rate": 3.5568914074903114e-06, + "loss": 0.6623, + "step": 11780 + }, + { + "epoch": 0.6054579093432008, + "grad_norm": 1.0294348001480103, + "learning_rate": 3.5560945914780575e-06, + "loss": 0.7908, + "step": 11781 + }, + { + "epoch": 0.6055093020865454, + "grad_norm": 1.0777082443237305, + "learning_rate": 3.5552978154704644e-06, + "loss": 0.7921, + "step": 11782 + }, + { + "epoch": 0.6055606948298901, + "grad_norm": 0.7403438091278076, + "learning_rate": 3.5545010794896073e-06, + "loss": 0.6818, + "step": 11783 + }, + { + "epoch": 0.6056120875732347, + "grad_norm": 1.1131402254104614, + "learning_rate": 3.5537043835575597e-06, + "loss": 0.67, + "step": 11784 + }, + { + "epoch": 0.6056634803165794, + "grad_norm": 0.7971148490905762, + "learning_rate": 3.5529077276963946e-06, + "loss": 0.6476, + "step": 11785 + }, + { + "epoch": 0.6057148730599239, + "grad_norm": 1.073693037033081, + "learning_rate": 3.552111111928187e-06, + "loss": 0.711, + "step": 11786 + }, + { + "epoch": 0.6057662658032685, + "grad_norm": 1.1514228582382202, + "learning_rate": 3.5513145362750036e-06, + "loss": 0.753, + "step": 11787 + }, + { + "epoch": 0.6058176585466132, + "grad_norm": 1.116470456123352, + "learning_rate": 3.5505180007589146e-06, + "loss": 0.6996, + "step": 11788 + }, + { + "epoch": 0.6058690512899578, + "grad_norm": 0.7149177193641663, + "learning_rate": 3.549721505401994e-06, + "loss": 0.6794, + "step": 11789 + }, + { + "epoch": 0.6059204440333025, + "grad_norm": 1.0795890092849731, + "learning_rate": 3.5489250502263023e-06, + "loss": 0.6836, + "step": 11790 + }, + { + "epoch": 0.6059718367766471, + "grad_norm": 1.0409691333770752, + "learning_rate": 3.5481286352539097e-06, + "loss": 0.6564, + "step": 11791 + }, + { + "epoch": 0.6060232295199918, + "grad_norm": 1.0496233701705933, + "learning_rate": 3.5473322605068806e-06, + "loss": 0.6415, + "step": 11792 + }, + { + "epoch": 0.6060746222633364, + "grad_norm": 0.6695075035095215, + "learning_rate": 3.5465359260072814e-06, + "loss": 0.6733, + "step": 11793 + }, + { + "epoch": 0.6061260150066811, + "grad_norm": 1.0904182195663452, + "learning_rate": 3.545739631777172e-06, + "loss": 0.7471, + "step": 11794 + }, + { + "epoch": 0.6061774077500257, + "grad_norm": 1.098577618598938, + "learning_rate": 3.5449433778386167e-06, + "loss": 0.7332, + "step": 11795 + }, + { + "epoch": 0.6062288004933704, + "grad_norm": 1.1283539533615112, + "learning_rate": 3.5441471642136773e-06, + "loss": 0.7676, + "step": 11796 + }, + { + "epoch": 0.606280193236715, + "grad_norm": 1.0964044332504272, + "learning_rate": 3.54335099092441e-06, + "loss": 0.7322, + "step": 11797 + }, + { + "epoch": 0.6063315859800597, + "grad_norm": 1.1049939393997192, + "learning_rate": 3.5425548579928757e-06, + "loss": 0.7244, + "step": 11798 + }, + { + "epoch": 0.6063829787234043, + "grad_norm": 1.0632071495056152, + "learning_rate": 3.5417587654411334e-06, + "loss": 0.6854, + "step": 11799 + }, + { + "epoch": 0.606434371466749, + "grad_norm": 0.7904536128044128, + "learning_rate": 3.540962713291237e-06, + "loss": 0.6806, + "step": 11800 + }, + { + "epoch": 0.6064857642100935, + "grad_norm": 1.1116286516189575, + "learning_rate": 3.5401667015652414e-06, + "loss": 0.7135, + "step": 11801 + }, + { + "epoch": 0.6065371569534381, + "grad_norm": 1.1878255605697632, + "learning_rate": 3.539370730285204e-06, + "loss": 0.7358, + "step": 11802 + }, + { + "epoch": 0.6065885496967828, + "grad_norm": 1.1322591304779053, + "learning_rate": 3.5385747994731747e-06, + "loss": 0.7162, + "step": 11803 + }, + { + "epoch": 0.6066399424401274, + "grad_norm": 1.041743516921997, + "learning_rate": 3.537778909151206e-06, + "loss": 0.7333, + "step": 11804 + }, + { + "epoch": 0.6066913351834721, + "grad_norm": 1.0568022727966309, + "learning_rate": 3.5369830593413524e-06, + "loss": 0.6746, + "step": 11805 + }, + { + "epoch": 0.6067427279268167, + "grad_norm": 1.1037957668304443, + "learning_rate": 3.5361872500656584e-06, + "loss": 0.6883, + "step": 11806 + }, + { + "epoch": 0.6067941206701614, + "grad_norm": 1.118453860282898, + "learning_rate": 3.5353914813461756e-06, + "loss": 0.7183, + "step": 11807 + }, + { + "epoch": 0.606845513413506, + "grad_norm": 1.151315689086914, + "learning_rate": 3.5345957532049495e-06, + "loss": 0.7435, + "step": 11808 + }, + { + "epoch": 0.6068969061568507, + "grad_norm": 1.054097056388855, + "learning_rate": 3.5338000656640303e-06, + "loss": 0.6719, + "step": 11809 + }, + { + "epoch": 0.6069482989001953, + "grad_norm": 1.0745588541030884, + "learning_rate": 3.5330044187454593e-06, + "loss": 0.7456, + "step": 11810 + }, + { + "epoch": 0.60699969164354, + "grad_norm": 1.0319892168045044, + "learning_rate": 3.532208812471281e-06, + "loss": 0.7353, + "step": 11811 + }, + { + "epoch": 0.6070510843868846, + "grad_norm": 1.139896273612976, + "learning_rate": 3.531413246863541e-06, + "loss": 0.7729, + "step": 11812 + }, + { + "epoch": 0.6071024771302292, + "grad_norm": 1.1132032871246338, + "learning_rate": 3.530617721944279e-06, + "loss": 0.6698, + "step": 11813 + }, + { + "epoch": 0.6071538698735739, + "grad_norm": 1.096381425857544, + "learning_rate": 3.529822237735535e-06, + "loss": 0.7583, + "step": 11814 + }, + { + "epoch": 0.6072052626169185, + "grad_norm": 1.1011208295822144, + "learning_rate": 3.5290267942593513e-06, + "loss": 0.7839, + "step": 11815 + }, + { + "epoch": 0.6072566553602631, + "grad_norm": 0.7371395826339722, + "learning_rate": 3.528231391537763e-06, + "loss": 0.6777, + "step": 11816 + }, + { + "epoch": 0.6073080481036077, + "grad_norm": 1.111451268196106, + "learning_rate": 3.52743602959281e-06, + "loss": 0.7193, + "step": 11817 + }, + { + "epoch": 0.6073594408469524, + "grad_norm": 1.1175260543823242, + "learning_rate": 3.526640708446529e-06, + "loss": 0.6724, + "step": 11818 + }, + { + "epoch": 0.607410833590297, + "grad_norm": 1.0813630819320679, + "learning_rate": 3.525845428120952e-06, + "loss": 0.7462, + "step": 11819 + }, + { + "epoch": 0.6074622263336417, + "grad_norm": 1.0229310989379883, + "learning_rate": 3.525050188638115e-06, + "loss": 0.6461, + "step": 11820 + }, + { + "epoch": 0.6075136190769863, + "grad_norm": 1.0155729055404663, + "learning_rate": 3.524254990020053e-06, + "loss": 0.7088, + "step": 11821 + }, + { + "epoch": 0.607565011820331, + "grad_norm": 1.0471463203430176, + "learning_rate": 3.523459832288792e-06, + "loss": 0.701, + "step": 11822 + }, + { + "epoch": 0.6076164045636756, + "grad_norm": 1.0348927974700928, + "learning_rate": 3.5226647154663664e-06, + "loss": 0.6631, + "step": 11823 + }, + { + "epoch": 0.6076677973070203, + "grad_norm": 1.0795766115188599, + "learning_rate": 3.5218696395748044e-06, + "loss": 0.6771, + "step": 11824 + }, + { + "epoch": 0.6077191900503649, + "grad_norm": 1.0964086055755615, + "learning_rate": 3.5210746046361364e-06, + "loss": 0.7025, + "step": 11825 + }, + { + "epoch": 0.6077705827937095, + "grad_norm": 1.0548222064971924, + "learning_rate": 3.520279610672387e-06, + "loss": 0.7316, + "step": 11826 + }, + { + "epoch": 0.6078219755370542, + "grad_norm": 1.1473653316497803, + "learning_rate": 3.5194846577055823e-06, + "loss": 0.749, + "step": 11827 + }, + { + "epoch": 0.6078733682803988, + "grad_norm": 1.0289922952651978, + "learning_rate": 3.518689745757751e-06, + "loss": 0.6661, + "step": 11828 + }, + { + "epoch": 0.6079247610237435, + "grad_norm": 1.1346315145492554, + "learning_rate": 3.517894874850911e-06, + "loss": 0.7115, + "step": 11829 + }, + { + "epoch": 0.6079761537670881, + "grad_norm": 1.0271775722503662, + "learning_rate": 3.517100045007088e-06, + "loss": 0.6493, + "step": 11830 + }, + { + "epoch": 0.6080275465104328, + "grad_norm": 1.0954210758209229, + "learning_rate": 3.5163052562483057e-06, + "loss": 0.6552, + "step": 11831 + }, + { + "epoch": 0.6080789392537773, + "grad_norm": 1.1188998222351074, + "learning_rate": 3.5155105085965798e-06, + "loss": 0.7238, + "step": 11832 + }, + { + "epoch": 0.608130331997122, + "grad_norm": 0.8290671110153198, + "learning_rate": 3.5147158020739315e-06, + "loss": 0.6309, + "step": 11833 + }, + { + "epoch": 0.6081817247404666, + "grad_norm": 1.0628269910812378, + "learning_rate": 3.5139211367023796e-06, + "loss": 0.7209, + "step": 11834 + }, + { + "epoch": 0.6082331174838113, + "grad_norm": 0.6959296464920044, + "learning_rate": 3.513126512503939e-06, + "loss": 0.6147, + "step": 11835 + }, + { + "epoch": 0.6082845102271559, + "grad_norm": 1.1849548816680908, + "learning_rate": 3.5123319295006277e-06, + "loss": 0.7394, + "step": 11836 + }, + { + "epoch": 0.6083359029705006, + "grad_norm": 1.1849671602249146, + "learning_rate": 3.511537387714461e-06, + "loss": 0.6911, + "step": 11837 + }, + { + "epoch": 0.6083872957138452, + "grad_norm": 1.048609733581543, + "learning_rate": 3.5107428871674486e-06, + "loss": 0.7603, + "step": 11838 + }, + { + "epoch": 0.6084386884571898, + "grad_norm": 1.0889729261398315, + "learning_rate": 3.5099484278816047e-06, + "loss": 0.7244, + "step": 11839 + }, + { + "epoch": 0.6084900812005345, + "grad_norm": 1.1150254011154175, + "learning_rate": 3.5091540098789412e-06, + "loss": 0.717, + "step": 11840 + }, + { + "epoch": 0.6085414739438791, + "grad_norm": 1.092042326927185, + "learning_rate": 3.5083596331814693e-06, + "loss": 0.6727, + "step": 11841 + }, + { + "epoch": 0.6085928666872238, + "grad_norm": 1.0360329151153564, + "learning_rate": 3.5075652978111953e-06, + "loss": 0.7372, + "step": 11842 + }, + { + "epoch": 0.6086442594305684, + "grad_norm": 1.1318461894989014, + "learning_rate": 3.506771003790127e-06, + "loss": 0.7393, + "step": 11843 + }, + { + "epoch": 0.6086956521739131, + "grad_norm": 1.1159861087799072, + "learning_rate": 3.5059767511402738e-06, + "loss": 0.6954, + "step": 11844 + }, + { + "epoch": 0.6087470449172577, + "grad_norm": 1.0765126943588257, + "learning_rate": 3.505182539883638e-06, + "loss": 0.7222, + "step": 11845 + }, + { + "epoch": 0.6087984376606024, + "grad_norm": 0.9926835298538208, + "learning_rate": 3.504388370042225e-06, + "loss": 0.7, + "step": 11846 + }, + { + "epoch": 0.6088498304039469, + "grad_norm": 0.8482490181922913, + "learning_rate": 3.5035942416380407e-06, + "loss": 0.6781, + "step": 11847 + }, + { + "epoch": 0.6089012231472916, + "grad_norm": 0.7162907719612122, + "learning_rate": 3.5028001546930822e-06, + "loss": 0.6442, + "step": 11848 + }, + { + "epoch": 0.6089526158906362, + "grad_norm": 1.0388542413711548, + "learning_rate": 3.5020061092293534e-06, + "loss": 0.7451, + "step": 11849 + }, + { + "epoch": 0.6090040086339809, + "grad_norm": 1.0878304243087769, + "learning_rate": 3.501212105268855e-06, + "loss": 0.7127, + "step": 11850 + }, + { + "epoch": 0.6090554013773255, + "grad_norm": 0.9801203608512878, + "learning_rate": 3.5004181428335816e-06, + "loss": 0.7169, + "step": 11851 + }, + { + "epoch": 0.6091067941206701, + "grad_norm": 1.1127053499221802, + "learning_rate": 3.499624221945534e-06, + "loss": 0.6478, + "step": 11852 + }, + { + "epoch": 0.6091581868640148, + "grad_norm": 0.7497096061706543, + "learning_rate": 3.498830342626708e-06, + "loss": 0.7188, + "step": 11853 + }, + { + "epoch": 0.6092095796073594, + "grad_norm": 1.3945552110671997, + "learning_rate": 3.498036504899097e-06, + "loss": 0.729, + "step": 11854 + }, + { + "epoch": 0.6092609723507041, + "grad_norm": 1.056204080581665, + "learning_rate": 3.4972427087846973e-06, + "loss": 0.6881, + "step": 11855 + }, + { + "epoch": 0.6093123650940487, + "grad_norm": 0.8180471062660217, + "learning_rate": 3.496448954305499e-06, + "loss": 0.6779, + "step": 11856 + }, + { + "epoch": 0.6093637578373934, + "grad_norm": 0.7603835463523865, + "learning_rate": 3.4956552414834987e-06, + "loss": 0.5966, + "step": 11857 + }, + { + "epoch": 0.609415150580738, + "grad_norm": 0.7971979379653931, + "learning_rate": 3.4948615703406812e-06, + "loss": 0.6861, + "step": 11858 + }, + { + "epoch": 0.6094665433240827, + "grad_norm": 1.0230458974838257, + "learning_rate": 3.4940679408990385e-06, + "loss": 0.7339, + "step": 11859 + }, + { + "epoch": 0.6095179360674273, + "grad_norm": 1.0044119358062744, + "learning_rate": 3.49327435318056e-06, + "loss": 0.7153, + "step": 11860 + }, + { + "epoch": 0.609569328810772, + "grad_norm": 1.0402259826660156, + "learning_rate": 3.4924808072072295e-06, + "loss": 0.7348, + "step": 11861 + }, + { + "epoch": 0.6096207215541165, + "grad_norm": 1.5505672693252563, + "learning_rate": 3.4916873030010344e-06, + "loss": 0.7458, + "step": 11862 + }, + { + "epoch": 0.6096721142974612, + "grad_norm": 1.008429765701294, + "learning_rate": 3.4908938405839615e-06, + "loss": 0.7018, + "step": 11863 + }, + { + "epoch": 0.6097235070408058, + "grad_norm": 1.075774073600769, + "learning_rate": 3.490100419977992e-06, + "loss": 0.6922, + "step": 11864 + }, + { + "epoch": 0.6097748997841504, + "grad_norm": 1.0135674476623535, + "learning_rate": 3.4893070412051076e-06, + "loss": 0.7143, + "step": 11865 + }, + { + "epoch": 0.6098262925274951, + "grad_norm": 1.0693053007125854, + "learning_rate": 3.488513704287293e-06, + "loss": 0.7557, + "step": 11866 + }, + { + "epoch": 0.6098776852708397, + "grad_norm": 1.1149888038635254, + "learning_rate": 3.4877204092465238e-06, + "loss": 0.6834, + "step": 11867 + }, + { + "epoch": 0.6099290780141844, + "grad_norm": 0.7114417552947998, + "learning_rate": 3.4869271561047816e-06, + "loss": 0.621, + "step": 11868 + }, + { + "epoch": 0.609980470757529, + "grad_norm": 1.1114683151245117, + "learning_rate": 3.4861339448840464e-06, + "loss": 0.6935, + "step": 11869 + }, + { + "epoch": 0.6100318635008737, + "grad_norm": 0.7656902074813843, + "learning_rate": 3.4853407756062898e-06, + "loss": 0.6617, + "step": 11870 + }, + { + "epoch": 0.6100832562442183, + "grad_norm": 1.194212555885315, + "learning_rate": 3.484547648293489e-06, + "loss": 0.6801, + "step": 11871 + }, + { + "epoch": 0.610134648987563, + "grad_norm": 1.124890923500061, + "learning_rate": 3.4837545629676205e-06, + "loss": 0.7283, + "step": 11872 + }, + { + "epoch": 0.6101860417309076, + "grad_norm": 1.166640043258667, + "learning_rate": 3.482961519650657e-06, + "loss": 0.7608, + "step": 11873 + }, + { + "epoch": 0.6102374344742523, + "grad_norm": 1.0783902406692505, + "learning_rate": 3.482168518364567e-06, + "loss": 0.7195, + "step": 11874 + }, + { + "epoch": 0.6102888272175969, + "grad_norm": 1.0856562852859497, + "learning_rate": 3.481375559131324e-06, + "loss": 0.6978, + "step": 11875 + }, + { + "epoch": 0.6103402199609416, + "grad_norm": 1.1609246730804443, + "learning_rate": 3.4805826419728984e-06, + "loss": 0.7283, + "step": 11876 + }, + { + "epoch": 0.6103916127042861, + "grad_norm": 0.6993660926818848, + "learning_rate": 3.4797897669112567e-06, + "loss": 0.6119, + "step": 11877 + }, + { + "epoch": 0.6104430054476307, + "grad_norm": 1.0403070449829102, + "learning_rate": 3.478996933968366e-06, + "loss": 0.6947, + "step": 11878 + }, + { + "epoch": 0.6104943981909754, + "grad_norm": 1.0169188976287842, + "learning_rate": 3.4782041431661966e-06, + "loss": 0.7209, + "step": 11879 + }, + { + "epoch": 0.61054579093432, + "grad_norm": 0.7749274373054504, + "learning_rate": 3.477411394526708e-06, + "loss": 0.638, + "step": 11880 + }, + { + "epoch": 0.6105971836776647, + "grad_norm": 1.0414111614227295, + "learning_rate": 3.476618688071866e-06, + "loss": 0.7039, + "step": 11881 + }, + { + "epoch": 0.6106485764210093, + "grad_norm": 1.0281057357788086, + "learning_rate": 3.4758260238236357e-06, + "loss": 0.7346, + "step": 11882 + }, + { + "epoch": 0.610699969164354, + "grad_norm": 1.0106642246246338, + "learning_rate": 3.4750334018039746e-06, + "loss": 0.727, + "step": 11883 + }, + { + "epoch": 0.6107513619076986, + "grad_norm": 1.0776649713516235, + "learning_rate": 3.4742408220348446e-06, + "loss": 0.7592, + "step": 11884 + }, + { + "epoch": 0.6108027546510433, + "grad_norm": 0.6897673606872559, + "learning_rate": 3.4734482845382055e-06, + "loss": 0.651, + "step": 11885 + }, + { + "epoch": 0.6108541473943879, + "grad_norm": 1.1204874515533447, + "learning_rate": 3.472655789336015e-06, + "loss": 0.6911, + "step": 11886 + }, + { + "epoch": 0.6109055401377326, + "grad_norm": 0.741254985332489, + "learning_rate": 3.4718633364502287e-06, + "loss": 0.6269, + "step": 11887 + }, + { + "epoch": 0.6109569328810772, + "grad_norm": 1.0893442630767822, + "learning_rate": 3.471070925902803e-06, + "loss": 0.678, + "step": 11888 + }, + { + "epoch": 0.6110083256244219, + "grad_norm": 1.04536771774292, + "learning_rate": 3.4702785577156946e-06, + "loss": 0.6889, + "step": 11889 + }, + { + "epoch": 0.6110597183677665, + "grad_norm": 1.1782145500183105, + "learning_rate": 3.4694862319108523e-06, + "loss": 0.7742, + "step": 11890 + }, + { + "epoch": 0.6111111111111112, + "grad_norm": 1.0272762775421143, + "learning_rate": 3.468693948510231e-06, + "loss": 0.6625, + "step": 11891 + }, + { + "epoch": 0.6111625038544557, + "grad_norm": 1.095741629600525, + "learning_rate": 3.4679017075357825e-06, + "loss": 0.7479, + "step": 11892 + }, + { + "epoch": 0.6112138965978003, + "grad_norm": 0.8137418627738953, + "learning_rate": 3.4671095090094536e-06, + "loss": 0.6191, + "step": 11893 + }, + { + "epoch": 0.611265289341145, + "grad_norm": 1.1190258264541626, + "learning_rate": 3.466317352953194e-06, + "loss": 0.6869, + "step": 11894 + }, + { + "epoch": 0.6113166820844896, + "grad_norm": 1.0588442087173462, + "learning_rate": 3.4655252393889527e-06, + "loss": 0.682, + "step": 11895 + }, + { + "epoch": 0.6113680748278343, + "grad_norm": 1.09615957736969, + "learning_rate": 3.464733168338673e-06, + "loss": 0.7663, + "step": 11896 + }, + { + "epoch": 0.6114194675711789, + "grad_norm": 0.9907885193824768, + "learning_rate": 3.4639411398243026e-06, + "loss": 0.7413, + "step": 11897 + }, + { + "epoch": 0.6114708603145236, + "grad_norm": 0.7235219478607178, + "learning_rate": 3.463149153867785e-06, + "loss": 0.7098, + "step": 11898 + }, + { + "epoch": 0.6115222530578682, + "grad_norm": 1.0669646263122559, + "learning_rate": 3.4623572104910615e-06, + "loss": 0.7292, + "step": 11899 + }, + { + "epoch": 0.6115736458012129, + "grad_norm": 1.0527647733688354, + "learning_rate": 3.4615653097160735e-06, + "loss": 0.7174, + "step": 11900 + }, + { + "epoch": 0.6116250385445575, + "grad_norm": 1.3574820756912231, + "learning_rate": 3.4607734515647617e-06, + "loss": 0.7152, + "step": 11901 + }, + { + "epoch": 0.6116764312879022, + "grad_norm": 1.0775631666183472, + "learning_rate": 3.459981636059068e-06, + "loss": 0.7438, + "step": 11902 + }, + { + "epoch": 0.6117278240312468, + "grad_norm": 1.0844933986663818, + "learning_rate": 3.4591898632209257e-06, + "loss": 0.6977, + "step": 11903 + }, + { + "epoch": 0.6117792167745915, + "grad_norm": 1.079079508781433, + "learning_rate": 3.4583981330722736e-06, + "loss": 0.7233, + "step": 11904 + }, + { + "epoch": 0.6118306095179361, + "grad_norm": 1.0844032764434814, + "learning_rate": 3.4576064456350487e-06, + "loss": 0.7563, + "step": 11905 + }, + { + "epoch": 0.6118820022612808, + "grad_norm": 0.9849695563316345, + "learning_rate": 3.456814800931183e-06, + "loss": 0.6763, + "step": 11906 + }, + { + "epoch": 0.6119333950046254, + "grad_norm": 0.9981567859649658, + "learning_rate": 3.456023198982611e-06, + "loss": 0.7119, + "step": 11907 + }, + { + "epoch": 0.6119847877479699, + "grad_norm": 1.0896896123886108, + "learning_rate": 3.4552316398112666e-06, + "loss": 0.7291, + "step": 11908 + }, + { + "epoch": 0.6120361804913146, + "grad_norm": 1.0749777555465698, + "learning_rate": 3.4544401234390768e-06, + "loss": 0.7472, + "step": 11909 + }, + { + "epoch": 0.6120875732346592, + "grad_norm": 0.913811206817627, + "learning_rate": 3.453648649887972e-06, + "loss": 0.6738, + "step": 11910 + }, + { + "epoch": 0.6121389659780039, + "grad_norm": 1.0979852676391602, + "learning_rate": 3.4528572191798847e-06, + "loss": 0.6912, + "step": 11911 + }, + { + "epoch": 0.6121903587213485, + "grad_norm": 1.0542471408843994, + "learning_rate": 3.452065831336737e-06, + "loss": 0.714, + "step": 11912 + }, + { + "epoch": 0.6122417514646932, + "grad_norm": 1.0307493209838867, + "learning_rate": 3.4512744863804566e-06, + "loss": 0.7737, + "step": 11913 + }, + { + "epoch": 0.6122931442080378, + "grad_norm": 1.0496041774749756, + "learning_rate": 3.45048318433297e-06, + "loss": 0.6995, + "step": 11914 + }, + { + "epoch": 0.6123445369513825, + "grad_norm": 1.135891318321228, + "learning_rate": 3.4496919252161997e-06, + "loss": 0.7203, + "step": 11915 + }, + { + "epoch": 0.6123959296947271, + "grad_norm": 1.1007752418518066, + "learning_rate": 3.448900709052069e-06, + "loss": 0.7755, + "step": 11916 + }, + { + "epoch": 0.6124473224380718, + "grad_norm": 1.062880277633667, + "learning_rate": 3.4481095358624973e-06, + "loss": 0.709, + "step": 11917 + }, + { + "epoch": 0.6124987151814164, + "grad_norm": 1.0238465070724487, + "learning_rate": 3.4473184056694078e-06, + "loss": 0.7173, + "step": 11918 + }, + { + "epoch": 0.6125501079247611, + "grad_norm": 0.724390983581543, + "learning_rate": 3.4465273184947157e-06, + "loss": 0.6394, + "step": 11919 + }, + { + "epoch": 0.6126015006681057, + "grad_norm": 0.7711619138717651, + "learning_rate": 3.4457362743603418e-06, + "loss": 0.6519, + "step": 11920 + }, + { + "epoch": 0.6126528934114503, + "grad_norm": 1.081173300743103, + "learning_rate": 3.4449452732882026e-06, + "loss": 0.7083, + "step": 11921 + }, + { + "epoch": 0.612704286154795, + "grad_norm": 1.058774471282959, + "learning_rate": 3.4441543153002117e-06, + "loss": 0.6996, + "step": 11922 + }, + { + "epoch": 0.6127556788981395, + "grad_norm": 1.0600477457046509, + "learning_rate": 3.4433634004182837e-06, + "loss": 0.7427, + "step": 11923 + }, + { + "epoch": 0.6128070716414842, + "grad_norm": 0.7493856549263, + "learning_rate": 3.442572528664334e-06, + "loss": 0.6378, + "step": 11924 + }, + { + "epoch": 0.6128584643848288, + "grad_norm": 1.1099209785461426, + "learning_rate": 3.4417817000602705e-06, + "loss": 0.7347, + "step": 11925 + }, + { + "epoch": 0.6129098571281735, + "grad_norm": 1.0539991855621338, + "learning_rate": 3.440990914628005e-06, + "loss": 0.6716, + "step": 11926 + }, + { + "epoch": 0.6129612498715181, + "grad_norm": 1.1441667079925537, + "learning_rate": 3.4402001723894495e-06, + "loss": 0.7727, + "step": 11927 + }, + { + "epoch": 0.6130126426148628, + "grad_norm": 1.2216852903366089, + "learning_rate": 3.4394094733665086e-06, + "loss": 0.7134, + "step": 11928 + }, + { + "epoch": 0.6130640353582074, + "grad_norm": 1.1003215312957764, + "learning_rate": 3.4386188175810912e-06, + "loss": 0.7187, + "step": 11929 + }, + { + "epoch": 0.6131154281015521, + "grad_norm": 1.1060786247253418, + "learning_rate": 3.4378282050551055e-06, + "loss": 0.7638, + "step": 11930 + }, + { + "epoch": 0.6131668208448967, + "grad_norm": 1.0837609767913818, + "learning_rate": 3.4370376358104514e-06, + "loss": 0.7587, + "step": 11931 + }, + { + "epoch": 0.6132182135882414, + "grad_norm": 0.7459571361541748, + "learning_rate": 3.436247109869034e-06, + "loss": 0.6936, + "step": 11932 + }, + { + "epoch": 0.613269606331586, + "grad_norm": 0.6606773138046265, + "learning_rate": 3.4354566272527558e-06, + "loss": 0.6272, + "step": 11933 + }, + { + "epoch": 0.6133209990749307, + "grad_norm": 0.7025207877159119, + "learning_rate": 3.4346661879835206e-06, + "loss": 0.6604, + "step": 11934 + }, + { + "epoch": 0.6133723918182753, + "grad_norm": 1.0718263387680054, + "learning_rate": 3.4338757920832232e-06, + "loss": 0.7346, + "step": 11935 + }, + { + "epoch": 0.61342378456162, + "grad_norm": 0.7326095104217529, + "learning_rate": 3.4330854395737645e-06, + "loss": 0.6343, + "step": 11936 + }, + { + "epoch": 0.6134751773049646, + "grad_norm": 1.03853440284729, + "learning_rate": 3.4322951304770435e-06, + "loss": 0.7051, + "step": 11937 + }, + { + "epoch": 0.6135265700483091, + "grad_norm": 0.8832135200500488, + "learning_rate": 3.4315048648149536e-06, + "loss": 0.6675, + "step": 11938 + }, + { + "epoch": 0.6135779627916538, + "grad_norm": 1.0592994689941406, + "learning_rate": 3.4307146426093907e-06, + "loss": 0.7129, + "step": 11939 + }, + { + "epoch": 0.6136293555349984, + "grad_norm": 1.0465888977050781, + "learning_rate": 3.4299244638822516e-06, + "loss": 0.7254, + "step": 11940 + }, + { + "epoch": 0.6136807482783431, + "grad_norm": 1.0536340475082397, + "learning_rate": 3.429134328655424e-06, + "loss": 0.6841, + "step": 11941 + }, + { + "epoch": 0.6137321410216877, + "grad_norm": 1.1462889909744263, + "learning_rate": 3.4283442369508015e-06, + "loss": 0.7386, + "step": 11942 + }, + { + "epoch": 0.6137835337650324, + "grad_norm": 0.9829745888710022, + "learning_rate": 3.427554188790277e-06, + "loss": 0.6611, + "step": 11943 + }, + { + "epoch": 0.613834926508377, + "grad_norm": 1.0320862531661987, + "learning_rate": 3.4267641841957345e-06, + "loss": 0.6321, + "step": 11944 + }, + { + "epoch": 0.6138863192517217, + "grad_norm": 1.1200461387634277, + "learning_rate": 3.4259742231890638e-06, + "loss": 0.7314, + "step": 11945 + }, + { + "epoch": 0.6139377119950663, + "grad_norm": 0.9819492101669312, + "learning_rate": 3.4251843057921524e-06, + "loss": 0.7122, + "step": 11946 + }, + { + "epoch": 0.613989104738411, + "grad_norm": 1.084065318107605, + "learning_rate": 3.4243944320268847e-06, + "loss": 0.7351, + "step": 11947 + }, + { + "epoch": 0.6140404974817556, + "grad_norm": 1.089953064918518, + "learning_rate": 3.4236046019151448e-06, + "loss": 0.708, + "step": 11948 + }, + { + "epoch": 0.6140918902251002, + "grad_norm": 0.8278733491897583, + "learning_rate": 3.422814815478816e-06, + "loss": 0.6769, + "step": 11949 + }, + { + "epoch": 0.6141432829684449, + "grad_norm": 0.7419480085372925, + "learning_rate": 3.422025072739782e-06, + "loss": 0.7056, + "step": 11950 + }, + { + "epoch": 0.6141946757117895, + "grad_norm": 1.2013943195343018, + "learning_rate": 3.42123537371992e-06, + "loss": 0.7135, + "step": 11951 + }, + { + "epoch": 0.6142460684551342, + "grad_norm": 1.0517261028289795, + "learning_rate": 3.42044571844111e-06, + "loss": 0.686, + "step": 11952 + }, + { + "epoch": 0.6142974611984787, + "grad_norm": 1.1152492761611938, + "learning_rate": 3.419656106925232e-06, + "loss": 0.7829, + "step": 11953 + }, + { + "epoch": 0.6143488539418234, + "grad_norm": 0.9811345338821411, + "learning_rate": 3.418866539194161e-06, + "loss": 0.6442, + "step": 11954 + }, + { + "epoch": 0.614400246685168, + "grad_norm": 1.0819642543792725, + "learning_rate": 3.4180770152697725e-06, + "loss": 0.7238, + "step": 11955 + }, + { + "epoch": 0.6144516394285127, + "grad_norm": 1.0472996234893799, + "learning_rate": 3.4172875351739425e-06, + "loss": 0.6851, + "step": 11956 + }, + { + "epoch": 0.6145030321718573, + "grad_norm": 1.075195074081421, + "learning_rate": 3.4164980989285424e-06, + "loss": 0.6773, + "step": 11957 + }, + { + "epoch": 0.614554424915202, + "grad_norm": 1.0791219472885132, + "learning_rate": 3.4157087065554463e-06, + "loss": 0.6986, + "step": 11958 + }, + { + "epoch": 0.6146058176585466, + "grad_norm": 0.7175921201705933, + "learning_rate": 3.414919358076525e-06, + "loss": 0.6752, + "step": 11959 + }, + { + "epoch": 0.6146572104018913, + "grad_norm": 1.1144776344299316, + "learning_rate": 3.414130053513645e-06, + "loss": 0.6737, + "step": 11960 + }, + { + "epoch": 0.6147086031452359, + "grad_norm": 1.0300164222717285, + "learning_rate": 3.413340792888677e-06, + "loss": 0.7013, + "step": 11961 + }, + { + "epoch": 0.6147599958885805, + "grad_norm": 1.0904394388198853, + "learning_rate": 3.4125515762234895e-06, + "loss": 0.6695, + "step": 11962 + }, + { + "epoch": 0.6148113886319252, + "grad_norm": 0.7407886981964111, + "learning_rate": 3.4117624035399456e-06, + "loss": 0.6589, + "step": 11963 + }, + { + "epoch": 0.6148627813752698, + "grad_norm": 1.0961823463439941, + "learning_rate": 3.410973274859911e-06, + "loss": 0.6625, + "step": 11964 + }, + { + "epoch": 0.6149141741186145, + "grad_norm": 0.8541749119758606, + "learning_rate": 3.4101841902052492e-06, + "loss": 0.6938, + "step": 11965 + }, + { + "epoch": 0.6149655668619591, + "grad_norm": 1.0181026458740234, + "learning_rate": 3.409395149597823e-06, + "loss": 0.7096, + "step": 11966 + }, + { + "epoch": 0.6150169596053038, + "grad_norm": 1.060071349143982, + "learning_rate": 3.4086061530594944e-06, + "loss": 0.7042, + "step": 11967 + }, + { + "epoch": 0.6150683523486483, + "grad_norm": 1.0849549770355225, + "learning_rate": 3.40781720061212e-06, + "loss": 0.7028, + "step": 11968 + }, + { + "epoch": 0.615119745091993, + "grad_norm": 0.9998595118522644, + "learning_rate": 3.407028292277562e-06, + "loss": 0.7085, + "step": 11969 + }, + { + "epoch": 0.6151711378353376, + "grad_norm": 1.1476051807403564, + "learning_rate": 3.4062394280776754e-06, + "loss": 0.7082, + "step": 11970 + }, + { + "epoch": 0.6152225305786823, + "grad_norm": 0.8414899706840515, + "learning_rate": 3.405450608034317e-06, + "loss": 0.6441, + "step": 11971 + }, + { + "epoch": 0.6152739233220269, + "grad_norm": 1.0804591178894043, + "learning_rate": 3.4046618321693437e-06, + "loss": 0.6955, + "step": 11972 + }, + { + "epoch": 0.6153253160653716, + "grad_norm": 1.0082899332046509, + "learning_rate": 3.4038731005046054e-06, + "loss": 0.7377, + "step": 11973 + }, + { + "epoch": 0.6153767088087162, + "grad_norm": 1.03804349899292, + "learning_rate": 3.4030844130619573e-06, + "loss": 0.7115, + "step": 11974 + }, + { + "epoch": 0.6154281015520608, + "grad_norm": 0.9946032762527466, + "learning_rate": 3.4022957698632517e-06, + "loss": 0.6828, + "step": 11975 + }, + { + "epoch": 0.6154794942954055, + "grad_norm": 1.109576940536499, + "learning_rate": 3.4015071709303347e-06, + "loss": 0.6204, + "step": 11976 + }, + { + "epoch": 0.6155308870387501, + "grad_norm": 1.0410388708114624, + "learning_rate": 3.4007186162850576e-06, + "loss": 0.7003, + "step": 11977 + }, + { + "epoch": 0.6155822797820948, + "grad_norm": 1.045224666595459, + "learning_rate": 3.3999301059492682e-06, + "loss": 0.6632, + "step": 11978 + }, + { + "epoch": 0.6156336725254394, + "grad_norm": 1.0935750007629395, + "learning_rate": 3.399141639944813e-06, + "loss": 0.7261, + "step": 11979 + }, + { + "epoch": 0.6156850652687841, + "grad_norm": 1.105319619178772, + "learning_rate": 3.3983532182935353e-06, + "loss": 0.7271, + "step": 11980 + }, + { + "epoch": 0.6157364580121287, + "grad_norm": 1.0905101299285889, + "learning_rate": 3.39756484101728e-06, + "loss": 0.7401, + "step": 11981 + }, + { + "epoch": 0.6157878507554734, + "grad_norm": 1.0350977182388306, + "learning_rate": 3.3967765081378933e-06, + "loss": 0.6524, + "step": 11982 + }, + { + "epoch": 0.6158392434988179, + "grad_norm": 1.127240777015686, + "learning_rate": 3.395988219677211e-06, + "loss": 0.7135, + "step": 11983 + }, + { + "epoch": 0.6158906362421626, + "grad_norm": 1.0493940114974976, + "learning_rate": 3.3951999756570754e-06, + "loss": 0.7156, + "step": 11984 + }, + { + "epoch": 0.6159420289855072, + "grad_norm": 1.0714149475097656, + "learning_rate": 3.394411776099328e-06, + "loss": 0.7152, + "step": 11985 + }, + { + "epoch": 0.6159934217288519, + "grad_norm": 1.0448074340820312, + "learning_rate": 3.3936236210258028e-06, + "loss": 0.7025, + "step": 11986 + }, + { + "epoch": 0.6160448144721965, + "grad_norm": 1.0660523176193237, + "learning_rate": 3.392835510458338e-06, + "loss": 0.7097, + "step": 11987 + }, + { + "epoch": 0.6160962072155411, + "grad_norm": 1.0485416650772095, + "learning_rate": 3.392047444418769e-06, + "loss": 0.6797, + "step": 11988 + }, + { + "epoch": 0.6161475999588858, + "grad_norm": 0.7331197261810303, + "learning_rate": 3.3912594229289296e-06, + "loss": 0.6641, + "step": 11989 + }, + { + "epoch": 0.6161989927022304, + "grad_norm": 0.6869722604751587, + "learning_rate": 3.3904714460106527e-06, + "loss": 0.6761, + "step": 11990 + }, + { + "epoch": 0.6162503854455751, + "grad_norm": 1.1129176616668701, + "learning_rate": 3.3896835136857707e-06, + "loss": 0.6979, + "step": 11991 + }, + { + "epoch": 0.6163017781889197, + "grad_norm": 1.070527195930481, + "learning_rate": 3.388895625976112e-06, + "loss": 0.7043, + "step": 11992 + }, + { + "epoch": 0.6163531709322644, + "grad_norm": 1.0062566995620728, + "learning_rate": 3.388107782903507e-06, + "loss": 0.6904, + "step": 11993 + }, + { + "epoch": 0.616404563675609, + "grad_norm": 1.1211024522781372, + "learning_rate": 3.387319984489785e-06, + "loss": 0.725, + "step": 11994 + }, + { + "epoch": 0.6164559564189537, + "grad_norm": 0.7740526795387268, + "learning_rate": 3.3865322307567692e-06, + "loss": 0.6344, + "step": 11995 + }, + { + "epoch": 0.6165073491622983, + "grad_norm": 1.1901206970214844, + "learning_rate": 3.3857445217262876e-06, + "loss": 0.7121, + "step": 11996 + }, + { + "epoch": 0.616558741905643, + "grad_norm": 1.119513750076294, + "learning_rate": 3.384956857420163e-06, + "loss": 0.7488, + "step": 11997 + }, + { + "epoch": 0.6166101346489876, + "grad_norm": 1.0623103380203247, + "learning_rate": 3.38416923786022e-06, + "loss": 0.6765, + "step": 11998 + }, + { + "epoch": 0.6166615273923322, + "grad_norm": 1.1121455430984497, + "learning_rate": 3.3833816630682786e-06, + "loss": 0.7258, + "step": 11999 + }, + { + "epoch": 0.6167129201356768, + "grad_norm": 1.2464754581451416, + "learning_rate": 3.3825941330661594e-06, + "loss": 0.6804, + "step": 12000 + }, + { + "epoch": 0.6167643128790214, + "grad_norm": 1.045380711555481, + "learning_rate": 3.381806647875685e-06, + "loss": 0.7242, + "step": 12001 + }, + { + "epoch": 0.6168157056223661, + "grad_norm": 1.005313754081726, + "learning_rate": 3.3810192075186677e-06, + "loss": 0.6833, + "step": 12002 + }, + { + "epoch": 0.6168670983657107, + "grad_norm": 0.6825047731399536, + "learning_rate": 3.3802318120169276e-06, + "loss": 0.6907, + "step": 12003 + }, + { + "epoch": 0.6169184911090554, + "grad_norm": 1.12043297290802, + "learning_rate": 3.3794444613922813e-06, + "loss": 0.7719, + "step": 12004 + }, + { + "epoch": 0.6169698838524, + "grad_norm": 1.041176199913025, + "learning_rate": 3.378657155666539e-06, + "loss": 0.7143, + "step": 12005 + }, + { + "epoch": 0.6170212765957447, + "grad_norm": 1.0308610200881958, + "learning_rate": 3.377869894861516e-06, + "loss": 0.7262, + "step": 12006 + }, + { + "epoch": 0.6170726693390893, + "grad_norm": 0.8717643022537231, + "learning_rate": 3.3770826789990257e-06, + "loss": 0.6585, + "step": 12007 + }, + { + "epoch": 0.617124062082434, + "grad_norm": 0.9989317655563354, + "learning_rate": 3.3762955081008757e-06, + "loss": 0.6829, + "step": 12008 + }, + { + "epoch": 0.6171754548257786, + "grad_norm": 1.0629189014434814, + "learning_rate": 3.3755083821888767e-06, + "loss": 0.6978, + "step": 12009 + }, + { + "epoch": 0.6172268475691233, + "grad_norm": 0.8257383704185486, + "learning_rate": 3.3747213012848374e-06, + "loss": 0.6725, + "step": 12010 + }, + { + "epoch": 0.6172782403124679, + "grad_norm": 0.7306297421455383, + "learning_rate": 3.3739342654105633e-06, + "loss": 0.6548, + "step": 12011 + }, + { + "epoch": 0.6173296330558126, + "grad_norm": 1.1134037971496582, + "learning_rate": 3.37314727458786e-06, + "loss": 0.7049, + "step": 12012 + }, + { + "epoch": 0.6173810257991572, + "grad_norm": 1.0994467735290527, + "learning_rate": 3.372360328838532e-06, + "loss": 0.727, + "step": 12013 + }, + { + "epoch": 0.6174324185425017, + "grad_norm": 1.106757402420044, + "learning_rate": 3.3715734281843836e-06, + "loss": 0.702, + "step": 12014 + }, + { + "epoch": 0.6174838112858464, + "grad_norm": 1.1218456029891968, + "learning_rate": 3.3707865726472145e-06, + "loss": 0.7051, + "step": 12015 + }, + { + "epoch": 0.617535204029191, + "grad_norm": 1.109925389289856, + "learning_rate": 3.3699997622488258e-06, + "loss": 0.7753, + "step": 12016 + }, + { + "epoch": 0.6175865967725357, + "grad_norm": 1.0417683124542236, + "learning_rate": 3.3692129970110177e-06, + "loss": 0.6741, + "step": 12017 + }, + { + "epoch": 0.6176379895158803, + "grad_norm": 1.0980241298675537, + "learning_rate": 3.3684262769555877e-06, + "loss": 0.7299, + "step": 12018 + }, + { + "epoch": 0.617689382259225, + "grad_norm": 1.0709458589553833, + "learning_rate": 3.3676396021043316e-06, + "loss": 0.7048, + "step": 12019 + }, + { + "epoch": 0.6177407750025696, + "grad_norm": 0.9785841703414917, + "learning_rate": 3.3668529724790467e-06, + "loss": 0.7338, + "step": 12020 + }, + { + "epoch": 0.6177921677459143, + "grad_norm": 1.0190116167068481, + "learning_rate": 3.3660663881015253e-06, + "loss": 0.7098, + "step": 12021 + }, + { + "epoch": 0.6178435604892589, + "grad_norm": 1.043182373046875, + "learning_rate": 3.365279848993561e-06, + "loss": 0.6559, + "step": 12022 + }, + { + "epoch": 0.6178949532326036, + "grad_norm": 1.0624303817749023, + "learning_rate": 3.364493355176949e-06, + "loss": 0.7556, + "step": 12023 + }, + { + "epoch": 0.6179463459759482, + "grad_norm": 0.7292889952659607, + "learning_rate": 3.363706906673474e-06, + "loss": 0.6333, + "step": 12024 + }, + { + "epoch": 0.6179977387192929, + "grad_norm": 1.083287239074707, + "learning_rate": 3.3629205035049286e-06, + "loss": 0.7256, + "step": 12025 + }, + { + "epoch": 0.6180491314626375, + "grad_norm": 1.2116307020187378, + "learning_rate": 3.3621341456931e-06, + "loss": 0.7038, + "step": 12026 + }, + { + "epoch": 0.6181005242059822, + "grad_norm": 1.0136851072311401, + "learning_rate": 3.361347833259777e-06, + "loss": 0.6772, + "step": 12027 + }, + { + "epoch": 0.6181519169493268, + "grad_norm": 1.057914137840271, + "learning_rate": 3.360561566226741e-06, + "loss": 0.6631, + "step": 12028 + }, + { + "epoch": 0.6182033096926713, + "grad_norm": 0.9898151159286499, + "learning_rate": 3.359775344615779e-06, + "loss": 0.6984, + "step": 12029 + }, + { + "epoch": 0.618254702436016, + "grad_norm": 1.161797285079956, + "learning_rate": 3.3589891684486742e-06, + "loss": 0.7316, + "step": 12030 + }, + { + "epoch": 0.6183060951793606, + "grad_norm": 1.1593981981277466, + "learning_rate": 3.3582030377472065e-06, + "loss": 0.736, + "step": 12031 + }, + { + "epoch": 0.6183574879227053, + "grad_norm": 1.067274808883667, + "learning_rate": 3.3574169525331567e-06, + "loss": 0.7279, + "step": 12032 + }, + { + "epoch": 0.6184088806660499, + "grad_norm": 1.1198688745498657, + "learning_rate": 3.356630912828308e-06, + "loss": 0.7116, + "step": 12033 + }, + { + "epoch": 0.6184602734093946, + "grad_norm": 1.110133171081543, + "learning_rate": 3.355844918654432e-06, + "loss": 0.7018, + "step": 12034 + }, + { + "epoch": 0.6185116661527392, + "grad_norm": 0.7323580980300903, + "learning_rate": 3.355058970033308e-06, + "loss": 0.7122, + "step": 12035 + }, + { + "epoch": 0.6185630588960839, + "grad_norm": 1.077885627746582, + "learning_rate": 3.354273066986714e-06, + "loss": 0.7443, + "step": 12036 + }, + { + "epoch": 0.6186144516394285, + "grad_norm": 0.8660556077957153, + "learning_rate": 3.3534872095364203e-06, + "loss": 0.6444, + "step": 12037 + }, + { + "epoch": 0.6186658443827732, + "grad_norm": 1.056745171546936, + "learning_rate": 3.3527013977042007e-06, + "loss": 0.7077, + "step": 12038 + }, + { + "epoch": 0.6187172371261178, + "grad_norm": 1.0295047760009766, + "learning_rate": 3.3519156315118282e-06, + "loss": 0.6066, + "step": 12039 + }, + { + "epoch": 0.6187686298694625, + "grad_norm": 0.7480875253677368, + "learning_rate": 3.3511299109810715e-06, + "loss": 0.669, + "step": 12040 + }, + { + "epoch": 0.6188200226128071, + "grad_norm": 1.0890185832977295, + "learning_rate": 3.3503442361337002e-06, + "loss": 0.6982, + "step": 12041 + }, + { + "epoch": 0.6188714153561518, + "grad_norm": 0.8537014722824097, + "learning_rate": 3.349558606991483e-06, + "loss": 0.7009, + "step": 12042 + }, + { + "epoch": 0.6189228080994964, + "grad_norm": 1.0447927713394165, + "learning_rate": 3.348773023576188e-06, + "loss": 0.6654, + "step": 12043 + }, + { + "epoch": 0.6189742008428409, + "grad_norm": 1.093884825706482, + "learning_rate": 3.3479874859095755e-06, + "loss": 0.7128, + "step": 12044 + }, + { + "epoch": 0.6190255935861856, + "grad_norm": 1.9971200227737427, + "learning_rate": 3.3472019940134125e-06, + "loss": 0.7152, + "step": 12045 + }, + { + "epoch": 0.6190769863295302, + "grad_norm": 1.0972340106964111, + "learning_rate": 3.346416547909464e-06, + "loss": 0.7606, + "step": 12046 + }, + { + "epoch": 0.6191283790728749, + "grad_norm": 0.7798712253570557, + "learning_rate": 3.345631147619487e-06, + "loss": 0.6707, + "step": 12047 + }, + { + "epoch": 0.6191797718162195, + "grad_norm": 1.1579269170761108, + "learning_rate": 3.3448457931652436e-06, + "loss": 0.7189, + "step": 12048 + }, + { + "epoch": 0.6192311645595642, + "grad_norm": 1.030864953994751, + "learning_rate": 3.344060484568493e-06, + "loss": 0.6889, + "step": 12049 + }, + { + "epoch": 0.6192825573029088, + "grad_norm": 0.9841315746307373, + "learning_rate": 3.343275221850993e-06, + "loss": 0.6264, + "step": 12050 + }, + { + "epoch": 0.6193339500462535, + "grad_norm": 1.1842811107635498, + "learning_rate": 3.3424900050344986e-06, + "loss": 0.7136, + "step": 12051 + }, + { + "epoch": 0.6193853427895981, + "grad_norm": 1.1328599452972412, + "learning_rate": 3.341704834140769e-06, + "loss": 0.6743, + "step": 12052 + }, + { + "epoch": 0.6194367355329428, + "grad_norm": 1.0348427295684814, + "learning_rate": 3.3409197091915526e-06, + "loss": 0.6707, + "step": 12053 + }, + { + "epoch": 0.6194881282762874, + "grad_norm": 0.7747828960418701, + "learning_rate": 3.3401346302086048e-06, + "loss": 0.6614, + "step": 12054 + }, + { + "epoch": 0.6195395210196321, + "grad_norm": 1.0832515954971313, + "learning_rate": 3.3393495972136782e-06, + "loss": 0.7285, + "step": 12055 + }, + { + "epoch": 0.6195909137629767, + "grad_norm": 0.8003397583961487, + "learning_rate": 3.3385646102285183e-06, + "loss": 0.6467, + "step": 12056 + }, + { + "epoch": 0.6196423065063213, + "grad_norm": 0.9832813143730164, + "learning_rate": 3.337779669274878e-06, + "loss": 0.6464, + "step": 12057 + }, + { + "epoch": 0.619693699249666, + "grad_norm": 0.9928159713745117, + "learning_rate": 3.3369947743745036e-06, + "loss": 0.6648, + "step": 12058 + }, + { + "epoch": 0.6197450919930105, + "grad_norm": 0.6698116064071655, + "learning_rate": 3.336209925549141e-06, + "loss": 0.6391, + "step": 12059 + }, + { + "epoch": 0.6197964847363552, + "grad_norm": 1.108699083328247, + "learning_rate": 3.3354251228205354e-06, + "loss": 0.7148, + "step": 12060 + }, + { + "epoch": 0.6198478774796998, + "grad_norm": 0.6748021245002747, + "learning_rate": 3.334640366210431e-06, + "loss": 0.6486, + "step": 12061 + }, + { + "epoch": 0.6198992702230445, + "grad_norm": 1.0672430992126465, + "learning_rate": 3.3338556557405695e-06, + "loss": 0.6815, + "step": 12062 + }, + { + "epoch": 0.6199506629663891, + "grad_norm": 1.105315923690796, + "learning_rate": 3.333070991432691e-06, + "loss": 0.6892, + "step": 12063 + }, + { + "epoch": 0.6200020557097338, + "grad_norm": 1.027273416519165, + "learning_rate": 3.332286373308536e-06, + "loss": 0.6656, + "step": 12064 + }, + { + "epoch": 0.6200534484530784, + "grad_norm": 1.1047329902648926, + "learning_rate": 3.331501801389846e-06, + "loss": 0.7114, + "step": 12065 + }, + { + "epoch": 0.6201048411964231, + "grad_norm": 0.7975478768348694, + "learning_rate": 3.330717275698353e-06, + "loss": 0.6423, + "step": 12066 + }, + { + "epoch": 0.6201562339397677, + "grad_norm": 1.1321862936019897, + "learning_rate": 3.329932796255796e-06, + "loss": 0.7698, + "step": 12067 + }, + { + "epoch": 0.6202076266831124, + "grad_norm": 0.7159425616264343, + "learning_rate": 3.3291483630839094e-06, + "loss": 0.5928, + "step": 12068 + }, + { + "epoch": 0.620259019426457, + "grad_norm": 1.0901782512664795, + "learning_rate": 3.328363976204427e-06, + "loss": 0.6935, + "step": 12069 + }, + { + "epoch": 0.6203104121698017, + "grad_norm": 1.0330448150634766, + "learning_rate": 3.32757963563908e-06, + "loss": 0.7303, + "step": 12070 + }, + { + "epoch": 0.6203618049131463, + "grad_norm": 0.7264391183853149, + "learning_rate": 3.3267953414095997e-06, + "loss": 0.6417, + "step": 12071 + }, + { + "epoch": 0.620413197656491, + "grad_norm": 1.085404634475708, + "learning_rate": 3.3260110935377155e-06, + "loss": 0.7207, + "step": 12072 + }, + { + "epoch": 0.6204645903998356, + "grad_norm": 1.0804234743118286, + "learning_rate": 3.3252268920451546e-06, + "loss": 0.6962, + "step": 12073 + }, + { + "epoch": 0.6205159831431801, + "grad_norm": 1.1416406631469727, + "learning_rate": 3.324442736953646e-06, + "loss": 0.7151, + "step": 12074 + }, + { + "epoch": 0.6205673758865248, + "grad_norm": 1.110981822013855, + "learning_rate": 3.3236586282849164e-06, + "loss": 0.7512, + "step": 12075 + }, + { + "epoch": 0.6206187686298694, + "grad_norm": 1.0306501388549805, + "learning_rate": 3.3228745660606864e-06, + "loss": 0.7072, + "step": 12076 + }, + { + "epoch": 0.6206701613732141, + "grad_norm": 1.0540844202041626, + "learning_rate": 3.3220905503026806e-06, + "loss": 0.7193, + "step": 12077 + }, + { + "epoch": 0.6207215541165587, + "grad_norm": 1.218295931816101, + "learning_rate": 3.321306581032624e-06, + "loss": 0.7634, + "step": 12078 + }, + { + "epoch": 0.6207729468599034, + "grad_norm": 1.0965325832366943, + "learning_rate": 3.3205226582722318e-06, + "loss": 0.7499, + "step": 12079 + }, + { + "epoch": 0.620824339603248, + "grad_norm": 1.129643440246582, + "learning_rate": 3.3197387820432262e-06, + "loss": 0.7261, + "step": 12080 + }, + { + "epoch": 0.6208757323465927, + "grad_norm": 0.7016200423240662, + "learning_rate": 3.3189549523673264e-06, + "loss": 0.611, + "step": 12081 + }, + { + "epoch": 0.6209271250899373, + "grad_norm": 1.1036901473999023, + "learning_rate": 3.3181711692662468e-06, + "loss": 0.7349, + "step": 12082 + }, + { + "epoch": 0.620978517833282, + "grad_norm": 1.016593098640442, + "learning_rate": 3.3173874327617028e-06, + "loss": 0.6979, + "step": 12083 + }, + { + "epoch": 0.6210299105766266, + "grad_norm": 1.2975292205810547, + "learning_rate": 3.3166037428754115e-06, + "loss": 0.6955, + "step": 12084 + }, + { + "epoch": 0.6210813033199712, + "grad_norm": 0.8700991868972778, + "learning_rate": 3.315820099629081e-06, + "loss": 0.636, + "step": 12085 + }, + { + "epoch": 0.6211326960633159, + "grad_norm": 1.1041383743286133, + "learning_rate": 3.3150365030444264e-06, + "loss": 0.7621, + "step": 12086 + }, + { + "epoch": 0.6211840888066605, + "grad_norm": 1.0860810279846191, + "learning_rate": 3.3142529531431584e-06, + "loss": 0.737, + "step": 12087 + }, + { + "epoch": 0.6212354815500052, + "grad_norm": 1.0357568264007568, + "learning_rate": 3.313469449946982e-06, + "loss": 0.6792, + "step": 12088 + }, + { + "epoch": 0.6212868742933498, + "grad_norm": 0.7414577603340149, + "learning_rate": 3.312685993477608e-06, + "loss": 0.6814, + "step": 12089 + }, + { + "epoch": 0.6213382670366944, + "grad_norm": 0.7697691321372986, + "learning_rate": 3.3119025837567415e-06, + "loss": 0.6882, + "step": 12090 + }, + { + "epoch": 0.621389659780039, + "grad_norm": 0.8091595768928528, + "learning_rate": 3.3111192208060893e-06, + "loss": 0.685, + "step": 12091 + }, + { + "epoch": 0.6214410525233837, + "grad_norm": 1.2382551431655884, + "learning_rate": 3.3103359046473527e-06, + "loss": 0.6555, + "step": 12092 + }, + { + "epoch": 0.6214924452667283, + "grad_norm": 1.0569231510162354, + "learning_rate": 3.309552635302235e-06, + "loss": 0.712, + "step": 12093 + }, + { + "epoch": 0.621543838010073, + "grad_norm": 1.1205781698226929, + "learning_rate": 3.3087694127924404e-06, + "loss": 0.7353, + "step": 12094 + }, + { + "epoch": 0.6215952307534176, + "grad_norm": 1.0179264545440674, + "learning_rate": 3.3079862371396637e-06, + "loss": 0.6864, + "step": 12095 + }, + { + "epoch": 0.6216466234967623, + "grad_norm": 0.8273069858551025, + "learning_rate": 3.307203108365606e-06, + "loss": 0.6534, + "step": 12096 + }, + { + "epoch": 0.6216980162401069, + "grad_norm": 0.9565629363059998, + "learning_rate": 3.3064200264919667e-06, + "loss": 0.6998, + "step": 12097 + }, + { + "epoch": 0.6217494089834515, + "grad_norm": 1.1001129150390625, + "learning_rate": 3.3056369915404364e-06, + "loss": 0.7407, + "step": 12098 + }, + { + "epoch": 0.6218008017267962, + "grad_norm": 1.0582191944122314, + "learning_rate": 3.304854003532714e-06, + "loss": 0.7166, + "step": 12099 + }, + { + "epoch": 0.6218521944701408, + "grad_norm": 1.1117619276046753, + "learning_rate": 3.304071062490492e-06, + "loss": 0.7509, + "step": 12100 + }, + { + "epoch": 0.6219035872134855, + "grad_norm": 1.1450592279434204, + "learning_rate": 3.3032881684354614e-06, + "loss": 0.7301, + "step": 12101 + }, + { + "epoch": 0.6219549799568301, + "grad_norm": 1.064035415649414, + "learning_rate": 3.302505321389313e-06, + "loss": 0.7209, + "step": 12102 + }, + { + "epoch": 0.6220063727001748, + "grad_norm": 1.0156742334365845, + "learning_rate": 3.30172252137374e-06, + "loss": 0.7243, + "step": 12103 + }, + { + "epoch": 0.6220577654435194, + "grad_norm": 1.143764853477478, + "learning_rate": 3.300939768410426e-06, + "loss": 0.7546, + "step": 12104 + }, + { + "epoch": 0.622109158186864, + "grad_norm": 0.804700493812561, + "learning_rate": 3.3001570625210583e-06, + "loss": 0.6649, + "step": 12105 + }, + { + "epoch": 0.6221605509302086, + "grad_norm": 1.1368941068649292, + "learning_rate": 3.299374403727324e-06, + "loss": 0.7285, + "step": 12106 + }, + { + "epoch": 0.6222119436735533, + "grad_norm": 1.0081709623336792, + "learning_rate": 3.298591792050909e-06, + "loss": 0.6979, + "step": 12107 + }, + { + "epoch": 0.6222633364168979, + "grad_norm": 1.1481094360351562, + "learning_rate": 3.2978092275134922e-06, + "loss": 0.6985, + "step": 12108 + }, + { + "epoch": 0.6223147291602426, + "grad_norm": 1.1093038320541382, + "learning_rate": 3.297026710136757e-06, + "loss": 0.7126, + "step": 12109 + }, + { + "epoch": 0.6223661219035872, + "grad_norm": 1.1402454376220703, + "learning_rate": 3.2962442399423842e-06, + "loss": 0.6547, + "step": 12110 + }, + { + "epoch": 0.6224175146469318, + "grad_norm": 1.1249759197235107, + "learning_rate": 3.2954618169520523e-06, + "loss": 0.698, + "step": 12111 + }, + { + "epoch": 0.6224689073902765, + "grad_norm": 1.1534699201583862, + "learning_rate": 3.2946794411874406e-06, + "loss": 0.7443, + "step": 12112 + }, + { + "epoch": 0.6225203001336211, + "grad_norm": 1.1092865467071533, + "learning_rate": 3.2938971126702233e-06, + "loss": 0.7318, + "step": 12113 + }, + { + "epoch": 0.6225716928769658, + "grad_norm": 1.082006812095642, + "learning_rate": 3.293114831422076e-06, + "loss": 0.67, + "step": 12114 + }, + { + "epoch": 0.6226230856203104, + "grad_norm": 0.75110924243927, + "learning_rate": 3.2923325974646723e-06, + "loss": 0.7106, + "step": 12115 + }, + { + "epoch": 0.6226744783636551, + "grad_norm": 0.6790202856063843, + "learning_rate": 3.2915504108196885e-06, + "loss": 0.6756, + "step": 12116 + }, + { + "epoch": 0.6227258711069997, + "grad_norm": 0.7299683094024658, + "learning_rate": 3.2907682715087887e-06, + "loss": 0.7017, + "step": 12117 + }, + { + "epoch": 0.6227772638503444, + "grad_norm": 1.1198533773422241, + "learning_rate": 3.2899861795536476e-06, + "loss": 0.7039, + "step": 12118 + }, + { + "epoch": 0.622828656593689, + "grad_norm": 1.049757957458496, + "learning_rate": 3.2892041349759316e-06, + "loss": 0.7259, + "step": 12119 + }, + { + "epoch": 0.6228800493370336, + "grad_norm": 1.1526069641113281, + "learning_rate": 3.2884221377973124e-06, + "loss": 0.7179, + "step": 12120 + }, + { + "epoch": 0.6229314420803782, + "grad_norm": 1.0349680185317993, + "learning_rate": 3.2876401880394495e-06, + "loss": 0.7414, + "step": 12121 + }, + { + "epoch": 0.6229828348237229, + "grad_norm": 1.0531561374664307, + "learning_rate": 3.2868582857240107e-06, + "loss": 0.7109, + "step": 12122 + }, + { + "epoch": 0.6230342275670675, + "grad_norm": 1.0895947217941284, + "learning_rate": 3.28607643087266e-06, + "loss": 0.7451, + "step": 12123 + }, + { + "epoch": 0.6230856203104121, + "grad_norm": 0.6915774345397949, + "learning_rate": 3.2852946235070583e-06, + "loss": 0.657, + "step": 12124 + }, + { + "epoch": 0.6231370130537568, + "grad_norm": 1.0454248189926147, + "learning_rate": 3.2845128636488656e-06, + "loss": 0.6756, + "step": 12125 + }, + { + "epoch": 0.6231884057971014, + "grad_norm": 1.129713773727417, + "learning_rate": 3.283731151319744e-06, + "loss": 0.7639, + "step": 12126 + }, + { + "epoch": 0.6232397985404461, + "grad_norm": 0.7431758642196655, + "learning_rate": 3.282949486541348e-06, + "loss": 0.7185, + "step": 12127 + }, + { + "epoch": 0.6232911912837907, + "grad_norm": 1.1020402908325195, + "learning_rate": 3.282167869335335e-06, + "loss": 0.7462, + "step": 12128 + }, + { + "epoch": 0.6233425840271354, + "grad_norm": 1.0747895240783691, + "learning_rate": 3.281386299723364e-06, + "loss": 0.6964, + "step": 12129 + }, + { + "epoch": 0.62339397677048, + "grad_norm": 0.7203353643417358, + "learning_rate": 3.2806047777270845e-06, + "loss": 0.6667, + "step": 12130 + }, + { + "epoch": 0.6234453695138247, + "grad_norm": 1.1273399591445923, + "learning_rate": 3.2798233033681504e-06, + "loss": 0.7722, + "step": 12131 + }, + { + "epoch": 0.6234967622571693, + "grad_norm": 1.0584933757781982, + "learning_rate": 3.279041876668215e-06, + "loss": 0.7034, + "step": 12132 + }, + { + "epoch": 0.623548155000514, + "grad_norm": 1.0835890769958496, + "learning_rate": 3.2782604976489274e-06, + "loss": 0.7218, + "step": 12133 + }, + { + "epoch": 0.6235995477438586, + "grad_norm": 1.0417078733444214, + "learning_rate": 3.277479166331935e-06, + "loss": 0.7785, + "step": 12134 + }, + { + "epoch": 0.6236509404872032, + "grad_norm": 1.014622449874878, + "learning_rate": 3.2766978827388864e-06, + "loss": 0.6622, + "step": 12135 + }, + { + "epoch": 0.6237023332305478, + "grad_norm": 0.6786527633666992, + "learning_rate": 3.2759166468914305e-06, + "loss": 0.6627, + "step": 12136 + }, + { + "epoch": 0.6237537259738924, + "grad_norm": 1.1211113929748535, + "learning_rate": 3.275135458811207e-06, + "loss": 0.7416, + "step": 12137 + }, + { + "epoch": 0.6238051187172371, + "grad_norm": 1.0647112131118774, + "learning_rate": 3.274354318519862e-06, + "loss": 0.7309, + "step": 12138 + }, + { + "epoch": 0.6238565114605817, + "grad_norm": 1.0913984775543213, + "learning_rate": 3.2735732260390397e-06, + "loss": 0.7387, + "step": 12139 + }, + { + "epoch": 0.6239079042039264, + "grad_norm": 0.7015288472175598, + "learning_rate": 3.272792181390376e-06, + "loss": 0.6744, + "step": 12140 + }, + { + "epoch": 0.623959296947271, + "grad_norm": 1.1160334348678589, + "learning_rate": 3.272011184595514e-06, + "loss": 0.7125, + "step": 12141 + }, + { + "epoch": 0.6240106896906157, + "grad_norm": 1.0897620916366577, + "learning_rate": 3.2712302356760917e-06, + "loss": 0.7758, + "step": 12142 + }, + { + "epoch": 0.6240620824339603, + "grad_norm": 1.0814694166183472, + "learning_rate": 3.2704493346537448e-06, + "loss": 0.7007, + "step": 12143 + }, + { + "epoch": 0.624113475177305, + "grad_norm": 1.0665570497512817, + "learning_rate": 3.269668481550109e-06, + "loss": 0.7179, + "step": 12144 + }, + { + "epoch": 0.6241648679206496, + "grad_norm": 0.8287755250930786, + "learning_rate": 3.2688876763868204e-06, + "loss": 0.6901, + "step": 12145 + }, + { + "epoch": 0.6242162606639943, + "grad_norm": 0.6976672410964966, + "learning_rate": 3.268106919185509e-06, + "loss": 0.6237, + "step": 12146 + }, + { + "epoch": 0.6242676534073389, + "grad_norm": 1.080417513847351, + "learning_rate": 3.267326209967808e-06, + "loss": 0.7476, + "step": 12147 + }, + { + "epoch": 0.6243190461506836, + "grad_norm": 1.1457500457763672, + "learning_rate": 3.266545548755349e-06, + "loss": 0.7331, + "step": 12148 + }, + { + "epoch": 0.6243704388940282, + "grad_norm": 1.162287712097168, + "learning_rate": 3.2657649355697572e-06, + "loss": 0.7774, + "step": 12149 + }, + { + "epoch": 0.6244218316373727, + "grad_norm": 1.0370932817459106, + "learning_rate": 3.2649843704326627e-06, + "loss": 0.6921, + "step": 12150 + }, + { + "epoch": 0.6244732243807174, + "grad_norm": 1.1899876594543457, + "learning_rate": 3.2642038533656916e-06, + "loss": 0.7344, + "step": 12151 + }, + { + "epoch": 0.624524617124062, + "grad_norm": 0.7286758422851562, + "learning_rate": 3.2634233843904687e-06, + "loss": 0.6255, + "step": 12152 + }, + { + "epoch": 0.6245760098674067, + "grad_norm": 0.7241337895393372, + "learning_rate": 3.262642963528617e-06, + "loss": 0.6693, + "step": 12153 + }, + { + "epoch": 0.6246274026107513, + "grad_norm": 1.0441992282867432, + "learning_rate": 3.2618625908017587e-06, + "loss": 0.6926, + "step": 12154 + }, + { + "epoch": 0.624678795354096, + "grad_norm": 1.066538691520691, + "learning_rate": 3.2610822662315172e-06, + "loss": 0.7694, + "step": 12155 + }, + { + "epoch": 0.6247301880974406, + "grad_norm": 1.091275930404663, + "learning_rate": 3.260301989839508e-06, + "loss": 0.7237, + "step": 12156 + }, + { + "epoch": 0.6247815808407853, + "grad_norm": 1.1216894388198853, + "learning_rate": 3.2595217616473516e-06, + "loss": 0.7191, + "step": 12157 + }, + { + "epoch": 0.6248329735841299, + "grad_norm": 1.0999263525009155, + "learning_rate": 3.2587415816766664e-06, + "loss": 0.7281, + "step": 12158 + }, + { + "epoch": 0.6248843663274746, + "grad_norm": 1.1139849424362183, + "learning_rate": 3.257961449949064e-06, + "loss": 0.7284, + "step": 12159 + }, + { + "epoch": 0.6249357590708192, + "grad_norm": 1.034637212753296, + "learning_rate": 3.2571813664861616e-06, + "loss": 0.7186, + "step": 12160 + }, + { + "epoch": 0.6249871518141639, + "grad_norm": 1.14315927028656, + "learning_rate": 3.256401331309572e-06, + "loss": 0.6675, + "step": 12161 + }, + { + "epoch": 0.6250385445575085, + "grad_norm": 1.2068579196929932, + "learning_rate": 3.2556213444409054e-06, + "loss": 0.7083, + "step": 12162 + }, + { + "epoch": 0.6250899373008532, + "grad_norm": 1.15444815158844, + "learning_rate": 3.254841405901773e-06, + "loss": 0.7337, + "step": 12163 + }, + { + "epoch": 0.6251413300441978, + "grad_norm": 0.7700082063674927, + "learning_rate": 3.254061515713784e-06, + "loss": 0.6976, + "step": 12164 + }, + { + "epoch": 0.6251927227875425, + "grad_norm": 0.7679399251937866, + "learning_rate": 3.253281673898544e-06, + "loss": 0.6459, + "step": 12165 + }, + { + "epoch": 0.625244115530887, + "grad_norm": 1.101738691329956, + "learning_rate": 3.2525018804776608e-06, + "loss": 0.7133, + "step": 12166 + }, + { + "epoch": 0.6252955082742316, + "grad_norm": 1.0523805618286133, + "learning_rate": 3.2517221354727385e-06, + "loss": 0.7092, + "step": 12167 + }, + { + "epoch": 0.6253469010175763, + "grad_norm": 1.1280841827392578, + "learning_rate": 3.2509424389053834e-06, + "loss": 0.7322, + "step": 12168 + }, + { + "epoch": 0.6253982937609209, + "grad_norm": 0.9063828587532043, + "learning_rate": 3.2501627907971933e-06, + "loss": 0.624, + "step": 12169 + }, + { + "epoch": 0.6254496865042656, + "grad_norm": 1.14544677734375, + "learning_rate": 3.2493831911697715e-06, + "loss": 0.6969, + "step": 12170 + }, + { + "epoch": 0.6255010792476102, + "grad_norm": 0.7130489945411682, + "learning_rate": 3.248603640044719e-06, + "loss": 0.6683, + "step": 12171 + }, + { + "epoch": 0.6255524719909549, + "grad_norm": 1.0864167213439941, + "learning_rate": 3.24782413744363e-06, + "loss": 0.7146, + "step": 12172 + }, + { + "epoch": 0.6256038647342995, + "grad_norm": 0.744078516960144, + "learning_rate": 3.2470446833881033e-06, + "loss": 0.6721, + "step": 12173 + }, + { + "epoch": 0.6256552574776442, + "grad_norm": 0.6829371452331543, + "learning_rate": 3.246265277899735e-06, + "loss": 0.672, + "step": 12174 + }, + { + "epoch": 0.6257066502209888, + "grad_norm": 1.051866054534912, + "learning_rate": 3.245485921000118e-06, + "loss": 0.7608, + "step": 12175 + }, + { + "epoch": 0.6257580429643335, + "grad_norm": 0.7587919235229492, + "learning_rate": 3.2447066127108456e-06, + "loss": 0.6816, + "step": 12176 + }, + { + "epoch": 0.6258094357076781, + "grad_norm": 1.0298322439193726, + "learning_rate": 3.2439273530535113e-06, + "loss": 0.678, + "step": 12177 + }, + { + "epoch": 0.6258608284510228, + "grad_norm": 1.1157838106155396, + "learning_rate": 3.243148142049701e-06, + "loss": 0.7231, + "step": 12178 + }, + { + "epoch": 0.6259122211943674, + "grad_norm": 1.1767150163650513, + "learning_rate": 3.242368979721006e-06, + "loss": 0.7381, + "step": 12179 + }, + { + "epoch": 0.625963613937712, + "grad_norm": 1.0673679113388062, + "learning_rate": 3.2415898660890154e-06, + "loss": 0.7178, + "step": 12180 + }, + { + "epoch": 0.6260150066810566, + "grad_norm": 1.012874722480774, + "learning_rate": 3.2408108011753105e-06, + "loss": 0.7231, + "step": 12181 + }, + { + "epoch": 0.6260663994244012, + "grad_norm": 1.1434369087219238, + "learning_rate": 3.2400317850014784e-06, + "loss": 0.7232, + "step": 12182 + }, + { + "epoch": 0.6261177921677459, + "grad_norm": 1.0482686758041382, + "learning_rate": 3.2392528175891037e-06, + "loss": 0.6872, + "step": 12183 + }, + { + "epoch": 0.6261691849110905, + "grad_norm": 1.1044414043426514, + "learning_rate": 3.238473898959767e-06, + "loss": 0.6891, + "step": 12184 + }, + { + "epoch": 0.6262205776544352, + "grad_norm": 1.0434261560440063, + "learning_rate": 3.2376950291350484e-06, + "loss": 0.7156, + "step": 12185 + }, + { + "epoch": 0.6262719703977798, + "grad_norm": 1.1340068578720093, + "learning_rate": 3.2369162081365284e-06, + "loss": 0.7372, + "step": 12186 + }, + { + "epoch": 0.6263233631411245, + "grad_norm": 1.112946629524231, + "learning_rate": 3.236137435985787e-06, + "loss": 0.7676, + "step": 12187 + }, + { + "epoch": 0.6263747558844691, + "grad_norm": 1.0489273071289062, + "learning_rate": 3.2353587127043953e-06, + "loss": 0.6939, + "step": 12188 + }, + { + "epoch": 0.6264261486278138, + "grad_norm": 1.125565528869629, + "learning_rate": 3.2345800383139313e-06, + "loss": 0.6963, + "step": 12189 + }, + { + "epoch": 0.6264775413711584, + "grad_norm": 1.1287851333618164, + "learning_rate": 3.2338014128359714e-06, + "loss": 0.6467, + "step": 12190 + }, + { + "epoch": 0.626528934114503, + "grad_norm": 1.1763123273849487, + "learning_rate": 3.2330228362920833e-06, + "loss": 0.7595, + "step": 12191 + }, + { + "epoch": 0.6265803268578477, + "grad_norm": 1.073868751525879, + "learning_rate": 3.2322443087038407e-06, + "loss": 0.7075, + "step": 12192 + }, + { + "epoch": 0.6266317196011923, + "grad_norm": 1.093248963356018, + "learning_rate": 3.2314658300928147e-06, + "loss": 0.7005, + "step": 12193 + }, + { + "epoch": 0.626683112344537, + "grad_norm": 0.8614458441734314, + "learning_rate": 3.2306874004805704e-06, + "loss": 0.673, + "step": 12194 + }, + { + "epoch": 0.6267345050878816, + "grad_norm": 1.0557905435562134, + "learning_rate": 3.2299090198886773e-06, + "loss": 0.7496, + "step": 12195 + }, + { + "epoch": 0.6267858978312262, + "grad_norm": 1.11032235622406, + "learning_rate": 3.229130688338702e-06, + "loss": 0.7063, + "step": 12196 + }, + { + "epoch": 0.6268372905745708, + "grad_norm": 1.1229689121246338, + "learning_rate": 3.2283524058522065e-06, + "loss": 0.7, + "step": 12197 + }, + { + "epoch": 0.6268886833179155, + "grad_norm": 1.124016523361206, + "learning_rate": 3.227574172450754e-06, + "loss": 0.7599, + "step": 12198 + }, + { + "epoch": 0.6269400760612601, + "grad_norm": 0.7309842705726624, + "learning_rate": 3.226795988155906e-06, + "loss": 0.6996, + "step": 12199 + }, + { + "epoch": 0.6269914688046048, + "grad_norm": 0.7533501386642456, + "learning_rate": 3.2260178529892266e-06, + "loss": 0.64, + "step": 12200 + }, + { + "epoch": 0.6270428615479494, + "grad_norm": 1.1044063568115234, + "learning_rate": 3.225239766972269e-06, + "loss": 0.6933, + "step": 12201 + }, + { + "epoch": 0.6270942542912941, + "grad_norm": 0.7195678949356079, + "learning_rate": 3.2244617301265947e-06, + "loss": 0.6747, + "step": 12202 + }, + { + "epoch": 0.6271456470346387, + "grad_norm": 1.1011396646499634, + "learning_rate": 3.223683742473758e-06, + "loss": 0.6973, + "step": 12203 + }, + { + "epoch": 0.6271970397779834, + "grad_norm": 1.1169767379760742, + "learning_rate": 3.2229058040353145e-06, + "loss": 0.7128, + "step": 12204 + }, + { + "epoch": 0.627248432521328, + "grad_norm": 0.6582478284835815, + "learning_rate": 3.222127914832817e-06, + "loss": 0.6523, + "step": 12205 + }, + { + "epoch": 0.6272998252646727, + "grad_norm": 1.0497746467590332, + "learning_rate": 3.2213500748878207e-06, + "loss": 0.6578, + "step": 12206 + }, + { + "epoch": 0.6273512180080173, + "grad_norm": 1.0629608631134033, + "learning_rate": 3.2205722842218714e-06, + "loss": 0.6894, + "step": 12207 + }, + { + "epoch": 0.6274026107513619, + "grad_norm": 1.060852289199829, + "learning_rate": 3.219794542856521e-06, + "loss": 0.7176, + "step": 12208 + }, + { + "epoch": 0.6274540034947066, + "grad_norm": 1.0877114534378052, + "learning_rate": 3.219016850813319e-06, + "loss": 0.7233, + "step": 12209 + }, + { + "epoch": 0.6275053962380512, + "grad_norm": 0.7119256854057312, + "learning_rate": 3.2182392081138085e-06, + "loss": 0.6441, + "step": 12210 + }, + { + "epoch": 0.6275567889813958, + "grad_norm": 1.1235225200653076, + "learning_rate": 3.2174616147795367e-06, + "loss": 0.7321, + "step": 12211 + }, + { + "epoch": 0.6276081817247404, + "grad_norm": 1.0369467735290527, + "learning_rate": 3.216684070832049e-06, + "loss": 0.7062, + "step": 12212 + }, + { + "epoch": 0.6276595744680851, + "grad_norm": 0.6881214380264282, + "learning_rate": 3.2159065762928842e-06, + "loss": 0.6475, + "step": 12213 + }, + { + "epoch": 0.6277109672114297, + "grad_norm": 1.074841856956482, + "learning_rate": 3.215129131183586e-06, + "loss": 0.7229, + "step": 12214 + }, + { + "epoch": 0.6277623599547744, + "grad_norm": 0.9904302954673767, + "learning_rate": 3.2143517355256958e-06, + "loss": 0.7517, + "step": 12215 + }, + { + "epoch": 0.627813752698119, + "grad_norm": 0.7632718682289124, + "learning_rate": 3.2135743893407497e-06, + "loss": 0.6763, + "step": 12216 + }, + { + "epoch": 0.6278651454414637, + "grad_norm": 1.0803008079528809, + "learning_rate": 3.2127970926502837e-06, + "loss": 0.6965, + "step": 12217 + }, + { + "epoch": 0.6279165381848083, + "grad_norm": 0.7880908846855164, + "learning_rate": 3.2120198454758354e-06, + "loss": 0.7054, + "step": 12218 + }, + { + "epoch": 0.627967930928153, + "grad_norm": 0.8806077241897583, + "learning_rate": 3.21124264783894e-06, + "loss": 0.6431, + "step": 12219 + }, + { + "epoch": 0.6280193236714976, + "grad_norm": 1.0449053049087524, + "learning_rate": 3.210465499761127e-06, + "loss": 0.6591, + "step": 12220 + }, + { + "epoch": 0.6280707164148422, + "grad_norm": 1.0450180768966675, + "learning_rate": 3.2096884012639305e-06, + "loss": 0.6619, + "step": 12221 + }, + { + "epoch": 0.6281221091581869, + "grad_norm": 1.065653681755066, + "learning_rate": 3.2089113523688824e-06, + "loss": 0.7304, + "step": 12222 + }, + { + "epoch": 0.6281735019015315, + "grad_norm": 0.7441000938415527, + "learning_rate": 3.2081343530975073e-06, + "loss": 0.6419, + "step": 12223 + }, + { + "epoch": 0.6282248946448762, + "grad_norm": 1.1724079847335815, + "learning_rate": 3.2073574034713346e-06, + "loss": 0.765, + "step": 12224 + }, + { + "epoch": 0.6282762873882208, + "grad_norm": 1.0313328504562378, + "learning_rate": 3.2065805035118913e-06, + "loss": 0.7139, + "step": 12225 + }, + { + "epoch": 0.6283276801315654, + "grad_norm": 0.7392985820770264, + "learning_rate": 3.2058036532407e-06, + "loss": 0.6963, + "step": 12226 + }, + { + "epoch": 0.62837907287491, + "grad_norm": 1.040402889251709, + "learning_rate": 3.2050268526792855e-06, + "loss": 0.7546, + "step": 12227 + }, + { + "epoch": 0.6284304656182547, + "grad_norm": 0.7218077778816223, + "learning_rate": 3.204250101849171e-06, + "loss": 0.6574, + "step": 12228 + }, + { + "epoch": 0.6284818583615993, + "grad_norm": 1.0274429321289062, + "learning_rate": 3.203473400771875e-06, + "loss": 0.7419, + "step": 12229 + }, + { + "epoch": 0.628533251104944, + "grad_norm": 1.0963010787963867, + "learning_rate": 3.202696749468916e-06, + "loss": 0.6768, + "step": 12230 + }, + { + "epoch": 0.6285846438482886, + "grad_norm": 1.097629189491272, + "learning_rate": 3.2019201479618132e-06, + "loss": 0.7057, + "step": 12231 + }, + { + "epoch": 0.6286360365916333, + "grad_norm": 1.1337676048278809, + "learning_rate": 3.201143596272085e-06, + "loss": 0.7498, + "step": 12232 + }, + { + "epoch": 0.6286874293349779, + "grad_norm": 1.0891634225845337, + "learning_rate": 3.2003670944212424e-06, + "loss": 0.7187, + "step": 12233 + }, + { + "epoch": 0.6287388220783225, + "grad_norm": 1.0536798238754272, + "learning_rate": 3.199590642430801e-06, + "loss": 0.7157, + "step": 12234 + }, + { + "epoch": 0.6287902148216672, + "grad_norm": 1.0377498865127563, + "learning_rate": 3.1988142403222738e-06, + "loss": 0.7957, + "step": 12235 + }, + { + "epoch": 0.6288416075650118, + "grad_norm": 0.9967600107192993, + "learning_rate": 3.1980378881171704e-06, + "loss": 0.7272, + "step": 12236 + }, + { + "epoch": 0.6288930003083565, + "grad_norm": 1.0806214809417725, + "learning_rate": 3.1972615858370004e-06, + "loss": 0.7157, + "step": 12237 + }, + { + "epoch": 0.6289443930517011, + "grad_norm": 1.0752514600753784, + "learning_rate": 3.1964853335032742e-06, + "loss": 0.666, + "step": 12238 + }, + { + "epoch": 0.6289957857950458, + "grad_norm": 0.7343825697898865, + "learning_rate": 3.1957091311374954e-06, + "loss": 0.6329, + "step": 12239 + }, + { + "epoch": 0.6290471785383904, + "grad_norm": 1.0848947763442993, + "learning_rate": 3.1949329787611697e-06, + "loss": 0.7663, + "step": 12240 + }, + { + "epoch": 0.629098571281735, + "grad_norm": 1.051859974861145, + "learning_rate": 3.1941568763958055e-06, + "loss": 0.7091, + "step": 12241 + }, + { + "epoch": 0.6291499640250796, + "grad_norm": 1.113353967666626, + "learning_rate": 3.1933808240628987e-06, + "loss": 0.7169, + "step": 12242 + }, + { + "epoch": 0.6292013567684243, + "grad_norm": 1.1560386419296265, + "learning_rate": 3.1926048217839546e-06, + "loss": 0.7588, + "step": 12243 + }, + { + "epoch": 0.6292527495117689, + "grad_norm": 0.9234907031059265, + "learning_rate": 3.191828869580472e-06, + "loss": 0.6523, + "step": 12244 + }, + { + "epoch": 0.6293041422551136, + "grad_norm": 1.1048601865768433, + "learning_rate": 3.1910529674739505e-06, + "loss": 0.7119, + "step": 12245 + }, + { + "epoch": 0.6293555349984582, + "grad_norm": 1.0674775838851929, + "learning_rate": 3.1902771154858846e-06, + "loss": 0.7261, + "step": 12246 + }, + { + "epoch": 0.6294069277418028, + "grad_norm": 1.0577160120010376, + "learning_rate": 3.1895013136377727e-06, + "loss": 0.7175, + "step": 12247 + }, + { + "epoch": 0.6294583204851475, + "grad_norm": 1.0778430700302124, + "learning_rate": 3.188725561951109e-06, + "loss": 0.6769, + "step": 12248 + }, + { + "epoch": 0.6295097132284921, + "grad_norm": 1.151700496673584, + "learning_rate": 3.1879498604473834e-06, + "loss": 0.7271, + "step": 12249 + }, + { + "epoch": 0.6295611059718368, + "grad_norm": 1.0111408233642578, + "learning_rate": 3.1871742091480894e-06, + "loss": 0.7257, + "step": 12250 + }, + { + "epoch": 0.6296124987151814, + "grad_norm": 1.1598451137542725, + "learning_rate": 3.1863986080747195e-06, + "loss": 0.7573, + "step": 12251 + }, + { + "epoch": 0.6296638914585261, + "grad_norm": 1.0505614280700684, + "learning_rate": 3.185623057248757e-06, + "loss": 0.6984, + "step": 12252 + }, + { + "epoch": 0.6297152842018707, + "grad_norm": 1.0693230628967285, + "learning_rate": 3.1848475566916927e-06, + "loss": 0.7161, + "step": 12253 + }, + { + "epoch": 0.6297666769452154, + "grad_norm": 1.1077351570129395, + "learning_rate": 3.184072106425013e-06, + "loss": 0.7347, + "step": 12254 + }, + { + "epoch": 0.62981806968856, + "grad_norm": 0.740250825881958, + "learning_rate": 3.1832967064702005e-06, + "loss": 0.6568, + "step": 12255 + }, + { + "epoch": 0.6298694624319047, + "grad_norm": 1.1873940229415894, + "learning_rate": 3.1825213568487384e-06, + "loss": 0.7406, + "step": 12256 + }, + { + "epoch": 0.6299208551752492, + "grad_norm": 1.1760528087615967, + "learning_rate": 3.181746057582111e-06, + "loss": 0.7124, + "step": 12257 + }, + { + "epoch": 0.6299722479185939, + "grad_norm": 1.2016164064407349, + "learning_rate": 3.1809708086917956e-06, + "loss": 0.7082, + "step": 12258 + }, + { + "epoch": 0.6300236406619385, + "grad_norm": 1.0883525609970093, + "learning_rate": 3.1801956101992726e-06, + "loss": 0.7478, + "step": 12259 + }, + { + "epoch": 0.6300750334052831, + "grad_norm": 1.0923559665679932, + "learning_rate": 3.179420462126019e-06, + "loss": 0.7054, + "step": 12260 + }, + { + "epoch": 0.6301264261486278, + "grad_norm": 1.0236154794692993, + "learning_rate": 3.1786453644935133e-06, + "loss": 0.7072, + "step": 12261 + }, + { + "epoch": 0.6301778188919724, + "grad_norm": 1.0006366968154907, + "learning_rate": 3.177870317323226e-06, + "loss": 0.7259, + "step": 12262 + }, + { + "epoch": 0.6302292116353171, + "grad_norm": 0.9935770034790039, + "learning_rate": 3.177095320636633e-06, + "loss": 0.6984, + "step": 12263 + }, + { + "epoch": 0.6302806043786617, + "grad_norm": 1.1000913381576538, + "learning_rate": 3.1763203744552073e-06, + "loss": 0.7252, + "step": 12264 + }, + { + "epoch": 0.6303319971220064, + "grad_norm": 1.060997724533081, + "learning_rate": 3.1755454788004164e-06, + "loss": 0.7047, + "step": 12265 + }, + { + "epoch": 0.630383389865351, + "grad_norm": 1.0514204502105713, + "learning_rate": 3.174770633693733e-06, + "loss": 0.7194, + "step": 12266 + }, + { + "epoch": 0.6304347826086957, + "grad_norm": 1.1313573122024536, + "learning_rate": 3.173995839156622e-06, + "loss": 0.7213, + "step": 12267 + }, + { + "epoch": 0.6304861753520403, + "grad_norm": 0.7685599327087402, + "learning_rate": 3.1732210952105507e-06, + "loss": 0.6586, + "step": 12268 + }, + { + "epoch": 0.630537568095385, + "grad_norm": 1.104049563407898, + "learning_rate": 3.172446401876984e-06, + "loss": 0.7338, + "step": 12269 + }, + { + "epoch": 0.6305889608387296, + "grad_norm": 0.7544822096824646, + "learning_rate": 3.1716717591773875e-06, + "loss": 0.678, + "step": 12270 + }, + { + "epoch": 0.6306403535820743, + "grad_norm": 1.0510345697402954, + "learning_rate": 3.17089716713322e-06, + "loss": 0.6371, + "step": 12271 + }, + { + "epoch": 0.6306917463254188, + "grad_norm": 1.0860135555267334, + "learning_rate": 3.1701226257659433e-06, + "loss": 0.7147, + "step": 12272 + }, + { + "epoch": 0.6307431390687634, + "grad_norm": 0.7083735466003418, + "learning_rate": 3.169348135097018e-06, + "loss": 0.6583, + "step": 12273 + }, + { + "epoch": 0.6307945318121081, + "grad_norm": 1.051897644996643, + "learning_rate": 3.1685736951479017e-06, + "loss": 0.7145, + "step": 12274 + }, + { + "epoch": 0.6308459245554527, + "grad_norm": 0.7621341943740845, + "learning_rate": 3.1677993059400493e-06, + "loss": 0.6613, + "step": 12275 + }, + { + "epoch": 0.6308973172987974, + "grad_norm": 1.1170969009399414, + "learning_rate": 3.1670249674949167e-06, + "loss": 0.7488, + "step": 12276 + }, + { + "epoch": 0.630948710042142, + "grad_norm": 1.0387990474700928, + "learning_rate": 3.1662506798339586e-06, + "loss": 0.7166, + "step": 12277 + }, + { + "epoch": 0.6310001027854867, + "grad_norm": 1.0262210369110107, + "learning_rate": 3.1654764429786266e-06, + "loss": 0.7043, + "step": 12278 + }, + { + "epoch": 0.6310514955288313, + "grad_norm": 1.1153687238693237, + "learning_rate": 3.1647022569503715e-06, + "loss": 0.712, + "step": 12279 + }, + { + "epoch": 0.631102888272176, + "grad_norm": 1.086102843284607, + "learning_rate": 3.163928121770645e-06, + "loss": 0.6968, + "step": 12280 + }, + { + "epoch": 0.6311542810155206, + "grad_norm": 0.7293244004249573, + "learning_rate": 3.1631540374608915e-06, + "loss": 0.6465, + "step": 12281 + }, + { + "epoch": 0.6312056737588653, + "grad_norm": 1.1921792030334473, + "learning_rate": 3.1623800040425595e-06, + "loss": 0.7396, + "step": 12282 + }, + { + "epoch": 0.6312570665022099, + "grad_norm": 1.1262049674987793, + "learning_rate": 3.161606021537096e-06, + "loss": 0.7765, + "step": 12283 + }, + { + "epoch": 0.6313084592455546, + "grad_norm": 1.0663120746612549, + "learning_rate": 3.1608320899659404e-06, + "loss": 0.679, + "step": 12284 + }, + { + "epoch": 0.6313598519888992, + "grad_norm": 1.103186845779419, + "learning_rate": 3.160058209350538e-06, + "loss": 0.7311, + "step": 12285 + }, + { + "epoch": 0.6314112447322439, + "grad_norm": 1.039108395576477, + "learning_rate": 3.159284379712332e-06, + "loss": 0.6872, + "step": 12286 + }, + { + "epoch": 0.6314626374755884, + "grad_norm": 1.0961426496505737, + "learning_rate": 3.1585106010727573e-06, + "loss": 0.7149, + "step": 12287 + }, + { + "epoch": 0.631514030218933, + "grad_norm": 1.1371374130249023, + "learning_rate": 3.157736873453254e-06, + "loss": 0.6906, + "step": 12288 + }, + { + "epoch": 0.6315654229622777, + "grad_norm": 1.0854449272155762, + "learning_rate": 3.1569631968752624e-06, + "loss": 0.7419, + "step": 12289 + }, + { + "epoch": 0.6316168157056223, + "grad_norm": 0.7221952676773071, + "learning_rate": 3.156189571360213e-06, + "loss": 0.6479, + "step": 12290 + }, + { + "epoch": 0.631668208448967, + "grad_norm": 1.178221344947815, + "learning_rate": 3.155415996929541e-06, + "loss": 0.7276, + "step": 12291 + }, + { + "epoch": 0.6317196011923116, + "grad_norm": 1.1213458776474, + "learning_rate": 3.15464247360468e-06, + "loss": 0.7578, + "step": 12292 + }, + { + "epoch": 0.6317709939356563, + "grad_norm": 1.0248063802719116, + "learning_rate": 3.153869001407062e-06, + "loss": 0.6945, + "step": 12293 + }, + { + "epoch": 0.6318223866790009, + "grad_norm": 1.091252088546753, + "learning_rate": 3.153095580358114e-06, + "loss": 0.7276, + "step": 12294 + }, + { + "epoch": 0.6318737794223456, + "grad_norm": 1.045844554901123, + "learning_rate": 3.1523222104792662e-06, + "loss": 0.7373, + "step": 12295 + }, + { + "epoch": 0.6319251721656902, + "grad_norm": 0.9801787734031677, + "learning_rate": 3.151548891791945e-06, + "loss": 0.6685, + "step": 12296 + }, + { + "epoch": 0.6319765649090349, + "grad_norm": 0.9949115514755249, + "learning_rate": 3.1507756243175756e-06, + "loss": 0.6437, + "step": 12297 + }, + { + "epoch": 0.6320279576523795, + "grad_norm": 1.0982470512390137, + "learning_rate": 3.150002408077582e-06, + "loss": 0.7014, + "step": 12298 + }, + { + "epoch": 0.6320793503957242, + "grad_norm": 1.1212944984436035, + "learning_rate": 3.149229243093389e-06, + "loss": 0.735, + "step": 12299 + }, + { + "epoch": 0.6321307431390688, + "grad_norm": 1.0576363801956177, + "learning_rate": 3.1484561293864147e-06, + "loss": 0.7263, + "step": 12300 + }, + { + "epoch": 0.6321821358824135, + "grad_norm": 1.025613784790039, + "learning_rate": 3.14768306697808e-06, + "loss": 0.7059, + "step": 12301 + }, + { + "epoch": 0.632233528625758, + "grad_norm": 0.7626006007194519, + "learning_rate": 3.146910055889805e-06, + "loss": 0.6293, + "step": 12302 + }, + { + "epoch": 0.6322849213691026, + "grad_norm": 1.1101508140563965, + "learning_rate": 3.146137096143004e-06, + "loss": 0.6968, + "step": 12303 + }, + { + "epoch": 0.6323363141124473, + "grad_norm": 1.0245519876480103, + "learning_rate": 3.1453641877590925e-06, + "loss": 0.6852, + "step": 12304 + }, + { + "epoch": 0.6323877068557919, + "grad_norm": 1.0685632228851318, + "learning_rate": 3.144591330759487e-06, + "loss": 0.77, + "step": 12305 + }, + { + "epoch": 0.6324390995991366, + "grad_norm": 1.1563650369644165, + "learning_rate": 3.1438185251655984e-06, + "loss": 0.713, + "step": 12306 + }, + { + "epoch": 0.6324904923424812, + "grad_norm": 0.8291066884994507, + "learning_rate": 3.1430457709988382e-06, + "loss": 0.6822, + "step": 12307 + }, + { + "epoch": 0.6325418850858259, + "grad_norm": 1.0921175479888916, + "learning_rate": 3.142273068280616e-06, + "loss": 0.7633, + "step": 12308 + }, + { + "epoch": 0.6325932778291705, + "grad_norm": 1.0751779079437256, + "learning_rate": 3.1415004170323436e-06, + "loss": 0.7016, + "step": 12309 + }, + { + "epoch": 0.6326446705725152, + "grad_norm": 1.0517675876617432, + "learning_rate": 3.1407278172754224e-06, + "loss": 0.6748, + "step": 12310 + }, + { + "epoch": 0.6326960633158598, + "grad_norm": 0.849648654460907, + "learning_rate": 3.139955269031261e-06, + "loss": 0.6555, + "step": 12311 + }, + { + "epoch": 0.6327474560592045, + "grad_norm": 1.071668028831482, + "learning_rate": 3.1391827723212653e-06, + "loss": 0.7138, + "step": 12312 + }, + { + "epoch": 0.6327988488025491, + "grad_norm": 1.1235990524291992, + "learning_rate": 3.138410327166833e-06, + "loss": 0.7671, + "step": 12313 + }, + { + "epoch": 0.6328502415458938, + "grad_norm": 1.0338060855865479, + "learning_rate": 3.137637933589369e-06, + "loss": 0.7044, + "step": 12314 + }, + { + "epoch": 0.6329016342892384, + "grad_norm": 1.044932246208191, + "learning_rate": 3.136865591610273e-06, + "loss": 0.6394, + "step": 12315 + }, + { + "epoch": 0.632953027032583, + "grad_norm": 1.049924373626709, + "learning_rate": 3.136093301250942e-06, + "loss": 0.74, + "step": 12316 + }, + { + "epoch": 0.6330044197759276, + "grad_norm": 1.0535130500793457, + "learning_rate": 3.1353210625327747e-06, + "loss": 0.6791, + "step": 12317 + }, + { + "epoch": 0.6330558125192722, + "grad_norm": 1.1084398031234741, + "learning_rate": 3.134548875477165e-06, + "loss": 0.7722, + "step": 12318 + }, + { + "epoch": 0.6331072052626169, + "grad_norm": 1.0418457984924316, + "learning_rate": 3.1337767401055076e-06, + "loss": 0.6999, + "step": 12319 + }, + { + "epoch": 0.6331585980059615, + "grad_norm": 1.0596643686294556, + "learning_rate": 3.133004656439195e-06, + "loss": 0.6816, + "step": 12320 + }, + { + "epoch": 0.6332099907493062, + "grad_norm": 1.0481611490249634, + "learning_rate": 3.1322326244996203e-06, + "loss": 0.6726, + "step": 12321 + }, + { + "epoch": 0.6332613834926508, + "grad_norm": 1.1281737089157104, + "learning_rate": 3.13146064430817e-06, + "loss": 0.684, + "step": 12322 + }, + { + "epoch": 0.6333127762359955, + "grad_norm": 1.089752435684204, + "learning_rate": 3.1306887158862347e-06, + "loss": 0.6844, + "step": 12323 + }, + { + "epoch": 0.6333641689793401, + "grad_norm": 1.1281038522720337, + "learning_rate": 3.1299168392552005e-06, + "loss": 0.6907, + "step": 12324 + }, + { + "epoch": 0.6334155617226848, + "grad_norm": 1.0633339881896973, + "learning_rate": 3.1291450144364556e-06, + "loss": 0.7193, + "step": 12325 + }, + { + "epoch": 0.6334669544660294, + "grad_norm": 1.1600886583328247, + "learning_rate": 3.128373241451379e-06, + "loss": 0.6896, + "step": 12326 + }, + { + "epoch": 0.633518347209374, + "grad_norm": 1.0765910148620605, + "learning_rate": 3.127601520321357e-06, + "loss": 0.743, + "step": 12327 + }, + { + "epoch": 0.6335697399527187, + "grad_norm": 0.7219448089599609, + "learning_rate": 3.126829851067771e-06, + "loss": 0.6429, + "step": 12328 + }, + { + "epoch": 0.6336211326960633, + "grad_norm": 1.0843355655670166, + "learning_rate": 3.126058233711998e-06, + "loss": 0.7596, + "step": 12329 + }, + { + "epoch": 0.633672525439408, + "grad_norm": 1.1019511222839355, + "learning_rate": 3.1252866682754177e-06, + "loss": 0.7249, + "step": 12330 + }, + { + "epoch": 0.6337239181827526, + "grad_norm": 1.0878782272338867, + "learning_rate": 3.1245151547794105e-06, + "loss": 0.7444, + "step": 12331 + }, + { + "epoch": 0.6337753109260973, + "grad_norm": 1.104278802871704, + "learning_rate": 3.1237436932453463e-06, + "loss": 0.7305, + "step": 12332 + }, + { + "epoch": 0.6338267036694418, + "grad_norm": 1.122657299041748, + "learning_rate": 3.1229722836946013e-06, + "loss": 0.704, + "step": 12333 + }, + { + "epoch": 0.6338780964127865, + "grad_norm": 0.7560040950775146, + "learning_rate": 3.122200926148551e-06, + "loss": 0.7078, + "step": 12334 + }, + { + "epoch": 0.6339294891561311, + "grad_norm": 1.0697526931762695, + "learning_rate": 3.121429620628561e-06, + "loss": 0.6981, + "step": 12335 + }, + { + "epoch": 0.6339808818994758, + "grad_norm": 1.084586262702942, + "learning_rate": 3.120658367156004e-06, + "loss": 0.7386, + "step": 12336 + }, + { + "epoch": 0.6340322746428204, + "grad_norm": 1.1103061437606812, + "learning_rate": 3.119887165752249e-06, + "loss": 0.7629, + "step": 12337 + }, + { + "epoch": 0.6340836673861651, + "grad_norm": 1.0343531370162964, + "learning_rate": 3.119116016438661e-06, + "loss": 0.7086, + "step": 12338 + }, + { + "epoch": 0.6341350601295097, + "grad_norm": 1.0980199575424194, + "learning_rate": 3.1183449192366054e-06, + "loss": 0.6996, + "step": 12339 + }, + { + "epoch": 0.6341864528728544, + "grad_norm": 1.092248558998108, + "learning_rate": 3.117573874167448e-06, + "loss": 0.7276, + "step": 12340 + }, + { + "epoch": 0.634237845616199, + "grad_norm": 1.055367112159729, + "learning_rate": 3.116802881252552e-06, + "loss": 0.7339, + "step": 12341 + }, + { + "epoch": 0.6342892383595437, + "grad_norm": 1.0575984716415405, + "learning_rate": 3.116031940513275e-06, + "loss": 0.7191, + "step": 12342 + }, + { + "epoch": 0.6343406311028883, + "grad_norm": 1.083843469619751, + "learning_rate": 3.115261051970978e-06, + "loss": 0.7353, + "step": 12343 + }, + { + "epoch": 0.6343920238462329, + "grad_norm": 1.0618653297424316, + "learning_rate": 3.1144902156470215e-06, + "loss": 0.6522, + "step": 12344 + }, + { + "epoch": 0.6344434165895776, + "grad_norm": 1.0892795324325562, + "learning_rate": 3.113719431562757e-06, + "loss": 0.6793, + "step": 12345 + }, + { + "epoch": 0.6344948093329222, + "grad_norm": 1.1061733961105347, + "learning_rate": 3.112948699739544e-06, + "loss": 0.7193, + "step": 12346 + }, + { + "epoch": 0.6345462020762669, + "grad_norm": 0.6837403178215027, + "learning_rate": 3.1121780201987363e-06, + "loss": 0.6627, + "step": 12347 + }, + { + "epoch": 0.6345975948196114, + "grad_norm": 1.0572859048843384, + "learning_rate": 3.1114073929616828e-06, + "loss": 0.7062, + "step": 12348 + }, + { + "epoch": 0.6346489875629561, + "grad_norm": 0.7688980102539062, + "learning_rate": 3.110636818049738e-06, + "loss": 0.6447, + "step": 12349 + }, + { + "epoch": 0.6347003803063007, + "grad_norm": 1.0971711874008179, + "learning_rate": 3.1098662954842516e-06, + "loss": 0.7202, + "step": 12350 + }, + { + "epoch": 0.6347517730496454, + "grad_norm": 1.0794202089309692, + "learning_rate": 3.1090958252865678e-06, + "loss": 0.6876, + "step": 12351 + }, + { + "epoch": 0.63480316579299, + "grad_norm": 0.7854575514793396, + "learning_rate": 3.1083254074780356e-06, + "loss": 0.6513, + "step": 12352 + }, + { + "epoch": 0.6348545585363347, + "grad_norm": 1.058131456375122, + "learning_rate": 3.107555042079999e-06, + "loss": 0.744, + "step": 12353 + }, + { + "epoch": 0.6349059512796793, + "grad_norm": 1.0140466690063477, + "learning_rate": 3.106784729113806e-06, + "loss": 0.6823, + "step": 12354 + }, + { + "epoch": 0.634957344023024, + "grad_norm": 1.037102460861206, + "learning_rate": 3.1060144686007924e-06, + "loss": 0.6974, + "step": 12355 + }, + { + "epoch": 0.6350087367663686, + "grad_norm": 1.0821844339370728, + "learning_rate": 3.1052442605623024e-06, + "loss": 0.7417, + "step": 12356 + }, + { + "epoch": 0.6350601295097132, + "grad_norm": 0.7502889037132263, + "learning_rate": 3.1044741050196754e-06, + "loss": 0.6611, + "step": 12357 + }, + { + "epoch": 0.6351115222530579, + "grad_norm": 1.0920366048812866, + "learning_rate": 3.1037040019942477e-06, + "loss": 0.7342, + "step": 12358 + }, + { + "epoch": 0.6351629149964025, + "grad_norm": 1.055985450744629, + "learning_rate": 3.1029339515073566e-06, + "loss": 0.7065, + "step": 12359 + }, + { + "epoch": 0.6352143077397472, + "grad_norm": 1.0918570756912231, + "learning_rate": 3.1021639535803384e-06, + "loss": 0.6966, + "step": 12360 + }, + { + "epoch": 0.6352657004830918, + "grad_norm": 1.1720868349075317, + "learning_rate": 3.101394008234524e-06, + "loss": 0.7501, + "step": 12361 + }, + { + "epoch": 0.6353170932264365, + "grad_norm": 1.07143235206604, + "learning_rate": 3.100624115491246e-06, + "loss": 0.7181, + "step": 12362 + }, + { + "epoch": 0.635368485969781, + "grad_norm": 1.1465545892715454, + "learning_rate": 3.0998542753718373e-06, + "loss": 0.7531, + "step": 12363 + }, + { + "epoch": 0.6354198787131257, + "grad_norm": 1.1917341947555542, + "learning_rate": 3.099084487897624e-06, + "loss": 0.7211, + "step": 12364 + }, + { + "epoch": 0.6354712714564703, + "grad_norm": 0.9880145788192749, + "learning_rate": 3.0983147530899342e-06, + "loss": 0.6872, + "step": 12365 + }, + { + "epoch": 0.635522664199815, + "grad_norm": 1.0699357986450195, + "learning_rate": 3.097545070970096e-06, + "loss": 0.7496, + "step": 12366 + }, + { + "epoch": 0.6355740569431596, + "grad_norm": 1.0874959230422974, + "learning_rate": 3.096775441559432e-06, + "loss": 0.7314, + "step": 12367 + }, + { + "epoch": 0.6356254496865043, + "grad_norm": 1.0823090076446533, + "learning_rate": 3.0960058648792674e-06, + "loss": 0.7366, + "step": 12368 + }, + { + "epoch": 0.6356768424298489, + "grad_norm": 1.1026524305343628, + "learning_rate": 3.095236340950922e-06, + "loss": 0.7375, + "step": 12369 + }, + { + "epoch": 0.6357282351731935, + "grad_norm": 1.057992935180664, + "learning_rate": 3.094466869795718e-06, + "loss": 0.7029, + "step": 12370 + }, + { + "epoch": 0.6357796279165382, + "grad_norm": 1.031686782836914, + "learning_rate": 3.093697451434973e-06, + "loss": 0.6818, + "step": 12371 + }, + { + "epoch": 0.6358310206598828, + "grad_norm": 1.0793780088424683, + "learning_rate": 3.0929280858900035e-06, + "loss": 0.7, + "step": 12372 + }, + { + "epoch": 0.6358824134032275, + "grad_norm": 1.1831921339035034, + "learning_rate": 3.092158773182129e-06, + "loss": 0.6919, + "step": 12373 + }, + { + "epoch": 0.6359338061465721, + "grad_norm": 1.093214988708496, + "learning_rate": 3.0913895133326605e-06, + "loss": 0.704, + "step": 12374 + }, + { + "epoch": 0.6359851988899168, + "grad_norm": 1.0415292978286743, + "learning_rate": 3.0906203063629116e-06, + "loss": 0.7256, + "step": 12375 + }, + { + "epoch": 0.6360365916332614, + "grad_norm": 1.0699998140335083, + "learning_rate": 3.0898511522941967e-06, + "loss": 0.6724, + "step": 12376 + }, + { + "epoch": 0.6360879843766061, + "grad_norm": 1.642176628112793, + "learning_rate": 3.0890820511478216e-06, + "loss": 0.6798, + "step": 12377 + }, + { + "epoch": 0.6361393771199506, + "grad_norm": 0.8243094086647034, + "learning_rate": 3.088313002945097e-06, + "loss": 0.6767, + "step": 12378 + }, + { + "epoch": 0.6361907698632953, + "grad_norm": 1.0829601287841797, + "learning_rate": 3.087544007707331e-06, + "loss": 0.7699, + "step": 12379 + }, + { + "epoch": 0.6362421626066399, + "grad_norm": 1.046200156211853, + "learning_rate": 3.086775065455827e-06, + "loss": 0.7258, + "step": 12380 + }, + { + "epoch": 0.6362935553499846, + "grad_norm": 1.100565791130066, + "learning_rate": 3.086006176211891e-06, + "loss": 0.7918, + "step": 12381 + }, + { + "epoch": 0.6363449480933292, + "grad_norm": 1.0411874055862427, + "learning_rate": 3.085237339996827e-06, + "loss": 0.6958, + "step": 12382 + }, + { + "epoch": 0.6363963408366738, + "grad_norm": 1.1154569387435913, + "learning_rate": 3.0844685568319326e-06, + "loss": 0.7152, + "step": 12383 + }, + { + "epoch": 0.6364477335800185, + "grad_norm": 1.148619532585144, + "learning_rate": 3.08369982673851e-06, + "loss": 0.6937, + "step": 12384 + }, + { + "epoch": 0.6364991263233631, + "grad_norm": 1.121852993965149, + "learning_rate": 3.0829311497378567e-06, + "loss": 0.8027, + "step": 12385 + }, + { + "epoch": 0.6365505190667078, + "grad_norm": 1.1024125814437866, + "learning_rate": 3.0821625258512717e-06, + "loss": 0.6982, + "step": 12386 + }, + { + "epoch": 0.6366019118100524, + "grad_norm": 1.1549736261367798, + "learning_rate": 3.0813939551000473e-06, + "loss": 0.7525, + "step": 12387 + }, + { + "epoch": 0.6366533045533971, + "grad_norm": 0.7429879307746887, + "learning_rate": 3.080625437505478e-06, + "loss": 0.6756, + "step": 12388 + }, + { + "epoch": 0.6367046972967417, + "grad_norm": 1.0803486108779907, + "learning_rate": 3.0798569730888584e-06, + "loss": 0.6759, + "step": 12389 + }, + { + "epoch": 0.6367560900400864, + "grad_norm": 0.711780309677124, + "learning_rate": 3.0790885618714773e-06, + "loss": 0.6577, + "step": 12390 + }, + { + "epoch": 0.636807482783431, + "grad_norm": 1.0022251605987549, + "learning_rate": 3.0783202038746246e-06, + "loss": 0.6691, + "step": 12391 + }, + { + "epoch": 0.6368588755267757, + "grad_norm": 1.1240925788879395, + "learning_rate": 3.0775518991195905e-06, + "loss": 0.7406, + "step": 12392 + }, + { + "epoch": 0.6369102682701202, + "grad_norm": 1.0533243417739868, + "learning_rate": 3.076783647627659e-06, + "loss": 0.725, + "step": 12393 + }, + { + "epoch": 0.6369616610134649, + "grad_norm": 1.2586220502853394, + "learning_rate": 3.0760154494201155e-06, + "loss": 0.6959, + "step": 12394 + }, + { + "epoch": 0.6370130537568095, + "grad_norm": 0.7132500410079956, + "learning_rate": 3.075247304518245e-06, + "loss": 0.6668, + "step": 12395 + }, + { + "epoch": 0.6370644465001541, + "grad_norm": 1.1156556606292725, + "learning_rate": 3.0744792129433282e-06, + "loss": 0.7514, + "step": 12396 + }, + { + "epoch": 0.6371158392434988, + "grad_norm": 1.101128101348877, + "learning_rate": 3.0737111747166458e-06, + "loss": 0.7287, + "step": 12397 + }, + { + "epoch": 0.6371672319868434, + "grad_norm": 1.0426234006881714, + "learning_rate": 3.072943189859478e-06, + "loss": 0.6906, + "step": 12398 + }, + { + "epoch": 0.6372186247301881, + "grad_norm": 1.116647481918335, + "learning_rate": 3.072175258393101e-06, + "loss": 0.713, + "step": 12399 + }, + { + "epoch": 0.6372700174735327, + "grad_norm": 1.0076799392700195, + "learning_rate": 3.0714073803387925e-06, + "loss": 0.7029, + "step": 12400 + }, + { + "epoch": 0.6373214102168774, + "grad_norm": 1.0805941820144653, + "learning_rate": 3.070639555717826e-06, + "loss": 0.7455, + "step": 12401 + }, + { + "epoch": 0.637372802960222, + "grad_norm": 0.8590505719184875, + "learning_rate": 3.069871784551478e-06, + "loss": 0.6603, + "step": 12402 + }, + { + "epoch": 0.6374241957035667, + "grad_norm": 0.7081646919250488, + "learning_rate": 3.069104066861015e-06, + "loss": 0.6401, + "step": 12403 + }, + { + "epoch": 0.6374755884469113, + "grad_norm": 1.0418975353240967, + "learning_rate": 3.06833640266771e-06, + "loss": 0.7051, + "step": 12404 + }, + { + "epoch": 0.637526981190256, + "grad_norm": 1.1633358001708984, + "learning_rate": 3.0675687919928345e-06, + "loss": 0.7013, + "step": 12405 + }, + { + "epoch": 0.6375783739336006, + "grad_norm": 1.263550877571106, + "learning_rate": 3.0668012348576505e-06, + "loss": 0.6992, + "step": 12406 + }, + { + "epoch": 0.6376297666769453, + "grad_norm": 1.0553500652313232, + "learning_rate": 3.0660337312834266e-06, + "loss": 0.7252, + "step": 12407 + }, + { + "epoch": 0.6376811594202898, + "grad_norm": 1.0750170946121216, + "learning_rate": 3.065266281291428e-06, + "loss": 0.7128, + "step": 12408 + }, + { + "epoch": 0.6377325521636344, + "grad_norm": 1.058333158493042, + "learning_rate": 3.0644988849029158e-06, + "loss": 0.7188, + "step": 12409 + }, + { + "epoch": 0.6377839449069791, + "grad_norm": 1.1271239519119263, + "learning_rate": 3.063731542139152e-06, + "loss": 0.6908, + "step": 12410 + }, + { + "epoch": 0.6378353376503237, + "grad_norm": 1.1228219270706177, + "learning_rate": 3.0629642530213986e-06, + "loss": 0.7446, + "step": 12411 + }, + { + "epoch": 0.6378867303936684, + "grad_norm": 1.0554416179656982, + "learning_rate": 3.0621970175709105e-06, + "loss": 0.7341, + "step": 12412 + }, + { + "epoch": 0.637938123137013, + "grad_norm": 0.7691937685012817, + "learning_rate": 3.0614298358089456e-06, + "loss": 0.6275, + "step": 12413 + }, + { + "epoch": 0.6379895158803577, + "grad_norm": 1.3259146213531494, + "learning_rate": 3.060662707756762e-06, + "loss": 0.6843, + "step": 12414 + }, + { + "epoch": 0.6380409086237023, + "grad_norm": 0.6934236884117126, + "learning_rate": 3.05989563343561e-06, + "loss": 0.6542, + "step": 12415 + }, + { + "epoch": 0.638092301367047, + "grad_norm": 1.0731662511825562, + "learning_rate": 3.059128612866743e-06, + "loss": 0.7318, + "step": 12416 + }, + { + "epoch": 0.6381436941103916, + "grad_norm": 0.8232048153877258, + "learning_rate": 3.0583616460714127e-06, + "loss": 0.6152, + "step": 12417 + }, + { + "epoch": 0.6381950868537363, + "grad_norm": 0.7083306908607483, + "learning_rate": 3.05759473307087e-06, + "loss": 0.6502, + "step": 12418 + }, + { + "epoch": 0.6382464795970809, + "grad_norm": 1.112864375114441, + "learning_rate": 3.0568278738863614e-06, + "loss": 0.7173, + "step": 12419 + }, + { + "epoch": 0.6382978723404256, + "grad_norm": 1.088829517364502, + "learning_rate": 3.0560610685391323e-06, + "loss": 0.7119, + "step": 12420 + }, + { + "epoch": 0.6383492650837702, + "grad_norm": 1.0680503845214844, + "learning_rate": 3.05529431705043e-06, + "loss": 0.691, + "step": 12421 + }, + { + "epoch": 0.6384006578271149, + "grad_norm": 1.0511410236358643, + "learning_rate": 3.0545276194414952e-06, + "loss": 0.6856, + "step": 12422 + }, + { + "epoch": 0.6384520505704595, + "grad_norm": 1.0105831623077393, + "learning_rate": 3.0537609757335723e-06, + "loss": 0.6536, + "step": 12423 + }, + { + "epoch": 0.638503443313804, + "grad_norm": 0.7362377047538757, + "learning_rate": 3.052994385947903e-06, + "loss": 0.668, + "step": 12424 + }, + { + "epoch": 0.6385548360571487, + "grad_norm": 1.0641356706619263, + "learning_rate": 3.052227850105722e-06, + "loss": 0.6692, + "step": 12425 + }, + { + "epoch": 0.6386062288004933, + "grad_norm": 1.0279804468154907, + "learning_rate": 3.0514613682282703e-06, + "loss": 0.7333, + "step": 12426 + }, + { + "epoch": 0.638657621543838, + "grad_norm": 1.1566846370697021, + "learning_rate": 3.0506949403367834e-06, + "loss": 0.7814, + "step": 12427 + }, + { + "epoch": 0.6387090142871826, + "grad_norm": 0.8539129495620728, + "learning_rate": 3.0499285664524946e-06, + "loss": 0.6488, + "step": 12428 + }, + { + "epoch": 0.6387604070305273, + "grad_norm": 1.1233267784118652, + "learning_rate": 3.0491622465966373e-06, + "loss": 0.7209, + "step": 12429 + }, + { + "epoch": 0.6388117997738719, + "grad_norm": 1.0695856809616089, + "learning_rate": 3.0483959807904445e-06, + "loss": 0.7237, + "step": 12430 + }, + { + "epoch": 0.6388631925172166, + "grad_norm": 1.0778601169586182, + "learning_rate": 3.047629769055144e-06, + "loss": 0.679, + "step": 12431 + }, + { + "epoch": 0.6389145852605612, + "grad_norm": 0.7820619940757751, + "learning_rate": 3.0468636114119666e-06, + "loss": 0.6504, + "step": 12432 + }, + { + "epoch": 0.6389659780039059, + "grad_norm": 1.0276966094970703, + "learning_rate": 3.0460975078821377e-06, + "loss": 0.7486, + "step": 12433 + }, + { + "epoch": 0.6390173707472505, + "grad_norm": 0.6749736070632935, + "learning_rate": 3.0453314584868854e-06, + "loss": 0.6514, + "step": 12434 + }, + { + "epoch": 0.6390687634905952, + "grad_norm": 1.045594334602356, + "learning_rate": 3.044565463247431e-06, + "loss": 0.6868, + "step": 12435 + }, + { + "epoch": 0.6391201562339398, + "grad_norm": 1.1094048023223877, + "learning_rate": 3.043799522184997e-06, + "loss": 0.7656, + "step": 12436 + }, + { + "epoch": 0.6391715489772845, + "grad_norm": 0.7875816822052002, + "learning_rate": 3.043033635320808e-06, + "loss": 0.6507, + "step": 12437 + }, + { + "epoch": 0.6392229417206291, + "grad_norm": 0.7932091355323792, + "learning_rate": 3.0422678026760784e-06, + "loss": 0.648, + "step": 12438 + }, + { + "epoch": 0.6392743344639736, + "grad_norm": 0.8026290535926819, + "learning_rate": 3.0415020242720294e-06, + "loss": 0.6833, + "step": 12439 + }, + { + "epoch": 0.6393257272073183, + "grad_norm": 1.044281005859375, + "learning_rate": 3.040736300129877e-06, + "loss": 0.7642, + "step": 12440 + }, + { + "epoch": 0.6393771199506629, + "grad_norm": 1.0879682302474976, + "learning_rate": 3.0399706302708363e-06, + "loss": 0.7336, + "step": 12441 + }, + { + "epoch": 0.6394285126940076, + "grad_norm": 1.1501655578613281, + "learning_rate": 3.0392050147161213e-06, + "loss": 0.6896, + "step": 12442 + }, + { + "epoch": 0.6394799054373522, + "grad_norm": 0.7306084036827087, + "learning_rate": 3.0384394534869455e-06, + "loss": 0.6706, + "step": 12443 + }, + { + "epoch": 0.6395312981806969, + "grad_norm": 1.0372000932693481, + "learning_rate": 3.0376739466045146e-06, + "loss": 0.6945, + "step": 12444 + }, + { + "epoch": 0.6395826909240415, + "grad_norm": 0.7529664635658264, + "learning_rate": 3.036908494090042e-06, + "loss": 0.6171, + "step": 12445 + }, + { + "epoch": 0.6396340836673862, + "grad_norm": 1.092300534248352, + "learning_rate": 3.0361430959647343e-06, + "loss": 0.6768, + "step": 12446 + }, + { + "epoch": 0.6396854764107308, + "grad_norm": 1.0681698322296143, + "learning_rate": 3.0353777522497963e-06, + "loss": 0.7008, + "step": 12447 + }, + { + "epoch": 0.6397368691540755, + "grad_norm": 1.0579118728637695, + "learning_rate": 3.0346124629664326e-06, + "loss": 0.7294, + "step": 12448 + }, + { + "epoch": 0.6397882618974201, + "grad_norm": 0.7365854978561401, + "learning_rate": 3.0338472281358466e-06, + "loss": 0.6854, + "step": 12449 + }, + { + "epoch": 0.6398396546407648, + "grad_norm": 1.1051244735717773, + "learning_rate": 3.0330820477792424e-06, + "loss": 0.7384, + "step": 12450 + }, + { + "epoch": 0.6398910473841094, + "grad_norm": 0.9895092844963074, + "learning_rate": 3.032316921917815e-06, + "loss": 0.7111, + "step": 12451 + }, + { + "epoch": 0.639942440127454, + "grad_norm": 1.0925488471984863, + "learning_rate": 3.0315518505727666e-06, + "loss": 0.7365, + "step": 12452 + }, + { + "epoch": 0.6399938328707987, + "grad_norm": 1.0687555074691772, + "learning_rate": 3.0307868337652945e-06, + "loss": 0.724, + "step": 12453 + }, + { + "epoch": 0.6400452256141432, + "grad_norm": 1.127245306968689, + "learning_rate": 3.0300218715165915e-06, + "loss": 0.6994, + "step": 12454 + }, + { + "epoch": 0.6400966183574879, + "grad_norm": 1.0680683851242065, + "learning_rate": 3.029256963847852e-06, + "loss": 0.7035, + "step": 12455 + }, + { + "epoch": 0.6401480111008325, + "grad_norm": 0.766851544380188, + "learning_rate": 3.028492110780272e-06, + "loss": 0.656, + "step": 12456 + }, + { + "epoch": 0.6401994038441772, + "grad_norm": 1.0638370513916016, + "learning_rate": 3.027727312335037e-06, + "loss": 0.7782, + "step": 12457 + }, + { + "epoch": 0.6402507965875218, + "grad_norm": 1.026733636856079, + "learning_rate": 3.02696256853334e-06, + "loss": 0.679, + "step": 12458 + }, + { + "epoch": 0.6403021893308665, + "grad_norm": 1.1674253940582275, + "learning_rate": 3.026197879396368e-06, + "loss": 0.73, + "step": 12459 + }, + { + "epoch": 0.6403535820742111, + "grad_norm": 1.1570357084274292, + "learning_rate": 3.025433244945307e-06, + "loss": 0.7426, + "step": 12460 + }, + { + "epoch": 0.6404049748175558, + "grad_norm": 1.0358844995498657, + "learning_rate": 3.0246686652013425e-06, + "loss": 0.7259, + "step": 12461 + }, + { + "epoch": 0.6404563675609004, + "grad_norm": 1.0972083806991577, + "learning_rate": 3.0239041401856583e-06, + "loss": 0.6965, + "step": 12462 + }, + { + "epoch": 0.640507760304245, + "grad_norm": 1.1043134927749634, + "learning_rate": 3.023139669919435e-06, + "loss": 0.6854, + "step": 12463 + }, + { + "epoch": 0.6405591530475897, + "grad_norm": 0.754643976688385, + "learning_rate": 3.022375254423852e-06, + "loss": 0.6492, + "step": 12464 + }, + { + "epoch": 0.6406105457909343, + "grad_norm": 1.0815545320510864, + "learning_rate": 3.0216108937200907e-06, + "loss": 0.688, + "step": 12465 + }, + { + "epoch": 0.640661938534279, + "grad_norm": 1.1285542249679565, + "learning_rate": 3.0208465878293287e-06, + "loss": 0.7192, + "step": 12466 + }, + { + "epoch": 0.6407133312776236, + "grad_norm": 0.7049675583839417, + "learning_rate": 3.0200823367727384e-06, + "loss": 0.6679, + "step": 12467 + }, + { + "epoch": 0.6407647240209683, + "grad_norm": 0.7776630520820618, + "learning_rate": 3.0193181405714954e-06, + "loss": 0.6275, + "step": 12468 + }, + { + "epoch": 0.6408161167643128, + "grad_norm": 1.077939748764038, + "learning_rate": 3.018553999246775e-06, + "loss": 0.7349, + "step": 12469 + }, + { + "epoch": 0.6408675095076575, + "grad_norm": 1.1270390748977661, + "learning_rate": 3.0177899128197453e-06, + "loss": 0.7272, + "step": 12470 + }, + { + "epoch": 0.6409189022510021, + "grad_norm": 1.1545878648757935, + "learning_rate": 3.017025881311576e-06, + "loss": 0.6845, + "step": 12471 + }, + { + "epoch": 0.6409702949943468, + "grad_norm": 0.7495307922363281, + "learning_rate": 3.0162619047434384e-06, + "loss": 0.6457, + "step": 12472 + }, + { + "epoch": 0.6410216877376914, + "grad_norm": 1.1431505680084229, + "learning_rate": 3.015497983136495e-06, + "loss": 0.7267, + "step": 12473 + }, + { + "epoch": 0.6410730804810361, + "grad_norm": 1.0917288064956665, + "learning_rate": 3.014734116511913e-06, + "loss": 0.7772, + "step": 12474 + }, + { + "epoch": 0.6411244732243807, + "grad_norm": 1.1838593482971191, + "learning_rate": 3.0139703048908586e-06, + "loss": 0.7052, + "step": 12475 + }, + { + "epoch": 0.6411758659677254, + "grad_norm": 1.0653914213180542, + "learning_rate": 3.013206548294488e-06, + "loss": 0.7166, + "step": 12476 + }, + { + "epoch": 0.64122725871107, + "grad_norm": 1.0956439971923828, + "learning_rate": 3.012442846743966e-06, + "loss": 0.7143, + "step": 12477 + }, + { + "epoch": 0.6412786514544146, + "grad_norm": 1.0114266872406006, + "learning_rate": 3.0116792002604506e-06, + "loss": 0.679, + "step": 12478 + }, + { + "epoch": 0.6413300441977593, + "grad_norm": 0.7221886515617371, + "learning_rate": 3.010915608865101e-06, + "loss": 0.6654, + "step": 12479 + }, + { + "epoch": 0.6413814369411039, + "grad_norm": 1.06960129737854, + "learning_rate": 3.0101520725790688e-06, + "loss": 0.7487, + "step": 12480 + }, + { + "epoch": 0.6414328296844486, + "grad_norm": 0.7439162135124207, + "learning_rate": 3.009388591423512e-06, + "loss": 0.6432, + "step": 12481 + }, + { + "epoch": 0.6414842224277932, + "grad_norm": 1.0023384094238281, + "learning_rate": 3.008625165419583e-06, + "loss": 0.6473, + "step": 12482 + }, + { + "epoch": 0.6415356151711379, + "grad_norm": 1.0683302879333496, + "learning_rate": 3.0078617945884314e-06, + "loss": 0.6876, + "step": 12483 + }, + { + "epoch": 0.6415870079144824, + "grad_norm": 1.1591871976852417, + "learning_rate": 3.0070984789512093e-06, + "loss": 0.7096, + "step": 12484 + }, + { + "epoch": 0.6416384006578271, + "grad_norm": 1.1107230186462402, + "learning_rate": 3.0063352185290656e-06, + "loss": 0.7589, + "step": 12485 + }, + { + "epoch": 0.6416897934011717, + "grad_norm": 1.0828583240509033, + "learning_rate": 3.0055720133431437e-06, + "loss": 0.6317, + "step": 12486 + }, + { + "epoch": 0.6417411861445164, + "grad_norm": 0.9825509786605835, + "learning_rate": 3.004808863414591e-06, + "loss": 0.6697, + "step": 12487 + }, + { + "epoch": 0.641792578887861, + "grad_norm": 1.1393312215805054, + "learning_rate": 3.004045768764553e-06, + "loss": 0.7427, + "step": 12488 + }, + { + "epoch": 0.6418439716312057, + "grad_norm": 1.051935076713562, + "learning_rate": 3.0032827294141674e-06, + "loss": 0.7303, + "step": 12489 + }, + { + "epoch": 0.6418953643745503, + "grad_norm": 1.01982581615448, + "learning_rate": 3.002519745384578e-06, + "loss": 0.7243, + "step": 12490 + }, + { + "epoch": 0.641946757117895, + "grad_norm": 0.7550032734870911, + "learning_rate": 3.001756816696924e-06, + "loss": 0.6956, + "step": 12491 + }, + { + "epoch": 0.6419981498612396, + "grad_norm": 0.8016767501831055, + "learning_rate": 3.000993943372341e-06, + "loss": 0.6447, + "step": 12492 + }, + { + "epoch": 0.6420495426045842, + "grad_norm": 1.0741279125213623, + "learning_rate": 3.000231125431967e-06, + "loss": 0.6926, + "step": 12493 + }, + { + "epoch": 0.6421009353479289, + "grad_norm": 1.014305830001831, + "learning_rate": 2.9994683628969364e-06, + "loss": 0.7456, + "step": 12494 + }, + { + "epoch": 0.6421523280912735, + "grad_norm": 0.7242282629013062, + "learning_rate": 2.998705655788383e-06, + "loss": 0.6611, + "step": 12495 + }, + { + "epoch": 0.6422037208346182, + "grad_norm": 0.9902424812316895, + "learning_rate": 2.9979430041274347e-06, + "loss": 0.6154, + "step": 12496 + }, + { + "epoch": 0.6422551135779628, + "grad_norm": 1.0947375297546387, + "learning_rate": 2.9971804079352244e-06, + "loss": 0.7472, + "step": 12497 + }, + { + "epoch": 0.6423065063213075, + "grad_norm": 0.7447814345359802, + "learning_rate": 2.996417867232882e-06, + "loss": 0.6592, + "step": 12498 + }, + { + "epoch": 0.642357899064652, + "grad_norm": 1.1309776306152344, + "learning_rate": 2.99565538204153e-06, + "loss": 0.7527, + "step": 12499 + }, + { + "epoch": 0.6424092918079967, + "grad_norm": 1.1105777025222778, + "learning_rate": 2.9948929523822958e-06, + "loss": 0.7291, + "step": 12500 + }, + { + "epoch": 0.6424606845513413, + "grad_norm": 1.088119387626648, + "learning_rate": 2.9941305782763043e-06, + "loss": 0.7514, + "step": 12501 + }, + { + "epoch": 0.642512077294686, + "grad_norm": 0.7007126808166504, + "learning_rate": 2.9933682597446755e-06, + "loss": 0.6344, + "step": 12502 + }, + { + "epoch": 0.6425634700380306, + "grad_norm": 1.0505273342132568, + "learning_rate": 2.992605996808532e-06, + "loss": 0.704, + "step": 12503 + }, + { + "epoch": 0.6426148627813753, + "grad_norm": 1.147643804550171, + "learning_rate": 2.991843789488994e-06, + "loss": 0.7022, + "step": 12504 + }, + { + "epoch": 0.6426662555247199, + "grad_norm": 1.2377270460128784, + "learning_rate": 2.9910816378071748e-06, + "loss": 0.7445, + "step": 12505 + }, + { + "epoch": 0.6427176482680645, + "grad_norm": 1.0696444511413574, + "learning_rate": 2.990319541784194e-06, + "loss": 0.7159, + "step": 12506 + }, + { + "epoch": 0.6427690410114092, + "grad_norm": 1.0263750553131104, + "learning_rate": 2.989557501441167e-06, + "loss": 0.7014, + "step": 12507 + }, + { + "epoch": 0.6428204337547538, + "grad_norm": 1.7530046701431274, + "learning_rate": 2.9887955167992026e-06, + "loss": 0.755, + "step": 12508 + }, + { + "epoch": 0.6428718264980985, + "grad_norm": 0.7299738526344299, + "learning_rate": 2.9880335878794154e-06, + "loss": 0.6656, + "step": 12509 + }, + { + "epoch": 0.6429232192414431, + "grad_norm": 1.14931321144104, + "learning_rate": 2.987271714702914e-06, + "loss": 0.7172, + "step": 12510 + }, + { + "epoch": 0.6429746119847878, + "grad_norm": 1.04103422164917, + "learning_rate": 2.9865098972908084e-06, + "loss": 0.7124, + "step": 12511 + }, + { + "epoch": 0.6430260047281324, + "grad_norm": 3.011867046356201, + "learning_rate": 2.9857481356642037e-06, + "loss": 0.7174, + "step": 12512 + }, + { + "epoch": 0.6430773974714771, + "grad_norm": 1.0644625425338745, + "learning_rate": 2.9849864298442056e-06, + "loss": 0.7024, + "step": 12513 + }, + { + "epoch": 0.6431287902148217, + "grad_norm": 1.0423341989517212, + "learning_rate": 2.98422477985192e-06, + "loss": 0.6921, + "step": 12514 + }, + { + "epoch": 0.6431801829581663, + "grad_norm": 1.0194381475448608, + "learning_rate": 2.983463185708445e-06, + "loss": 0.7109, + "step": 12515 + }, + { + "epoch": 0.6432315757015109, + "grad_norm": 1.1233144998550415, + "learning_rate": 2.9827016474348836e-06, + "loss": 0.7522, + "step": 12516 + }, + { + "epoch": 0.6432829684448556, + "grad_norm": 1.178581953048706, + "learning_rate": 2.981940165052337e-06, + "loss": 0.7534, + "step": 12517 + }, + { + "epoch": 0.6433343611882002, + "grad_norm": 1.147438406944275, + "learning_rate": 2.981178738581898e-06, + "loss": 0.6313, + "step": 12518 + }, + { + "epoch": 0.6433857539315448, + "grad_norm": 1.0982946157455444, + "learning_rate": 2.9804173680446647e-06, + "loss": 0.7372, + "step": 12519 + }, + { + "epoch": 0.6434371466748895, + "grad_norm": 1.0677376985549927, + "learning_rate": 2.9796560534617335e-06, + "loss": 0.6836, + "step": 12520 + }, + { + "epoch": 0.6434885394182341, + "grad_norm": 0.813614547252655, + "learning_rate": 2.9788947948541945e-06, + "loss": 0.6607, + "step": 12521 + }, + { + "epoch": 0.6435399321615788, + "grad_norm": 1.112439513206482, + "learning_rate": 2.9781335922431398e-06, + "loss": 0.6763, + "step": 12522 + }, + { + "epoch": 0.6435913249049234, + "grad_norm": 1.152753233909607, + "learning_rate": 2.977372445649661e-06, + "loss": 0.6748, + "step": 12523 + }, + { + "epoch": 0.6436427176482681, + "grad_norm": 1.1023285388946533, + "learning_rate": 2.9766113550948434e-06, + "loss": 0.6853, + "step": 12524 + }, + { + "epoch": 0.6436941103916127, + "grad_norm": 1.0749114751815796, + "learning_rate": 2.9758503205997747e-06, + "loss": 0.7356, + "step": 12525 + }, + { + "epoch": 0.6437455031349574, + "grad_norm": 1.0768135786056519, + "learning_rate": 2.975089342185541e-06, + "loss": 0.7214, + "step": 12526 + }, + { + "epoch": 0.643796895878302, + "grad_norm": 1.0707271099090576, + "learning_rate": 2.974328419873227e-06, + "loss": 0.7271, + "step": 12527 + }, + { + "epoch": 0.6438482886216467, + "grad_norm": 0.9974445104598999, + "learning_rate": 2.9735675536839115e-06, + "loss": 0.6618, + "step": 12528 + }, + { + "epoch": 0.6438996813649913, + "grad_norm": 1.0249018669128418, + "learning_rate": 2.972806743638676e-06, + "loss": 0.6411, + "step": 12529 + }, + { + "epoch": 0.6439510741083359, + "grad_norm": 1.0546780824661255, + "learning_rate": 2.972045989758602e-06, + "loss": 0.6922, + "step": 12530 + }, + { + "epoch": 0.6440024668516805, + "grad_norm": 1.1408554315567017, + "learning_rate": 2.9712852920647627e-06, + "loss": 0.7116, + "step": 12531 + }, + { + "epoch": 0.6440538595950251, + "grad_norm": 0.7988154888153076, + "learning_rate": 2.970524650578236e-06, + "loss": 0.639, + "step": 12532 + }, + { + "epoch": 0.6441052523383698, + "grad_norm": 1.0956555604934692, + "learning_rate": 2.9697640653200975e-06, + "loss": 0.7277, + "step": 12533 + }, + { + "epoch": 0.6441566450817144, + "grad_norm": 1.0431976318359375, + "learning_rate": 2.9690035363114167e-06, + "loss": 0.7722, + "step": 12534 + }, + { + "epoch": 0.6442080378250591, + "grad_norm": 1.1353570222854614, + "learning_rate": 2.9682430635732675e-06, + "loss": 0.7007, + "step": 12535 + }, + { + "epoch": 0.6442594305684037, + "grad_norm": 1.110840916633606, + "learning_rate": 2.9674826471267193e-06, + "loss": 0.732, + "step": 12536 + }, + { + "epoch": 0.6443108233117484, + "grad_norm": 1.161787748336792, + "learning_rate": 2.966722286992838e-06, + "loss": 0.7008, + "step": 12537 + }, + { + "epoch": 0.644362216055093, + "grad_norm": 1.0624558925628662, + "learning_rate": 2.965961983192691e-06, + "loss": 0.6562, + "step": 12538 + }, + { + "epoch": 0.6444136087984377, + "grad_norm": 1.1931118965148926, + "learning_rate": 2.9652017357473457e-06, + "loss": 0.7242, + "step": 12539 + }, + { + "epoch": 0.6444650015417823, + "grad_norm": 1.0274269580841064, + "learning_rate": 2.964441544677861e-06, + "loss": 0.6672, + "step": 12540 + }, + { + "epoch": 0.644516394285127, + "grad_norm": 1.0350313186645508, + "learning_rate": 2.963681410005301e-06, + "loss": 0.7218, + "step": 12541 + }, + { + "epoch": 0.6445677870284716, + "grad_norm": 1.0855530500411987, + "learning_rate": 2.9629213317507253e-06, + "loss": 0.7169, + "step": 12542 + }, + { + "epoch": 0.6446191797718163, + "grad_norm": 1.0388375520706177, + "learning_rate": 2.962161309935194e-06, + "loss": 0.7569, + "step": 12543 + }, + { + "epoch": 0.6446705725151609, + "grad_norm": 1.0432623624801636, + "learning_rate": 2.961401344579763e-06, + "loss": 0.6506, + "step": 12544 + }, + { + "epoch": 0.6447219652585054, + "grad_norm": 1.0363191366195679, + "learning_rate": 2.960641435705487e-06, + "loss": 0.7355, + "step": 12545 + }, + { + "epoch": 0.6447733580018501, + "grad_norm": 1.0218799114227295, + "learning_rate": 2.959881583333423e-06, + "loss": 0.6851, + "step": 12546 + }, + { + "epoch": 0.6448247507451947, + "grad_norm": 0.9656689763069153, + "learning_rate": 2.9591217874846203e-06, + "loss": 0.6606, + "step": 12547 + }, + { + "epoch": 0.6448761434885394, + "grad_norm": 0.7550825476646423, + "learning_rate": 2.958362048180129e-06, + "loss": 0.6459, + "step": 12548 + }, + { + "epoch": 0.644927536231884, + "grad_norm": 1.096889615058899, + "learning_rate": 2.9576023654410038e-06, + "loss": 0.6929, + "step": 12549 + }, + { + "epoch": 0.6449789289752287, + "grad_norm": 1.1097761392593384, + "learning_rate": 2.9568427392882846e-06, + "loss": 0.7067, + "step": 12550 + }, + { + "epoch": 0.6450303217185733, + "grad_norm": 1.0830873250961304, + "learning_rate": 2.9560831697430226e-06, + "loss": 0.6463, + "step": 12551 + }, + { + "epoch": 0.645081714461918, + "grad_norm": 1.0513182878494263, + "learning_rate": 2.955323656826262e-06, + "loss": 0.66, + "step": 12552 + }, + { + "epoch": 0.6451331072052626, + "grad_norm": 1.1426093578338623, + "learning_rate": 2.9545642005590445e-06, + "loss": 0.7581, + "step": 12553 + }, + { + "epoch": 0.6451844999486073, + "grad_norm": 1.0884987115859985, + "learning_rate": 2.9538048009624116e-06, + "loss": 0.6804, + "step": 12554 + }, + { + "epoch": 0.6452358926919519, + "grad_norm": 1.0375171899795532, + "learning_rate": 2.9530454580574054e-06, + "loss": 0.7424, + "step": 12555 + }, + { + "epoch": 0.6452872854352966, + "grad_norm": 1.057641863822937, + "learning_rate": 2.9522861718650608e-06, + "loss": 0.744, + "step": 12556 + }, + { + "epoch": 0.6453386781786412, + "grad_norm": 1.0870685577392578, + "learning_rate": 2.9515269424064154e-06, + "loss": 0.7206, + "step": 12557 + }, + { + "epoch": 0.6453900709219859, + "grad_norm": 0.7162367105484009, + "learning_rate": 2.950767769702505e-06, + "loss": 0.6276, + "step": 12558 + }, + { + "epoch": 0.6454414636653305, + "grad_norm": 1.1099445819854736, + "learning_rate": 2.950008653774365e-06, + "loss": 0.733, + "step": 12559 + }, + { + "epoch": 0.645492856408675, + "grad_norm": 1.1124504804611206, + "learning_rate": 2.949249594643023e-06, + "loss": 0.7223, + "step": 12560 + }, + { + "epoch": 0.6455442491520197, + "grad_norm": 1.0755809545516968, + "learning_rate": 2.9484905923295127e-06, + "loss": 0.6696, + "step": 12561 + }, + { + "epoch": 0.6455956418953643, + "grad_norm": 1.26218843460083, + "learning_rate": 2.947731646854862e-06, + "loss": 0.7385, + "step": 12562 + }, + { + "epoch": 0.645647034638709, + "grad_norm": 1.0858018398284912, + "learning_rate": 2.9469727582400982e-06, + "loss": 0.6856, + "step": 12563 + }, + { + "epoch": 0.6456984273820536, + "grad_norm": 1.1529483795166016, + "learning_rate": 2.9462139265062464e-06, + "loss": 0.7054, + "step": 12564 + }, + { + "epoch": 0.6457498201253983, + "grad_norm": 1.126605749130249, + "learning_rate": 2.945455151674333e-06, + "loss": 0.6943, + "step": 12565 + }, + { + "epoch": 0.6458012128687429, + "grad_norm": 1.055630087852478, + "learning_rate": 2.9446964337653773e-06, + "loss": 0.7636, + "step": 12566 + }, + { + "epoch": 0.6458526056120876, + "grad_norm": 1.1092755794525146, + "learning_rate": 2.943937772800401e-06, + "loss": 0.6737, + "step": 12567 + }, + { + "epoch": 0.6459039983554322, + "grad_norm": 0.8738458156585693, + "learning_rate": 2.9431791688004265e-06, + "loss": 0.6607, + "step": 12568 + }, + { + "epoch": 0.6459553910987769, + "grad_norm": 1.0223608016967773, + "learning_rate": 2.942420621786467e-06, + "loss": 0.6911, + "step": 12569 + }, + { + "epoch": 0.6460067838421215, + "grad_norm": 1.0747071504592896, + "learning_rate": 2.9416621317795415e-06, + "loss": 0.7405, + "step": 12570 + }, + { + "epoch": 0.6460581765854662, + "grad_norm": 1.1457273960113525, + "learning_rate": 2.9409036988006644e-06, + "loss": 0.7817, + "step": 12571 + }, + { + "epoch": 0.6461095693288108, + "grad_norm": 1.0779527425765991, + "learning_rate": 2.940145322870848e-06, + "loss": 0.6641, + "step": 12572 + }, + { + "epoch": 0.6461609620721555, + "grad_norm": 0.7959700226783752, + "learning_rate": 2.939387004011105e-06, + "loss": 0.6562, + "step": 12573 + }, + { + "epoch": 0.6462123548155001, + "grad_norm": 1.1567611694335938, + "learning_rate": 2.9386287422424433e-06, + "loss": 0.7105, + "step": 12574 + }, + { + "epoch": 0.6462637475588446, + "grad_norm": 1.070190668106079, + "learning_rate": 2.9378705375858737e-06, + "loss": 0.747, + "step": 12575 + }, + { + "epoch": 0.6463151403021893, + "grad_norm": 1.1524614095687866, + "learning_rate": 2.9371123900623997e-06, + "loss": 0.6661, + "step": 12576 + }, + { + "epoch": 0.6463665330455339, + "grad_norm": 1.08661949634552, + "learning_rate": 2.9363542996930295e-06, + "loss": 0.6878, + "step": 12577 + }, + { + "epoch": 0.6464179257888786, + "grad_norm": 1.0304526090621948, + "learning_rate": 2.935596266498767e-06, + "loss": 0.6876, + "step": 12578 + }, + { + "epoch": 0.6464693185322232, + "grad_norm": 1.019436240196228, + "learning_rate": 2.9348382905006113e-06, + "loss": 0.6979, + "step": 12579 + }, + { + "epoch": 0.6465207112755679, + "grad_norm": 1.0169875621795654, + "learning_rate": 2.934080371719563e-06, + "loss": 0.6683, + "step": 12580 + }, + { + "epoch": 0.6465721040189125, + "grad_norm": 1.217195987701416, + "learning_rate": 2.933322510176625e-06, + "loss": 0.7245, + "step": 12581 + }, + { + "epoch": 0.6466234967622572, + "grad_norm": 1.0641175508499146, + "learning_rate": 2.9325647058927887e-06, + "loss": 0.7536, + "step": 12582 + }, + { + "epoch": 0.6466748895056018, + "grad_norm": 1.08638334274292, + "learning_rate": 2.931806958889053e-06, + "loss": 0.7252, + "step": 12583 + }, + { + "epoch": 0.6467262822489465, + "grad_norm": 1.148267149925232, + "learning_rate": 2.9310492691864128e-06, + "loss": 0.7402, + "step": 12584 + }, + { + "epoch": 0.6467776749922911, + "grad_norm": 1.1270256042480469, + "learning_rate": 2.930291636805858e-06, + "loss": 0.7209, + "step": 12585 + }, + { + "epoch": 0.6468290677356358, + "grad_norm": 0.8185450434684753, + "learning_rate": 2.929534061768381e-06, + "loss": 0.6661, + "step": 12586 + }, + { + "epoch": 0.6468804604789804, + "grad_norm": 1.0447026491165161, + "learning_rate": 2.9287765440949705e-06, + "loss": 0.6838, + "step": 12587 + }, + { + "epoch": 0.646931853222325, + "grad_norm": 1.0965949296951294, + "learning_rate": 2.928019083806617e-06, + "loss": 0.7017, + "step": 12588 + }, + { + "epoch": 0.6469832459656697, + "grad_norm": 1.1301207542419434, + "learning_rate": 2.9272616809243016e-06, + "loss": 0.7186, + "step": 12589 + }, + { + "epoch": 0.6470346387090143, + "grad_norm": 1.1941970586776733, + "learning_rate": 2.9265043354690115e-06, + "loss": 0.7307, + "step": 12590 + }, + { + "epoch": 0.6470860314523589, + "grad_norm": 1.1365361213684082, + "learning_rate": 2.9257470474617315e-06, + "loss": 0.764, + "step": 12591 + }, + { + "epoch": 0.6471374241957035, + "grad_norm": 0.9992626905441284, + "learning_rate": 2.9249898169234394e-06, + "loss": 0.6982, + "step": 12592 + }, + { + "epoch": 0.6471888169390482, + "grad_norm": 1.0347284078598022, + "learning_rate": 2.9242326438751157e-06, + "loss": 0.6901, + "step": 12593 + }, + { + "epoch": 0.6472402096823928, + "grad_norm": 0.7505594491958618, + "learning_rate": 2.92347552833774e-06, + "loss": 0.7026, + "step": 12594 + }, + { + "epoch": 0.6472916024257375, + "grad_norm": 1.029219150543213, + "learning_rate": 2.922718470332288e-06, + "loss": 0.7281, + "step": 12595 + }, + { + "epoch": 0.6473429951690821, + "grad_norm": 1.0583431720733643, + "learning_rate": 2.921961469879734e-06, + "loss": 0.6953, + "step": 12596 + }, + { + "epoch": 0.6473943879124268, + "grad_norm": 1.0568609237670898, + "learning_rate": 2.9212045270010547e-06, + "loss": 0.7541, + "step": 12597 + }, + { + "epoch": 0.6474457806557714, + "grad_norm": 1.0321409702301025, + "learning_rate": 2.9204476417172166e-06, + "loss": 0.7146, + "step": 12598 + }, + { + "epoch": 0.647497173399116, + "grad_norm": 1.0842598676681519, + "learning_rate": 2.9196908140491932e-06, + "loss": 0.734, + "step": 12599 + }, + { + "epoch": 0.6475485661424607, + "grad_norm": 1.086569905281067, + "learning_rate": 2.918934044017954e-06, + "loss": 0.6725, + "step": 12600 + }, + { + "epoch": 0.6475999588858053, + "grad_norm": 0.9834355711936951, + "learning_rate": 2.9181773316444615e-06, + "loss": 0.6445, + "step": 12601 + }, + { + "epoch": 0.64765135162915, + "grad_norm": 1.111777901649475, + "learning_rate": 2.917420676949686e-06, + "loss": 0.7441, + "step": 12602 + }, + { + "epoch": 0.6477027443724946, + "grad_norm": 1.1209532022476196, + "learning_rate": 2.9166640799545877e-06, + "loss": 0.7463, + "step": 12603 + }, + { + "epoch": 0.6477541371158393, + "grad_norm": 0.7377808094024658, + "learning_rate": 2.915907540680134e-06, + "loss": 0.6779, + "step": 12604 + }, + { + "epoch": 0.6478055298591839, + "grad_norm": 1.082658052444458, + "learning_rate": 2.9151510591472775e-06, + "loss": 0.6818, + "step": 12605 + }, + { + "epoch": 0.6478569226025285, + "grad_norm": 1.0856460332870483, + "learning_rate": 2.9143946353769836e-06, + "loss": 0.7181, + "step": 12606 + }, + { + "epoch": 0.6479083153458731, + "grad_norm": 1.0511581897735596, + "learning_rate": 2.9136382693902075e-06, + "loss": 0.7147, + "step": 12607 + }, + { + "epoch": 0.6479597080892178, + "grad_norm": 0.964617133140564, + "learning_rate": 2.9128819612079053e-06, + "loss": 0.6952, + "step": 12608 + }, + { + "epoch": 0.6480111008325624, + "grad_norm": 0.8149796724319458, + "learning_rate": 2.912125710851029e-06, + "loss": 0.6626, + "step": 12609 + }, + { + "epoch": 0.6480624935759071, + "grad_norm": 1.1061389446258545, + "learning_rate": 2.911369518340535e-06, + "loss": 0.6908, + "step": 12610 + }, + { + "epoch": 0.6481138863192517, + "grad_norm": 1.0958303213119507, + "learning_rate": 2.910613383697372e-06, + "loss": 0.7164, + "step": 12611 + }, + { + "epoch": 0.6481652790625964, + "grad_norm": 1.093487024307251, + "learning_rate": 2.9098573069424883e-06, + "loss": 0.7427, + "step": 12612 + }, + { + "epoch": 0.648216671805941, + "grad_norm": 1.2232463359832764, + "learning_rate": 2.9091012880968373e-06, + "loss": 0.7975, + "step": 12613 + }, + { + "epoch": 0.6482680645492856, + "grad_norm": 1.111070990562439, + "learning_rate": 2.908345327181356e-06, + "loss": 0.7467, + "step": 12614 + }, + { + "epoch": 0.6483194572926303, + "grad_norm": 1.0774874687194824, + "learning_rate": 2.907589424216997e-06, + "loss": 0.6995, + "step": 12615 + }, + { + "epoch": 0.6483708500359749, + "grad_norm": 1.1049230098724365, + "learning_rate": 2.9068335792246994e-06, + "loss": 0.7411, + "step": 12616 + }, + { + "epoch": 0.6484222427793196, + "grad_norm": 0.788194477558136, + "learning_rate": 2.9060777922254055e-06, + "loss": 0.6591, + "step": 12617 + }, + { + "epoch": 0.6484736355226642, + "grad_norm": 1.0172597169876099, + "learning_rate": 2.905322063240053e-06, + "loss": 0.6648, + "step": 12618 + }, + { + "epoch": 0.6485250282660089, + "grad_norm": 1.1682467460632324, + "learning_rate": 2.904566392289584e-06, + "loss": 0.7104, + "step": 12619 + }, + { + "epoch": 0.6485764210093535, + "grad_norm": 0.7147257328033447, + "learning_rate": 2.903810779394933e-06, + "loss": 0.6399, + "step": 12620 + }, + { + "epoch": 0.6486278137526981, + "grad_norm": 1.100781798362732, + "learning_rate": 2.9030552245770323e-06, + "loss": 0.7132, + "step": 12621 + }, + { + "epoch": 0.6486792064960427, + "grad_norm": 1.0393102169036865, + "learning_rate": 2.90229972785682e-06, + "loss": 0.7223, + "step": 12622 + }, + { + "epoch": 0.6487305992393874, + "grad_norm": 1.0879110097885132, + "learning_rate": 2.901544289255226e-06, + "loss": 0.6582, + "step": 12623 + }, + { + "epoch": 0.648781991982732, + "grad_norm": 0.9889287352561951, + "learning_rate": 2.9007889087931797e-06, + "loss": 0.6635, + "step": 12624 + }, + { + "epoch": 0.6488333847260767, + "grad_norm": 0.9613938927650452, + "learning_rate": 2.900033586491607e-06, + "loss": 0.6914, + "step": 12625 + }, + { + "epoch": 0.6488847774694213, + "grad_norm": 1.1242367029190063, + "learning_rate": 2.899278322371443e-06, + "loss": 0.7101, + "step": 12626 + }, + { + "epoch": 0.648936170212766, + "grad_norm": 1.062567114830017, + "learning_rate": 2.898523116453602e-06, + "loss": 0.7377, + "step": 12627 + }, + { + "epoch": 0.6489875629561106, + "grad_norm": 1.0578736066818237, + "learning_rate": 2.897767968759016e-06, + "loss": 0.6649, + "step": 12628 + }, + { + "epoch": 0.6490389556994552, + "grad_norm": 0.8728432059288025, + "learning_rate": 2.897012879308604e-06, + "loss": 0.6708, + "step": 12629 + }, + { + "epoch": 0.6490903484427999, + "grad_norm": 1.1333264112472534, + "learning_rate": 2.896257848123285e-06, + "loss": 0.7075, + "step": 12630 + }, + { + "epoch": 0.6491417411861445, + "grad_norm": 1.0440527200698853, + "learning_rate": 2.895502875223981e-06, + "loss": 0.6932, + "step": 12631 + }, + { + "epoch": 0.6491931339294892, + "grad_norm": 1.0709819793701172, + "learning_rate": 2.8947479606316074e-06, + "loss": 0.7166, + "step": 12632 + }, + { + "epoch": 0.6492445266728338, + "grad_norm": 1.0816556215286255, + "learning_rate": 2.8939931043670805e-06, + "loss": 0.727, + "step": 12633 + }, + { + "epoch": 0.6492959194161785, + "grad_norm": 1.0810855627059937, + "learning_rate": 2.8932383064513114e-06, + "loss": 0.7162, + "step": 12634 + }, + { + "epoch": 0.6493473121595231, + "grad_norm": 1.0960279703140259, + "learning_rate": 2.8924835669052166e-06, + "loss": 0.675, + "step": 12635 + }, + { + "epoch": 0.6493987049028677, + "grad_norm": 1.1212399005889893, + "learning_rate": 2.891728885749705e-06, + "loss": 0.7252, + "step": 12636 + }, + { + "epoch": 0.6494500976462123, + "grad_norm": 1.1157004833221436, + "learning_rate": 2.890974263005686e-06, + "loss": 0.7112, + "step": 12637 + }, + { + "epoch": 0.649501490389557, + "grad_norm": 1.114310622215271, + "learning_rate": 2.8902196986940647e-06, + "loss": 0.6579, + "step": 12638 + }, + { + "epoch": 0.6495528831329016, + "grad_norm": 0.6990789771080017, + "learning_rate": 2.889465192835751e-06, + "loss": 0.6431, + "step": 12639 + }, + { + "epoch": 0.6496042758762463, + "grad_norm": 1.1571526527404785, + "learning_rate": 2.8887107454516467e-06, + "loss": 0.7152, + "step": 12640 + }, + { + "epoch": 0.6496556686195909, + "grad_norm": 0.8259235620498657, + "learning_rate": 2.887956356562655e-06, + "loss": 0.6688, + "step": 12641 + }, + { + "epoch": 0.6497070613629355, + "grad_norm": 1.102955937385559, + "learning_rate": 2.887202026189677e-06, + "loss": 0.8067, + "step": 12642 + }, + { + "epoch": 0.6497584541062802, + "grad_norm": 1.1036666631698608, + "learning_rate": 2.8864477543536094e-06, + "loss": 0.7321, + "step": 12643 + }, + { + "epoch": 0.6498098468496248, + "grad_norm": 1.0310508012771606, + "learning_rate": 2.885693541075355e-06, + "loss": 0.7342, + "step": 12644 + }, + { + "epoch": 0.6498612395929695, + "grad_norm": 1.0737802982330322, + "learning_rate": 2.884939386375807e-06, + "loss": 0.7256, + "step": 12645 + }, + { + "epoch": 0.6499126323363141, + "grad_norm": 1.4719418287277222, + "learning_rate": 2.8841852902758605e-06, + "loss": 0.7017, + "step": 12646 + }, + { + "epoch": 0.6499640250796588, + "grad_norm": 1.1220132112503052, + "learning_rate": 2.883431252796406e-06, + "loss": 0.7073, + "step": 12647 + }, + { + "epoch": 0.6500154178230034, + "grad_norm": 1.083936333656311, + "learning_rate": 2.8826772739583386e-06, + "loss": 0.7129, + "step": 12648 + }, + { + "epoch": 0.6500668105663481, + "grad_norm": 1.048462986946106, + "learning_rate": 2.881923353782547e-06, + "loss": 0.6763, + "step": 12649 + }, + { + "epoch": 0.6501182033096927, + "grad_norm": 0.7035284638404846, + "learning_rate": 2.881169492289918e-06, + "loss": 0.675, + "step": 12650 + }, + { + "epoch": 0.6501695960530373, + "grad_norm": 0.8668980598449707, + "learning_rate": 2.880415689501337e-06, + "loss": 0.6864, + "step": 12651 + }, + { + "epoch": 0.6502209887963819, + "grad_norm": 1.21080482006073, + "learning_rate": 2.8796619454376916e-06, + "loss": 0.7665, + "step": 12652 + }, + { + "epoch": 0.6502723815397266, + "grad_norm": 0.7108464241027832, + "learning_rate": 2.8789082601198636e-06, + "loss": 0.6643, + "step": 12653 + }, + { + "epoch": 0.6503237742830712, + "grad_norm": 1.0314874649047852, + "learning_rate": 2.8781546335687325e-06, + "loss": 0.6364, + "step": 12654 + }, + { + "epoch": 0.6503751670264158, + "grad_norm": 1.1164394617080688, + "learning_rate": 2.877401065805184e-06, + "loss": 0.7524, + "step": 12655 + }, + { + "epoch": 0.6504265597697605, + "grad_norm": 1.0630953311920166, + "learning_rate": 2.8766475568500897e-06, + "loss": 0.6656, + "step": 12656 + }, + { + "epoch": 0.6504779525131051, + "grad_norm": 1.0845907926559448, + "learning_rate": 2.8758941067243295e-06, + "loss": 0.6963, + "step": 12657 + }, + { + "epoch": 0.6505293452564498, + "grad_norm": 1.0170427560806274, + "learning_rate": 2.8751407154487786e-06, + "loss": 0.6536, + "step": 12658 + }, + { + "epoch": 0.6505807379997944, + "grad_norm": 0.7387202978134155, + "learning_rate": 2.8743873830443104e-06, + "loss": 0.6766, + "step": 12659 + }, + { + "epoch": 0.6506321307431391, + "grad_norm": 1.1213816404342651, + "learning_rate": 2.8736341095317925e-06, + "loss": 0.6631, + "step": 12660 + }, + { + "epoch": 0.6506835234864837, + "grad_norm": 1.0957870483398438, + "learning_rate": 2.8728808949321013e-06, + "loss": 0.6956, + "step": 12661 + }, + { + "epoch": 0.6507349162298284, + "grad_norm": 0.9973905682563782, + "learning_rate": 2.8721277392661025e-06, + "loss": 0.6875, + "step": 12662 + }, + { + "epoch": 0.650786308973173, + "grad_norm": 1.0251758098602295, + "learning_rate": 2.8713746425546608e-06, + "loss": 0.6966, + "step": 12663 + }, + { + "epoch": 0.6508377017165177, + "grad_norm": 1.064182996749878, + "learning_rate": 2.870621604818647e-06, + "loss": 0.6799, + "step": 12664 + }, + { + "epoch": 0.6508890944598623, + "grad_norm": 1.0819506645202637, + "learning_rate": 2.869868626078917e-06, + "loss": 0.6965, + "step": 12665 + }, + { + "epoch": 0.6509404872032069, + "grad_norm": 1.125706672668457, + "learning_rate": 2.869115706356339e-06, + "loss": 0.7097, + "step": 12666 + }, + { + "epoch": 0.6509918799465515, + "grad_norm": 1.1135793924331665, + "learning_rate": 2.8683628456717693e-06, + "loss": 0.7178, + "step": 12667 + }, + { + "epoch": 0.6510432726898961, + "grad_norm": 1.0797240734100342, + "learning_rate": 2.867610044046073e-06, + "loss": 0.7399, + "step": 12668 + }, + { + "epoch": 0.6510946654332408, + "grad_norm": 1.1318385601043701, + "learning_rate": 2.8668573015000976e-06, + "loss": 0.7007, + "step": 12669 + }, + { + "epoch": 0.6511460581765854, + "grad_norm": 0.7572476863861084, + "learning_rate": 2.866104618054706e-06, + "loss": 0.6308, + "step": 12670 + }, + { + "epoch": 0.6511974509199301, + "grad_norm": 1.0626306533813477, + "learning_rate": 2.8653519937307497e-06, + "loss": 0.702, + "step": 12671 + }, + { + "epoch": 0.6512488436632747, + "grad_norm": 1.0133082866668701, + "learning_rate": 2.8645994285490774e-06, + "loss": 0.7541, + "step": 12672 + }, + { + "epoch": 0.6513002364066194, + "grad_norm": 1.0453921556472778, + "learning_rate": 2.8638469225305456e-06, + "loss": 0.7273, + "step": 12673 + }, + { + "epoch": 0.651351629149964, + "grad_norm": 1.1614511013031006, + "learning_rate": 2.863094475695999e-06, + "loss": 0.7755, + "step": 12674 + }, + { + "epoch": 0.6514030218933087, + "grad_norm": 1.1173795461654663, + "learning_rate": 2.8623420880662866e-06, + "loss": 0.714, + "step": 12675 + }, + { + "epoch": 0.6514544146366533, + "grad_norm": 1.1158134937286377, + "learning_rate": 2.8615897596622524e-06, + "loss": 0.6737, + "step": 12676 + }, + { + "epoch": 0.651505807379998, + "grad_norm": 1.0734282732009888, + "learning_rate": 2.8608374905047436e-06, + "loss": 0.7225, + "step": 12677 + }, + { + "epoch": 0.6515572001233426, + "grad_norm": 1.099980354309082, + "learning_rate": 2.8600852806145973e-06, + "loss": 0.7148, + "step": 12678 + }, + { + "epoch": 0.6516085928666873, + "grad_norm": 1.0808162689208984, + "learning_rate": 2.859333130012658e-06, + "loss": 0.6887, + "step": 12679 + }, + { + "epoch": 0.6516599856100319, + "grad_norm": 1.070190191268921, + "learning_rate": 2.858581038719764e-06, + "loss": 0.7285, + "step": 12680 + }, + { + "epoch": 0.6517113783533766, + "grad_norm": 1.080062985420227, + "learning_rate": 2.857829006756751e-06, + "loss": 0.7193, + "step": 12681 + }, + { + "epoch": 0.6517627710967211, + "grad_norm": 0.8259374499320984, + "learning_rate": 2.8570770341444577e-06, + "loss": 0.6061, + "step": 12682 + }, + { + "epoch": 0.6518141638400657, + "grad_norm": 1.0425503253936768, + "learning_rate": 2.856325120903714e-06, + "loss": 0.7194, + "step": 12683 + }, + { + "epoch": 0.6518655565834104, + "grad_norm": 1.0406701564788818, + "learning_rate": 2.8555732670553595e-06, + "loss": 0.6595, + "step": 12684 + }, + { + "epoch": 0.651916949326755, + "grad_norm": 1.2220526933670044, + "learning_rate": 2.8548214726202155e-06, + "loss": 0.7472, + "step": 12685 + }, + { + "epoch": 0.6519683420700997, + "grad_norm": 1.0990723371505737, + "learning_rate": 2.8540697376191185e-06, + "loss": 0.732, + "step": 12686 + }, + { + "epoch": 0.6520197348134443, + "grad_norm": 0.6881179809570312, + "learning_rate": 2.853318062072893e-06, + "loss": 0.6395, + "step": 12687 + }, + { + "epoch": 0.652071127556789, + "grad_norm": 1.0097739696502686, + "learning_rate": 2.852566446002365e-06, + "loss": 0.6971, + "step": 12688 + }, + { + "epoch": 0.6521225203001336, + "grad_norm": 1.1167685985565186, + "learning_rate": 2.851814889428357e-06, + "loss": 0.6819, + "step": 12689 + }, + { + "epoch": 0.6521739130434783, + "grad_norm": 1.043837070465088, + "learning_rate": 2.8510633923716957e-06, + "loss": 0.7354, + "step": 12690 + }, + { + "epoch": 0.6522253057868229, + "grad_norm": 1.015903115272522, + "learning_rate": 2.850311954853199e-06, + "loss": 0.7729, + "step": 12691 + }, + { + "epoch": 0.6522766985301676, + "grad_norm": 0.6884838938713074, + "learning_rate": 2.8495605768936874e-06, + "loss": 0.6754, + "step": 12692 + }, + { + "epoch": 0.6523280912735122, + "grad_norm": 1.109500527381897, + "learning_rate": 2.848809258513977e-06, + "loss": 0.704, + "step": 12693 + }, + { + "epoch": 0.6523794840168569, + "grad_norm": 0.7294967770576477, + "learning_rate": 2.8480579997348833e-06, + "loss": 0.6779, + "step": 12694 + }, + { + "epoch": 0.6524308767602015, + "grad_norm": 1.0306261777877808, + "learning_rate": 2.847306800577224e-06, + "loss": 0.659, + "step": 12695 + }, + { + "epoch": 0.6524822695035462, + "grad_norm": 0.763022243976593, + "learning_rate": 2.8465556610618097e-06, + "loss": 0.6796, + "step": 12696 + }, + { + "epoch": 0.6525336622468907, + "grad_norm": 1.0255707502365112, + "learning_rate": 2.845804581209451e-06, + "loss": 0.7457, + "step": 12697 + }, + { + "epoch": 0.6525850549902353, + "grad_norm": 1.1152048110961914, + "learning_rate": 2.845053561040956e-06, + "loss": 0.7014, + "step": 12698 + }, + { + "epoch": 0.65263644773358, + "grad_norm": 1.005722999572754, + "learning_rate": 2.8443026005771345e-06, + "loss": 0.6791, + "step": 12699 + }, + { + "epoch": 0.6526878404769246, + "grad_norm": 1.0054409503936768, + "learning_rate": 2.843551699838793e-06, + "loss": 0.6602, + "step": 12700 + }, + { + "epoch": 0.6527392332202693, + "grad_norm": 1.078627586364746, + "learning_rate": 2.842800858846735e-06, + "loss": 0.755, + "step": 12701 + }, + { + "epoch": 0.6527906259636139, + "grad_norm": 1.286373257637024, + "learning_rate": 2.8420500776217602e-06, + "loss": 0.6785, + "step": 12702 + }, + { + "epoch": 0.6528420187069586, + "grad_norm": 1.0865894556045532, + "learning_rate": 2.8412993561846748e-06, + "loss": 0.7048, + "step": 12703 + }, + { + "epoch": 0.6528934114503032, + "grad_norm": 1.0086597204208374, + "learning_rate": 2.8405486945562753e-06, + "loss": 0.6798, + "step": 12704 + }, + { + "epoch": 0.6529448041936479, + "grad_norm": 1.008651852607727, + "learning_rate": 2.8397980927573586e-06, + "loss": 0.7053, + "step": 12705 + }, + { + "epoch": 0.6529961969369925, + "grad_norm": 1.0774078369140625, + "learning_rate": 2.839047550808726e-06, + "loss": 0.6775, + "step": 12706 + }, + { + "epoch": 0.6530475896803372, + "grad_norm": 1.0694278478622437, + "learning_rate": 2.838297068731164e-06, + "loss": 0.7183, + "step": 12707 + }, + { + "epoch": 0.6530989824236818, + "grad_norm": 1.178061842918396, + "learning_rate": 2.8375466465454714e-06, + "loss": 0.8103, + "step": 12708 + }, + { + "epoch": 0.6531503751670265, + "grad_norm": 1.1364883184432983, + "learning_rate": 2.8367962842724373e-06, + "loss": 0.7279, + "step": 12709 + }, + { + "epoch": 0.6532017679103711, + "grad_norm": 0.8000428080558777, + "learning_rate": 2.8360459819328512e-06, + "loss": 0.6941, + "step": 12710 + }, + { + "epoch": 0.6532531606537157, + "grad_norm": 0.7019063234329224, + "learning_rate": 2.835295739547499e-06, + "loss": 0.6607, + "step": 12711 + }, + { + "epoch": 0.6533045533970603, + "grad_norm": 1.083438515663147, + "learning_rate": 2.8345455571371703e-06, + "loss": 0.6721, + "step": 12712 + }, + { + "epoch": 0.6533559461404049, + "grad_norm": 0.7948020696640015, + "learning_rate": 2.8337954347226483e-06, + "loss": 0.6524, + "step": 12713 + }, + { + "epoch": 0.6534073388837496, + "grad_norm": 1.0776002407073975, + "learning_rate": 2.833045372324713e-06, + "loss": 0.6914, + "step": 12714 + }, + { + "epoch": 0.6534587316270942, + "grad_norm": 1.107102394104004, + "learning_rate": 2.8322953699641497e-06, + "loss": 0.7202, + "step": 12715 + }, + { + "epoch": 0.6535101243704389, + "grad_norm": 1.0733683109283447, + "learning_rate": 2.831545427661737e-06, + "loss": 0.7222, + "step": 12716 + }, + { + "epoch": 0.6535615171137835, + "grad_norm": 1.1022833585739136, + "learning_rate": 2.830795545438251e-06, + "loss": 0.6851, + "step": 12717 + }, + { + "epoch": 0.6536129098571282, + "grad_norm": 1.0910674333572388, + "learning_rate": 2.8300457233144673e-06, + "loss": 0.6692, + "step": 12718 + }, + { + "epoch": 0.6536643026004728, + "grad_norm": 0.7024424076080322, + "learning_rate": 2.829295961311165e-06, + "loss": 0.6834, + "step": 12719 + }, + { + "epoch": 0.6537156953438175, + "grad_norm": 1.022907018661499, + "learning_rate": 2.82854625944911e-06, + "loss": 0.6712, + "step": 12720 + }, + { + "epoch": 0.6537670880871621, + "grad_norm": 1.0808684825897217, + "learning_rate": 2.827796617749079e-06, + "loss": 0.7098, + "step": 12721 + }, + { + "epoch": 0.6538184808305068, + "grad_norm": 0.7135031223297119, + "learning_rate": 2.8270470362318403e-06, + "loss": 0.6199, + "step": 12722 + }, + { + "epoch": 0.6538698735738514, + "grad_norm": 1.0814285278320312, + "learning_rate": 2.8262975149181583e-06, + "loss": 0.6846, + "step": 12723 + }, + { + "epoch": 0.653921266317196, + "grad_norm": 1.1053740978240967, + "learning_rate": 2.825548053828804e-06, + "loss": 0.704, + "step": 12724 + }, + { + "epoch": 0.6539726590605407, + "grad_norm": 0.8136286735534668, + "learning_rate": 2.8247986529845397e-06, + "loss": 0.6946, + "step": 12725 + }, + { + "epoch": 0.6540240518038853, + "grad_norm": 1.059436559677124, + "learning_rate": 2.8240493124061274e-06, + "loss": 0.6908, + "step": 12726 + }, + { + "epoch": 0.6540754445472299, + "grad_norm": 1.0118805170059204, + "learning_rate": 2.8233000321143277e-06, + "loss": 0.6703, + "step": 12727 + }, + { + "epoch": 0.6541268372905745, + "grad_norm": 1.0381046533584595, + "learning_rate": 2.8225508121299037e-06, + "loss": 0.7546, + "step": 12728 + }, + { + "epoch": 0.6541782300339192, + "grad_norm": 1.0622750520706177, + "learning_rate": 2.8218016524736103e-06, + "loss": 0.6797, + "step": 12729 + }, + { + "epoch": 0.6542296227772638, + "grad_norm": 0.7641800045967102, + "learning_rate": 2.821052553166205e-06, + "loss": 0.6752, + "step": 12730 + }, + { + "epoch": 0.6542810155206085, + "grad_norm": 1.0862160921096802, + "learning_rate": 2.8203035142284386e-06, + "loss": 0.7362, + "step": 12731 + }, + { + "epoch": 0.6543324082639531, + "grad_norm": 1.108565092086792, + "learning_rate": 2.8195545356810696e-06, + "loss": 0.7283, + "step": 12732 + }, + { + "epoch": 0.6543838010072978, + "grad_norm": 1.0464057922363281, + "learning_rate": 2.8188056175448454e-06, + "loss": 0.6863, + "step": 12733 + }, + { + "epoch": 0.6544351937506424, + "grad_norm": 1.1162748336791992, + "learning_rate": 2.818056759840515e-06, + "loss": 0.7499, + "step": 12734 + }, + { + "epoch": 0.654486586493987, + "grad_norm": 1.0928720235824585, + "learning_rate": 2.817307962588831e-06, + "loss": 0.7409, + "step": 12735 + }, + { + "epoch": 0.6545379792373317, + "grad_norm": 1.075702428817749, + "learning_rate": 2.8165592258105322e-06, + "loss": 0.6874, + "step": 12736 + }, + { + "epoch": 0.6545893719806763, + "grad_norm": 1.0422711372375488, + "learning_rate": 2.815810549526368e-06, + "loss": 0.7177, + "step": 12737 + }, + { + "epoch": 0.654640764724021, + "grad_norm": 1.2073267698287964, + "learning_rate": 2.8150619337570804e-06, + "loss": 0.7258, + "step": 12738 + }, + { + "epoch": 0.6546921574673656, + "grad_norm": 0.8831818103790283, + "learning_rate": 2.81431337852341e-06, + "loss": 0.6535, + "step": 12739 + }, + { + "epoch": 0.6547435502107103, + "grad_norm": 1.0626106262207031, + "learning_rate": 2.8135648838460943e-06, + "loss": 0.7188, + "step": 12740 + }, + { + "epoch": 0.6547949429540549, + "grad_norm": 0.7242793440818787, + "learning_rate": 2.812816449745874e-06, + "loss": 0.6719, + "step": 12741 + }, + { + "epoch": 0.6548463356973995, + "grad_norm": 1.0402781963348389, + "learning_rate": 2.812068076243485e-06, + "loss": 0.6684, + "step": 12742 + }, + { + "epoch": 0.6548977284407441, + "grad_norm": 1.0340954065322876, + "learning_rate": 2.8113197633596577e-06, + "loss": 0.7316, + "step": 12743 + }, + { + "epoch": 0.6549491211840888, + "grad_norm": 0.9980387091636658, + "learning_rate": 2.81057151111513e-06, + "loss": 0.6872, + "step": 12744 + }, + { + "epoch": 0.6550005139274334, + "grad_norm": 1.1070702075958252, + "learning_rate": 2.8098233195306303e-06, + "loss": 0.7241, + "step": 12745 + }, + { + "epoch": 0.6550519066707781, + "grad_norm": 1.0863522291183472, + "learning_rate": 2.809075188626888e-06, + "loss": 0.7213, + "step": 12746 + }, + { + "epoch": 0.6551032994141227, + "grad_norm": 1.1725856065750122, + "learning_rate": 2.8083271184246292e-06, + "loss": 0.6979, + "step": 12747 + }, + { + "epoch": 0.6551546921574674, + "grad_norm": 1.0646820068359375, + "learning_rate": 2.807579108944585e-06, + "loss": 0.7227, + "step": 12748 + }, + { + "epoch": 0.655206084900812, + "grad_norm": 1.0303170680999756, + "learning_rate": 2.806831160207472e-06, + "loss": 0.7246, + "step": 12749 + }, + { + "epoch": 0.6552574776441566, + "grad_norm": 1.069653034210205, + "learning_rate": 2.806083272234019e-06, + "loss": 0.7021, + "step": 12750 + }, + { + "epoch": 0.6553088703875013, + "grad_norm": 1.1190775632858276, + "learning_rate": 2.8053354450449455e-06, + "loss": 0.747, + "step": 12751 + }, + { + "epoch": 0.6553602631308459, + "grad_norm": 0.773638129234314, + "learning_rate": 2.8045876786609692e-06, + "loss": 0.6481, + "step": 12752 + }, + { + "epoch": 0.6554116558741906, + "grad_norm": 1.0609550476074219, + "learning_rate": 2.8038399731028067e-06, + "loss": 0.7028, + "step": 12753 + }, + { + "epoch": 0.6554630486175352, + "grad_norm": 1.0849156379699707, + "learning_rate": 2.803092328391177e-06, + "loss": 0.7044, + "step": 12754 + }, + { + "epoch": 0.6555144413608799, + "grad_norm": 0.9928613901138306, + "learning_rate": 2.802344744546793e-06, + "loss": 0.6482, + "step": 12755 + }, + { + "epoch": 0.6555658341042245, + "grad_norm": 1.0639619827270508, + "learning_rate": 2.8015972215903654e-06, + "loss": 0.7092, + "step": 12756 + }, + { + "epoch": 0.6556172268475691, + "grad_norm": 1.1189416646957397, + "learning_rate": 2.8008497595426103e-06, + "loss": 0.6428, + "step": 12757 + }, + { + "epoch": 0.6556686195909137, + "grad_norm": 1.0047690868377686, + "learning_rate": 2.8001023584242292e-06, + "loss": 0.7003, + "step": 12758 + }, + { + "epoch": 0.6557200123342584, + "grad_norm": 1.0537421703338623, + "learning_rate": 2.7993550182559347e-06, + "loss": 0.7732, + "step": 12759 + }, + { + "epoch": 0.655771405077603, + "grad_norm": 1.0244888067245483, + "learning_rate": 2.79860773905843e-06, + "loss": 0.6736, + "step": 12760 + }, + { + "epoch": 0.6558227978209477, + "grad_norm": 1.082221508026123, + "learning_rate": 2.7978605208524233e-06, + "loss": 0.7416, + "step": 12761 + }, + { + "epoch": 0.6558741905642923, + "grad_norm": 1.081788182258606, + "learning_rate": 2.7971133636586106e-06, + "loss": 0.6589, + "step": 12762 + }, + { + "epoch": 0.655925583307637, + "grad_norm": 1.1062562465667725, + "learning_rate": 2.7963662674976977e-06, + "loss": 0.7442, + "step": 12763 + }, + { + "epoch": 0.6559769760509816, + "grad_norm": 0.6969239711761475, + "learning_rate": 2.7956192323903812e-06, + "loss": 0.706, + "step": 12764 + }, + { + "epoch": 0.6560283687943262, + "grad_norm": 0.6793100237846375, + "learning_rate": 2.7948722583573573e-06, + "loss": 0.6496, + "step": 12765 + }, + { + "epoch": 0.6560797615376709, + "grad_norm": 1.0185930728912354, + "learning_rate": 2.794125345419325e-06, + "loss": 0.7091, + "step": 12766 + }, + { + "epoch": 0.6561311542810155, + "grad_norm": 0.7710469365119934, + "learning_rate": 2.793378493596976e-06, + "loss": 0.6798, + "step": 12767 + }, + { + "epoch": 0.6561825470243602, + "grad_norm": 1.1846907138824463, + "learning_rate": 2.7926317029110027e-06, + "loss": 0.7357, + "step": 12768 + }, + { + "epoch": 0.6562339397677048, + "grad_norm": 1.0201274156570435, + "learning_rate": 2.791884973382094e-06, + "loss": 0.6732, + "step": 12769 + }, + { + "epoch": 0.6562853325110495, + "grad_norm": 1.0298786163330078, + "learning_rate": 2.7911383050309444e-06, + "loss": 0.6943, + "step": 12770 + }, + { + "epoch": 0.6563367252543941, + "grad_norm": 1.082228422164917, + "learning_rate": 2.7903916978782332e-06, + "loss": 0.7015, + "step": 12771 + }, + { + "epoch": 0.6563881179977388, + "grad_norm": 1.0424376726150513, + "learning_rate": 2.7896451519446505e-06, + "loss": 0.7179, + "step": 12772 + }, + { + "epoch": 0.6564395107410833, + "grad_norm": 1.0396262407302856, + "learning_rate": 2.78889866725088e-06, + "loss": 0.7077, + "step": 12773 + }, + { + "epoch": 0.656490903484428, + "grad_norm": 1.1299337148666382, + "learning_rate": 2.7881522438176e-06, + "loss": 0.6985, + "step": 12774 + }, + { + "epoch": 0.6565422962277726, + "grad_norm": 1.1063848733901978, + "learning_rate": 2.787405881665496e-06, + "loss": 0.7178, + "step": 12775 + }, + { + "epoch": 0.6565936889711173, + "grad_norm": 1.081787347793579, + "learning_rate": 2.786659580815242e-06, + "loss": 0.7184, + "step": 12776 + }, + { + "epoch": 0.6566450817144619, + "grad_norm": 1.0519335269927979, + "learning_rate": 2.785913341287522e-06, + "loss": 0.7293, + "step": 12777 + }, + { + "epoch": 0.6566964744578065, + "grad_norm": 1.0808764696121216, + "learning_rate": 2.785167163103001e-06, + "loss": 0.6821, + "step": 12778 + }, + { + "epoch": 0.6567478672011512, + "grad_norm": 1.0175098180770874, + "learning_rate": 2.784421046282361e-06, + "loss": 0.6812, + "step": 12779 + }, + { + "epoch": 0.6567992599444958, + "grad_norm": 1.0509356260299683, + "learning_rate": 2.783674990846269e-06, + "loss": 0.7871, + "step": 12780 + }, + { + "epoch": 0.6568506526878405, + "grad_norm": 1.1779853105545044, + "learning_rate": 2.782928996815398e-06, + "loss": 0.6582, + "step": 12781 + }, + { + "epoch": 0.6569020454311851, + "grad_norm": 1.0136502981185913, + "learning_rate": 2.782183064210413e-06, + "loss": 0.6787, + "step": 12782 + }, + { + "epoch": 0.6569534381745298, + "grad_norm": 1.0517176389694214, + "learning_rate": 2.7814371930519844e-06, + "loss": 0.7277, + "step": 12783 + }, + { + "epoch": 0.6570048309178744, + "grad_norm": 0.7343508005142212, + "learning_rate": 2.780691383360777e-06, + "loss": 0.6633, + "step": 12784 + }, + { + "epoch": 0.6570562236612191, + "grad_norm": 1.0693659782409668, + "learning_rate": 2.7799456351574493e-06, + "loss": 0.6577, + "step": 12785 + }, + { + "epoch": 0.6571076164045637, + "grad_norm": 1.0741453170776367, + "learning_rate": 2.779199948462672e-06, + "loss": 0.7059, + "step": 12786 + }, + { + "epoch": 0.6571590091479084, + "grad_norm": 1.0592615604400635, + "learning_rate": 2.778454323297094e-06, + "loss": 0.7045, + "step": 12787 + }, + { + "epoch": 0.6572104018912529, + "grad_norm": 1.047134518623352, + "learning_rate": 2.777708759681382e-06, + "loss": 0.7427, + "step": 12788 + }, + { + "epoch": 0.6572617946345976, + "grad_norm": 1.1295169591903687, + "learning_rate": 2.776963257636188e-06, + "loss": 0.7152, + "step": 12789 + }, + { + "epoch": 0.6573131873779422, + "grad_norm": 0.6891986131668091, + "learning_rate": 2.7762178171821696e-06, + "loss": 0.6668, + "step": 12790 + }, + { + "epoch": 0.6573645801212868, + "grad_norm": 1.05350923538208, + "learning_rate": 2.7754724383399763e-06, + "loss": 0.6845, + "step": 12791 + }, + { + "epoch": 0.6574159728646315, + "grad_norm": 1.0742684602737427, + "learning_rate": 2.774727121130263e-06, + "loss": 0.7061, + "step": 12792 + }, + { + "epoch": 0.6574673656079761, + "grad_norm": 1.1062641143798828, + "learning_rate": 2.7739818655736783e-06, + "loss": 0.718, + "step": 12793 + }, + { + "epoch": 0.6575187583513208, + "grad_norm": 1.099281668663025, + "learning_rate": 2.773236671690868e-06, + "loss": 0.692, + "step": 12794 + }, + { + "epoch": 0.6575701510946654, + "grad_norm": 1.0931957960128784, + "learning_rate": 2.772491539502482e-06, + "loss": 0.7272, + "step": 12795 + }, + { + "epoch": 0.6576215438380101, + "grad_norm": 1.0395638942718506, + "learning_rate": 2.7717464690291633e-06, + "loss": 0.6752, + "step": 12796 + }, + { + "epoch": 0.6576729365813547, + "grad_norm": 1.0390788316726685, + "learning_rate": 2.7710014602915547e-06, + "loss": 0.7019, + "step": 12797 + }, + { + "epoch": 0.6577243293246994, + "grad_norm": 1.090287208557129, + "learning_rate": 2.7702565133102945e-06, + "loss": 0.6396, + "step": 12798 + }, + { + "epoch": 0.657775722068044, + "grad_norm": 1.0715219974517822, + "learning_rate": 2.769511628106029e-06, + "loss": 0.7007, + "step": 12799 + }, + { + "epoch": 0.6578271148113887, + "grad_norm": 1.0754685401916504, + "learning_rate": 2.768766804699388e-06, + "loss": 0.7201, + "step": 12800 + }, + { + "epoch": 0.6578785075547333, + "grad_norm": 1.155867576599121, + "learning_rate": 2.7680220431110126e-06, + "loss": 0.7857, + "step": 12801 + }, + { + "epoch": 0.657929900298078, + "grad_norm": 1.3431057929992676, + "learning_rate": 2.767277343361535e-06, + "loss": 0.7104, + "step": 12802 + }, + { + "epoch": 0.6579812930414225, + "grad_norm": 1.015091061592102, + "learning_rate": 2.7665327054715895e-06, + "loss": 0.7154, + "step": 12803 + }, + { + "epoch": 0.6580326857847671, + "grad_norm": 1.0379616022109985, + "learning_rate": 2.7657881294618027e-06, + "loss": 0.6882, + "step": 12804 + }, + { + "epoch": 0.6580840785281118, + "grad_norm": 1.0949854850769043, + "learning_rate": 2.7650436153528093e-06, + "loss": 0.7467, + "step": 12805 + }, + { + "epoch": 0.6581354712714564, + "grad_norm": 1.1441411972045898, + "learning_rate": 2.764299163165235e-06, + "loss": 0.6855, + "step": 12806 + }, + { + "epoch": 0.6581868640148011, + "grad_norm": 1.1006847620010376, + "learning_rate": 2.7635547729197015e-06, + "loss": 0.7071, + "step": 12807 + }, + { + "epoch": 0.6582382567581457, + "grad_norm": 0.6870553493499756, + "learning_rate": 2.7628104446368386e-06, + "loss": 0.6185, + "step": 12808 + }, + { + "epoch": 0.6582896495014904, + "grad_norm": 1.0310174226760864, + "learning_rate": 2.7620661783372653e-06, + "loss": 0.7299, + "step": 12809 + }, + { + "epoch": 0.658341042244835, + "grad_norm": 1.0141171216964722, + "learning_rate": 2.761321974041603e-06, + "loss": 0.6382, + "step": 12810 + }, + { + "epoch": 0.6583924349881797, + "grad_norm": 1.0433390140533447, + "learning_rate": 2.760577831770469e-06, + "loss": 0.7513, + "step": 12811 + }, + { + "epoch": 0.6584438277315243, + "grad_norm": 1.0822899341583252, + "learning_rate": 2.759833751544485e-06, + "loss": 0.6576, + "step": 12812 + }, + { + "epoch": 0.658495220474869, + "grad_norm": 1.085644006729126, + "learning_rate": 2.7590897333842594e-06, + "loss": 0.7532, + "step": 12813 + }, + { + "epoch": 0.6585466132182136, + "grad_norm": 1.0528186559677124, + "learning_rate": 2.7583457773104116e-06, + "loss": 0.6937, + "step": 12814 + }, + { + "epoch": 0.6585980059615583, + "grad_norm": 1.1516897678375244, + "learning_rate": 2.7576018833435513e-06, + "loss": 0.6773, + "step": 12815 + }, + { + "epoch": 0.6586493987049029, + "grad_norm": 1.0743348598480225, + "learning_rate": 2.7568580515042876e-06, + "loss": 0.6823, + "step": 12816 + }, + { + "epoch": 0.6587007914482476, + "grad_norm": 1.0742267370224, + "learning_rate": 2.756114281813232e-06, + "loss": 0.7369, + "step": 12817 + }, + { + "epoch": 0.6587521841915921, + "grad_norm": 1.1002205610275269, + "learning_rate": 2.75537057429099e-06, + "loss": 0.7683, + "step": 12818 + }, + { + "epoch": 0.6588035769349367, + "grad_norm": 1.3030164241790771, + "learning_rate": 2.7546269289581655e-06, + "loss": 0.7255, + "step": 12819 + }, + { + "epoch": 0.6588549696782814, + "grad_norm": 0.8345321416854858, + "learning_rate": 2.7538833458353613e-06, + "loss": 0.6662, + "step": 12820 + }, + { + "epoch": 0.658906362421626, + "grad_norm": 1.0949599742889404, + "learning_rate": 2.753139824943182e-06, + "loss": 0.7624, + "step": 12821 + }, + { + "epoch": 0.6589577551649707, + "grad_norm": 0.7916919589042664, + "learning_rate": 2.7523963663022267e-06, + "loss": 0.6432, + "step": 12822 + }, + { + "epoch": 0.6590091479083153, + "grad_norm": 1.0783405303955078, + "learning_rate": 2.7516529699330917e-06, + "loss": 0.7531, + "step": 12823 + }, + { + "epoch": 0.65906054065166, + "grad_norm": 1.1259506940841675, + "learning_rate": 2.7509096358563737e-06, + "loss": 0.7125, + "step": 12824 + }, + { + "epoch": 0.6591119333950046, + "grad_norm": 1.025465965270996, + "learning_rate": 2.7501663640926695e-06, + "loss": 0.7083, + "step": 12825 + }, + { + "epoch": 0.6591633261383493, + "grad_norm": 1.1218931674957275, + "learning_rate": 2.749423154662571e-06, + "loss": 0.6896, + "step": 12826 + }, + { + "epoch": 0.6592147188816939, + "grad_norm": 1.0682748556137085, + "learning_rate": 2.748680007586667e-06, + "loss": 0.6971, + "step": 12827 + }, + { + "epoch": 0.6592661116250386, + "grad_norm": 1.0668566226959229, + "learning_rate": 2.747936922885554e-06, + "loss": 0.7245, + "step": 12828 + }, + { + "epoch": 0.6593175043683832, + "grad_norm": 1.0837361812591553, + "learning_rate": 2.747193900579811e-06, + "loss": 0.6987, + "step": 12829 + }, + { + "epoch": 0.6593688971117279, + "grad_norm": 1.0189871788024902, + "learning_rate": 2.7464509406900297e-06, + "loss": 0.6969, + "step": 12830 + }, + { + "epoch": 0.6594202898550725, + "grad_norm": 1.057712435722351, + "learning_rate": 2.7457080432367934e-06, + "loss": 0.6866, + "step": 12831 + }, + { + "epoch": 0.6594716825984172, + "grad_norm": 0.6882720589637756, + "learning_rate": 2.744965208240684e-06, + "loss": 0.6211, + "step": 12832 + }, + { + "epoch": 0.6595230753417617, + "grad_norm": 0.7165482640266418, + "learning_rate": 2.744222435722281e-06, + "loss": 0.6807, + "step": 12833 + }, + { + "epoch": 0.6595744680851063, + "grad_norm": 0.7354871034622192, + "learning_rate": 2.743479725702167e-06, + "loss": 0.6405, + "step": 12834 + }, + { + "epoch": 0.659625860828451, + "grad_norm": 1.1313343048095703, + "learning_rate": 2.7427370782009184e-06, + "loss": 0.7235, + "step": 12835 + }, + { + "epoch": 0.6596772535717956, + "grad_norm": 1.085094928741455, + "learning_rate": 2.7419944932391072e-06, + "loss": 0.7108, + "step": 12836 + }, + { + "epoch": 0.6597286463151403, + "grad_norm": 1.1693204641342163, + "learning_rate": 2.741251970837313e-06, + "loss": 0.6974, + "step": 12837 + }, + { + "epoch": 0.6597800390584849, + "grad_norm": 1.0934221744537354, + "learning_rate": 2.7405095110161052e-06, + "loss": 0.7361, + "step": 12838 + }, + { + "epoch": 0.6598314318018296, + "grad_norm": 1.0419096946716309, + "learning_rate": 2.7397671137960547e-06, + "loss": 0.7079, + "step": 12839 + }, + { + "epoch": 0.6598828245451742, + "grad_norm": 1.1381500959396362, + "learning_rate": 2.7390247791977277e-06, + "loss": 0.7339, + "step": 12840 + }, + { + "epoch": 0.6599342172885189, + "grad_norm": 1.132917881011963, + "learning_rate": 2.738282507241698e-06, + "loss": 0.7557, + "step": 12841 + }, + { + "epoch": 0.6599856100318635, + "grad_norm": 1.0529555082321167, + "learning_rate": 2.7375402979485223e-06, + "loss": 0.7413, + "step": 12842 + }, + { + "epoch": 0.6600370027752082, + "grad_norm": 1.0734312534332275, + "learning_rate": 2.73679815133877e-06, + "loss": 0.645, + "step": 12843 + }, + { + "epoch": 0.6600883955185528, + "grad_norm": 1.0571225881576538, + "learning_rate": 2.7360560674330015e-06, + "loss": 0.6877, + "step": 12844 + }, + { + "epoch": 0.6601397882618975, + "grad_norm": 1.1606024503707886, + "learning_rate": 2.735314046251775e-06, + "loss": 0.7497, + "step": 12845 + }, + { + "epoch": 0.6601911810052421, + "grad_norm": 0.8661072254180908, + "learning_rate": 2.7345720878156513e-06, + "loss": 0.6594, + "step": 12846 + }, + { + "epoch": 0.6602425737485867, + "grad_norm": 1.015209674835205, + "learning_rate": 2.733830192145187e-06, + "loss": 0.6661, + "step": 12847 + }, + { + "epoch": 0.6602939664919314, + "grad_norm": 0.7820641398429871, + "learning_rate": 2.7330883592609356e-06, + "loss": 0.6775, + "step": 12848 + }, + { + "epoch": 0.6603453592352759, + "grad_norm": 1.1234233379364014, + "learning_rate": 2.73234658918345e-06, + "loss": 0.6843, + "step": 12849 + }, + { + "epoch": 0.6603967519786206, + "grad_norm": 0.7217654585838318, + "learning_rate": 2.731604881933285e-06, + "loss": 0.6066, + "step": 12850 + }, + { + "epoch": 0.6604481447219652, + "grad_norm": 1.137237548828125, + "learning_rate": 2.7308632375309845e-06, + "loss": 0.7596, + "step": 12851 + }, + { + "epoch": 0.6604995374653099, + "grad_norm": 1.1154121160507202, + "learning_rate": 2.730121655997101e-06, + "loss": 0.7482, + "step": 12852 + }, + { + "epoch": 0.6605509302086545, + "grad_norm": 1.032662034034729, + "learning_rate": 2.7293801373521776e-06, + "loss": 0.715, + "step": 12853 + }, + { + "epoch": 0.6606023229519992, + "grad_norm": 1.0052317380905151, + "learning_rate": 2.728638681616764e-06, + "loss": 0.6993, + "step": 12854 + }, + { + "epoch": 0.6606537156953438, + "grad_norm": 0.6921722292900085, + "learning_rate": 2.7278972888113954e-06, + "loss": 0.6759, + "step": 12855 + }, + { + "epoch": 0.6607051084386885, + "grad_norm": 1.146413803100586, + "learning_rate": 2.7271559589566187e-06, + "loss": 0.7389, + "step": 12856 + }, + { + "epoch": 0.6607565011820331, + "grad_norm": 0.8863875269889832, + "learning_rate": 2.7264146920729704e-06, + "loss": 0.6774, + "step": 12857 + }, + { + "epoch": 0.6608078939253778, + "grad_norm": 1.112107515335083, + "learning_rate": 2.7256734881809864e-06, + "loss": 0.6846, + "step": 12858 + }, + { + "epoch": 0.6608592866687224, + "grad_norm": 1.153350830078125, + "learning_rate": 2.7249323473012067e-06, + "loss": 0.7165, + "step": 12859 + }, + { + "epoch": 0.660910679412067, + "grad_norm": 1.064603328704834, + "learning_rate": 2.7241912694541635e-06, + "loss": 0.6703, + "step": 12860 + }, + { + "epoch": 0.6609620721554117, + "grad_norm": 1.0802053213119507, + "learning_rate": 2.723450254660388e-06, + "loss": 0.7535, + "step": 12861 + }, + { + "epoch": 0.6610134648987563, + "grad_norm": 1.0695346593856812, + "learning_rate": 2.7227093029404093e-06, + "loss": 0.7583, + "step": 12862 + }, + { + "epoch": 0.661064857642101, + "grad_norm": 1.1594820022583008, + "learning_rate": 2.7219684143147616e-06, + "loss": 0.7265, + "step": 12863 + }, + { + "epoch": 0.6611162503854455, + "grad_norm": 1.1278560161590576, + "learning_rate": 2.7212275888039647e-06, + "loss": 0.6458, + "step": 12864 + }, + { + "epoch": 0.6611676431287902, + "grad_norm": 1.0376242399215698, + "learning_rate": 2.720486826428549e-06, + "loss": 0.6832, + "step": 12865 + }, + { + "epoch": 0.6612190358721348, + "grad_norm": 1.1174986362457275, + "learning_rate": 2.7197461272090364e-06, + "loss": 0.7007, + "step": 12866 + }, + { + "epoch": 0.6612704286154795, + "grad_norm": 1.0603545904159546, + "learning_rate": 2.7190054911659467e-06, + "loss": 0.703, + "step": 12867 + }, + { + "epoch": 0.6613218213588241, + "grad_norm": 1.0593544244766235, + "learning_rate": 2.718264918319804e-06, + "loss": 0.7296, + "step": 12868 + }, + { + "epoch": 0.6613732141021688, + "grad_norm": 1.0537619590759277, + "learning_rate": 2.7175244086911224e-06, + "loss": 0.7316, + "step": 12869 + }, + { + "epoch": 0.6614246068455134, + "grad_norm": 0.8461965322494507, + "learning_rate": 2.716783962300423e-06, + "loss": 0.6542, + "step": 12870 + }, + { + "epoch": 0.661475999588858, + "grad_norm": 1.0707175731658936, + "learning_rate": 2.716043579168215e-06, + "loss": 0.7165, + "step": 12871 + }, + { + "epoch": 0.6615273923322027, + "grad_norm": 1.0807058811187744, + "learning_rate": 2.715303259315016e-06, + "loss": 0.6933, + "step": 12872 + }, + { + "epoch": 0.6615787850755473, + "grad_norm": 1.0899757146835327, + "learning_rate": 2.7145630027613345e-06, + "loss": 0.7436, + "step": 12873 + }, + { + "epoch": 0.661630177818892, + "grad_norm": 1.1173241138458252, + "learning_rate": 2.7138228095276814e-06, + "loss": 0.6769, + "step": 12874 + }, + { + "epoch": 0.6616815705622366, + "grad_norm": 0.8813436627388, + "learning_rate": 2.713082679634561e-06, + "loss": 0.6524, + "step": 12875 + }, + { + "epoch": 0.6617329633055813, + "grad_norm": 1.043320894241333, + "learning_rate": 2.712342613102485e-06, + "loss": 0.7338, + "step": 12876 + }, + { + "epoch": 0.6617843560489259, + "grad_norm": 1.0370912551879883, + "learning_rate": 2.7116026099519545e-06, + "loss": 0.7598, + "step": 12877 + }, + { + "epoch": 0.6618357487922706, + "grad_norm": 1.0835531949996948, + "learning_rate": 2.7108626702034692e-06, + "loss": 0.6604, + "step": 12878 + }, + { + "epoch": 0.6618871415356151, + "grad_norm": 1.0562360286712646, + "learning_rate": 2.7101227938775372e-06, + "loss": 0.7276, + "step": 12879 + }, + { + "epoch": 0.6619385342789598, + "grad_norm": 1.0672177076339722, + "learning_rate": 2.7093829809946487e-06, + "loss": 0.6926, + "step": 12880 + }, + { + "epoch": 0.6619899270223044, + "grad_norm": 1.1066380739212036, + "learning_rate": 2.7086432315753065e-06, + "loss": 0.7237, + "step": 12881 + }, + { + "epoch": 0.6620413197656491, + "grad_norm": 1.0359033346176147, + "learning_rate": 2.707903545640005e-06, + "loss": 0.6986, + "step": 12882 + }, + { + "epoch": 0.6620927125089937, + "grad_norm": 1.0642324686050415, + "learning_rate": 2.7071639232092372e-06, + "loss": 0.7203, + "step": 12883 + }, + { + "epoch": 0.6621441052523384, + "grad_norm": 1.1208524703979492, + "learning_rate": 2.706424364303494e-06, + "loss": 0.7454, + "step": 12884 + }, + { + "epoch": 0.662195497995683, + "grad_norm": 1.046903133392334, + "learning_rate": 2.705684868943267e-06, + "loss": 0.7152, + "step": 12885 + }, + { + "epoch": 0.6622468907390276, + "grad_norm": 1.1236436367034912, + "learning_rate": 2.7049454371490453e-06, + "loss": 0.7415, + "step": 12886 + }, + { + "epoch": 0.6622982834823723, + "grad_norm": 1.056667447090149, + "learning_rate": 2.704206068941312e-06, + "loss": 0.7266, + "step": 12887 + }, + { + "epoch": 0.6623496762257169, + "grad_norm": 1.0895304679870605, + "learning_rate": 2.7034667643405565e-06, + "loss": 0.7088, + "step": 12888 + }, + { + "epoch": 0.6624010689690616, + "grad_norm": 1.0453078746795654, + "learning_rate": 2.7027275233672595e-06, + "loss": 0.6953, + "step": 12889 + }, + { + "epoch": 0.6624524617124062, + "grad_norm": 0.7355624437332153, + "learning_rate": 2.7019883460419027e-06, + "loss": 0.6284, + "step": 12890 + }, + { + "epoch": 0.6625038544557509, + "grad_norm": 1.0479023456573486, + "learning_rate": 2.7012492323849637e-06, + "loss": 0.7041, + "step": 12891 + }, + { + "epoch": 0.6625552471990955, + "grad_norm": 0.8359283208847046, + "learning_rate": 2.7005101824169254e-06, + "loss": 0.6644, + "step": 12892 + }, + { + "epoch": 0.6626066399424402, + "grad_norm": 1.0445494651794434, + "learning_rate": 2.699771196158257e-06, + "loss": 0.749, + "step": 12893 + }, + { + "epoch": 0.6626580326857847, + "grad_norm": 1.1334178447723389, + "learning_rate": 2.699032273629439e-06, + "loss": 0.7641, + "step": 12894 + }, + { + "epoch": 0.6627094254291294, + "grad_norm": 1.087429404258728, + "learning_rate": 2.69829341485094e-06, + "loss": 0.6796, + "step": 12895 + }, + { + "epoch": 0.662760818172474, + "grad_norm": 1.2374082803726196, + "learning_rate": 2.6975546198432307e-06, + "loss": 0.6819, + "step": 12896 + }, + { + "epoch": 0.6628122109158187, + "grad_norm": 1.1020188331604004, + "learning_rate": 2.6968158886267826e-06, + "loss": 0.714, + "step": 12897 + }, + { + "epoch": 0.6628636036591633, + "grad_norm": 1.0726667642593384, + "learning_rate": 2.696077221222062e-06, + "loss": 0.7025, + "step": 12898 + }, + { + "epoch": 0.662914996402508, + "grad_norm": 1.064894437789917, + "learning_rate": 2.6953386176495333e-06, + "loss": 0.7028, + "step": 12899 + }, + { + "epoch": 0.6629663891458526, + "grad_norm": 1.1138255596160889, + "learning_rate": 2.6946000779296595e-06, + "loss": 0.6948, + "step": 12900 + }, + { + "epoch": 0.6630177818891972, + "grad_norm": 1.0681095123291016, + "learning_rate": 2.6938616020829055e-06, + "loss": 0.7504, + "step": 12901 + }, + { + "epoch": 0.6630691746325419, + "grad_norm": 1.101899266242981, + "learning_rate": 2.6931231901297296e-06, + "loss": 0.6759, + "step": 12902 + }, + { + "epoch": 0.6631205673758865, + "grad_norm": 1.0485601425170898, + "learning_rate": 2.6923848420905894e-06, + "loss": 0.7164, + "step": 12903 + }, + { + "epoch": 0.6631719601192312, + "grad_norm": 1.1175611019134521, + "learning_rate": 2.6916465579859413e-06, + "loss": 0.6848, + "step": 12904 + }, + { + "epoch": 0.6632233528625758, + "grad_norm": 1.0747559070587158, + "learning_rate": 2.6909083378362443e-06, + "loss": 0.7115, + "step": 12905 + }, + { + "epoch": 0.6632747456059205, + "grad_norm": 1.0026954412460327, + "learning_rate": 2.690170181661945e-06, + "loss": 0.7054, + "step": 12906 + }, + { + "epoch": 0.6633261383492651, + "grad_norm": 1.0330870151519775, + "learning_rate": 2.6894320894835e-06, + "loss": 0.6807, + "step": 12907 + }, + { + "epoch": 0.6633775310926098, + "grad_norm": 0.8364273905754089, + "learning_rate": 2.688694061321355e-06, + "loss": 0.6631, + "step": 12908 + }, + { + "epoch": 0.6634289238359543, + "grad_norm": 0.7108138799667358, + "learning_rate": 2.6879560971959596e-06, + "loss": 0.6312, + "step": 12909 + }, + { + "epoch": 0.663480316579299, + "grad_norm": 0.9977409243583679, + "learning_rate": 2.68721819712776e-06, + "loss": 0.7048, + "step": 12910 + }, + { + "epoch": 0.6635317093226436, + "grad_norm": 1.1724025011062622, + "learning_rate": 2.6864803611372005e-06, + "loss": 0.7372, + "step": 12911 + }, + { + "epoch": 0.6635831020659883, + "grad_norm": 0.702157735824585, + "learning_rate": 2.685742589244722e-06, + "loss": 0.6002, + "step": 12912 + }, + { + "epoch": 0.6636344948093329, + "grad_norm": 0.8037451505661011, + "learning_rate": 2.685004881470765e-06, + "loss": 0.636, + "step": 12913 + }, + { + "epoch": 0.6636858875526775, + "grad_norm": 1.1100704669952393, + "learning_rate": 2.6842672378357724e-06, + "loss": 0.7237, + "step": 12914 + }, + { + "epoch": 0.6637372802960222, + "grad_norm": 1.0537354946136475, + "learning_rate": 2.683529658360175e-06, + "loss": 0.664, + "step": 12915 + }, + { + "epoch": 0.6637886730393668, + "grad_norm": 1.122029423713684, + "learning_rate": 2.6827921430644134e-06, + "loss": 0.7384, + "step": 12916 + }, + { + "epoch": 0.6638400657827115, + "grad_norm": 0.7626706957817078, + "learning_rate": 2.682054691968916e-06, + "loss": 0.6549, + "step": 12917 + }, + { + "epoch": 0.6638914585260561, + "grad_norm": 1.077371597290039, + "learning_rate": 2.68131730509412e-06, + "loss": 0.7114, + "step": 12918 + }, + { + "epoch": 0.6639428512694008, + "grad_norm": 1.0130869150161743, + "learning_rate": 2.680579982460453e-06, + "loss": 0.7152, + "step": 12919 + }, + { + "epoch": 0.6639942440127454, + "grad_norm": 0.9791831374168396, + "learning_rate": 2.6798427240883406e-06, + "loss": 0.6747, + "step": 12920 + }, + { + "epoch": 0.6640456367560901, + "grad_norm": 1.0818803310394287, + "learning_rate": 2.679105529998216e-06, + "loss": 0.7692, + "step": 12921 + }, + { + "epoch": 0.6640970294994347, + "grad_norm": 1.022172212600708, + "learning_rate": 2.6783684002104947e-06, + "loss": 0.7294, + "step": 12922 + }, + { + "epoch": 0.6641484222427794, + "grad_norm": 1.145811915397644, + "learning_rate": 2.6776313347456075e-06, + "loss": 0.7068, + "step": 12923 + }, + { + "epoch": 0.6641998149861239, + "grad_norm": 0.75337815284729, + "learning_rate": 2.6768943336239713e-06, + "loss": 0.6577, + "step": 12924 + }, + { + "epoch": 0.6642512077294686, + "grad_norm": 1.0113253593444824, + "learning_rate": 2.676157396866006e-06, + "loss": 0.6871, + "step": 12925 + }, + { + "epoch": 0.6643026004728132, + "grad_norm": 1.1185624599456787, + "learning_rate": 2.6754205244921276e-06, + "loss": 0.6686, + "step": 12926 + }, + { + "epoch": 0.6643539932161578, + "grad_norm": 1.1275732517242432, + "learning_rate": 2.674683716522756e-06, + "loss": 0.7195, + "step": 12927 + }, + { + "epoch": 0.6644053859595025, + "grad_norm": 0.7710570096969604, + "learning_rate": 2.673946972978302e-06, + "loss": 0.6069, + "step": 12928 + }, + { + "epoch": 0.6644567787028471, + "grad_norm": 1.1093297004699707, + "learning_rate": 2.6732102938791758e-06, + "loss": 0.7286, + "step": 12929 + }, + { + "epoch": 0.6645081714461918, + "grad_norm": 1.0068796873092651, + "learning_rate": 2.672473679245795e-06, + "loss": 0.6898, + "step": 12930 + }, + { + "epoch": 0.6645595641895364, + "grad_norm": 1.092211127281189, + "learning_rate": 2.6717371290985596e-06, + "loss": 0.6837, + "step": 12931 + }, + { + "epoch": 0.6646109569328811, + "grad_norm": 1.2151323556900024, + "learning_rate": 2.6710006434578817e-06, + "loss": 0.689, + "step": 12932 + }, + { + "epoch": 0.6646623496762257, + "grad_norm": 0.7107070088386536, + "learning_rate": 2.670264222344163e-06, + "loss": 0.6365, + "step": 12933 + }, + { + "epoch": 0.6647137424195704, + "grad_norm": 1.0214279890060425, + "learning_rate": 2.6695278657778123e-06, + "loss": 0.6662, + "step": 12934 + }, + { + "epoch": 0.664765135162915, + "grad_norm": 1.0154188871383667, + "learning_rate": 2.6687915737792237e-06, + "loss": 0.6869, + "step": 12935 + }, + { + "epoch": 0.6648165279062597, + "grad_norm": 1.0958317518234253, + "learning_rate": 2.668055346368802e-06, + "loss": 0.739, + "step": 12936 + }, + { + "epoch": 0.6648679206496043, + "grad_norm": 0.9915972948074341, + "learning_rate": 2.6673191835669434e-06, + "loss": 0.7194, + "step": 12937 + }, + { + "epoch": 0.664919313392949, + "grad_norm": 1.2342073917388916, + "learning_rate": 2.666583085394041e-06, + "loss": 0.7498, + "step": 12938 + }, + { + "epoch": 0.6649707061362936, + "grad_norm": 0.6850623488426208, + "learning_rate": 2.665847051870494e-06, + "loss": 0.6408, + "step": 12939 + }, + { + "epoch": 0.6650220988796381, + "grad_norm": 1.1085309982299805, + "learning_rate": 2.665111083016694e-06, + "loss": 0.7008, + "step": 12940 + }, + { + "epoch": 0.6650734916229828, + "grad_norm": 0.9892670512199402, + "learning_rate": 2.6643751788530293e-06, + "loss": 0.6826, + "step": 12941 + }, + { + "epoch": 0.6651248843663274, + "grad_norm": 1.1044886112213135, + "learning_rate": 2.663639339399887e-06, + "loss": 0.7445, + "step": 12942 + }, + { + "epoch": 0.6651762771096721, + "grad_norm": 1.0368824005126953, + "learning_rate": 2.6629035646776625e-06, + "loss": 0.6899, + "step": 12943 + }, + { + "epoch": 0.6652276698530167, + "grad_norm": 1.1664491891860962, + "learning_rate": 2.662167854706731e-06, + "loss": 0.7509, + "step": 12944 + }, + { + "epoch": 0.6652790625963614, + "grad_norm": 1.092483639717102, + "learning_rate": 2.6614322095074827e-06, + "loss": 0.6688, + "step": 12945 + }, + { + "epoch": 0.665330455339706, + "grad_norm": 1.0791107416152954, + "learning_rate": 2.6606966291002956e-06, + "loss": 0.7076, + "step": 12946 + }, + { + "epoch": 0.6653818480830507, + "grad_norm": 1.0799063444137573, + "learning_rate": 2.6599611135055527e-06, + "loss": 0.7059, + "step": 12947 + }, + { + "epoch": 0.6654332408263953, + "grad_norm": 1.0485109090805054, + "learning_rate": 2.659225662743631e-06, + "loss": 0.7363, + "step": 12948 + }, + { + "epoch": 0.66548463356974, + "grad_norm": 1.0942001342773438, + "learning_rate": 2.658490276834905e-06, + "loss": 0.7132, + "step": 12949 + }, + { + "epoch": 0.6655360263130846, + "grad_norm": 1.1476563215255737, + "learning_rate": 2.6577549557997515e-06, + "loss": 0.7211, + "step": 12950 + }, + { + "epoch": 0.6655874190564293, + "grad_norm": 0.9851348400115967, + "learning_rate": 2.6570196996585407e-06, + "loss": 0.6968, + "step": 12951 + }, + { + "epoch": 0.6656388117997739, + "grad_norm": 1.1379748582839966, + "learning_rate": 2.6562845084316467e-06, + "loss": 0.6957, + "step": 12952 + }, + { + "epoch": 0.6656902045431186, + "grad_norm": 0.843419075012207, + "learning_rate": 2.6555493821394373e-06, + "loss": 0.6851, + "step": 12953 + }, + { + "epoch": 0.6657415972864632, + "grad_norm": 1.04120934009552, + "learning_rate": 2.6548143208022794e-06, + "loss": 0.6963, + "step": 12954 + }, + { + "epoch": 0.6657929900298077, + "grad_norm": 1.0185667276382446, + "learning_rate": 2.6540793244405364e-06, + "loss": 0.667, + "step": 12955 + }, + { + "epoch": 0.6658443827731524, + "grad_norm": 1.038561224937439, + "learning_rate": 2.6533443930745788e-06, + "loss": 0.6686, + "step": 12956 + }, + { + "epoch": 0.665895775516497, + "grad_norm": 1.105364441871643, + "learning_rate": 2.6526095267247598e-06, + "loss": 0.7036, + "step": 12957 + }, + { + "epoch": 0.6659471682598417, + "grad_norm": 0.9806315302848816, + "learning_rate": 2.651874725411445e-06, + "loss": 0.6941, + "step": 12958 + }, + { + "epoch": 0.6659985610031863, + "grad_norm": 1.0594521760940552, + "learning_rate": 2.6511399891549927e-06, + "loss": 0.7132, + "step": 12959 + }, + { + "epoch": 0.666049953746531, + "grad_norm": 1.1371122598648071, + "learning_rate": 2.650405317975755e-06, + "loss": 0.7084, + "step": 12960 + }, + { + "epoch": 0.6661013464898756, + "grad_norm": 1.045655369758606, + "learning_rate": 2.6496707118940924e-06, + "loss": 0.7327, + "step": 12961 + }, + { + "epoch": 0.6661527392332203, + "grad_norm": 1.119173288345337, + "learning_rate": 2.648936170930353e-06, + "loss": 0.7237, + "step": 12962 + }, + { + "epoch": 0.6662041319765649, + "grad_norm": 0.920009195804596, + "learning_rate": 2.648201695104894e-06, + "loss": 0.6461, + "step": 12963 + }, + { + "epoch": 0.6662555247199096, + "grad_norm": 0.7543112635612488, + "learning_rate": 2.6474672844380566e-06, + "loss": 0.6639, + "step": 12964 + }, + { + "epoch": 0.6663069174632542, + "grad_norm": 1.097749948501587, + "learning_rate": 2.6467329389501946e-06, + "loss": 0.7405, + "step": 12965 + }, + { + "epoch": 0.6663583102065989, + "grad_norm": 1.0818537473678589, + "learning_rate": 2.6459986586616515e-06, + "loss": 0.7568, + "step": 12966 + }, + { + "epoch": 0.6664097029499435, + "grad_norm": 1.1216773986816406, + "learning_rate": 2.6452644435927712e-06, + "loss": 0.7066, + "step": 12967 + }, + { + "epoch": 0.6664610956932882, + "grad_norm": 1.1157472133636475, + "learning_rate": 2.644530293763893e-06, + "loss": 0.7251, + "step": 12968 + }, + { + "epoch": 0.6665124884366328, + "grad_norm": 1.1142723560333252, + "learning_rate": 2.6437962091953628e-06, + "loss": 0.7961, + "step": 12969 + }, + { + "epoch": 0.6665638811799773, + "grad_norm": 0.7631222009658813, + "learning_rate": 2.643062189907516e-06, + "loss": 0.6527, + "step": 12970 + }, + { + "epoch": 0.666615273923322, + "grad_norm": 1.0619889497756958, + "learning_rate": 2.6423282359206877e-06, + "loss": 0.689, + "step": 12971 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 1.1143907308578491, + "learning_rate": 2.6415943472552176e-06, + "loss": 0.7299, + "step": 12972 + }, + { + "epoch": 0.6667180594100113, + "grad_norm": 0.7048237919807434, + "learning_rate": 2.640860523931432e-06, + "loss": 0.6148, + "step": 12973 + }, + { + "epoch": 0.6667694521533559, + "grad_norm": 1.0469070672988892, + "learning_rate": 2.6401267659696684e-06, + "loss": 0.7182, + "step": 12974 + }, + { + "epoch": 0.6668208448967006, + "grad_norm": 0.7999299168586731, + "learning_rate": 2.6393930733902525e-06, + "loss": 0.6549, + "step": 12975 + }, + { + "epoch": 0.6668722376400452, + "grad_norm": 1.1045491695404053, + "learning_rate": 2.6386594462135128e-06, + "loss": 0.702, + "step": 12976 + }, + { + "epoch": 0.6669236303833899, + "grad_norm": 1.201661467552185, + "learning_rate": 2.6379258844597738e-06, + "loss": 0.7121, + "step": 12977 + }, + { + "epoch": 0.6669750231267345, + "grad_norm": 1.1330125331878662, + "learning_rate": 2.6371923881493627e-06, + "loss": 0.6542, + "step": 12978 + }, + { + "epoch": 0.6670264158700792, + "grad_norm": 1.0941286087036133, + "learning_rate": 2.6364589573026e-06, + "loss": 0.6682, + "step": 12979 + }, + { + "epoch": 0.6670778086134238, + "grad_norm": 1.0680410861968994, + "learning_rate": 2.6357255919398027e-06, + "loss": 0.6904, + "step": 12980 + }, + { + "epoch": 0.6671292013567685, + "grad_norm": 1.1758029460906982, + "learning_rate": 2.634992292081296e-06, + "loss": 0.7069, + "step": 12981 + }, + { + "epoch": 0.6671805941001131, + "grad_norm": 1.1242934465408325, + "learning_rate": 2.6342590577473926e-06, + "loss": 0.7487, + "step": 12982 + }, + { + "epoch": 0.6672319868434577, + "grad_norm": 1.089093804359436, + "learning_rate": 2.633525888958407e-06, + "loss": 0.6794, + "step": 12983 + }, + { + "epoch": 0.6672833795868024, + "grad_norm": 1.077668309211731, + "learning_rate": 2.632792785734652e-06, + "loss": 0.7132, + "step": 12984 + }, + { + "epoch": 0.6673347723301469, + "grad_norm": 1.1061662435531616, + "learning_rate": 2.632059748096444e-06, + "loss": 0.6843, + "step": 12985 + }, + { + "epoch": 0.6673861650734916, + "grad_norm": 0.7946038246154785, + "learning_rate": 2.6313267760640842e-06, + "loss": 0.6564, + "step": 12986 + }, + { + "epoch": 0.6674375578168362, + "grad_norm": 1.0307117700576782, + "learning_rate": 2.6305938696578866e-06, + "loss": 0.6981, + "step": 12987 + }, + { + "epoch": 0.6674889505601809, + "grad_norm": 1.066892147064209, + "learning_rate": 2.6298610288981553e-06, + "loss": 0.7817, + "step": 12988 + }, + { + "epoch": 0.6675403433035255, + "grad_norm": 1.0612431764602661, + "learning_rate": 2.629128253805191e-06, + "loss": 0.7093, + "step": 12989 + }, + { + "epoch": 0.6675917360468702, + "grad_norm": 1.1109445095062256, + "learning_rate": 2.6283955443993015e-06, + "loss": 0.7142, + "step": 12990 + }, + { + "epoch": 0.6676431287902148, + "grad_norm": 1.0170804262161255, + "learning_rate": 2.6276629007007838e-06, + "loss": 0.6319, + "step": 12991 + }, + { + "epoch": 0.6676945215335595, + "grad_norm": 1.1019597053527832, + "learning_rate": 2.626930322729938e-06, + "loss": 0.6879, + "step": 12992 + }, + { + "epoch": 0.6677459142769041, + "grad_norm": 1.0517843961715698, + "learning_rate": 2.6261978105070575e-06, + "loss": 0.6755, + "step": 12993 + }, + { + "epoch": 0.6677973070202488, + "grad_norm": 1.1824597120285034, + "learning_rate": 2.625465364052441e-06, + "loss": 0.7672, + "step": 12994 + }, + { + "epoch": 0.6678486997635934, + "grad_norm": 1.098812460899353, + "learning_rate": 2.6247329833863804e-06, + "loss": 0.7355, + "step": 12995 + }, + { + "epoch": 0.667900092506938, + "grad_norm": 0.7628695964813232, + "learning_rate": 2.624000668529167e-06, + "loss": 0.6582, + "step": 12996 + }, + { + "epoch": 0.6679514852502827, + "grad_norm": 0.7828240394592285, + "learning_rate": 2.6232684195010876e-06, + "loss": 0.6547, + "step": 12997 + }, + { + "epoch": 0.6680028779936273, + "grad_norm": 1.0493202209472656, + "learning_rate": 2.6225362363224346e-06, + "loss": 0.7351, + "step": 12998 + }, + { + "epoch": 0.668054270736972, + "grad_norm": 1.0406752824783325, + "learning_rate": 2.621804119013491e-06, + "loss": 0.728, + "step": 12999 + }, + { + "epoch": 0.6681056634803165, + "grad_norm": 0.8195496201515198, + "learning_rate": 2.6210720675945407e-06, + "loss": 0.6664, + "step": 13000 + }, + { + "epoch": 0.6681570562236612, + "grad_norm": 1.0710667371749878, + "learning_rate": 2.620340082085866e-06, + "loss": 0.718, + "step": 13001 + }, + { + "epoch": 0.6682084489670058, + "grad_norm": 1.0106840133666992, + "learning_rate": 2.6196081625077463e-06, + "loss": 0.6745, + "step": 13002 + }, + { + "epoch": 0.6682598417103505, + "grad_norm": 1.0486606359481812, + "learning_rate": 2.6188763088804624e-06, + "loss": 0.7013, + "step": 13003 + }, + { + "epoch": 0.6683112344536951, + "grad_norm": 1.022236704826355, + "learning_rate": 2.6181445212242897e-06, + "loss": 0.6632, + "step": 13004 + }, + { + "epoch": 0.6683626271970398, + "grad_norm": 0.8202311992645264, + "learning_rate": 2.6174127995595035e-06, + "loss": 0.6832, + "step": 13005 + }, + { + "epoch": 0.6684140199403844, + "grad_norm": 1.1213316917419434, + "learning_rate": 2.6166811439063733e-06, + "loss": 0.7157, + "step": 13006 + }, + { + "epoch": 0.668465412683729, + "grad_norm": 1.0338537693023682, + "learning_rate": 2.615949554285178e-06, + "loss": 0.7103, + "step": 13007 + }, + { + "epoch": 0.6685168054270737, + "grad_norm": 1.1118669509887695, + "learning_rate": 2.6152180307161777e-06, + "loss": 0.7517, + "step": 13008 + }, + { + "epoch": 0.6685681981704183, + "grad_norm": 1.1068660020828247, + "learning_rate": 2.6144865732196467e-06, + "loss": 0.72, + "step": 13009 + }, + { + "epoch": 0.668619590913763, + "grad_norm": 1.0240241289138794, + "learning_rate": 2.6137551818158467e-06, + "loss": 0.6949, + "step": 13010 + }, + { + "epoch": 0.6686709836571076, + "grad_norm": 1.0715107917785645, + "learning_rate": 2.6130238565250443e-06, + "loss": 0.659, + "step": 13011 + }, + { + "epoch": 0.6687223764004523, + "grad_norm": 1.0885446071624756, + "learning_rate": 2.6122925973675006e-06, + "loss": 0.7081, + "step": 13012 + }, + { + "epoch": 0.6687737691437969, + "grad_norm": 1.142139196395874, + "learning_rate": 2.611561404363474e-06, + "loss": 0.7213, + "step": 13013 + }, + { + "epoch": 0.6688251618871416, + "grad_norm": 1.0383515357971191, + "learning_rate": 2.6108302775332285e-06, + "loss": 0.6533, + "step": 13014 + }, + { + "epoch": 0.6688765546304862, + "grad_norm": 1.1004455089569092, + "learning_rate": 2.610099216897012e-06, + "loss": 0.7198, + "step": 13015 + }, + { + "epoch": 0.6689279473738308, + "grad_norm": 1.1366934776306152, + "learning_rate": 2.6093682224750856e-06, + "loss": 0.7104, + "step": 13016 + }, + { + "epoch": 0.6689793401171754, + "grad_norm": 1.07968270778656, + "learning_rate": 2.6086372942877e-06, + "loss": 0.7275, + "step": 13017 + }, + { + "epoch": 0.6690307328605201, + "grad_norm": 0.7909573316574097, + "learning_rate": 2.6079064323551072e-06, + "loss": 0.6762, + "step": 13018 + }, + { + "epoch": 0.6690821256038647, + "grad_norm": 1.092541217803955, + "learning_rate": 2.607175636697553e-06, + "loss": 0.7093, + "step": 13019 + }, + { + "epoch": 0.6691335183472094, + "grad_norm": 1.0228408575057983, + "learning_rate": 2.606444907335289e-06, + "loss": 0.6737, + "step": 13020 + }, + { + "epoch": 0.669184911090554, + "grad_norm": 1.0342910289764404, + "learning_rate": 2.60571424428856e-06, + "loss": 0.6593, + "step": 13021 + }, + { + "epoch": 0.6692363038338986, + "grad_norm": 1.0905957221984863, + "learning_rate": 2.604983647577606e-06, + "loss": 0.7159, + "step": 13022 + }, + { + "epoch": 0.6692876965772433, + "grad_norm": 1.0802081823349, + "learning_rate": 2.6042531172226755e-06, + "loss": 0.7369, + "step": 13023 + }, + { + "epoch": 0.6693390893205879, + "grad_norm": 1.055462121963501, + "learning_rate": 2.6035226532440006e-06, + "loss": 0.7044, + "step": 13024 + }, + { + "epoch": 0.6693904820639326, + "grad_norm": 0.8474434018135071, + "learning_rate": 2.6027922556618252e-06, + "loss": 0.7038, + "step": 13025 + }, + { + "epoch": 0.6694418748072772, + "grad_norm": 1.0953435897827148, + "learning_rate": 2.602061924496382e-06, + "loss": 0.7057, + "step": 13026 + }, + { + "epoch": 0.6694932675506219, + "grad_norm": 1.085310697555542, + "learning_rate": 2.6013316597679105e-06, + "loss": 0.6776, + "step": 13027 + }, + { + "epoch": 0.6695446602939665, + "grad_norm": 0.6719794273376465, + "learning_rate": 2.6006014614966364e-06, + "loss": 0.6349, + "step": 13028 + }, + { + "epoch": 0.6695960530373112, + "grad_norm": 1.0670769214630127, + "learning_rate": 2.5998713297027955e-06, + "loss": 0.7228, + "step": 13029 + }, + { + "epoch": 0.6696474457806558, + "grad_norm": 1.145293951034546, + "learning_rate": 2.599141264406616e-06, + "loss": 0.755, + "step": 13030 + }, + { + "epoch": 0.6696988385240004, + "grad_norm": 0.8037762641906738, + "learning_rate": 2.5984112656283223e-06, + "loss": 0.6194, + "step": 13031 + }, + { + "epoch": 0.669750231267345, + "grad_norm": 1.149341106414795, + "learning_rate": 2.5976813333881436e-06, + "loss": 0.7279, + "step": 13032 + }, + { + "epoch": 0.6698016240106897, + "grad_norm": 1.0252666473388672, + "learning_rate": 2.596951467706301e-06, + "loss": 0.6754, + "step": 13033 + }, + { + "epoch": 0.6698530167540343, + "grad_norm": 1.0942459106445312, + "learning_rate": 2.5962216686030172e-06, + "loss": 0.6908, + "step": 13034 + }, + { + "epoch": 0.669904409497379, + "grad_norm": 1.129592776298523, + "learning_rate": 2.5954919360985086e-06, + "loss": 0.7408, + "step": 13035 + }, + { + "epoch": 0.6699558022407236, + "grad_norm": 1.0698856115341187, + "learning_rate": 2.5947622702129994e-06, + "loss": 0.7293, + "step": 13036 + }, + { + "epoch": 0.6700071949840682, + "grad_norm": 1.0777108669281006, + "learning_rate": 2.5940326709666986e-06, + "loss": 0.6972, + "step": 13037 + }, + { + "epoch": 0.6700585877274129, + "grad_norm": 1.1078829765319824, + "learning_rate": 2.5933031383798248e-06, + "loss": 0.7244, + "step": 13038 + }, + { + "epoch": 0.6701099804707575, + "grad_norm": 1.1121273040771484, + "learning_rate": 2.59257367247259e-06, + "loss": 0.6602, + "step": 13039 + }, + { + "epoch": 0.6701613732141022, + "grad_norm": 1.0727003812789917, + "learning_rate": 2.5918442732652017e-06, + "loss": 0.7541, + "step": 13040 + }, + { + "epoch": 0.6702127659574468, + "grad_norm": 1.0119311809539795, + "learning_rate": 2.5911149407778723e-06, + "loss": 0.7204, + "step": 13041 + }, + { + "epoch": 0.6702641587007915, + "grad_norm": 1.0637989044189453, + "learning_rate": 2.590385675030805e-06, + "loss": 0.7006, + "step": 13042 + }, + { + "epoch": 0.6703155514441361, + "grad_norm": 1.087314248085022, + "learning_rate": 2.589656476044211e-06, + "loss": 0.7378, + "step": 13043 + }, + { + "epoch": 0.6703669441874808, + "grad_norm": 1.0874582529067993, + "learning_rate": 2.588927343838285e-06, + "loss": 0.7493, + "step": 13044 + }, + { + "epoch": 0.6704183369308254, + "grad_norm": 0.7219361066818237, + "learning_rate": 2.5881982784332344e-06, + "loss": 0.6477, + "step": 13045 + }, + { + "epoch": 0.67046972967417, + "grad_norm": 1.0843855142593384, + "learning_rate": 2.5874692798492567e-06, + "loss": 0.6362, + "step": 13046 + }, + { + "epoch": 0.6705211224175146, + "grad_norm": 1.0886350870132446, + "learning_rate": 2.586740348106549e-06, + "loss": 0.6998, + "step": 13047 + }, + { + "epoch": 0.6705725151608593, + "grad_norm": 1.1643319129943848, + "learning_rate": 2.586011483225306e-06, + "loss": 0.7174, + "step": 13048 + }, + { + "epoch": 0.6706239079042039, + "grad_norm": 0.7661274671554565, + "learning_rate": 2.585282685225724e-06, + "loss": 0.65, + "step": 13049 + }, + { + "epoch": 0.6706753006475485, + "grad_norm": 1.0566658973693848, + "learning_rate": 2.5845539541279946e-06, + "loss": 0.6845, + "step": 13050 + }, + { + "epoch": 0.6707266933908932, + "grad_norm": 1.0441278219223022, + "learning_rate": 2.583825289952307e-06, + "loss": 0.6807, + "step": 13051 + }, + { + "epoch": 0.6707780861342378, + "grad_norm": 1.0378021001815796, + "learning_rate": 2.5830966927188495e-06, + "loss": 0.7092, + "step": 13052 + }, + { + "epoch": 0.6708294788775825, + "grad_norm": 1.061447024345398, + "learning_rate": 2.582368162447807e-06, + "loss": 0.7138, + "step": 13053 + }, + { + "epoch": 0.6708808716209271, + "grad_norm": 1.0806057453155518, + "learning_rate": 2.5816396991593672e-06, + "loss": 0.7193, + "step": 13054 + }, + { + "epoch": 0.6709322643642718, + "grad_norm": 1.2929465770721436, + "learning_rate": 2.5809113028737097e-06, + "loss": 0.7552, + "step": 13055 + }, + { + "epoch": 0.6709836571076164, + "grad_norm": 1.0460340976715088, + "learning_rate": 2.5801829736110206e-06, + "loss": 0.7011, + "step": 13056 + }, + { + "epoch": 0.6710350498509611, + "grad_norm": 1.1173027753829956, + "learning_rate": 2.579454711391472e-06, + "loss": 0.6868, + "step": 13057 + }, + { + "epoch": 0.6710864425943057, + "grad_norm": 1.0581424236297607, + "learning_rate": 2.578726516235246e-06, + "loss": 0.6749, + "step": 13058 + }, + { + "epoch": 0.6711378353376504, + "grad_norm": 0.7346340417861938, + "learning_rate": 2.5779983881625158e-06, + "loss": 0.6523, + "step": 13059 + }, + { + "epoch": 0.671189228080995, + "grad_norm": 0.7397363185882568, + "learning_rate": 2.577270327193456e-06, + "loss": 0.6498, + "step": 13060 + }, + { + "epoch": 0.6712406208243396, + "grad_norm": 1.037415862083435, + "learning_rate": 2.5765423333482355e-06, + "loss": 0.7242, + "step": 13061 + }, + { + "epoch": 0.6712920135676842, + "grad_norm": 1.1445791721343994, + "learning_rate": 2.575814406647027e-06, + "loss": 0.7272, + "step": 13062 + }, + { + "epoch": 0.6713434063110288, + "grad_norm": 1.1003161668777466, + "learning_rate": 2.575086547109997e-06, + "loss": 0.7337, + "step": 13063 + }, + { + "epoch": 0.6713947990543735, + "grad_norm": 1.170903205871582, + "learning_rate": 2.5743587547573114e-06, + "loss": 0.6773, + "step": 13064 + }, + { + "epoch": 0.6714461917977181, + "grad_norm": 1.1652607917785645, + "learning_rate": 2.573631029609137e-06, + "loss": 0.6811, + "step": 13065 + }, + { + "epoch": 0.6714975845410628, + "grad_norm": 1.06890869140625, + "learning_rate": 2.5729033716856313e-06, + "loss": 0.6962, + "step": 13066 + }, + { + "epoch": 0.6715489772844074, + "grad_norm": 1.0035547018051147, + "learning_rate": 2.572175781006958e-06, + "loss": 0.6714, + "step": 13067 + }, + { + "epoch": 0.6716003700277521, + "grad_norm": 1.0934373140335083, + "learning_rate": 2.5714482575932755e-06, + "loss": 0.6975, + "step": 13068 + }, + { + "epoch": 0.6716517627710967, + "grad_norm": 1.0862419605255127, + "learning_rate": 2.570720801464739e-06, + "loss": 0.7528, + "step": 13069 + }, + { + "epoch": 0.6717031555144414, + "grad_norm": 1.1682851314544678, + "learning_rate": 2.5699934126415027e-06, + "loss": 0.7053, + "step": 13070 + }, + { + "epoch": 0.671754548257786, + "grad_norm": 1.2616493701934814, + "learning_rate": 2.569266091143723e-06, + "loss": 0.704, + "step": 13071 + }, + { + "epoch": 0.6718059410011307, + "grad_norm": 1.0981091260910034, + "learning_rate": 2.5685388369915477e-06, + "loss": 0.7537, + "step": 13072 + }, + { + "epoch": 0.6718573337444753, + "grad_norm": 0.7039056420326233, + "learning_rate": 2.5678116502051263e-06, + "loss": 0.6654, + "step": 13073 + }, + { + "epoch": 0.67190872648782, + "grad_norm": 1.1649174690246582, + "learning_rate": 2.567084530804609e-06, + "loss": 0.7219, + "step": 13074 + }, + { + "epoch": 0.6719601192311646, + "grad_norm": 0.7009938359260559, + "learning_rate": 2.5663574788101385e-06, + "loss": 0.6424, + "step": 13075 + }, + { + "epoch": 0.6720115119745091, + "grad_norm": 0.9895606637001038, + "learning_rate": 2.5656304942418596e-06, + "loss": 0.6761, + "step": 13076 + }, + { + "epoch": 0.6720629047178538, + "grad_norm": 1.0448269844055176, + "learning_rate": 2.5649035771199114e-06, + "loss": 0.6861, + "step": 13077 + }, + { + "epoch": 0.6721142974611984, + "grad_norm": 1.188977599143982, + "learning_rate": 2.5641767274644408e-06, + "loss": 0.75, + "step": 13078 + }, + { + "epoch": 0.6721656902045431, + "grad_norm": 1.0759861469268799, + "learning_rate": 2.5634499452955765e-06, + "loss": 0.7192, + "step": 13079 + }, + { + "epoch": 0.6722170829478877, + "grad_norm": 0.6729865670204163, + "learning_rate": 2.5627232306334615e-06, + "loss": 0.6795, + "step": 13080 + }, + { + "epoch": 0.6722684756912324, + "grad_norm": 1.1020084619522095, + "learning_rate": 2.5619965834982275e-06, + "loss": 0.6903, + "step": 13081 + }, + { + "epoch": 0.672319868434577, + "grad_norm": 1.0308765172958374, + "learning_rate": 2.5612700039100053e-06, + "loss": 0.7, + "step": 13082 + }, + { + "epoch": 0.6723712611779217, + "grad_norm": 1.1218749284744263, + "learning_rate": 2.5605434918889287e-06, + "loss": 0.6872, + "step": 13083 + }, + { + "epoch": 0.6724226539212663, + "grad_norm": 1.1067495346069336, + "learning_rate": 2.5598170474551254e-06, + "loss": 0.6765, + "step": 13084 + }, + { + "epoch": 0.672474046664611, + "grad_norm": 1.04864501953125, + "learning_rate": 2.5590906706287217e-06, + "loss": 0.6928, + "step": 13085 + }, + { + "epoch": 0.6725254394079556, + "grad_norm": 1.1000691652297974, + "learning_rate": 2.55836436142984e-06, + "loss": 0.6992, + "step": 13086 + }, + { + "epoch": 0.6725768321513003, + "grad_norm": 1.046045184135437, + "learning_rate": 2.5576381198786084e-06, + "loss": 0.75, + "step": 13087 + }, + { + "epoch": 0.6726282248946449, + "grad_norm": 1.0704320669174194, + "learning_rate": 2.556911945995145e-06, + "loss": 0.6569, + "step": 13088 + }, + { + "epoch": 0.6726796176379896, + "grad_norm": 1.063007116317749, + "learning_rate": 2.5561858397995696e-06, + "loss": 0.7299, + "step": 13089 + }, + { + "epoch": 0.6727310103813342, + "grad_norm": 1.0866018533706665, + "learning_rate": 2.555459801311998e-06, + "loss": 0.7866, + "step": 13090 + }, + { + "epoch": 0.6727824031246787, + "grad_norm": 1.0894092321395874, + "learning_rate": 2.5547338305525492e-06, + "loss": 0.7167, + "step": 13091 + }, + { + "epoch": 0.6728337958680234, + "grad_norm": 0.7166454195976257, + "learning_rate": 2.554007927541334e-06, + "loss": 0.6988, + "step": 13092 + }, + { + "epoch": 0.672885188611368, + "grad_norm": 1.141790747642517, + "learning_rate": 2.5532820922984648e-06, + "loss": 0.7166, + "step": 13093 + }, + { + "epoch": 0.6729365813547127, + "grad_norm": 1.0133863687515259, + "learning_rate": 2.5525563248440542e-06, + "loss": 0.6717, + "step": 13094 + }, + { + "epoch": 0.6729879740980573, + "grad_norm": 1.2195563316345215, + "learning_rate": 2.551830625198205e-06, + "loss": 0.782, + "step": 13095 + }, + { + "epoch": 0.673039366841402, + "grad_norm": 1.040109634399414, + "learning_rate": 2.5511049933810274e-06, + "loss": 0.6378, + "step": 13096 + }, + { + "epoch": 0.6730907595847466, + "grad_norm": 1.1011369228363037, + "learning_rate": 2.550379429412625e-06, + "loss": 0.7248, + "step": 13097 + }, + { + "epoch": 0.6731421523280913, + "grad_norm": 1.0901676416397095, + "learning_rate": 2.549653933313099e-06, + "loss": 0.6657, + "step": 13098 + }, + { + "epoch": 0.6731935450714359, + "grad_norm": 1.1082632541656494, + "learning_rate": 2.5489285051025492e-06, + "loss": 0.6533, + "step": 13099 + }, + { + "epoch": 0.6732449378147806, + "grad_norm": 1.2068206071853638, + "learning_rate": 2.5482031448010767e-06, + "loss": 0.7539, + "step": 13100 + }, + { + "epoch": 0.6732963305581252, + "grad_norm": 1.1151360273361206, + "learning_rate": 2.547477852428778e-06, + "loss": 0.7532, + "step": 13101 + }, + { + "epoch": 0.6733477233014699, + "grad_norm": 1.0475815534591675, + "learning_rate": 2.5467526280057463e-06, + "loss": 0.7231, + "step": 13102 + }, + { + "epoch": 0.6733991160448145, + "grad_norm": 1.0282297134399414, + "learning_rate": 2.5460274715520737e-06, + "loss": 0.6675, + "step": 13103 + }, + { + "epoch": 0.6734505087881592, + "grad_norm": 1.0747696161270142, + "learning_rate": 2.5453023830878554e-06, + "loss": 0.6627, + "step": 13104 + }, + { + "epoch": 0.6735019015315038, + "grad_norm": 1.0827078819274902, + "learning_rate": 2.544577362633177e-06, + "loss": 0.7425, + "step": 13105 + }, + { + "epoch": 0.6735532942748484, + "grad_norm": 1.057867169380188, + "learning_rate": 2.5438524102081256e-06, + "loss": 0.6886, + "step": 13106 + }, + { + "epoch": 0.673604687018193, + "grad_norm": 1.0974222421646118, + "learning_rate": 2.5431275258327927e-06, + "loss": 0.6921, + "step": 13107 + }, + { + "epoch": 0.6736560797615376, + "grad_norm": 0.7434971332550049, + "learning_rate": 2.5424027095272517e-06, + "loss": 0.6501, + "step": 13108 + }, + { + "epoch": 0.6737074725048823, + "grad_norm": 0.9824162721633911, + "learning_rate": 2.5416779613115927e-06, + "loss": 0.6371, + "step": 13109 + }, + { + "epoch": 0.6737588652482269, + "grad_norm": 1.0667482614517212, + "learning_rate": 2.5409532812058923e-06, + "loss": 0.7152, + "step": 13110 + }, + { + "epoch": 0.6738102579915716, + "grad_norm": 1.1122701168060303, + "learning_rate": 2.540228669230228e-06, + "loss": 0.7292, + "step": 13111 + }, + { + "epoch": 0.6738616507349162, + "grad_norm": 1.0843881368637085, + "learning_rate": 2.5395041254046747e-06, + "loss": 0.7345, + "step": 13112 + }, + { + "epoch": 0.6739130434782609, + "grad_norm": 1.272737741470337, + "learning_rate": 2.538779649749311e-06, + "loss": 0.7153, + "step": 13113 + }, + { + "epoch": 0.6739644362216055, + "grad_norm": 1.0220656394958496, + "learning_rate": 2.5380552422842053e-06, + "loss": 0.6688, + "step": 13114 + }, + { + "epoch": 0.6740158289649502, + "grad_norm": 1.0114638805389404, + "learning_rate": 2.537330903029428e-06, + "loss": 0.6889, + "step": 13115 + }, + { + "epoch": 0.6740672217082948, + "grad_norm": 1.102752447128296, + "learning_rate": 2.536606632005052e-06, + "loss": 0.6953, + "step": 13116 + }, + { + "epoch": 0.6741186144516395, + "grad_norm": 1.0608450174331665, + "learning_rate": 2.535882429231136e-06, + "loss": 0.6504, + "step": 13117 + }, + { + "epoch": 0.6741700071949841, + "grad_norm": 1.0559003353118896, + "learning_rate": 2.535158294727752e-06, + "loss": 0.6457, + "step": 13118 + }, + { + "epoch": 0.6742213999383287, + "grad_norm": 1.0380640029907227, + "learning_rate": 2.534434228514957e-06, + "loss": 0.7546, + "step": 13119 + }, + { + "epoch": 0.6742727926816734, + "grad_norm": 1.0353333950042725, + "learning_rate": 2.533710230612819e-06, + "loss": 0.7126, + "step": 13120 + }, + { + "epoch": 0.674324185425018, + "grad_norm": 1.0319677591323853, + "learning_rate": 2.532986301041389e-06, + "loss": 0.7293, + "step": 13121 + }, + { + "epoch": 0.6743755781683626, + "grad_norm": 1.1223087310791016, + "learning_rate": 2.5322624398207297e-06, + "loss": 0.7445, + "step": 13122 + }, + { + "epoch": 0.6744269709117072, + "grad_norm": 1.0661731958389282, + "learning_rate": 2.531538646970894e-06, + "loss": 0.7617, + "step": 13123 + }, + { + "epoch": 0.6744783636550519, + "grad_norm": 1.0735751390457153, + "learning_rate": 2.5308149225119337e-06, + "loss": 0.6671, + "step": 13124 + }, + { + "epoch": 0.6745297563983965, + "grad_norm": 1.0273689031600952, + "learning_rate": 2.5300912664639047e-06, + "loss": 0.6753, + "step": 13125 + }, + { + "epoch": 0.6745811491417412, + "grad_norm": 0.8318812251091003, + "learning_rate": 2.529367678846854e-06, + "loss": 0.6313, + "step": 13126 + }, + { + "epoch": 0.6746325418850858, + "grad_norm": 0.7973877787590027, + "learning_rate": 2.5286441596808286e-06, + "loss": 0.6141, + "step": 13127 + }, + { + "epoch": 0.6746839346284305, + "grad_norm": 0.7554808259010315, + "learning_rate": 2.5279207089858736e-06, + "loss": 0.6792, + "step": 13128 + }, + { + "epoch": 0.6747353273717751, + "grad_norm": 1.1440098285675049, + "learning_rate": 2.5271973267820384e-06, + "loss": 0.7345, + "step": 13129 + }, + { + "epoch": 0.6747867201151198, + "grad_norm": 1.0106546878814697, + "learning_rate": 2.526474013089355e-06, + "loss": 0.697, + "step": 13130 + }, + { + "epoch": 0.6748381128584644, + "grad_norm": 1.041168451309204, + "learning_rate": 2.5257507679278723e-06, + "loss": 0.6767, + "step": 13131 + }, + { + "epoch": 0.674889505601809, + "grad_norm": 1.0405133962631226, + "learning_rate": 2.525027591317625e-06, + "loss": 0.6881, + "step": 13132 + }, + { + "epoch": 0.6749408983451537, + "grad_norm": 1.0461935997009277, + "learning_rate": 2.524304483278648e-06, + "loss": 0.6866, + "step": 13133 + }, + { + "epoch": 0.6749922910884983, + "grad_norm": 0.6814484596252441, + "learning_rate": 2.523581443830978e-06, + "loss": 0.6561, + "step": 13134 + }, + { + "epoch": 0.675043683831843, + "grad_norm": 1.0452812910079956, + "learning_rate": 2.5228584729946455e-06, + "loss": 0.6878, + "step": 13135 + }, + { + "epoch": 0.6750950765751876, + "grad_norm": 1.7889001369476318, + "learning_rate": 2.5221355707896855e-06, + "loss": 0.728, + "step": 13136 + }, + { + "epoch": 0.6751464693185322, + "grad_norm": 1.1480262279510498, + "learning_rate": 2.52141273723612e-06, + "loss": 0.744, + "step": 13137 + }, + { + "epoch": 0.6751978620618768, + "grad_norm": 1.0950225591659546, + "learning_rate": 2.520689972353981e-06, + "loss": 0.778, + "step": 13138 + }, + { + "epoch": 0.6752492548052215, + "grad_norm": 1.0076096057891846, + "learning_rate": 2.519967276163291e-06, + "loss": 0.6658, + "step": 13139 + }, + { + "epoch": 0.6753006475485661, + "grad_norm": 1.102768898010254, + "learning_rate": 2.5192446486840733e-06, + "loss": 0.7342, + "step": 13140 + }, + { + "epoch": 0.6753520402919108, + "grad_norm": 1.06729257106781, + "learning_rate": 2.518522089936347e-06, + "loss": 0.7352, + "step": 13141 + }, + { + "epoch": 0.6754034330352554, + "grad_norm": 1.1567792892456055, + "learning_rate": 2.517799599940135e-06, + "loss": 0.6884, + "step": 13142 + }, + { + "epoch": 0.6754548257786, + "grad_norm": 1.1541435718536377, + "learning_rate": 2.5170771787154523e-06, + "loss": 0.6953, + "step": 13143 + }, + { + "epoch": 0.6755062185219447, + "grad_norm": 0.9961956739425659, + "learning_rate": 2.5163548262823135e-06, + "loss": 0.7342, + "step": 13144 + }, + { + "epoch": 0.6755576112652893, + "grad_norm": 1.0637978315353394, + "learning_rate": 2.5156325426607363e-06, + "loss": 0.697, + "step": 13145 + }, + { + "epoch": 0.675609004008634, + "grad_norm": 0.7624397277832031, + "learning_rate": 2.5149103278707254e-06, + "loss": 0.6472, + "step": 13146 + }, + { + "epoch": 0.6756603967519786, + "grad_norm": 1.0833979845046997, + "learning_rate": 2.5141881819322955e-06, + "loss": 0.6561, + "step": 13147 + }, + { + "epoch": 0.6757117894953233, + "grad_norm": 1.073819875717163, + "learning_rate": 2.5134661048654534e-06, + "loss": 0.7123, + "step": 13148 + }, + { + "epoch": 0.6757631822386679, + "grad_norm": 0.8595784306526184, + "learning_rate": 2.512744096690204e-06, + "loss": 0.6467, + "step": 13149 + }, + { + "epoch": 0.6758145749820126, + "grad_norm": 0.7041749954223633, + "learning_rate": 2.512022157426549e-06, + "loss": 0.7005, + "step": 13150 + }, + { + "epoch": 0.6758659677253572, + "grad_norm": 0.7271613478660583, + "learning_rate": 2.5113002870944953e-06, + "loss": 0.6349, + "step": 13151 + }, + { + "epoch": 0.6759173604687018, + "grad_norm": 1.0554580688476562, + "learning_rate": 2.51057848571404e-06, + "loss": 0.6827, + "step": 13152 + }, + { + "epoch": 0.6759687532120464, + "grad_norm": 1.0741956233978271, + "learning_rate": 2.5098567533051813e-06, + "loss": 0.7252, + "step": 13153 + }, + { + "epoch": 0.6760201459553911, + "grad_norm": 0.8376405239105225, + "learning_rate": 2.509135089887914e-06, + "loss": 0.6402, + "step": 13154 + }, + { + "epoch": 0.6760715386987357, + "grad_norm": 1.0455571413040161, + "learning_rate": 2.508413495482237e-06, + "loss": 0.662, + "step": 13155 + }, + { + "epoch": 0.6761229314420804, + "grad_norm": 1.0987800359725952, + "learning_rate": 2.5076919701081395e-06, + "loss": 0.7186, + "step": 13156 + }, + { + "epoch": 0.676174324185425, + "grad_norm": 0.8191101551055908, + "learning_rate": 2.5069705137856095e-06, + "loss": 0.6314, + "step": 13157 + }, + { + "epoch": 0.6762257169287696, + "grad_norm": 1.1435883045196533, + "learning_rate": 2.506249126534643e-06, + "loss": 0.7153, + "step": 13158 + }, + { + "epoch": 0.6762771096721143, + "grad_norm": 0.7505073547363281, + "learning_rate": 2.505527808375218e-06, + "loss": 0.6463, + "step": 13159 + }, + { + "epoch": 0.6763285024154589, + "grad_norm": 1.098889708518982, + "learning_rate": 2.504806559327325e-06, + "loss": 0.72, + "step": 13160 + }, + { + "epoch": 0.6763798951588036, + "grad_norm": 1.1002869606018066, + "learning_rate": 2.504085379410946e-06, + "loss": 0.7659, + "step": 13161 + }, + { + "epoch": 0.6764312879021482, + "grad_norm": 0.8311879634857178, + "learning_rate": 2.503364268646061e-06, + "loss": 0.6268, + "step": 13162 + }, + { + "epoch": 0.6764826806454929, + "grad_norm": 1.0641425848007202, + "learning_rate": 2.5026432270526467e-06, + "loss": 0.6958, + "step": 13163 + }, + { + "epoch": 0.6765340733888375, + "grad_norm": 0.7869943976402283, + "learning_rate": 2.501922254650685e-06, + "loss": 0.6594, + "step": 13164 + }, + { + "epoch": 0.6765854661321822, + "grad_norm": 1.012304425239563, + "learning_rate": 2.501201351460148e-06, + "loss": 0.6975, + "step": 13165 + }, + { + "epoch": 0.6766368588755268, + "grad_norm": 1.1329584121704102, + "learning_rate": 2.500480517501008e-06, + "loss": 0.7034, + "step": 13166 + }, + { + "epoch": 0.6766882516188714, + "grad_norm": 1.1433192491531372, + "learning_rate": 2.49975975279324e-06, + "loss": 0.7134, + "step": 13167 + }, + { + "epoch": 0.676739644362216, + "grad_norm": 1.0830217599868774, + "learning_rate": 2.499039057356811e-06, + "loss": 0.752, + "step": 13168 + }, + { + "epoch": 0.6767910371055607, + "grad_norm": 1.05928373336792, + "learning_rate": 2.498318431211689e-06, + "loss": 0.7033, + "step": 13169 + }, + { + "epoch": 0.6768424298489053, + "grad_norm": 1.1782184839248657, + "learning_rate": 2.497597874377838e-06, + "loss": 0.7422, + "step": 13170 + }, + { + "epoch": 0.67689382259225, + "grad_norm": 1.0819706916809082, + "learning_rate": 2.4968773868752265e-06, + "loss": 0.6857, + "step": 13171 + }, + { + "epoch": 0.6769452153355946, + "grad_norm": 1.0625133514404297, + "learning_rate": 2.4961569687238092e-06, + "loss": 0.7047, + "step": 13172 + }, + { + "epoch": 0.6769966080789392, + "grad_norm": 1.0723130702972412, + "learning_rate": 2.4954366199435513e-06, + "loss": 0.7172, + "step": 13173 + }, + { + "epoch": 0.6770480008222839, + "grad_norm": 1.082820177078247, + "learning_rate": 2.4947163405544093e-06, + "loss": 0.7607, + "step": 13174 + }, + { + "epoch": 0.6770993935656285, + "grad_norm": 0.7482179403305054, + "learning_rate": 2.493996130576337e-06, + "loss": 0.6166, + "step": 13175 + }, + { + "epoch": 0.6771507863089732, + "grad_norm": 1.071678638458252, + "learning_rate": 2.4932759900292915e-06, + "loss": 0.6903, + "step": 13176 + }, + { + "epoch": 0.6772021790523178, + "grad_norm": 1.0330917835235596, + "learning_rate": 2.4925559189332237e-06, + "loss": 0.7554, + "step": 13177 + }, + { + "epoch": 0.6772535717956625, + "grad_norm": 1.0285574197769165, + "learning_rate": 2.4918359173080843e-06, + "loss": 0.6661, + "step": 13178 + }, + { + "epoch": 0.6773049645390071, + "grad_norm": 0.8143740296363831, + "learning_rate": 2.491115985173819e-06, + "loss": 0.6854, + "step": 13179 + }, + { + "epoch": 0.6773563572823518, + "grad_norm": 1.0581188201904297, + "learning_rate": 2.4903961225503775e-06, + "loss": 0.6696, + "step": 13180 + }, + { + "epoch": 0.6774077500256964, + "grad_norm": 1.0039702653884888, + "learning_rate": 2.4896763294577036e-06, + "loss": 0.6635, + "step": 13181 + }, + { + "epoch": 0.677459142769041, + "grad_norm": 0.7126507759094238, + "learning_rate": 2.488956605915739e-06, + "loss": 0.5948, + "step": 13182 + }, + { + "epoch": 0.6775105355123856, + "grad_norm": 1.0816986560821533, + "learning_rate": 2.4882369519444223e-06, + "loss": 0.7411, + "step": 13183 + }, + { + "epoch": 0.6775619282557303, + "grad_norm": 0.7402935028076172, + "learning_rate": 2.4875173675636965e-06, + "loss": 0.6537, + "step": 13184 + }, + { + "epoch": 0.6776133209990749, + "grad_norm": 1.0375361442565918, + "learning_rate": 2.4867978527934957e-06, + "loss": 0.7393, + "step": 13185 + }, + { + "epoch": 0.6776647137424195, + "grad_norm": 1.1466938257217407, + "learning_rate": 2.486078407653753e-06, + "loss": 0.7166, + "step": 13186 + }, + { + "epoch": 0.6777161064857642, + "grad_norm": 0.7103905081748962, + "learning_rate": 2.4853590321644074e-06, + "loss": 0.6382, + "step": 13187 + }, + { + "epoch": 0.6777674992291088, + "grad_norm": 1.08000910282135, + "learning_rate": 2.484639726345382e-06, + "loss": 0.7243, + "step": 13188 + }, + { + "epoch": 0.6778188919724535, + "grad_norm": 1.0228893756866455, + "learning_rate": 2.483920490216611e-06, + "loss": 0.7619, + "step": 13189 + }, + { + "epoch": 0.6778702847157981, + "grad_norm": 1.0764765739440918, + "learning_rate": 2.4832013237980203e-06, + "loss": 0.6977, + "step": 13190 + }, + { + "epoch": 0.6779216774591428, + "grad_norm": 1.0704166889190674, + "learning_rate": 2.4824822271095344e-06, + "loss": 0.6862, + "step": 13191 + }, + { + "epoch": 0.6779730702024874, + "grad_norm": 1.0351858139038086, + "learning_rate": 2.4817632001710753e-06, + "loss": 0.6875, + "step": 13192 + }, + { + "epoch": 0.6780244629458321, + "grad_norm": 0.7257068753242493, + "learning_rate": 2.481044243002567e-06, + "loss": 0.614, + "step": 13193 + }, + { + "epoch": 0.6780758556891767, + "grad_norm": 1.0680242776870728, + "learning_rate": 2.4803253556239282e-06, + "loss": 0.7121, + "step": 13194 + }, + { + "epoch": 0.6781272484325214, + "grad_norm": 1.045596718788147, + "learning_rate": 2.479606538055074e-06, + "loss": 0.7402, + "step": 13195 + }, + { + "epoch": 0.678178641175866, + "grad_norm": 1.0770820379257202, + "learning_rate": 2.478887790315923e-06, + "loss": 0.6673, + "step": 13196 + }, + { + "epoch": 0.6782300339192107, + "grad_norm": 1.0784111022949219, + "learning_rate": 2.4781691124263875e-06, + "loss": 0.6795, + "step": 13197 + }, + { + "epoch": 0.6782814266625552, + "grad_norm": 1.2641180753707886, + "learning_rate": 2.4774505044063785e-06, + "loss": 0.6842, + "step": 13198 + }, + { + "epoch": 0.6783328194058998, + "grad_norm": 1.0442169904708862, + "learning_rate": 2.476731966275805e-06, + "loss": 0.6943, + "step": 13199 + }, + { + "epoch": 0.6783842121492445, + "grad_norm": 1.047584891319275, + "learning_rate": 2.476013498054579e-06, + "loss": 0.7136, + "step": 13200 + }, + { + "epoch": 0.6784356048925891, + "grad_norm": 1.0777981281280518, + "learning_rate": 2.475295099762599e-06, + "loss": 0.74, + "step": 13201 + }, + { + "epoch": 0.6784869976359338, + "grad_norm": 1.1164733171463013, + "learning_rate": 2.4745767714197757e-06, + "loss": 0.7033, + "step": 13202 + }, + { + "epoch": 0.6785383903792784, + "grad_norm": 1.1021857261657715, + "learning_rate": 2.473858513046008e-06, + "loss": 0.7242, + "step": 13203 + }, + { + "epoch": 0.6785897831226231, + "grad_norm": 1.1564886569976807, + "learning_rate": 2.473140324661196e-06, + "loss": 0.718, + "step": 13204 + }, + { + "epoch": 0.6786411758659677, + "grad_norm": 0.8104077577590942, + "learning_rate": 2.4724222062852355e-06, + "loss": 0.6392, + "step": 13205 + }, + { + "epoch": 0.6786925686093124, + "grad_norm": 0.9967166185379028, + "learning_rate": 2.4717041579380273e-06, + "loss": 0.7127, + "step": 13206 + }, + { + "epoch": 0.678743961352657, + "grad_norm": 1.0755122900009155, + "learning_rate": 2.4709861796394634e-06, + "loss": 0.7394, + "step": 13207 + }, + { + "epoch": 0.6787953540960017, + "grad_norm": 0.7414858937263489, + "learning_rate": 2.470268271409434e-06, + "loss": 0.7221, + "step": 13208 + }, + { + "epoch": 0.6788467468393463, + "grad_norm": 1.034874677658081, + "learning_rate": 2.4695504332678345e-06, + "loss": 0.6585, + "step": 13209 + }, + { + "epoch": 0.678898139582691, + "grad_norm": 1.091726303100586, + "learning_rate": 2.468832665234546e-06, + "loss": 0.7072, + "step": 13210 + }, + { + "epoch": 0.6789495323260356, + "grad_norm": 0.8990628719329834, + "learning_rate": 2.468114967329462e-06, + "loss": 0.639, + "step": 13211 + }, + { + "epoch": 0.6790009250693803, + "grad_norm": 1.001253366470337, + "learning_rate": 2.4673973395724605e-06, + "loss": 0.7196, + "step": 13212 + }, + { + "epoch": 0.6790523178127248, + "grad_norm": 1.0703651905059814, + "learning_rate": 2.466679781983432e-06, + "loss": 0.7042, + "step": 13213 + }, + { + "epoch": 0.6791037105560694, + "grad_norm": 1.0038541555404663, + "learning_rate": 2.4659622945822483e-06, + "loss": 0.664, + "step": 13214 + }, + { + "epoch": 0.6791551032994141, + "grad_norm": 1.06258225440979, + "learning_rate": 2.465244877388794e-06, + "loss": 0.7247, + "step": 13215 + }, + { + "epoch": 0.6792064960427587, + "grad_norm": 0.6911278367042542, + "learning_rate": 2.464527530422943e-06, + "loss": 0.6154, + "step": 13216 + }, + { + "epoch": 0.6792578887861034, + "grad_norm": 0.7348666191101074, + "learning_rate": 2.46381025370457e-06, + "loss": 0.6736, + "step": 13217 + }, + { + "epoch": 0.679309281529448, + "grad_norm": 1.0497105121612549, + "learning_rate": 2.46309304725355e-06, + "loss": 0.6491, + "step": 13218 + }, + { + "epoch": 0.6793606742727927, + "grad_norm": 1.0711970329284668, + "learning_rate": 2.4623759110897528e-06, + "loss": 0.7084, + "step": 13219 + }, + { + "epoch": 0.6794120670161373, + "grad_norm": 1.1374893188476562, + "learning_rate": 2.4616588452330465e-06, + "loss": 0.7312, + "step": 13220 + }, + { + "epoch": 0.679463459759482, + "grad_norm": 1.112452745437622, + "learning_rate": 2.460941849703297e-06, + "loss": 0.7414, + "step": 13221 + }, + { + "epoch": 0.6795148525028266, + "grad_norm": 0.8322348594665527, + "learning_rate": 2.4602249245203746e-06, + "loss": 0.657, + "step": 13222 + }, + { + "epoch": 0.6795662452461713, + "grad_norm": 1.106318473815918, + "learning_rate": 2.4595080697041345e-06, + "loss": 0.6896, + "step": 13223 + }, + { + "epoch": 0.6796176379895159, + "grad_norm": 1.0241153240203857, + "learning_rate": 2.4587912852744435e-06, + "loss": 0.6776, + "step": 13224 + }, + { + "epoch": 0.6796690307328606, + "grad_norm": 1.0289332866668701, + "learning_rate": 2.4580745712511592e-06, + "loss": 0.7418, + "step": 13225 + }, + { + "epoch": 0.6797204234762052, + "grad_norm": 1.04972243309021, + "learning_rate": 2.457357927654137e-06, + "loss": 0.6447, + "step": 13226 + }, + { + "epoch": 0.6797718162195499, + "grad_norm": 0.7166408896446228, + "learning_rate": 2.456641354503235e-06, + "loss": 0.667, + "step": 13227 + }, + { + "epoch": 0.6798232089628944, + "grad_norm": 1.132569670677185, + "learning_rate": 2.4559248518183042e-06, + "loss": 0.6948, + "step": 13228 + }, + { + "epoch": 0.679874601706239, + "grad_norm": 1.0630922317504883, + "learning_rate": 2.4552084196192005e-06, + "loss": 0.6853, + "step": 13229 + }, + { + "epoch": 0.6799259944495837, + "grad_norm": 1.1038269996643066, + "learning_rate": 2.4544920579257658e-06, + "loss": 0.7022, + "step": 13230 + }, + { + "epoch": 0.6799773871929283, + "grad_norm": 1.0308912992477417, + "learning_rate": 2.453775766757853e-06, + "loss": 0.6921, + "step": 13231 + }, + { + "epoch": 0.680028779936273, + "grad_norm": 1.1923452615737915, + "learning_rate": 2.453059546135307e-06, + "loss": 0.6986, + "step": 13232 + }, + { + "epoch": 0.6800801726796176, + "grad_norm": 1.0724221467971802, + "learning_rate": 2.4523433960779695e-06, + "loss": 0.7207, + "step": 13233 + }, + { + "epoch": 0.6801315654229623, + "grad_norm": 1.0431641340255737, + "learning_rate": 2.4516273166056814e-06, + "loss": 0.7492, + "step": 13234 + }, + { + "epoch": 0.6801829581663069, + "grad_norm": 1.1156055927276611, + "learning_rate": 2.4509113077382855e-06, + "loss": 0.7093, + "step": 13235 + }, + { + "epoch": 0.6802343509096516, + "grad_norm": 0.8234076499938965, + "learning_rate": 2.4501953694956183e-06, + "loss": 0.6548, + "step": 13236 + }, + { + "epoch": 0.6802857436529962, + "grad_norm": 1.1025968790054321, + "learning_rate": 2.449479501897513e-06, + "loss": 0.6919, + "step": 13237 + }, + { + "epoch": 0.6803371363963409, + "grad_norm": 1.0031371116638184, + "learning_rate": 2.4487637049638085e-06, + "loss": 0.7183, + "step": 13238 + }, + { + "epoch": 0.6803885291396855, + "grad_norm": 1.0633676052093506, + "learning_rate": 2.4480479787143303e-06, + "loss": 0.7401, + "step": 13239 + }, + { + "epoch": 0.6804399218830302, + "grad_norm": 1.0840507745742798, + "learning_rate": 2.4473323231689127e-06, + "loss": 0.7186, + "step": 13240 + }, + { + "epoch": 0.6804913146263748, + "grad_norm": 1.0725923776626587, + "learning_rate": 2.4466167383473826e-06, + "loss": 0.692, + "step": 13241 + }, + { + "epoch": 0.6805427073697194, + "grad_norm": 0.7000992894172668, + "learning_rate": 2.4459012242695656e-06, + "loss": 0.6804, + "step": 13242 + }, + { + "epoch": 0.680594100113064, + "grad_norm": 1.0073797702789307, + "learning_rate": 2.4451857809552833e-06, + "loss": 0.6798, + "step": 13243 + }, + { + "epoch": 0.6806454928564086, + "grad_norm": 1.0870059728622437, + "learning_rate": 2.444470408424362e-06, + "loss": 0.7102, + "step": 13244 + }, + { + "epoch": 0.6806968855997533, + "grad_norm": 1.053800344467163, + "learning_rate": 2.443755106696619e-06, + "loss": 0.6913, + "step": 13245 + }, + { + "epoch": 0.6807482783430979, + "grad_norm": 1.0393702983856201, + "learning_rate": 2.4430398757918717e-06, + "loss": 0.7253, + "step": 13246 + }, + { + "epoch": 0.6807996710864426, + "grad_norm": 0.830040693283081, + "learning_rate": 2.4423247157299394e-06, + "loss": 0.637, + "step": 13247 + }, + { + "epoch": 0.6808510638297872, + "grad_norm": 0.7397857904434204, + "learning_rate": 2.4416096265306344e-06, + "loss": 0.667, + "step": 13248 + }, + { + "epoch": 0.6809024565731319, + "grad_norm": 0.798831582069397, + "learning_rate": 2.4408946082137684e-06, + "loss": 0.6415, + "step": 13249 + }, + { + "epoch": 0.6809538493164765, + "grad_norm": 1.1541287899017334, + "learning_rate": 2.440179660799151e-06, + "loss": 0.7792, + "step": 13250 + }, + { + "epoch": 0.6810052420598212, + "grad_norm": 1.0994206666946411, + "learning_rate": 2.439464784306594e-06, + "loss": 0.7289, + "step": 13251 + }, + { + "epoch": 0.6810566348031658, + "grad_norm": 0.9598152041435242, + "learning_rate": 2.4387499787558976e-06, + "loss": 0.6187, + "step": 13252 + }, + { + "epoch": 0.6811080275465105, + "grad_norm": 1.051993727684021, + "learning_rate": 2.438035244166872e-06, + "loss": 0.6985, + "step": 13253 + }, + { + "epoch": 0.6811594202898551, + "grad_norm": 0.7552441358566284, + "learning_rate": 2.437320580559317e-06, + "loss": 0.6809, + "step": 13254 + }, + { + "epoch": 0.6812108130331997, + "grad_norm": 1.1250953674316406, + "learning_rate": 2.4366059879530335e-06, + "loss": 0.7288, + "step": 13255 + }, + { + "epoch": 0.6812622057765444, + "grad_norm": 1.0953460931777954, + "learning_rate": 2.4358914663678173e-06, + "loss": 0.7486, + "step": 13256 + }, + { + "epoch": 0.681313598519889, + "grad_norm": 1.3267933130264282, + "learning_rate": 2.435177015823469e-06, + "loss": 0.7038, + "step": 13257 + }, + { + "epoch": 0.6813649912632336, + "grad_norm": 1.0587003231048584, + "learning_rate": 2.434462636339781e-06, + "loss": 0.6923, + "step": 13258 + }, + { + "epoch": 0.6814163840065782, + "grad_norm": 1.1268763542175293, + "learning_rate": 2.433748327936544e-06, + "loss": 0.6824, + "step": 13259 + }, + { + "epoch": 0.6814677767499229, + "grad_norm": 1.0469001531600952, + "learning_rate": 2.433034090633553e-06, + "loss": 0.697, + "step": 13260 + }, + { + "epoch": 0.6815191694932675, + "grad_norm": 1.0895551443099976, + "learning_rate": 2.4323199244505943e-06, + "loss": 0.7072, + "step": 13261 + }, + { + "epoch": 0.6815705622366122, + "grad_norm": 0.778810441493988, + "learning_rate": 2.431605829407454e-06, + "loss": 0.6563, + "step": 13262 + }, + { + "epoch": 0.6816219549799568, + "grad_norm": 1.102554202079773, + "learning_rate": 2.430891805523915e-06, + "loss": 0.6573, + "step": 13263 + }, + { + "epoch": 0.6816733477233015, + "grad_norm": 1.0904885530471802, + "learning_rate": 2.4301778528197654e-06, + "loss": 0.7017, + "step": 13264 + }, + { + "epoch": 0.6817247404666461, + "grad_norm": 1.1557235717773438, + "learning_rate": 2.4294639713147795e-06, + "loss": 0.7046, + "step": 13265 + }, + { + "epoch": 0.6817761332099908, + "grad_norm": 1.0727252960205078, + "learning_rate": 2.42875016102874e-06, + "loss": 0.6893, + "step": 13266 + }, + { + "epoch": 0.6818275259533354, + "grad_norm": 1.0447843074798584, + "learning_rate": 2.4280364219814234e-06, + "loss": 0.7351, + "step": 13267 + }, + { + "epoch": 0.68187891869668, + "grad_norm": 1.0589386224746704, + "learning_rate": 2.4273227541926015e-06, + "loss": 0.7276, + "step": 13268 + }, + { + "epoch": 0.6819303114400247, + "grad_norm": 1.015015959739685, + "learning_rate": 2.426609157682051e-06, + "loss": 0.7006, + "step": 13269 + }, + { + "epoch": 0.6819817041833693, + "grad_norm": 0.8710305094718933, + "learning_rate": 2.425895632469541e-06, + "loss": 0.6122, + "step": 13270 + }, + { + "epoch": 0.682033096926714, + "grad_norm": 0.677774965763092, + "learning_rate": 2.42518217857484e-06, + "loss": 0.6479, + "step": 13271 + }, + { + "epoch": 0.6820844896700586, + "grad_norm": 1.0322576761245728, + "learning_rate": 2.424468796017714e-06, + "loss": 0.6599, + "step": 13272 + }, + { + "epoch": 0.6821358824134033, + "grad_norm": 1.0469520092010498, + "learning_rate": 2.423755484817933e-06, + "loss": 0.6744, + "step": 13273 + }, + { + "epoch": 0.6821872751567478, + "grad_norm": 0.7185530066490173, + "learning_rate": 2.4230422449952517e-06, + "loss": 0.6228, + "step": 13274 + }, + { + "epoch": 0.6822386679000925, + "grad_norm": 1.095075011253357, + "learning_rate": 2.4223290765694375e-06, + "loss": 0.7384, + "step": 13275 + }, + { + "epoch": 0.6822900606434371, + "grad_norm": 1.0157147645950317, + "learning_rate": 2.4216159795602444e-06, + "loss": 0.657, + "step": 13276 + }, + { + "epoch": 0.6823414533867818, + "grad_norm": 0.8043586015701294, + "learning_rate": 2.420902953987435e-06, + "loss": 0.6369, + "step": 13277 + }, + { + "epoch": 0.6823928461301264, + "grad_norm": 0.8233100175857544, + "learning_rate": 2.4201899998707613e-06, + "loss": 0.6281, + "step": 13278 + }, + { + "epoch": 0.682444238873471, + "grad_norm": 1.0913152694702148, + "learning_rate": 2.4194771172299743e-06, + "loss": 0.7014, + "step": 13279 + }, + { + "epoch": 0.6824956316168157, + "grad_norm": 0.7298514246940613, + "learning_rate": 2.418764306084831e-06, + "loss": 0.6591, + "step": 13280 + }, + { + "epoch": 0.6825470243601603, + "grad_norm": 1.0800707340240479, + "learning_rate": 2.418051566455073e-06, + "loss": 0.7377, + "step": 13281 + }, + { + "epoch": 0.682598417103505, + "grad_norm": 1.0422908067703247, + "learning_rate": 2.4173388983604527e-06, + "loss": 0.6468, + "step": 13282 + }, + { + "epoch": 0.6826498098468496, + "grad_norm": 1.1076951026916504, + "learning_rate": 2.416626301820714e-06, + "loss": 0.6665, + "step": 13283 + }, + { + "epoch": 0.6827012025901943, + "grad_norm": 1.0576202869415283, + "learning_rate": 2.415913776855599e-06, + "loss": 0.6721, + "step": 13284 + }, + { + "epoch": 0.6827525953335389, + "grad_norm": 0.7094531059265137, + "learning_rate": 2.4152013234848476e-06, + "loss": 0.6243, + "step": 13285 + }, + { + "epoch": 0.6828039880768836, + "grad_norm": 1.1675782203674316, + "learning_rate": 2.4144889417282026e-06, + "loss": 0.7949, + "step": 13286 + }, + { + "epoch": 0.6828553808202282, + "grad_norm": 1.0918580293655396, + "learning_rate": 2.413776631605399e-06, + "loss": 0.7481, + "step": 13287 + }, + { + "epoch": 0.6829067735635729, + "grad_norm": 1.1489192247390747, + "learning_rate": 2.413064393136171e-06, + "loss": 0.74, + "step": 13288 + }, + { + "epoch": 0.6829581663069174, + "grad_norm": 1.0683095455169678, + "learning_rate": 2.4123522263402545e-06, + "loss": 0.7544, + "step": 13289 + }, + { + "epoch": 0.6830095590502621, + "grad_norm": 1.093979835510254, + "learning_rate": 2.4116401312373788e-06, + "loss": 0.7256, + "step": 13290 + }, + { + "epoch": 0.6830609517936067, + "grad_norm": 1.0903630256652832, + "learning_rate": 2.410928107847274e-06, + "loss": 0.6746, + "step": 13291 + }, + { + "epoch": 0.6831123445369514, + "grad_norm": 1.079805612564087, + "learning_rate": 2.4102161561896655e-06, + "loss": 0.7199, + "step": 13292 + }, + { + "epoch": 0.683163737280296, + "grad_norm": 1.100926160812378, + "learning_rate": 2.409504276284283e-06, + "loss": 0.7454, + "step": 13293 + }, + { + "epoch": 0.6832151300236406, + "grad_norm": 1.0561484098434448, + "learning_rate": 2.408792468150843e-06, + "loss": 0.6259, + "step": 13294 + }, + { + "epoch": 0.6832665227669853, + "grad_norm": 1.003684639930725, + "learning_rate": 2.4080807318090723e-06, + "loss": 0.6844, + "step": 13295 + }, + { + "epoch": 0.6833179155103299, + "grad_norm": 1.0702459812164307, + "learning_rate": 2.407369067278688e-06, + "loss": 0.6459, + "step": 13296 + }, + { + "epoch": 0.6833693082536746, + "grad_norm": 1.1424384117126465, + "learning_rate": 2.4066574745794064e-06, + "loss": 0.7039, + "step": 13297 + }, + { + "epoch": 0.6834207009970192, + "grad_norm": 1.064834475517273, + "learning_rate": 2.4059459537309452e-06, + "loss": 0.7194, + "step": 13298 + }, + { + "epoch": 0.6834720937403639, + "grad_norm": 1.0064125061035156, + "learning_rate": 2.4052345047530175e-06, + "loss": 0.7249, + "step": 13299 + }, + { + "epoch": 0.6835234864837085, + "grad_norm": 1.097679853439331, + "learning_rate": 2.404523127665333e-06, + "loss": 0.6973, + "step": 13300 + }, + { + "epoch": 0.6835748792270532, + "grad_norm": 1.0807406902313232, + "learning_rate": 2.4038118224876e-06, + "loss": 0.6983, + "step": 13301 + }, + { + "epoch": 0.6836262719703978, + "grad_norm": 1.0093823671340942, + "learning_rate": 2.4031005892395314e-06, + "loss": 0.6845, + "step": 13302 + }, + { + "epoch": 0.6836776647137425, + "grad_norm": 1.0717673301696777, + "learning_rate": 2.4023894279408257e-06, + "loss": 0.7054, + "step": 13303 + }, + { + "epoch": 0.683729057457087, + "grad_norm": 1.0829806327819824, + "learning_rate": 2.4016783386111904e-06, + "loss": 0.675, + "step": 13304 + }, + { + "epoch": 0.6837804502004317, + "grad_norm": 1.0419515371322632, + "learning_rate": 2.4009673212703233e-06, + "loss": 0.6973, + "step": 13305 + }, + { + "epoch": 0.6838318429437763, + "grad_norm": 1.0893298387527466, + "learning_rate": 2.4002563759379305e-06, + "loss": 0.73, + "step": 13306 + }, + { + "epoch": 0.683883235687121, + "grad_norm": 1.0366350412368774, + "learning_rate": 2.3995455026337015e-06, + "loss": 0.6772, + "step": 13307 + }, + { + "epoch": 0.6839346284304656, + "grad_norm": 1.0730319023132324, + "learning_rate": 2.3988347013773364e-06, + "loss": 0.7325, + "step": 13308 + }, + { + "epoch": 0.6839860211738102, + "grad_norm": 1.1614218950271606, + "learning_rate": 2.398123972188528e-06, + "loss": 0.7419, + "step": 13309 + }, + { + "epoch": 0.6840374139171549, + "grad_norm": 0.7046939730644226, + "learning_rate": 2.397413315086965e-06, + "loss": 0.6414, + "step": 13310 + }, + { + "epoch": 0.6840888066604995, + "grad_norm": 1.092928409576416, + "learning_rate": 2.396702730092341e-06, + "loss": 0.7321, + "step": 13311 + }, + { + "epoch": 0.6841401994038442, + "grad_norm": 1.149901032447815, + "learning_rate": 2.39599221722434e-06, + "loss": 0.7164, + "step": 13312 + }, + { + "epoch": 0.6841915921471888, + "grad_norm": 1.1414217948913574, + "learning_rate": 2.3952817765026494e-06, + "loss": 0.7404, + "step": 13313 + }, + { + "epoch": 0.6842429848905335, + "grad_norm": 1.098250150680542, + "learning_rate": 2.3945714079469494e-06, + "loss": 0.7233, + "step": 13314 + }, + { + "epoch": 0.6842943776338781, + "grad_norm": 0.7288789749145508, + "learning_rate": 2.3938611115769283e-06, + "loss": 0.6713, + "step": 13315 + }, + { + "epoch": 0.6843457703772228, + "grad_norm": 1.1219171285629272, + "learning_rate": 2.3931508874122565e-06, + "loss": 0.7707, + "step": 13316 + }, + { + "epoch": 0.6843971631205674, + "grad_norm": 0.8191121220588684, + "learning_rate": 2.3924407354726175e-06, + "loss": 0.6386, + "step": 13317 + }, + { + "epoch": 0.6844485558639121, + "grad_norm": 1.048741102218628, + "learning_rate": 2.3917306557776853e-06, + "loss": 0.6591, + "step": 13318 + }, + { + "epoch": 0.6844999486072566, + "grad_norm": 1.049906611442566, + "learning_rate": 2.391020648347131e-06, + "loss": 0.8278, + "step": 13319 + }, + { + "epoch": 0.6845513413506013, + "grad_norm": 1.100466251373291, + "learning_rate": 2.39031071320063e-06, + "loss": 0.7202, + "step": 13320 + }, + { + "epoch": 0.6846027340939459, + "grad_norm": 0.7114228010177612, + "learning_rate": 2.3896008503578476e-06, + "loss": 0.6507, + "step": 13321 + }, + { + "epoch": 0.6846541268372905, + "grad_norm": 1.1019946336746216, + "learning_rate": 2.3888910598384567e-06, + "loss": 0.6786, + "step": 13322 + }, + { + "epoch": 0.6847055195806352, + "grad_norm": 0.7408274412155151, + "learning_rate": 2.3881813416621157e-06, + "loss": 0.6671, + "step": 13323 + }, + { + "epoch": 0.6847569123239798, + "grad_norm": 0.7356608510017395, + "learning_rate": 2.3874716958484927e-06, + "loss": 0.6953, + "step": 13324 + }, + { + "epoch": 0.6848083050673245, + "grad_norm": 1.0569781064987183, + "learning_rate": 2.386762122417248e-06, + "loss": 0.6974, + "step": 13325 + }, + { + "epoch": 0.6848596978106691, + "grad_norm": 0.7239935994148254, + "learning_rate": 2.3860526213880413e-06, + "loss": 0.7091, + "step": 13326 + }, + { + "epoch": 0.6849110905540138, + "grad_norm": 1.1210187673568726, + "learning_rate": 2.3853431927805267e-06, + "loss": 0.6825, + "step": 13327 + }, + { + "epoch": 0.6849624832973584, + "grad_norm": 0.694304883480072, + "learning_rate": 2.3846338366143644e-06, + "loss": 0.7121, + "step": 13328 + }, + { + "epoch": 0.6850138760407031, + "grad_norm": 1.097902536392212, + "learning_rate": 2.3839245529092055e-06, + "loss": 0.685, + "step": 13329 + }, + { + "epoch": 0.6850652687840477, + "grad_norm": 1.0709285736083984, + "learning_rate": 2.3832153416846995e-06, + "loss": 0.6925, + "step": 13330 + }, + { + "epoch": 0.6851166615273924, + "grad_norm": 1.1218461990356445, + "learning_rate": 2.3825062029605014e-06, + "loss": 0.7291, + "step": 13331 + }, + { + "epoch": 0.685168054270737, + "grad_norm": 1.0941627025604248, + "learning_rate": 2.3817971367562505e-06, + "loss": 0.7527, + "step": 13332 + }, + { + "epoch": 0.6852194470140817, + "grad_norm": 1.0594651699066162, + "learning_rate": 2.3810881430915975e-06, + "loss": 0.7014, + "step": 13333 + }, + { + "epoch": 0.6852708397574262, + "grad_norm": 1.0984879732131958, + "learning_rate": 2.380379221986186e-06, + "loss": 0.6687, + "step": 13334 + }, + { + "epoch": 0.6853222325007708, + "grad_norm": 0.6971931457519531, + "learning_rate": 2.379670373459655e-06, + "loss": 0.6356, + "step": 13335 + }, + { + "epoch": 0.6853736252441155, + "grad_norm": 1.117618441581726, + "learning_rate": 2.3789615975316423e-06, + "loss": 0.6995, + "step": 13336 + }, + { + "epoch": 0.6854250179874601, + "grad_norm": 1.022958517074585, + "learning_rate": 2.37825289422179e-06, + "loss": 0.6433, + "step": 13337 + }, + { + "epoch": 0.6854764107308048, + "grad_norm": 1.023323893547058, + "learning_rate": 2.3775442635497297e-06, + "loss": 0.6601, + "step": 13338 + }, + { + "epoch": 0.6855278034741494, + "grad_norm": 1.0816253423690796, + "learning_rate": 2.3768357055350945e-06, + "loss": 0.7245, + "step": 13339 + }, + { + "epoch": 0.6855791962174941, + "grad_norm": 1.0332515239715576, + "learning_rate": 2.3761272201975184e-06, + "loss": 0.7225, + "step": 13340 + }, + { + "epoch": 0.6856305889608387, + "grad_norm": 1.1607924699783325, + "learning_rate": 2.375418807556629e-06, + "loss": 0.7492, + "step": 13341 + }, + { + "epoch": 0.6856819817041834, + "grad_norm": 1.0634335279464722, + "learning_rate": 2.3747104676320533e-06, + "loss": 0.6862, + "step": 13342 + }, + { + "epoch": 0.685733374447528, + "grad_norm": 1.1076996326446533, + "learning_rate": 2.3740022004434145e-06, + "loss": 0.745, + "step": 13343 + }, + { + "epoch": 0.6857847671908727, + "grad_norm": 1.1111207008361816, + "learning_rate": 2.3732940060103417e-06, + "loss": 0.7504, + "step": 13344 + }, + { + "epoch": 0.6858361599342173, + "grad_norm": 1.04714834690094, + "learning_rate": 2.3725858843524487e-06, + "loss": 0.7171, + "step": 13345 + }, + { + "epoch": 0.685887552677562, + "grad_norm": 0.6929269433021545, + "learning_rate": 2.37187783548936e-06, + "loss": 0.5974, + "step": 13346 + }, + { + "epoch": 0.6859389454209066, + "grad_norm": 1.1622371673583984, + "learning_rate": 2.371169859440691e-06, + "loss": 0.7771, + "step": 13347 + }, + { + "epoch": 0.6859903381642513, + "grad_norm": 1.1110748052597046, + "learning_rate": 2.3704619562260545e-06, + "loss": 0.7381, + "step": 13348 + }, + { + "epoch": 0.6860417309075958, + "grad_norm": 1.0892292261123657, + "learning_rate": 2.3697541258650676e-06, + "loss": 0.6758, + "step": 13349 + }, + { + "epoch": 0.6860931236509404, + "grad_norm": 0.8300483822822571, + "learning_rate": 2.36904636837734e-06, + "loss": 0.6711, + "step": 13350 + }, + { + "epoch": 0.6861445163942851, + "grad_norm": 1.0972872972488403, + "learning_rate": 2.3683386837824794e-06, + "loss": 0.657, + "step": 13351 + }, + { + "epoch": 0.6861959091376297, + "grad_norm": 0.759110152721405, + "learning_rate": 2.3676310721000922e-06, + "loss": 0.6485, + "step": 13352 + }, + { + "epoch": 0.6862473018809744, + "grad_norm": 1.0260483026504517, + "learning_rate": 2.3669235333497868e-06, + "loss": 0.6444, + "step": 13353 + }, + { + "epoch": 0.686298694624319, + "grad_norm": 1.0845814943313599, + "learning_rate": 2.366216067551164e-06, + "loss": 0.7671, + "step": 13354 + }, + { + "epoch": 0.6863500873676637, + "grad_norm": 1.1047589778900146, + "learning_rate": 2.365508674723825e-06, + "loss": 0.6906, + "step": 13355 + }, + { + "epoch": 0.6864014801110083, + "grad_norm": 1.0388273000717163, + "learning_rate": 2.3648013548873672e-06, + "loss": 0.6842, + "step": 13356 + }, + { + "epoch": 0.686452872854353, + "grad_norm": 1.0922319889068604, + "learning_rate": 2.3640941080613927e-06, + "loss": 0.7262, + "step": 13357 + }, + { + "epoch": 0.6865042655976976, + "grad_norm": 1.0639010667800903, + "learning_rate": 2.3633869342654884e-06, + "loss": 0.7291, + "step": 13358 + }, + { + "epoch": 0.6865556583410423, + "grad_norm": 0.7619389295578003, + "learning_rate": 2.362679833519254e-06, + "loss": 0.6772, + "step": 13359 + }, + { + "epoch": 0.6866070510843869, + "grad_norm": 1.097631812095642, + "learning_rate": 2.361972805842278e-06, + "loss": 0.767, + "step": 13360 + }, + { + "epoch": 0.6866584438277316, + "grad_norm": 1.1154394149780273, + "learning_rate": 2.3612658512541468e-06, + "loss": 0.7049, + "step": 13361 + }, + { + "epoch": 0.6867098365710762, + "grad_norm": 0.7978020310401917, + "learning_rate": 2.360558969774451e-06, + "loss": 0.6517, + "step": 13362 + }, + { + "epoch": 0.6867612293144209, + "grad_norm": 1.103040337562561, + "learning_rate": 2.359852161422774e-06, + "loss": 0.6785, + "step": 13363 + }, + { + "epoch": 0.6868126220577655, + "grad_norm": 0.9830541610717773, + "learning_rate": 2.359145426218699e-06, + "loss": 0.6595, + "step": 13364 + }, + { + "epoch": 0.68686401480111, + "grad_norm": 1.0240007638931274, + "learning_rate": 2.3584387641818035e-06, + "loss": 0.7125, + "step": 13365 + }, + { + "epoch": 0.6869154075444547, + "grad_norm": 1.100358247756958, + "learning_rate": 2.357732175331673e-06, + "loss": 0.6663, + "step": 13366 + }, + { + "epoch": 0.6869668002877993, + "grad_norm": 1.0087822675704956, + "learning_rate": 2.357025659687876e-06, + "loss": 0.7115, + "step": 13367 + }, + { + "epoch": 0.687018193031144, + "grad_norm": 1.0406197309494019, + "learning_rate": 2.356319217269993e-06, + "loss": 0.7565, + "step": 13368 + }, + { + "epoch": 0.6870695857744886, + "grad_norm": 1.1621811389923096, + "learning_rate": 2.3556128480975926e-06, + "loss": 0.7173, + "step": 13369 + }, + { + "epoch": 0.6871209785178333, + "grad_norm": 0.697023332118988, + "learning_rate": 2.3549065521902493e-06, + "loss": 0.6172, + "step": 13370 + }, + { + "epoch": 0.6871723712611779, + "grad_norm": 1.0437123775482178, + "learning_rate": 2.35420032956753e-06, + "loss": 0.6942, + "step": 13371 + }, + { + "epoch": 0.6872237640045226, + "grad_norm": 1.0900206565856934, + "learning_rate": 2.3534941802489985e-06, + "loss": 0.7209, + "step": 13372 + }, + { + "epoch": 0.6872751567478672, + "grad_norm": 1.0648189783096313, + "learning_rate": 2.352788104254226e-06, + "loss": 0.7509, + "step": 13373 + }, + { + "epoch": 0.6873265494912119, + "grad_norm": 0.8383201360702515, + "learning_rate": 2.3520821016027674e-06, + "loss": 0.6318, + "step": 13374 + }, + { + "epoch": 0.6873779422345565, + "grad_norm": 1.157762885093689, + "learning_rate": 2.3513761723141874e-06, + "loss": 0.7062, + "step": 13375 + }, + { + "epoch": 0.6874293349779012, + "grad_norm": 1.1178290843963623, + "learning_rate": 2.3506703164080443e-06, + "loss": 0.6757, + "step": 13376 + }, + { + "epoch": 0.6874807277212458, + "grad_norm": 0.7638944983482361, + "learning_rate": 2.349964533903893e-06, + "loss": 0.6576, + "step": 13377 + }, + { + "epoch": 0.6875321204645904, + "grad_norm": 1.0895625352859497, + "learning_rate": 2.3492588248212868e-06, + "loss": 0.7007, + "step": 13378 + }, + { + "epoch": 0.6875835132079351, + "grad_norm": 1.0964897871017456, + "learning_rate": 2.3485531891797812e-06, + "loss": 0.6726, + "step": 13379 + }, + { + "epoch": 0.6876349059512796, + "grad_norm": 1.1167876720428467, + "learning_rate": 2.3478476269989254e-06, + "loss": 0.8101, + "step": 13380 + }, + { + "epoch": 0.6876862986946243, + "grad_norm": 1.0434703826904297, + "learning_rate": 2.347142138298265e-06, + "loss": 0.69, + "step": 13381 + }, + { + "epoch": 0.6877376914379689, + "grad_norm": 1.1000373363494873, + "learning_rate": 2.3464367230973516e-06, + "loss": 0.717, + "step": 13382 + }, + { + "epoch": 0.6877890841813136, + "grad_norm": 1.0852264165878296, + "learning_rate": 2.345731381415723e-06, + "loss": 0.6297, + "step": 13383 + }, + { + "epoch": 0.6878404769246582, + "grad_norm": 1.1253141164779663, + "learning_rate": 2.345026113272925e-06, + "loss": 0.6463, + "step": 13384 + }, + { + "epoch": 0.6878918696680029, + "grad_norm": 1.0757182836532593, + "learning_rate": 2.3443209186884963e-06, + "loss": 0.7259, + "step": 13385 + }, + { + "epoch": 0.6879432624113475, + "grad_norm": 0.664968729019165, + "learning_rate": 2.343615797681979e-06, + "loss": 0.651, + "step": 13386 + }, + { + "epoch": 0.6879946551546922, + "grad_norm": 0.7261338829994202, + "learning_rate": 2.3429107502729016e-06, + "loss": 0.699, + "step": 13387 + }, + { + "epoch": 0.6880460478980368, + "grad_norm": 1.1462910175323486, + "learning_rate": 2.3422057764808043e-06, + "loss": 0.6695, + "step": 13388 + }, + { + "epoch": 0.6880974406413815, + "grad_norm": 1.0899831056594849, + "learning_rate": 2.3415008763252168e-06, + "loss": 0.6508, + "step": 13389 + }, + { + "epoch": 0.6881488333847261, + "grad_norm": 1.0417860746383667, + "learning_rate": 2.340796049825667e-06, + "loss": 0.7076, + "step": 13390 + }, + { + "epoch": 0.6882002261280707, + "grad_norm": 1.070149302482605, + "learning_rate": 2.3400912970016872e-06, + "loss": 0.6723, + "step": 13391 + }, + { + "epoch": 0.6882516188714154, + "grad_norm": 1.0535063743591309, + "learning_rate": 2.3393866178728e-06, + "loss": 0.7734, + "step": 13392 + }, + { + "epoch": 0.68830301161476, + "grad_norm": 1.0936784744262695, + "learning_rate": 2.3386820124585304e-06, + "loss": 0.7356, + "step": 13393 + }, + { + "epoch": 0.6883544043581047, + "grad_norm": 1.1746679544448853, + "learning_rate": 2.337977480778398e-06, + "loss": 0.7335, + "step": 13394 + }, + { + "epoch": 0.6884057971014492, + "grad_norm": 1.1055418252944946, + "learning_rate": 2.337273022851927e-06, + "loss": 0.657, + "step": 13395 + }, + { + "epoch": 0.6884571898447939, + "grad_norm": 1.104048728942871, + "learning_rate": 2.336568638698629e-06, + "loss": 0.7105, + "step": 13396 + }, + { + "epoch": 0.6885085825881385, + "grad_norm": 1.1065303087234497, + "learning_rate": 2.3358643283380243e-06, + "loss": 0.6949, + "step": 13397 + }, + { + "epoch": 0.6885599753314832, + "grad_norm": 1.1341803073883057, + "learning_rate": 2.335160091789625e-06, + "loss": 0.7281, + "step": 13398 + }, + { + "epoch": 0.6886113680748278, + "grad_norm": 1.0388866662979126, + "learning_rate": 2.33445592907294e-06, + "loss": 0.6661, + "step": 13399 + }, + { + "epoch": 0.6886627608181725, + "grad_norm": 0.7395660877227783, + "learning_rate": 2.333751840207484e-06, + "loss": 0.6482, + "step": 13400 + }, + { + "epoch": 0.6887141535615171, + "grad_norm": 1.0575189590454102, + "learning_rate": 2.333047825212761e-06, + "loss": 0.7135, + "step": 13401 + }, + { + "epoch": 0.6887655463048618, + "grad_norm": 1.059338092803955, + "learning_rate": 2.3323438841082767e-06, + "loss": 0.678, + "step": 13402 + }, + { + "epoch": 0.6888169390482064, + "grad_norm": 0.7790700793266296, + "learning_rate": 2.3316400169135332e-06, + "loss": 0.6319, + "step": 13403 + }, + { + "epoch": 0.688868331791551, + "grad_norm": 1.1395809650421143, + "learning_rate": 2.330936223648035e-06, + "loss": 0.7004, + "step": 13404 + }, + { + "epoch": 0.6889197245348957, + "grad_norm": 1.043841004371643, + "learning_rate": 2.330232504331279e-06, + "loss": 0.6711, + "step": 13405 + }, + { + "epoch": 0.6889711172782403, + "grad_norm": 1.0228657722473145, + "learning_rate": 2.329528858982763e-06, + "loss": 0.6974, + "step": 13406 + }, + { + "epoch": 0.689022510021585, + "grad_norm": 1.0640373229980469, + "learning_rate": 2.32882528762198e-06, + "loss": 0.7111, + "step": 13407 + }, + { + "epoch": 0.6890739027649296, + "grad_norm": 1.0011875629425049, + "learning_rate": 2.3281217902684292e-06, + "loss": 0.6827, + "step": 13408 + }, + { + "epoch": 0.6891252955082743, + "grad_norm": 1.0655626058578491, + "learning_rate": 2.3274183669415934e-06, + "loss": 0.6575, + "step": 13409 + }, + { + "epoch": 0.6891766882516188, + "grad_norm": 1.0391764640808105, + "learning_rate": 2.3267150176609676e-06, + "loss": 0.6831, + "step": 13410 + }, + { + "epoch": 0.6892280809949635, + "grad_norm": 1.046884536743164, + "learning_rate": 2.326011742446036e-06, + "loss": 0.7275, + "step": 13411 + }, + { + "epoch": 0.6892794737383081, + "grad_norm": 0.7641355991363525, + "learning_rate": 2.325308541316283e-06, + "loss": 0.6717, + "step": 13412 + }, + { + "epoch": 0.6893308664816528, + "grad_norm": 1.0446792840957642, + "learning_rate": 2.324605414291194e-06, + "loss": 0.6809, + "step": 13413 + }, + { + "epoch": 0.6893822592249974, + "grad_norm": 1.0867435932159424, + "learning_rate": 2.3239023613902466e-06, + "loss": 0.6979, + "step": 13414 + }, + { + "epoch": 0.689433651968342, + "grad_norm": 0.813461184501648, + "learning_rate": 2.3231993826329248e-06, + "loss": 0.6214, + "step": 13415 + }, + { + "epoch": 0.6894850447116867, + "grad_norm": 1.0633823871612549, + "learning_rate": 2.322496478038698e-06, + "loss": 0.7265, + "step": 13416 + }, + { + "epoch": 0.6895364374550313, + "grad_norm": 1.0876883268356323, + "learning_rate": 2.3217936476270457e-06, + "loss": 0.7279, + "step": 13417 + }, + { + "epoch": 0.689587830198376, + "grad_norm": 1.064242959022522, + "learning_rate": 2.3210908914174396e-06, + "loss": 0.686, + "step": 13418 + }, + { + "epoch": 0.6896392229417206, + "grad_norm": 1.1113409996032715, + "learning_rate": 2.3203882094293493e-06, + "loss": 0.7161, + "step": 13419 + }, + { + "epoch": 0.6896906156850653, + "grad_norm": 1.0891555547714233, + "learning_rate": 2.319685601682242e-06, + "loss": 0.6922, + "step": 13420 + }, + { + "epoch": 0.6897420084284099, + "grad_norm": 0.9466959238052368, + "learning_rate": 2.318983068195588e-06, + "loss": 0.6919, + "step": 13421 + }, + { + "epoch": 0.6897934011717546, + "grad_norm": 1.0454496145248413, + "learning_rate": 2.3182806089888486e-06, + "loss": 0.6857, + "step": 13422 + }, + { + "epoch": 0.6898447939150992, + "grad_norm": 0.6607553362846375, + "learning_rate": 2.3175782240814853e-06, + "loss": 0.6897, + "step": 13423 + }, + { + "epoch": 0.6898961866584439, + "grad_norm": 1.1564693450927734, + "learning_rate": 2.3168759134929634e-06, + "loss": 0.7195, + "step": 13424 + }, + { + "epoch": 0.6899475794017884, + "grad_norm": 1.0280423164367676, + "learning_rate": 2.3161736772427335e-06, + "loss": 0.7321, + "step": 13425 + }, + { + "epoch": 0.6899989721451331, + "grad_norm": 1.0199776887893677, + "learning_rate": 2.3154715153502577e-06, + "loss": 0.713, + "step": 13426 + }, + { + "epoch": 0.6900503648884777, + "grad_norm": 1.0492305755615234, + "learning_rate": 2.3147694278349873e-06, + "loss": 0.6872, + "step": 13427 + }, + { + "epoch": 0.6901017576318224, + "grad_norm": 1.0582655668258667, + "learning_rate": 2.314067414716375e-06, + "loss": 0.6894, + "step": 13428 + }, + { + "epoch": 0.690153150375167, + "grad_norm": 1.0092233419418335, + "learning_rate": 2.3133654760138683e-06, + "loss": 0.6479, + "step": 13429 + }, + { + "epoch": 0.6902045431185116, + "grad_norm": 1.145283579826355, + "learning_rate": 2.3126636117469193e-06, + "loss": 0.6867, + "step": 13430 + }, + { + "epoch": 0.6902559358618563, + "grad_norm": 1.1579627990722656, + "learning_rate": 2.3119618219349714e-06, + "loss": 0.6665, + "step": 13431 + }, + { + "epoch": 0.6903073286052009, + "grad_norm": 1.0809438228607178, + "learning_rate": 2.311260106597467e-06, + "loss": 0.7118, + "step": 13432 + }, + { + "epoch": 0.6903587213485456, + "grad_norm": 1.0483686923980713, + "learning_rate": 2.310558465753851e-06, + "loss": 0.7024, + "step": 13433 + }, + { + "epoch": 0.6904101140918902, + "grad_norm": 1.046481728553772, + "learning_rate": 2.3098568994235615e-06, + "loss": 0.6673, + "step": 13434 + }, + { + "epoch": 0.6904615068352349, + "grad_norm": 1.0845974683761597, + "learning_rate": 2.3091554076260352e-06, + "loss": 0.712, + "step": 13435 + }, + { + "epoch": 0.6905128995785795, + "grad_norm": 0.8732377886772156, + "learning_rate": 2.3084539903807068e-06, + "loss": 0.635, + "step": 13436 + }, + { + "epoch": 0.6905642923219242, + "grad_norm": 1.1515207290649414, + "learning_rate": 2.3077526477070146e-06, + "loss": 0.7139, + "step": 13437 + }, + { + "epoch": 0.6906156850652688, + "grad_norm": 0.6914734244346619, + "learning_rate": 2.307051379624382e-06, + "loss": 0.643, + "step": 13438 + }, + { + "epoch": 0.6906670778086135, + "grad_norm": 1.1208144426345825, + "learning_rate": 2.306350186152245e-06, + "loss": 0.6664, + "step": 13439 + }, + { + "epoch": 0.690718470551958, + "grad_norm": 0.7578917145729065, + "learning_rate": 2.305649067310028e-06, + "loss": 0.6688, + "step": 13440 + }, + { + "epoch": 0.6907698632953027, + "grad_norm": 1.1689575910568237, + "learning_rate": 2.3049480231171544e-06, + "loss": 0.7268, + "step": 13441 + }, + { + "epoch": 0.6908212560386473, + "grad_norm": 1.102469801902771, + "learning_rate": 2.3042470535930516e-06, + "loss": 0.8059, + "step": 13442 + }, + { + "epoch": 0.690872648781992, + "grad_norm": 1.068149209022522, + "learning_rate": 2.3035461587571378e-06, + "loss": 0.7272, + "step": 13443 + }, + { + "epoch": 0.6909240415253366, + "grad_norm": 1.055262565612793, + "learning_rate": 2.3028453386288323e-06, + "loss": 0.6787, + "step": 13444 + }, + { + "epoch": 0.6909754342686812, + "grad_norm": 1.060615062713623, + "learning_rate": 2.3021445932275506e-06, + "loss": 0.7376, + "step": 13445 + }, + { + "epoch": 0.6910268270120259, + "grad_norm": 1.119568943977356, + "learning_rate": 2.3014439225727103e-06, + "loss": 0.7477, + "step": 13446 + }, + { + "epoch": 0.6910782197553705, + "grad_norm": 1.0908480882644653, + "learning_rate": 2.300743326683723e-06, + "loss": 0.7012, + "step": 13447 + }, + { + "epoch": 0.6911296124987152, + "grad_norm": 1.092620611190796, + "learning_rate": 2.3000428055799984e-06, + "loss": 0.744, + "step": 13448 + }, + { + "epoch": 0.6911810052420598, + "grad_norm": 1.0482251644134521, + "learning_rate": 2.2993423592809445e-06, + "loss": 0.6821, + "step": 13449 + }, + { + "epoch": 0.6912323979854045, + "grad_norm": 1.1021394729614258, + "learning_rate": 2.2986419878059703e-06, + "loss": 0.6713, + "step": 13450 + }, + { + "epoch": 0.6912837907287491, + "grad_norm": 1.1428685188293457, + "learning_rate": 2.2979416911744784e-06, + "loss": 0.7413, + "step": 13451 + }, + { + "epoch": 0.6913351834720938, + "grad_norm": 1.078089714050293, + "learning_rate": 2.297241469405872e-06, + "loss": 0.6882, + "step": 13452 + }, + { + "epoch": 0.6913865762154384, + "grad_norm": 1.0971280336380005, + "learning_rate": 2.2965413225195502e-06, + "loss": 0.7274, + "step": 13453 + }, + { + "epoch": 0.6914379689587831, + "grad_norm": 0.7383515238761902, + "learning_rate": 2.295841250534911e-06, + "loss": 0.6681, + "step": 13454 + }, + { + "epoch": 0.6914893617021277, + "grad_norm": 1.099220871925354, + "learning_rate": 2.2951412534713525e-06, + "loss": 0.6791, + "step": 13455 + }, + { + "epoch": 0.6915407544454722, + "grad_norm": 1.2490869760513306, + "learning_rate": 2.294441331348268e-06, + "loss": 0.709, + "step": 13456 + }, + { + "epoch": 0.6915921471888169, + "grad_norm": 1.112113356590271, + "learning_rate": 2.293741484185048e-06, + "loss": 0.7566, + "step": 13457 + }, + { + "epoch": 0.6916435399321615, + "grad_norm": 0.7345757484436035, + "learning_rate": 2.293041712001083e-06, + "loss": 0.6528, + "step": 13458 + }, + { + "epoch": 0.6916949326755062, + "grad_norm": 1.0814324617385864, + "learning_rate": 2.292342014815762e-06, + "loss": 0.7344, + "step": 13459 + }, + { + "epoch": 0.6917463254188508, + "grad_norm": 1.0851945877075195, + "learning_rate": 2.2916423926484705e-06, + "loss": 0.6907, + "step": 13460 + }, + { + "epoch": 0.6917977181621955, + "grad_norm": 1.0835740566253662, + "learning_rate": 2.290942845518591e-06, + "loss": 0.7407, + "step": 13461 + }, + { + "epoch": 0.6918491109055401, + "grad_norm": 1.079351544380188, + "learning_rate": 2.2902433734455044e-06, + "loss": 0.7473, + "step": 13462 + }, + { + "epoch": 0.6919005036488848, + "grad_norm": 1.071176528930664, + "learning_rate": 2.2895439764485926e-06, + "loss": 0.6979, + "step": 13463 + }, + { + "epoch": 0.6919518963922294, + "grad_norm": 1.0512492656707764, + "learning_rate": 2.2888446545472324e-06, + "loss": 0.681, + "step": 13464 + }, + { + "epoch": 0.6920032891355741, + "grad_norm": 1.0513546466827393, + "learning_rate": 2.2881454077607968e-06, + "loss": 0.715, + "step": 13465 + }, + { + "epoch": 0.6920546818789187, + "grad_norm": 0.6778315305709839, + "learning_rate": 2.287446236108664e-06, + "loss": 0.643, + "step": 13466 + }, + { + "epoch": 0.6921060746222634, + "grad_norm": 1.071773886680603, + "learning_rate": 2.286747139610198e-06, + "loss": 0.6871, + "step": 13467 + }, + { + "epoch": 0.692157467365608, + "grad_norm": 1.0454281568527222, + "learning_rate": 2.2860481182847742e-06, + "loss": 0.7161, + "step": 13468 + }, + { + "epoch": 0.6922088601089527, + "grad_norm": 1.0766897201538086, + "learning_rate": 2.2853491721517574e-06, + "loss": 0.6666, + "step": 13469 + }, + { + "epoch": 0.6922602528522973, + "grad_norm": 1.078318476676941, + "learning_rate": 2.2846503012305123e-06, + "loss": 0.699, + "step": 13470 + }, + { + "epoch": 0.6923116455956418, + "grad_norm": 1.1118658781051636, + "learning_rate": 2.283951505540399e-06, + "loss": 0.7074, + "step": 13471 + }, + { + "epoch": 0.6923630383389865, + "grad_norm": 1.0923362970352173, + "learning_rate": 2.283252785100784e-06, + "loss": 0.7368, + "step": 13472 + }, + { + "epoch": 0.6924144310823311, + "grad_norm": 0.7332751750946045, + "learning_rate": 2.2825541399310223e-06, + "loss": 0.6362, + "step": 13473 + }, + { + "epoch": 0.6924658238256758, + "grad_norm": 1.035706877708435, + "learning_rate": 2.2818555700504697e-06, + "loss": 0.7355, + "step": 13474 + }, + { + "epoch": 0.6925172165690204, + "grad_norm": 1.084615707397461, + "learning_rate": 2.2811570754784856e-06, + "loss": 0.7225, + "step": 13475 + }, + { + "epoch": 0.6925686093123651, + "grad_norm": 0.7340329885482788, + "learning_rate": 2.2804586562344155e-06, + "loss": 0.6465, + "step": 13476 + }, + { + "epoch": 0.6926200020557097, + "grad_norm": 1.044554591178894, + "learning_rate": 2.279760312337615e-06, + "loss": 0.6787, + "step": 13477 + }, + { + "epoch": 0.6926713947990544, + "grad_norm": 1.0937559604644775, + "learning_rate": 2.2790620438074278e-06, + "loss": 0.6992, + "step": 13478 + }, + { + "epoch": 0.692722787542399, + "grad_norm": 1.1471049785614014, + "learning_rate": 2.2783638506632073e-06, + "loss": 0.7134, + "step": 13479 + }, + { + "epoch": 0.6927741802857437, + "grad_norm": 1.0870516300201416, + "learning_rate": 2.277665732924289e-06, + "loss": 0.7164, + "step": 13480 + }, + { + "epoch": 0.6928255730290883, + "grad_norm": 0.9905552268028259, + "learning_rate": 2.2769676906100207e-06, + "loss": 0.6769, + "step": 13481 + }, + { + "epoch": 0.692876965772433, + "grad_norm": 1.099609613418579, + "learning_rate": 2.2762697237397406e-06, + "loss": 0.7233, + "step": 13482 + }, + { + "epoch": 0.6929283585157776, + "grad_norm": 1.0504956245422363, + "learning_rate": 2.275571832332784e-06, + "loss": 0.7112, + "step": 13483 + }, + { + "epoch": 0.6929797512591223, + "grad_norm": 1.0653971433639526, + "learning_rate": 2.2748740164084907e-06, + "loss": 0.6814, + "step": 13484 + }, + { + "epoch": 0.6930311440024669, + "grad_norm": 1.154149055480957, + "learning_rate": 2.274176275986193e-06, + "loss": 0.7133, + "step": 13485 + }, + { + "epoch": 0.6930825367458114, + "grad_norm": 0.7868914008140564, + "learning_rate": 2.273478611085222e-06, + "loss": 0.6441, + "step": 13486 + }, + { + "epoch": 0.6931339294891561, + "grad_norm": 1.097804307937622, + "learning_rate": 2.2727810217249045e-06, + "loss": 0.7107, + "step": 13487 + }, + { + "epoch": 0.6931853222325007, + "grad_norm": 1.0980697870254517, + "learning_rate": 2.2720835079245744e-06, + "loss": 0.7395, + "step": 13488 + }, + { + "epoch": 0.6932367149758454, + "grad_norm": 1.0722997188568115, + "learning_rate": 2.271386069703549e-06, + "loss": 0.7372, + "step": 13489 + }, + { + "epoch": 0.69328810771919, + "grad_norm": 1.0293450355529785, + "learning_rate": 2.270688707081157e-06, + "loss": 0.7051, + "step": 13490 + }, + { + "epoch": 0.6933395004625347, + "grad_norm": 1.1046091318130493, + "learning_rate": 2.269991420076718e-06, + "loss": 0.7526, + "step": 13491 + }, + { + "epoch": 0.6933908932058793, + "grad_norm": 1.0929967164993286, + "learning_rate": 2.2692942087095487e-06, + "loss": 0.6725, + "step": 13492 + }, + { + "epoch": 0.693442285949224, + "grad_norm": 0.8360573053359985, + "learning_rate": 2.2685970729989696e-06, + "loss": 0.6657, + "step": 13493 + }, + { + "epoch": 0.6934936786925686, + "grad_norm": 0.7532451748847961, + "learning_rate": 2.267900012964292e-06, + "loss": 0.6644, + "step": 13494 + }, + { + "epoch": 0.6935450714359133, + "grad_norm": 1.1184346675872803, + "learning_rate": 2.2672030286248342e-06, + "loss": 0.7184, + "step": 13495 + }, + { + "epoch": 0.6935964641792579, + "grad_norm": 1.026456594467163, + "learning_rate": 2.2665061199998996e-06, + "loss": 0.6437, + "step": 13496 + }, + { + "epoch": 0.6936478569226026, + "grad_norm": 1.0560096502304077, + "learning_rate": 2.2658092871088013e-06, + "loss": 0.6751, + "step": 13497 + }, + { + "epoch": 0.6936992496659472, + "grad_norm": 1.028740644454956, + "learning_rate": 2.265112529970844e-06, + "loss": 0.7228, + "step": 13498 + }, + { + "epoch": 0.6937506424092919, + "grad_norm": 0.863117516040802, + "learning_rate": 2.2644158486053323e-06, + "loss": 0.598, + "step": 13499 + }, + { + "epoch": 0.6938020351526365, + "grad_norm": 1.0150525569915771, + "learning_rate": 2.263719243031567e-06, + "loss": 0.7142, + "step": 13500 + }, + { + "epoch": 0.693853427895981, + "grad_norm": 1.0095702409744263, + "learning_rate": 2.2630227132688515e-06, + "loss": 0.6977, + "step": 13501 + }, + { + "epoch": 0.6939048206393257, + "grad_norm": 1.0393542051315308, + "learning_rate": 2.2623262593364813e-06, + "loss": 0.6549, + "step": 13502 + }, + { + "epoch": 0.6939562133826703, + "grad_norm": 0.9956376552581787, + "learning_rate": 2.261629881253753e-06, + "loss": 0.6542, + "step": 13503 + }, + { + "epoch": 0.694007606126015, + "grad_norm": 1.1031354665756226, + "learning_rate": 2.2609335790399602e-06, + "loss": 0.6867, + "step": 13504 + }, + { + "epoch": 0.6940589988693596, + "grad_norm": 1.0696535110473633, + "learning_rate": 2.260237352714392e-06, + "loss": 0.7057, + "step": 13505 + }, + { + "epoch": 0.6941103916127043, + "grad_norm": 1.059991717338562, + "learning_rate": 2.2595412022963424e-06, + "loss": 0.691, + "step": 13506 + }, + { + "epoch": 0.6941617843560489, + "grad_norm": 1.232047438621521, + "learning_rate": 2.258845127805098e-06, + "loss": 0.6761, + "step": 13507 + }, + { + "epoch": 0.6942131770993936, + "grad_norm": 1.053749680519104, + "learning_rate": 2.2581491292599415e-06, + "loss": 0.6931, + "step": 13508 + }, + { + "epoch": 0.6942645698427382, + "grad_norm": 1.1948935985565186, + "learning_rate": 2.2574532066801567e-06, + "loss": 0.694, + "step": 13509 + }, + { + "epoch": 0.6943159625860829, + "grad_norm": 0.7406054139137268, + "learning_rate": 2.2567573600850273e-06, + "loss": 0.6674, + "step": 13510 + }, + { + "epoch": 0.6943673553294275, + "grad_norm": 1.170021414756775, + "learning_rate": 2.256061589493831e-06, + "loss": 0.6722, + "step": 13511 + }, + { + "epoch": 0.6944187480727722, + "grad_norm": 0.665573000907898, + "learning_rate": 2.2553658949258445e-06, + "loss": 0.6127, + "step": 13512 + }, + { + "epoch": 0.6944701408161168, + "grad_norm": 1.0770032405853271, + "learning_rate": 2.2546702764003403e-06, + "loss": 0.7105, + "step": 13513 + }, + { + "epoch": 0.6945215335594614, + "grad_norm": 1.0802977085113525, + "learning_rate": 2.2539747339365957e-06, + "loss": 0.6523, + "step": 13514 + }, + { + "epoch": 0.6945729263028061, + "grad_norm": 1.0573500394821167, + "learning_rate": 2.253279267553879e-06, + "loss": 0.6757, + "step": 13515 + }, + { + "epoch": 0.6946243190461506, + "grad_norm": 1.161020278930664, + "learning_rate": 2.2525838772714564e-06, + "loss": 0.6903, + "step": 13516 + }, + { + "epoch": 0.6946757117894953, + "grad_norm": 1.08132004737854, + "learning_rate": 2.251888563108601e-06, + "loss": 0.7068, + "step": 13517 + }, + { + "epoch": 0.6947271045328399, + "grad_norm": 0.6985142827033997, + "learning_rate": 2.251193325084569e-06, + "loss": 0.6941, + "step": 13518 + }, + { + "epoch": 0.6947784972761846, + "grad_norm": 1.0044920444488525, + "learning_rate": 2.2504981632186278e-06, + "loss": 0.7025, + "step": 13519 + }, + { + "epoch": 0.6948298900195292, + "grad_norm": 1.0493305921554565, + "learning_rate": 2.249803077530036e-06, + "loss": 0.7143, + "step": 13520 + }, + { + "epoch": 0.6948812827628739, + "grad_norm": 1.0844831466674805, + "learning_rate": 2.2491080680380515e-06, + "loss": 0.6629, + "step": 13521 + }, + { + "epoch": 0.6949326755062185, + "grad_norm": 0.7185050845146179, + "learning_rate": 2.2484131347619285e-06, + "loss": 0.6661, + "step": 13522 + }, + { + "epoch": 0.6949840682495632, + "grad_norm": 1.09365713596344, + "learning_rate": 2.2477182777209244e-06, + "loss": 0.6656, + "step": 13523 + }, + { + "epoch": 0.6950354609929078, + "grad_norm": 0.8049207329750061, + "learning_rate": 2.2470234969342887e-06, + "loss": 0.679, + "step": 13524 + }, + { + "epoch": 0.6950868537362525, + "grad_norm": 0.7386157512664795, + "learning_rate": 2.246328792421269e-06, + "loss": 0.6768, + "step": 13525 + }, + { + "epoch": 0.6951382464795971, + "grad_norm": 0.6995337009429932, + "learning_rate": 2.245634164201117e-06, + "loss": 0.6571, + "step": 13526 + }, + { + "epoch": 0.6951896392229417, + "grad_norm": 1.1221362352371216, + "learning_rate": 2.2449396122930755e-06, + "loss": 0.7141, + "step": 13527 + }, + { + "epoch": 0.6952410319662864, + "grad_norm": 1.2116707563400269, + "learning_rate": 2.2442451367163877e-06, + "loss": 0.7267, + "step": 13528 + }, + { + "epoch": 0.695292424709631, + "grad_norm": 1.0572103261947632, + "learning_rate": 2.243550737490293e-06, + "loss": 0.7153, + "step": 13529 + }, + { + "epoch": 0.6953438174529757, + "grad_norm": 1.1448333263397217, + "learning_rate": 2.2428564146340357e-06, + "loss": 0.7181, + "step": 13530 + }, + { + "epoch": 0.6953952101963203, + "grad_norm": 1.1060892343521118, + "learning_rate": 2.242162168166846e-06, + "loss": 0.7155, + "step": 13531 + }, + { + "epoch": 0.6954466029396649, + "grad_norm": 1.121079444885254, + "learning_rate": 2.2414679981079635e-06, + "loss": 0.7484, + "step": 13532 + }, + { + "epoch": 0.6954979956830095, + "grad_norm": 0.8193677663803101, + "learning_rate": 2.2407739044766187e-06, + "loss": 0.6458, + "step": 13533 + }, + { + "epoch": 0.6955493884263542, + "grad_norm": 1.0856173038482666, + "learning_rate": 2.2400798872920404e-06, + "loss": 0.7099, + "step": 13534 + }, + { + "epoch": 0.6956007811696988, + "grad_norm": 1.0929173231124878, + "learning_rate": 2.239385946573461e-06, + "loss": 0.6294, + "step": 13535 + }, + { + "epoch": 0.6956521739130435, + "grad_norm": 1.244460940361023, + "learning_rate": 2.238692082340104e-06, + "loss": 0.7127, + "step": 13536 + }, + { + "epoch": 0.6957035666563881, + "grad_norm": 1.0711065530776978, + "learning_rate": 2.237998294611195e-06, + "loss": 0.6519, + "step": 13537 + }, + { + "epoch": 0.6957549593997328, + "grad_norm": 1.119640827178955, + "learning_rate": 2.237304583405953e-06, + "loss": 0.6988, + "step": 13538 + }, + { + "epoch": 0.6958063521430774, + "grad_norm": 0.7877973914146423, + "learning_rate": 2.2366109487436016e-06, + "loss": 0.6568, + "step": 13539 + }, + { + "epoch": 0.695857744886422, + "grad_norm": 1.0677130222320557, + "learning_rate": 2.2359173906433573e-06, + "loss": 0.7198, + "step": 13540 + }, + { + "epoch": 0.6959091376297667, + "grad_norm": 1.0715129375457764, + "learning_rate": 2.2352239091244355e-06, + "loss": 0.7079, + "step": 13541 + }, + { + "epoch": 0.6959605303731113, + "grad_norm": 1.0717582702636719, + "learning_rate": 2.234530504206047e-06, + "loss": 0.6789, + "step": 13542 + }, + { + "epoch": 0.696011923116456, + "grad_norm": 1.1999636888504028, + "learning_rate": 2.2338371759074086e-06, + "loss": 0.7582, + "step": 13543 + }, + { + "epoch": 0.6960633158598006, + "grad_norm": 0.7118518948554993, + "learning_rate": 2.2331439242477266e-06, + "loss": 0.6599, + "step": 13544 + }, + { + "epoch": 0.6961147086031453, + "grad_norm": 1.1292623281478882, + "learning_rate": 2.232450749246206e-06, + "loss": 0.7757, + "step": 13545 + }, + { + "epoch": 0.6961661013464899, + "grad_norm": 1.00393545627594, + "learning_rate": 2.2317576509220583e-06, + "loss": 0.6949, + "step": 13546 + }, + { + "epoch": 0.6962174940898345, + "grad_norm": 0.772158682346344, + "learning_rate": 2.2310646292944782e-06, + "loss": 0.634, + "step": 13547 + }, + { + "epoch": 0.6962688868331791, + "grad_norm": 1.1463512182235718, + "learning_rate": 2.230371684382673e-06, + "loss": 0.7078, + "step": 13548 + }, + { + "epoch": 0.6963202795765238, + "grad_norm": 1.0920727252960205, + "learning_rate": 2.229678816205838e-06, + "loss": 0.6741, + "step": 13549 + }, + { + "epoch": 0.6963716723198684, + "grad_norm": 1.132951259613037, + "learning_rate": 2.2289860247831708e-06, + "loss": 0.7125, + "step": 13550 + }, + { + "epoch": 0.696423065063213, + "grad_norm": 1.0562787055969238, + "learning_rate": 2.228293310133864e-06, + "loss": 0.6815, + "step": 13551 + }, + { + "epoch": 0.6964744578065577, + "grad_norm": 1.0256842374801636, + "learning_rate": 2.227600672277113e-06, + "loss": 0.677, + "step": 13552 + }, + { + "epoch": 0.6965258505499023, + "grad_norm": 1.1402944326400757, + "learning_rate": 2.226908111232106e-06, + "loss": 0.7094, + "step": 13553 + }, + { + "epoch": 0.696577243293247, + "grad_norm": 1.100414514541626, + "learning_rate": 2.2262156270180325e-06, + "loss": 0.7393, + "step": 13554 + }, + { + "epoch": 0.6966286360365916, + "grad_norm": 1.1295472383499146, + "learning_rate": 2.2255232196540744e-06, + "loss": 0.734, + "step": 13555 + }, + { + "epoch": 0.6966800287799363, + "grad_norm": 1.111074686050415, + "learning_rate": 2.2248308891594205e-06, + "loss": 0.7036, + "step": 13556 + }, + { + "epoch": 0.6967314215232809, + "grad_norm": 1.144273281097412, + "learning_rate": 2.2241386355532502e-06, + "loss": 0.7026, + "step": 13557 + }, + { + "epoch": 0.6967828142666256, + "grad_norm": 0.6851841807365417, + "learning_rate": 2.223446458854741e-06, + "loss": 0.6114, + "step": 13558 + }, + { + "epoch": 0.6968342070099702, + "grad_norm": 1.1043798923492432, + "learning_rate": 2.2227543590830764e-06, + "loss": 0.6963, + "step": 13559 + }, + { + "epoch": 0.6968855997533149, + "grad_norm": 1.0370383262634277, + "learning_rate": 2.2220623362574235e-06, + "loss": 0.7016, + "step": 13560 + }, + { + "epoch": 0.6969369924966595, + "grad_norm": 1.183712363243103, + "learning_rate": 2.2213703903969607e-06, + "loss": 0.752, + "step": 13561 + }, + { + "epoch": 0.6969883852400041, + "grad_norm": 1.0715758800506592, + "learning_rate": 2.220678521520858e-06, + "loss": 0.707, + "step": 13562 + }, + { + "epoch": 0.6970397779833487, + "grad_norm": 1.1316934823989868, + "learning_rate": 2.2199867296482842e-06, + "loss": 0.693, + "step": 13563 + }, + { + "epoch": 0.6970911707266934, + "grad_norm": 1.0494171380996704, + "learning_rate": 2.219295014798404e-06, + "loss": 0.709, + "step": 13564 + }, + { + "epoch": 0.697142563470038, + "grad_norm": 1.0881223678588867, + "learning_rate": 2.2186033769903846e-06, + "loss": 0.7428, + "step": 13565 + }, + { + "epoch": 0.6971939562133826, + "grad_norm": 1.07633638381958, + "learning_rate": 2.217911816243388e-06, + "loss": 0.6992, + "step": 13566 + }, + { + "epoch": 0.6972453489567273, + "grad_norm": 1.093612551689148, + "learning_rate": 2.217220332576573e-06, + "loss": 0.7377, + "step": 13567 + }, + { + "epoch": 0.6972967417000719, + "grad_norm": 1.0358095169067383, + "learning_rate": 2.216528926009101e-06, + "loss": 0.7396, + "step": 13568 + }, + { + "epoch": 0.6973481344434166, + "grad_norm": 1.046341896057129, + "learning_rate": 2.215837596560123e-06, + "loss": 0.7273, + "step": 13569 + }, + { + "epoch": 0.6973995271867612, + "grad_norm": 1.0692846775054932, + "learning_rate": 2.215146344248797e-06, + "loss": 0.7117, + "step": 13570 + }, + { + "epoch": 0.6974509199301059, + "grad_norm": 1.111298680305481, + "learning_rate": 2.214455169094272e-06, + "loss": 0.7233, + "step": 13571 + }, + { + "epoch": 0.6975023126734505, + "grad_norm": 1.1238657236099243, + "learning_rate": 2.2137640711157023e-06, + "loss": 0.643, + "step": 13572 + }, + { + "epoch": 0.6975537054167952, + "grad_norm": 1.131379246711731, + "learning_rate": 2.2130730503322282e-06, + "loss": 0.7643, + "step": 13573 + }, + { + "epoch": 0.6976050981601398, + "grad_norm": 1.0902824401855469, + "learning_rate": 2.2123821067630012e-06, + "loss": 0.6756, + "step": 13574 + }, + { + "epoch": 0.6976564909034845, + "grad_norm": 0.7707480192184448, + "learning_rate": 2.2116912404271616e-06, + "loss": 0.6181, + "step": 13575 + }, + { + "epoch": 0.6977078836468291, + "grad_norm": 1.068686604499817, + "learning_rate": 2.2110004513438498e-06, + "loss": 0.6517, + "step": 13576 + }, + { + "epoch": 0.6977592763901737, + "grad_norm": 1.0500993728637695, + "learning_rate": 2.2103097395322074e-06, + "loss": 0.7644, + "step": 13577 + }, + { + "epoch": 0.6978106691335183, + "grad_norm": 1.1153894662857056, + "learning_rate": 2.2096191050113703e-06, + "loss": 0.6588, + "step": 13578 + }, + { + "epoch": 0.697862061876863, + "grad_norm": 1.0967663526535034, + "learning_rate": 2.2089285478004724e-06, + "loss": 0.6989, + "step": 13579 + }, + { + "epoch": 0.6979134546202076, + "grad_norm": 1.071048617362976, + "learning_rate": 2.2082380679186445e-06, + "loss": 0.7652, + "step": 13580 + }, + { + "epoch": 0.6979648473635522, + "grad_norm": 1.0856297016143799, + "learning_rate": 2.2075476653850227e-06, + "loss": 0.7067, + "step": 13581 + }, + { + "epoch": 0.6980162401068969, + "grad_norm": 0.8347252607345581, + "learning_rate": 2.206857340218727e-06, + "loss": 0.668, + "step": 13582 + }, + { + "epoch": 0.6980676328502415, + "grad_norm": 1.0944101810455322, + "learning_rate": 2.20616709243889e-06, + "loss": 0.6891, + "step": 13583 + }, + { + "epoch": 0.6981190255935862, + "grad_norm": 0.8433620929718018, + "learning_rate": 2.205476922064633e-06, + "loss": 0.6901, + "step": 13584 + }, + { + "epoch": 0.6981704183369308, + "grad_norm": 1.1295604705810547, + "learning_rate": 2.204786829115077e-06, + "loss": 0.7019, + "step": 13585 + }, + { + "epoch": 0.6982218110802755, + "grad_norm": 1.091489553451538, + "learning_rate": 2.2040968136093445e-06, + "loss": 0.7056, + "step": 13586 + }, + { + "epoch": 0.6982732038236201, + "grad_norm": 1.0570809841156006, + "learning_rate": 2.203406875566549e-06, + "loss": 0.7053, + "step": 13587 + }, + { + "epoch": 0.6983245965669648, + "grad_norm": 0.6707478761672974, + "learning_rate": 2.202717015005812e-06, + "loss": 0.698, + "step": 13588 + }, + { + "epoch": 0.6983759893103094, + "grad_norm": 1.073694109916687, + "learning_rate": 2.202027231946239e-06, + "loss": 0.7072, + "step": 13589 + }, + { + "epoch": 0.6984273820536541, + "grad_norm": 1.1681567430496216, + "learning_rate": 2.2013375264069468e-06, + "loss": 0.6689, + "step": 13590 + }, + { + "epoch": 0.6984787747969987, + "grad_norm": 1.2627829313278198, + "learning_rate": 2.200647898407043e-06, + "loss": 0.7366, + "step": 13591 + }, + { + "epoch": 0.6985301675403432, + "grad_norm": 1.079843521118164, + "learning_rate": 2.1999583479656327e-06, + "loss": 0.629, + "step": 13592 + }, + { + "epoch": 0.6985815602836879, + "grad_norm": 1.0948940515518188, + "learning_rate": 2.1992688751018203e-06, + "loss": 0.6984, + "step": 13593 + }, + { + "epoch": 0.6986329530270325, + "grad_norm": 1.0765708684921265, + "learning_rate": 2.198579479834711e-06, + "loss": 0.7202, + "step": 13594 + }, + { + "epoch": 0.6986843457703772, + "grad_norm": 1.0639162063598633, + "learning_rate": 2.1978901621834037e-06, + "loss": 0.6726, + "step": 13595 + }, + { + "epoch": 0.6987357385137218, + "grad_norm": 1.3543606996536255, + "learning_rate": 2.1972009221669943e-06, + "loss": 0.7086, + "step": 13596 + }, + { + "epoch": 0.6987871312570665, + "grad_norm": 1.0584834814071655, + "learning_rate": 2.1965117598045848e-06, + "loss": 0.7193, + "step": 13597 + }, + { + "epoch": 0.6988385240004111, + "grad_norm": 1.082047939300537, + "learning_rate": 2.1958226751152613e-06, + "loss": 0.6781, + "step": 13598 + }, + { + "epoch": 0.6988899167437558, + "grad_norm": 1.0912306308746338, + "learning_rate": 2.1951336681181214e-06, + "loss": 0.665, + "step": 13599 + }, + { + "epoch": 0.6989413094871004, + "grad_norm": 0.7504315376281738, + "learning_rate": 2.194444738832252e-06, + "loss": 0.6587, + "step": 13600 + }, + { + "epoch": 0.6989927022304451, + "grad_norm": 1.216542363166809, + "learning_rate": 2.1937558872767416e-06, + "loss": 0.705, + "step": 13601 + }, + { + "epoch": 0.6990440949737897, + "grad_norm": 0.9907384514808655, + "learning_rate": 2.1930671134706727e-06, + "loss": 0.6931, + "step": 13602 + }, + { + "epoch": 0.6990954877171344, + "grad_norm": 1.0784937143325806, + "learning_rate": 2.1923784174331323e-06, + "loss": 0.7274, + "step": 13603 + }, + { + "epoch": 0.699146880460479, + "grad_norm": 1.0981013774871826, + "learning_rate": 2.1916897991832e-06, + "loss": 0.7222, + "step": 13604 + }, + { + "epoch": 0.6991982732038237, + "grad_norm": 0.7300714254379272, + "learning_rate": 2.1910012587399544e-06, + "loss": 0.6654, + "step": 13605 + }, + { + "epoch": 0.6992496659471683, + "grad_norm": 0.6990795135498047, + "learning_rate": 2.190312796122469e-06, + "loss": 0.667, + "step": 13606 + }, + { + "epoch": 0.6993010586905128, + "grad_norm": 1.1190791130065918, + "learning_rate": 2.1896244113498237e-06, + "loss": 0.6827, + "step": 13607 + }, + { + "epoch": 0.6993524514338575, + "grad_norm": 1.0576037168502808, + "learning_rate": 2.188936104441088e-06, + "loss": 0.6927, + "step": 13608 + }, + { + "epoch": 0.6994038441772021, + "grad_norm": 1.2227036952972412, + "learning_rate": 2.188247875415331e-06, + "loss": 0.7733, + "step": 13609 + }, + { + "epoch": 0.6994552369205468, + "grad_norm": 0.752952516078949, + "learning_rate": 2.1875597242916246e-06, + "loss": 0.6075, + "step": 13610 + }, + { + "epoch": 0.6995066296638914, + "grad_norm": 1.0708752870559692, + "learning_rate": 2.186871651089029e-06, + "loss": 0.7218, + "step": 13611 + }, + { + "epoch": 0.6995580224072361, + "grad_norm": 1.0147924423217773, + "learning_rate": 2.1861836558266127e-06, + "loss": 0.7239, + "step": 13612 + }, + { + "epoch": 0.6996094151505807, + "grad_norm": 1.0690596103668213, + "learning_rate": 2.1854957385234347e-06, + "loss": 0.6639, + "step": 13613 + }, + { + "epoch": 0.6996608078939254, + "grad_norm": 1.1709914207458496, + "learning_rate": 2.184807899198556e-06, + "loss": 0.6993, + "step": 13614 + }, + { + "epoch": 0.69971220063727, + "grad_norm": 1.0704842805862427, + "learning_rate": 2.1841201378710303e-06, + "loss": 0.7126, + "step": 13615 + }, + { + "epoch": 0.6997635933806147, + "grad_norm": 1.31644606590271, + "learning_rate": 2.183432454559917e-06, + "loss": 0.6585, + "step": 13616 + }, + { + "epoch": 0.6998149861239593, + "grad_norm": 1.1318049430847168, + "learning_rate": 2.1827448492842675e-06, + "loss": 0.7502, + "step": 13617 + }, + { + "epoch": 0.699866378867304, + "grad_norm": 1.091261625289917, + "learning_rate": 2.1820573220631304e-06, + "loss": 0.7084, + "step": 13618 + }, + { + "epoch": 0.6999177716106486, + "grad_norm": 0.7350858449935913, + "learning_rate": 2.181369872915557e-06, + "loss": 0.6686, + "step": 13619 + }, + { + "epoch": 0.6999691643539933, + "grad_norm": 1.0864428281784058, + "learning_rate": 2.180682501860593e-06, + "loss": 0.7073, + "step": 13620 + }, + { + "epoch": 0.7000205570973379, + "grad_norm": 1.1340830326080322, + "learning_rate": 2.179995208917282e-06, + "loss": 0.7126, + "step": 13621 + }, + { + "epoch": 0.7000719498406826, + "grad_norm": 1.1045676469802856, + "learning_rate": 2.1793079941046653e-06, + "loss": 0.7106, + "step": 13622 + }, + { + "epoch": 0.7001233425840271, + "grad_norm": 1.1023622751235962, + "learning_rate": 2.1786208574417865e-06, + "loss": 0.7351, + "step": 13623 + }, + { + "epoch": 0.7001747353273717, + "grad_norm": 1.1220405101776123, + "learning_rate": 2.1779337989476773e-06, + "loss": 0.7003, + "step": 13624 + }, + { + "epoch": 0.7002261280707164, + "grad_norm": 1.0507183074951172, + "learning_rate": 2.177246818641378e-06, + "loss": 0.694, + "step": 13625 + }, + { + "epoch": 0.700277520814061, + "grad_norm": 1.1043332815170288, + "learning_rate": 2.176559916541921e-06, + "loss": 0.6942, + "step": 13626 + }, + { + "epoch": 0.7003289135574057, + "grad_norm": 1.080931544303894, + "learning_rate": 2.1758730926683353e-06, + "loss": 0.7695, + "step": 13627 + }, + { + "epoch": 0.7003803063007503, + "grad_norm": 1.1108373403549194, + "learning_rate": 2.1751863470396535e-06, + "loss": 0.7056, + "step": 13628 + }, + { + "epoch": 0.700431699044095, + "grad_norm": 1.075114369392395, + "learning_rate": 2.174499679674901e-06, + "loss": 0.6637, + "step": 13629 + }, + { + "epoch": 0.7004830917874396, + "grad_norm": 1.0331659317016602, + "learning_rate": 2.1738130905931015e-06, + "loss": 0.6879, + "step": 13630 + }, + { + "epoch": 0.7005344845307843, + "grad_norm": 0.952104926109314, + "learning_rate": 2.1731265798132765e-06, + "loss": 0.6385, + "step": 13631 + }, + { + "epoch": 0.7005858772741289, + "grad_norm": 1.091810941696167, + "learning_rate": 2.172440147354452e-06, + "loss": 0.7078, + "step": 13632 + }, + { + "epoch": 0.7006372700174736, + "grad_norm": 1.0442560911178589, + "learning_rate": 2.1717537932356387e-06, + "loss": 0.7162, + "step": 13633 + }, + { + "epoch": 0.7006886627608182, + "grad_norm": 1.119232177734375, + "learning_rate": 2.1710675174758585e-06, + "loss": 0.6665, + "step": 13634 + }, + { + "epoch": 0.7007400555041629, + "grad_norm": 1.1632204055786133, + "learning_rate": 2.170381320094121e-06, + "loss": 0.6902, + "step": 13635 + }, + { + "epoch": 0.7007914482475075, + "grad_norm": 1.0333095788955688, + "learning_rate": 2.1696952011094413e-06, + "loss": 0.7014, + "step": 13636 + }, + { + "epoch": 0.7008428409908521, + "grad_norm": 1.061029314994812, + "learning_rate": 2.169009160540828e-06, + "loss": 0.7383, + "step": 13637 + }, + { + "epoch": 0.7008942337341967, + "grad_norm": 1.0474401712417603, + "learning_rate": 2.1683231984072866e-06, + "loss": 0.741, + "step": 13638 + }, + { + "epoch": 0.7009456264775413, + "grad_norm": 1.0333564281463623, + "learning_rate": 2.167637314727827e-06, + "loss": 0.6946, + "step": 13639 + }, + { + "epoch": 0.700997019220886, + "grad_norm": 1.0708327293395996, + "learning_rate": 2.1669515095214457e-06, + "loss": 0.6804, + "step": 13640 + }, + { + "epoch": 0.7010484119642306, + "grad_norm": 1.0710110664367676, + "learning_rate": 2.1662657828071484e-06, + "loss": 0.6693, + "step": 13641 + }, + { + "epoch": 0.7010998047075753, + "grad_norm": 0.9644345045089722, + "learning_rate": 2.1655801346039317e-06, + "loss": 0.662, + "step": 13642 + }, + { + "epoch": 0.7011511974509199, + "grad_norm": 1.1154215335845947, + "learning_rate": 2.164894564930793e-06, + "loss": 0.7623, + "step": 13643 + }, + { + "epoch": 0.7012025901942646, + "grad_norm": 1.115875244140625, + "learning_rate": 2.164209073806724e-06, + "loss": 0.6539, + "step": 13644 + }, + { + "epoch": 0.7012539829376092, + "grad_norm": 1.1323747634887695, + "learning_rate": 2.1635236612507204e-06, + "loss": 0.7305, + "step": 13645 + }, + { + "epoch": 0.7013053756809539, + "grad_norm": 1.0729527473449707, + "learning_rate": 2.162838327281771e-06, + "loss": 0.6332, + "step": 13646 + }, + { + "epoch": 0.7013567684242985, + "grad_norm": 1.1192591190338135, + "learning_rate": 2.162153071918862e-06, + "loss": 0.7029, + "step": 13647 + }, + { + "epoch": 0.7014081611676432, + "grad_norm": 0.7902805805206299, + "learning_rate": 2.1614678951809814e-06, + "loss": 0.6678, + "step": 13648 + }, + { + "epoch": 0.7014595539109878, + "grad_norm": 1.0752100944519043, + "learning_rate": 2.160782797087112e-06, + "loss": 0.6562, + "step": 13649 + }, + { + "epoch": 0.7015109466543324, + "grad_norm": 1.044222116470337, + "learning_rate": 2.1600977776562343e-06, + "loss": 0.6867, + "step": 13650 + }, + { + "epoch": 0.7015623393976771, + "grad_norm": 1.077748417854309, + "learning_rate": 2.159412836907326e-06, + "loss": 0.6493, + "step": 13651 + }, + { + "epoch": 0.7016137321410217, + "grad_norm": 1.09913969039917, + "learning_rate": 2.1587279748593685e-06, + "loss": 0.6576, + "step": 13652 + }, + { + "epoch": 0.7016651248843663, + "grad_norm": 1.2124676704406738, + "learning_rate": 2.158043191531331e-06, + "loss": 0.7248, + "step": 13653 + }, + { + "epoch": 0.7017165176277109, + "grad_norm": 1.0583033561706543, + "learning_rate": 2.1573584869421897e-06, + "loss": 0.6929, + "step": 13654 + }, + { + "epoch": 0.7017679103710556, + "grad_norm": 1.1056854724884033, + "learning_rate": 2.156673861110914e-06, + "loss": 0.7566, + "step": 13655 + }, + { + "epoch": 0.7018193031144002, + "grad_norm": 1.1423996686935425, + "learning_rate": 2.155989314056472e-06, + "loss": 0.7329, + "step": 13656 + }, + { + "epoch": 0.7018706958577449, + "grad_norm": 1.0696130990982056, + "learning_rate": 2.1553048457978276e-06, + "loss": 0.7174, + "step": 13657 + }, + { + "epoch": 0.7019220886010895, + "grad_norm": 1.0908503532409668, + "learning_rate": 2.1546204563539486e-06, + "loss": 0.7047, + "step": 13658 + }, + { + "epoch": 0.7019734813444342, + "grad_norm": 1.066223382949829, + "learning_rate": 2.153936145743794e-06, + "loss": 0.6784, + "step": 13659 + }, + { + "epoch": 0.7020248740877788, + "grad_norm": 1.0545718669891357, + "learning_rate": 2.153251913986322e-06, + "loss": 0.7361, + "step": 13660 + }, + { + "epoch": 0.7020762668311235, + "grad_norm": 0.8303635120391846, + "learning_rate": 2.1525677611004964e-06, + "loss": 0.6801, + "step": 13661 + }, + { + "epoch": 0.7021276595744681, + "grad_norm": 0.7222477197647095, + "learning_rate": 2.151883687105263e-06, + "loss": 0.6509, + "step": 13662 + }, + { + "epoch": 0.7021790523178127, + "grad_norm": 1.2067302465438843, + "learning_rate": 2.151199692019581e-06, + "loss": 0.6995, + "step": 13663 + }, + { + "epoch": 0.7022304450611574, + "grad_norm": 1.164230465888977, + "learning_rate": 2.1505157758623974e-06, + "loss": 0.6388, + "step": 13664 + }, + { + "epoch": 0.702281837804502, + "grad_norm": 1.1490386724472046, + "learning_rate": 2.149831938652666e-06, + "loss": 0.7215, + "step": 13665 + }, + { + "epoch": 0.7023332305478467, + "grad_norm": 1.0562026500701904, + "learning_rate": 2.1491481804093263e-06, + "loss": 0.6535, + "step": 13666 + }, + { + "epoch": 0.7023846232911913, + "grad_norm": 0.7007404565811157, + "learning_rate": 2.1484645011513277e-06, + "loss": 0.6503, + "step": 13667 + }, + { + "epoch": 0.7024360160345359, + "grad_norm": 1.154572606086731, + "learning_rate": 2.14778090089761e-06, + "loss": 0.6918, + "step": 13668 + }, + { + "epoch": 0.7024874087778805, + "grad_norm": 1.0800907611846924, + "learning_rate": 2.1470973796671114e-06, + "loss": 0.6634, + "step": 13669 + }, + { + "epoch": 0.7025388015212252, + "grad_norm": 1.0401551723480225, + "learning_rate": 2.146413937478772e-06, + "loss": 0.6496, + "step": 13670 + }, + { + "epoch": 0.7025901942645698, + "grad_norm": 1.0757418870925903, + "learning_rate": 2.1457305743515278e-06, + "loss": 0.7151, + "step": 13671 + }, + { + "epoch": 0.7026415870079145, + "grad_norm": 1.047582745552063, + "learning_rate": 2.14504729030431e-06, + "loss": 0.6632, + "step": 13672 + }, + { + "epoch": 0.7026929797512591, + "grad_norm": 1.1095973253250122, + "learning_rate": 2.1443640853560484e-06, + "loss": 0.6873, + "step": 13673 + }, + { + "epoch": 0.7027443724946038, + "grad_norm": 1.0561424493789673, + "learning_rate": 2.1436809595256762e-06, + "loss": 0.6767, + "step": 13674 + }, + { + "epoch": 0.7027957652379484, + "grad_norm": 1.0903648138046265, + "learning_rate": 2.1429979128321147e-06, + "loss": 0.6625, + "step": 13675 + }, + { + "epoch": 0.702847157981293, + "grad_norm": 1.1231417655944824, + "learning_rate": 2.142314945294293e-06, + "loss": 0.6695, + "step": 13676 + }, + { + "epoch": 0.7028985507246377, + "grad_norm": 0.7106789350509644, + "learning_rate": 2.14163205693113e-06, + "loss": 0.6061, + "step": 13677 + }, + { + "epoch": 0.7029499434679823, + "grad_norm": 1.1653095483779907, + "learning_rate": 2.140949247761546e-06, + "loss": 0.7084, + "step": 13678 + }, + { + "epoch": 0.703001336211327, + "grad_norm": 0.7626535892486572, + "learning_rate": 2.1402665178044607e-06, + "loss": 0.6794, + "step": 13679 + }, + { + "epoch": 0.7030527289546716, + "grad_norm": 1.0738590955734253, + "learning_rate": 2.139583867078788e-06, + "loss": 0.7252, + "step": 13680 + }, + { + "epoch": 0.7031041216980163, + "grad_norm": 1.0791456699371338, + "learning_rate": 2.1389012956034446e-06, + "loss": 0.6915, + "step": 13681 + }, + { + "epoch": 0.7031555144413609, + "grad_norm": 1.16554856300354, + "learning_rate": 2.138218803397336e-06, + "loss": 0.6945, + "step": 13682 + }, + { + "epoch": 0.7032069071847055, + "grad_norm": 1.117922067642212, + "learning_rate": 2.1375363904793765e-06, + "loss": 0.7271, + "step": 13683 + }, + { + "epoch": 0.7032582999280501, + "grad_norm": 1.1187388896942139, + "learning_rate": 2.136854056868471e-06, + "loss": 0.6586, + "step": 13684 + }, + { + "epoch": 0.7033096926713948, + "grad_norm": 1.023292064666748, + "learning_rate": 2.1361718025835244e-06, + "loss": 0.7324, + "step": 13685 + }, + { + "epoch": 0.7033610854147394, + "grad_norm": 1.1801109313964844, + "learning_rate": 2.135489627643436e-06, + "loss": 0.6385, + "step": 13686 + }, + { + "epoch": 0.703412478158084, + "grad_norm": 1.1244659423828125, + "learning_rate": 2.1348075320671118e-06, + "loss": 0.7755, + "step": 13687 + }, + { + "epoch": 0.7034638709014287, + "grad_norm": 0.9937413334846497, + "learning_rate": 2.1341255158734463e-06, + "loss": 0.6908, + "step": 13688 + }, + { + "epoch": 0.7035152636447733, + "grad_norm": 1.0457442998886108, + "learning_rate": 2.1334435790813346e-06, + "loss": 0.7156, + "step": 13689 + }, + { + "epoch": 0.703566656388118, + "grad_norm": 1.1232115030288696, + "learning_rate": 2.132761721709675e-06, + "loss": 0.7111, + "step": 13690 + }, + { + "epoch": 0.7036180491314626, + "grad_norm": 1.0953432321548462, + "learning_rate": 2.132079943777352e-06, + "loss": 0.7031, + "step": 13691 + }, + { + "epoch": 0.7036694418748073, + "grad_norm": 1.060590147972107, + "learning_rate": 2.1313982453032606e-06, + "loss": 0.6321, + "step": 13692 + }, + { + "epoch": 0.7037208346181519, + "grad_norm": 1.0582643747329712, + "learning_rate": 2.1307166263062856e-06, + "loss": 0.7644, + "step": 13693 + }, + { + "epoch": 0.7037722273614966, + "grad_norm": 1.2292600870132446, + "learning_rate": 2.1300350868053123e-06, + "loss": 0.7469, + "step": 13694 + }, + { + "epoch": 0.7038236201048412, + "grad_norm": 1.0825990438461304, + "learning_rate": 2.129353626819221e-06, + "loss": 0.6431, + "step": 13695 + }, + { + "epoch": 0.7038750128481859, + "grad_norm": 1.0749919414520264, + "learning_rate": 2.128672246366896e-06, + "loss": 0.7363, + "step": 13696 + }, + { + "epoch": 0.7039264055915305, + "grad_norm": 1.0899678468704224, + "learning_rate": 2.1279909454672143e-06, + "loss": 0.7532, + "step": 13697 + }, + { + "epoch": 0.7039777983348752, + "grad_norm": 1.1461503505706787, + "learning_rate": 2.1273097241390494e-06, + "loss": 0.7137, + "step": 13698 + }, + { + "epoch": 0.7040291910782197, + "grad_norm": 1.0445613861083984, + "learning_rate": 2.1266285824012787e-06, + "loss": 0.7256, + "step": 13699 + }, + { + "epoch": 0.7040805838215644, + "grad_norm": 1.0993098020553589, + "learning_rate": 2.1259475202727724e-06, + "loss": 0.6933, + "step": 13700 + }, + { + "epoch": 0.704131976564909, + "grad_norm": 1.067650556564331, + "learning_rate": 2.1252665377724003e-06, + "loss": 0.7316, + "step": 13701 + }, + { + "epoch": 0.7041833693082536, + "grad_norm": 1.031410813331604, + "learning_rate": 2.1245856349190262e-06, + "loss": 0.7068, + "step": 13702 + }, + { + "epoch": 0.7042347620515983, + "grad_norm": 1.032565951347351, + "learning_rate": 2.1239048117315225e-06, + "loss": 0.6464, + "step": 13703 + }, + { + "epoch": 0.7042861547949429, + "grad_norm": 1.0571365356445312, + "learning_rate": 2.1232240682287437e-06, + "loss": 0.668, + "step": 13704 + }, + { + "epoch": 0.7043375475382876, + "grad_norm": 1.1523305177688599, + "learning_rate": 2.122543404429556e-06, + "loss": 0.639, + "step": 13705 + }, + { + "epoch": 0.7043889402816322, + "grad_norm": 1.123197317123413, + "learning_rate": 2.1218628203528165e-06, + "loss": 0.7134, + "step": 13706 + }, + { + "epoch": 0.7044403330249769, + "grad_norm": 0.6851266622543335, + "learning_rate": 2.12118231601738e-06, + "loss": 0.6797, + "step": 13707 + }, + { + "epoch": 0.7044917257683215, + "grad_norm": 1.1299890279769897, + "learning_rate": 2.1205018914421e-06, + "loss": 0.7074, + "step": 13708 + }, + { + "epoch": 0.7045431185116662, + "grad_norm": 1.061153769493103, + "learning_rate": 2.1198215466458317e-06, + "loss": 0.6165, + "step": 13709 + }, + { + "epoch": 0.7045945112550108, + "grad_norm": 1.0980801582336426, + "learning_rate": 2.1191412816474215e-06, + "loss": 0.7455, + "step": 13710 + }, + { + "epoch": 0.7046459039983555, + "grad_norm": 1.1461031436920166, + "learning_rate": 2.118461096465717e-06, + "loss": 0.7646, + "step": 13711 + }, + { + "epoch": 0.7046972967417001, + "grad_norm": 1.044925570487976, + "learning_rate": 2.1177809911195655e-06, + "loss": 0.6647, + "step": 13712 + }, + { + "epoch": 0.7047486894850448, + "grad_norm": 1.2419744729995728, + "learning_rate": 2.1171009656278086e-06, + "loss": 0.7271, + "step": 13713 + }, + { + "epoch": 0.7048000822283893, + "grad_norm": 1.0512958765029907, + "learning_rate": 2.116421020009287e-06, + "loss": 0.7044, + "step": 13714 + }, + { + "epoch": 0.704851474971734, + "grad_norm": 1.0639828443527222, + "learning_rate": 2.115741154282837e-06, + "loss": 0.7075, + "step": 13715 + }, + { + "epoch": 0.7049028677150786, + "grad_norm": 1.0687519311904907, + "learning_rate": 2.1150613684673012e-06, + "loss": 0.6832, + "step": 13716 + }, + { + "epoch": 0.7049542604584232, + "grad_norm": 1.0934782028198242, + "learning_rate": 2.1143816625815057e-06, + "loss": 0.689, + "step": 13717 + }, + { + "epoch": 0.7050056532017679, + "grad_norm": 1.0897703170776367, + "learning_rate": 2.113702036644287e-06, + "loss": 0.6598, + "step": 13718 + }, + { + "epoch": 0.7050570459451125, + "grad_norm": 1.0416258573532104, + "learning_rate": 2.1130224906744745e-06, + "loss": 0.6947, + "step": 13719 + }, + { + "epoch": 0.7051084386884572, + "grad_norm": 1.1381744146347046, + "learning_rate": 2.1123430246908933e-06, + "loss": 0.684, + "step": 13720 + }, + { + "epoch": 0.7051598314318018, + "grad_norm": 1.0941307544708252, + "learning_rate": 2.1116636387123717e-06, + "loss": 0.7423, + "step": 13721 + }, + { + "epoch": 0.7052112241751465, + "grad_norm": 1.0425844192504883, + "learning_rate": 2.1109843327577312e-06, + "loss": 0.6757, + "step": 13722 + }, + { + "epoch": 0.7052626169184911, + "grad_norm": 1.129780650138855, + "learning_rate": 2.110305106845793e-06, + "loss": 0.7123, + "step": 13723 + }, + { + "epoch": 0.7053140096618358, + "grad_norm": 1.0874446630477905, + "learning_rate": 2.109625960995373e-06, + "loss": 0.719, + "step": 13724 + }, + { + "epoch": 0.7053654024051804, + "grad_norm": 0.7113963961601257, + "learning_rate": 2.108946895225293e-06, + "loss": 0.6682, + "step": 13725 + }, + { + "epoch": 0.7054167951485251, + "grad_norm": 0.7825025320053101, + "learning_rate": 2.108267909554361e-06, + "loss": 0.6526, + "step": 13726 + }, + { + "epoch": 0.7054681878918697, + "grad_norm": 1.0926584005355835, + "learning_rate": 2.1075890040013937e-06, + "loss": 0.7228, + "step": 13727 + }, + { + "epoch": 0.7055195806352144, + "grad_norm": 1.1640859842300415, + "learning_rate": 2.1069101785851964e-06, + "loss": 0.7344, + "step": 13728 + }, + { + "epoch": 0.7055709733785589, + "grad_norm": 1.1008111238479614, + "learning_rate": 2.1062314333245807e-06, + "loss": 0.6915, + "step": 13729 + }, + { + "epoch": 0.7056223661219035, + "grad_norm": 1.0449261665344238, + "learning_rate": 2.105552768238351e-06, + "loss": 0.727, + "step": 13730 + }, + { + "epoch": 0.7056737588652482, + "grad_norm": 1.0833659172058105, + "learning_rate": 2.104874183345307e-06, + "loss": 0.6768, + "step": 13731 + }, + { + "epoch": 0.7057251516085928, + "grad_norm": 1.0926237106323242, + "learning_rate": 2.1041956786642554e-06, + "loss": 0.6703, + "step": 13732 + }, + { + "epoch": 0.7057765443519375, + "grad_norm": 0.732568085193634, + "learning_rate": 2.1035172542139876e-06, + "loss": 0.6521, + "step": 13733 + }, + { + "epoch": 0.7058279370952821, + "grad_norm": 1.1078683137893677, + "learning_rate": 2.1028389100133055e-06, + "loss": 0.693, + "step": 13734 + }, + { + "epoch": 0.7058793298386268, + "grad_norm": 1.1113091707229614, + "learning_rate": 2.102160646081002e-06, + "loss": 0.6908, + "step": 13735 + }, + { + "epoch": 0.7059307225819714, + "grad_norm": 1.0711385011672974, + "learning_rate": 2.101482462435867e-06, + "loss": 0.7083, + "step": 13736 + }, + { + "epoch": 0.7059821153253161, + "grad_norm": 1.0911685228347778, + "learning_rate": 2.100804359096691e-06, + "loss": 0.6975, + "step": 13737 + }, + { + "epoch": 0.7060335080686607, + "grad_norm": 1.0011365413665771, + "learning_rate": 2.100126336082263e-06, + "loss": 0.7599, + "step": 13738 + }, + { + "epoch": 0.7060849008120054, + "grad_norm": 1.0789313316345215, + "learning_rate": 2.099448393411367e-06, + "loss": 0.6865, + "step": 13739 + }, + { + "epoch": 0.70613629355535, + "grad_norm": 1.0851807594299316, + "learning_rate": 2.0987705311027845e-06, + "loss": 0.7197, + "step": 13740 + }, + { + "epoch": 0.7061876862986947, + "grad_norm": 1.0809543132781982, + "learning_rate": 2.0980927491753017e-06, + "loss": 0.6783, + "step": 13741 + }, + { + "epoch": 0.7062390790420393, + "grad_norm": 1.0799906253814697, + "learning_rate": 2.09741504764769e-06, + "loss": 0.7913, + "step": 13742 + }, + { + "epoch": 0.706290471785384, + "grad_norm": 1.0651135444641113, + "learning_rate": 2.096737426538731e-06, + "loss": 0.6883, + "step": 13743 + }, + { + "epoch": 0.7063418645287285, + "grad_norm": 0.6875742673873901, + "learning_rate": 2.0960598858671942e-06, + "loss": 0.6115, + "step": 13744 + }, + { + "epoch": 0.7063932572720731, + "grad_norm": 1.1060433387756348, + "learning_rate": 2.095382425651858e-06, + "loss": 0.687, + "step": 13745 + }, + { + "epoch": 0.7064446500154178, + "grad_norm": 1.0396978855133057, + "learning_rate": 2.094705045911485e-06, + "loss": 0.7132, + "step": 13746 + }, + { + "epoch": 0.7064960427587624, + "grad_norm": 1.1153088808059692, + "learning_rate": 2.0940277466648474e-06, + "loss": 0.7064, + "step": 13747 + }, + { + "epoch": 0.7065474355021071, + "grad_norm": 1.168765902519226, + "learning_rate": 2.0933505279307083e-06, + "loss": 0.7042, + "step": 13748 + }, + { + "epoch": 0.7065988282454517, + "grad_norm": 1.0957200527191162, + "learning_rate": 2.0926733897278294e-06, + "loss": 0.6664, + "step": 13749 + }, + { + "epoch": 0.7066502209887964, + "grad_norm": 0.757698118686676, + "learning_rate": 2.0919963320749746e-06, + "loss": 0.6588, + "step": 13750 + }, + { + "epoch": 0.706701613732141, + "grad_norm": 1.0701937675476074, + "learning_rate": 2.091319354990902e-06, + "loss": 0.7045, + "step": 13751 + }, + { + "epoch": 0.7067530064754857, + "grad_norm": 0.7378687262535095, + "learning_rate": 2.0906424584943653e-06, + "loss": 0.6012, + "step": 13752 + }, + { + "epoch": 0.7068043992188303, + "grad_norm": 1.1651259660720825, + "learning_rate": 2.0899656426041183e-06, + "loss": 0.7384, + "step": 13753 + }, + { + "epoch": 0.706855791962175, + "grad_norm": 1.07802152633667, + "learning_rate": 2.0892889073389184e-06, + "loss": 0.7122, + "step": 13754 + }, + { + "epoch": 0.7069071847055196, + "grad_norm": 1.0923494100570679, + "learning_rate": 2.0886122527175074e-06, + "loss": 0.7109, + "step": 13755 + }, + { + "epoch": 0.7069585774488643, + "grad_norm": 1.0939182043075562, + "learning_rate": 2.0879356787586374e-06, + "loss": 0.7248, + "step": 13756 + }, + { + "epoch": 0.7070099701922089, + "grad_norm": 0.7409538626670837, + "learning_rate": 2.0872591854810516e-06, + "loss": 0.6616, + "step": 13757 + }, + { + "epoch": 0.7070613629355536, + "grad_norm": 1.0640689134597778, + "learning_rate": 2.0865827729034947e-06, + "loss": 0.7159, + "step": 13758 + }, + { + "epoch": 0.7071127556788981, + "grad_norm": 1.0520906448364258, + "learning_rate": 2.0859064410447066e-06, + "loss": 0.7389, + "step": 13759 + }, + { + "epoch": 0.7071641484222427, + "grad_norm": 1.068968415260315, + "learning_rate": 2.0852301899234252e-06, + "loss": 0.7194, + "step": 13760 + }, + { + "epoch": 0.7072155411655874, + "grad_norm": 1.1356557607650757, + "learning_rate": 2.0845540195583862e-06, + "loss": 0.6747, + "step": 13761 + }, + { + "epoch": 0.707266933908932, + "grad_norm": 1.0984845161437988, + "learning_rate": 2.0838779299683225e-06, + "loss": 0.699, + "step": 13762 + }, + { + "epoch": 0.7073183266522767, + "grad_norm": 1.0935633182525635, + "learning_rate": 2.0832019211719696e-06, + "loss": 0.6528, + "step": 13763 + }, + { + "epoch": 0.7073697193956213, + "grad_norm": 1.1489708423614502, + "learning_rate": 2.0825259931880537e-06, + "loss": 0.7808, + "step": 13764 + }, + { + "epoch": 0.707421112138966, + "grad_norm": 1.0749751329421997, + "learning_rate": 2.081850146035303e-06, + "loss": 0.7348, + "step": 13765 + }, + { + "epoch": 0.7074725048823106, + "grad_norm": 1.0883599519729614, + "learning_rate": 2.081174379732441e-06, + "loss": 0.6776, + "step": 13766 + }, + { + "epoch": 0.7075238976256553, + "grad_norm": 1.0754282474517822, + "learning_rate": 2.080498694298195e-06, + "loss": 0.7203, + "step": 13767 + }, + { + "epoch": 0.7075752903689999, + "grad_norm": 1.0727996826171875, + "learning_rate": 2.079823089751278e-06, + "loss": 0.6779, + "step": 13768 + }, + { + "epoch": 0.7076266831123446, + "grad_norm": 1.0764833688735962, + "learning_rate": 2.0791475661104142e-06, + "loss": 0.7063, + "step": 13769 + }, + { + "epoch": 0.7076780758556892, + "grad_norm": 1.0532652139663696, + "learning_rate": 2.0784721233943183e-06, + "loss": 0.7222, + "step": 13770 + }, + { + "epoch": 0.7077294685990339, + "grad_norm": 1.0890300273895264, + "learning_rate": 2.0777967616217017e-06, + "loss": 0.7131, + "step": 13771 + }, + { + "epoch": 0.7077808613423785, + "grad_norm": 1.0651859045028687, + "learning_rate": 2.077121480811279e-06, + "loss": 0.6851, + "step": 13772 + }, + { + "epoch": 0.7078322540857231, + "grad_norm": 1.0930585861206055, + "learning_rate": 2.076446280981756e-06, + "loss": 0.706, + "step": 13773 + }, + { + "epoch": 0.7078836468290677, + "grad_norm": 0.7050151824951172, + "learning_rate": 2.0757711621518458e-06, + "loss": 0.6378, + "step": 13774 + }, + { + "epoch": 0.7079350395724123, + "grad_norm": 1.042059302330017, + "learning_rate": 2.0750961243402453e-06, + "loss": 0.6709, + "step": 13775 + }, + { + "epoch": 0.707986432315757, + "grad_norm": 0.9877983927726746, + "learning_rate": 2.074421167565663e-06, + "loss": 0.6792, + "step": 13776 + }, + { + "epoch": 0.7080378250591016, + "grad_norm": 1.1301740407943726, + "learning_rate": 2.0737462918467967e-06, + "loss": 0.7207, + "step": 13777 + }, + { + "epoch": 0.7080892178024463, + "grad_norm": 1.0695161819458008, + "learning_rate": 2.0730714972023447e-06, + "loss": 0.6362, + "step": 13778 + }, + { + "epoch": 0.7081406105457909, + "grad_norm": 1.102811336517334, + "learning_rate": 2.0723967836510004e-06, + "loss": 0.7513, + "step": 13779 + }, + { + "epoch": 0.7081920032891356, + "grad_norm": 1.2144571542739868, + "learning_rate": 2.0717221512114626e-06, + "loss": 0.7055, + "step": 13780 + }, + { + "epoch": 0.7082433960324802, + "grad_norm": 1.067043423652649, + "learning_rate": 2.0710475999024193e-06, + "loss": 0.713, + "step": 13781 + }, + { + "epoch": 0.7082947887758249, + "grad_norm": 1.0727665424346924, + "learning_rate": 2.0703731297425577e-06, + "loss": 0.7316, + "step": 13782 + }, + { + "epoch": 0.7083461815191695, + "grad_norm": 1.0858210325241089, + "learning_rate": 2.0696987407505707e-06, + "loss": 0.6639, + "step": 13783 + }, + { + "epoch": 0.7083975742625142, + "grad_norm": 1.067585825920105, + "learning_rate": 2.069024432945136e-06, + "loss": 0.7528, + "step": 13784 + }, + { + "epoch": 0.7084489670058588, + "grad_norm": 1.07047700881958, + "learning_rate": 2.06835020634494e-06, + "loss": 0.7175, + "step": 13785 + }, + { + "epoch": 0.7085003597492034, + "grad_norm": 1.1306891441345215, + "learning_rate": 2.0676760609686615e-06, + "loss": 0.6858, + "step": 13786 + }, + { + "epoch": 0.7085517524925481, + "grad_norm": 1.1133527755737305, + "learning_rate": 2.0670019968349787e-06, + "loss": 0.7053, + "step": 13787 + }, + { + "epoch": 0.7086031452358927, + "grad_norm": 1.0710773468017578, + "learning_rate": 2.0663280139625647e-06, + "loss": 0.7222, + "step": 13788 + }, + { + "epoch": 0.7086545379792374, + "grad_norm": 0.791092574596405, + "learning_rate": 2.065654112370097e-06, + "loss": 0.6752, + "step": 13789 + }, + { + "epoch": 0.7087059307225819, + "grad_norm": 1.046260118484497, + "learning_rate": 2.064980292076244e-06, + "loss": 0.7061, + "step": 13790 + }, + { + "epoch": 0.7087573234659266, + "grad_norm": 1.0017520189285278, + "learning_rate": 2.0643065530996736e-06, + "loss": 0.6519, + "step": 13791 + }, + { + "epoch": 0.7088087162092712, + "grad_norm": 1.0316901206970215, + "learning_rate": 2.0636328954590555e-06, + "loss": 0.6939, + "step": 13792 + }, + { + "epoch": 0.7088601089526159, + "grad_norm": 1.106510043144226, + "learning_rate": 2.0629593191730525e-06, + "loss": 0.7262, + "step": 13793 + }, + { + "epoch": 0.7089115016959605, + "grad_norm": 1.1042814254760742, + "learning_rate": 2.0622858242603254e-06, + "loss": 0.7186, + "step": 13794 + }, + { + "epoch": 0.7089628944393052, + "grad_norm": 1.098565697669983, + "learning_rate": 2.061612410739534e-06, + "loss": 0.6607, + "step": 13795 + }, + { + "epoch": 0.7090142871826498, + "grad_norm": 1.1267523765563965, + "learning_rate": 2.0609390786293397e-06, + "loss": 0.7097, + "step": 13796 + }, + { + "epoch": 0.7090656799259945, + "grad_norm": 1.0906375646591187, + "learning_rate": 2.0602658279483917e-06, + "loss": 0.6841, + "step": 13797 + }, + { + "epoch": 0.7091170726693391, + "grad_norm": 1.144810676574707, + "learning_rate": 2.0595926587153477e-06, + "loss": 0.6916, + "step": 13798 + }, + { + "epoch": 0.7091684654126837, + "grad_norm": 1.0772943496704102, + "learning_rate": 2.0589195709488564e-06, + "loss": 0.7012, + "step": 13799 + }, + { + "epoch": 0.7092198581560284, + "grad_norm": 1.0579253435134888, + "learning_rate": 2.058246564667565e-06, + "loss": 0.661, + "step": 13800 + }, + { + "epoch": 0.709271250899373, + "grad_norm": 1.1185640096664429, + "learning_rate": 2.057573639890123e-06, + "loss": 0.6695, + "step": 13801 + }, + { + "epoch": 0.7093226436427177, + "grad_norm": 0.7512776851654053, + "learning_rate": 2.0569007966351727e-06, + "loss": 0.6546, + "step": 13802 + }, + { + "epoch": 0.7093740363860623, + "grad_norm": 1.1023831367492676, + "learning_rate": 2.0562280349213564e-06, + "loss": 0.7313, + "step": 13803 + }, + { + "epoch": 0.709425429129407, + "grad_norm": 1.3334959745407104, + "learning_rate": 2.0555553547673114e-06, + "loss": 0.6624, + "step": 13804 + }, + { + "epoch": 0.7094768218727515, + "grad_norm": 1.0482275485992432, + "learning_rate": 2.0548827561916778e-06, + "loss": 0.7459, + "step": 13805 + }, + { + "epoch": 0.7095282146160962, + "grad_norm": 1.027115821838379, + "learning_rate": 2.054210239213089e-06, + "loss": 0.693, + "step": 13806 + }, + { + "epoch": 0.7095796073594408, + "grad_norm": 1.113142967224121, + "learning_rate": 2.0535378038501783e-06, + "loss": 0.7185, + "step": 13807 + }, + { + "epoch": 0.7096310001027855, + "grad_norm": 1.131225347518921, + "learning_rate": 2.052865450121574e-06, + "loss": 0.7162, + "step": 13808 + }, + { + "epoch": 0.7096823928461301, + "grad_norm": 1.0713504552841187, + "learning_rate": 2.0521931780459075e-06, + "loss": 0.7023, + "step": 13809 + }, + { + "epoch": 0.7097337855894748, + "grad_norm": 1.00968337059021, + "learning_rate": 2.0515209876418034e-06, + "loss": 0.6934, + "step": 13810 + }, + { + "epoch": 0.7097851783328194, + "grad_norm": 0.7862069606781006, + "learning_rate": 2.050848878927885e-06, + "loss": 0.6301, + "step": 13811 + }, + { + "epoch": 0.709836571076164, + "grad_norm": 1.2094237804412842, + "learning_rate": 2.0501768519227736e-06, + "loss": 0.702, + "step": 13812 + }, + { + "epoch": 0.7098879638195087, + "grad_norm": 1.0520743131637573, + "learning_rate": 2.0495049066450874e-06, + "loss": 0.7487, + "step": 13813 + }, + { + "epoch": 0.7099393565628533, + "grad_norm": 1.0909069776535034, + "learning_rate": 2.048833043113445e-06, + "loss": 0.6838, + "step": 13814 + }, + { + "epoch": 0.709990749306198, + "grad_norm": 1.0861800909042358, + "learning_rate": 2.0481612613464607e-06, + "loss": 0.6901, + "step": 13815 + }, + { + "epoch": 0.7100421420495426, + "grad_norm": 1.060874581336975, + "learning_rate": 2.0474895613627465e-06, + "loss": 0.7049, + "step": 13816 + }, + { + "epoch": 0.7100935347928873, + "grad_norm": 1.1056462526321411, + "learning_rate": 2.0468179431809106e-06, + "loss": 0.7195, + "step": 13817 + }, + { + "epoch": 0.7101449275362319, + "grad_norm": 0.744979202747345, + "learning_rate": 2.046146406819565e-06, + "loss": 0.6323, + "step": 13818 + }, + { + "epoch": 0.7101963202795766, + "grad_norm": 1.1755642890930176, + "learning_rate": 2.0454749522973098e-06, + "loss": 0.6866, + "step": 13819 + }, + { + "epoch": 0.7102477130229211, + "grad_norm": 1.148440957069397, + "learning_rate": 2.044803579632753e-06, + "loss": 0.6843, + "step": 13820 + }, + { + "epoch": 0.7102991057662658, + "grad_norm": 1.0315439701080322, + "learning_rate": 2.0441322888444913e-06, + "loss": 0.6905, + "step": 13821 + }, + { + "epoch": 0.7103504985096104, + "grad_norm": 0.800396203994751, + "learning_rate": 2.0434610799511286e-06, + "loss": 0.6693, + "step": 13822 + }, + { + "epoch": 0.710401891252955, + "grad_norm": 1.1546156406402588, + "learning_rate": 2.0427899529712577e-06, + "loss": 0.7169, + "step": 13823 + }, + { + "epoch": 0.7104532839962997, + "grad_norm": 1.1220064163208008, + "learning_rate": 2.0421189079234717e-06, + "loss": 0.7465, + "step": 13824 + }, + { + "epoch": 0.7105046767396443, + "grad_norm": 1.1018762588500977, + "learning_rate": 2.0414479448263674e-06, + "loss": 0.6829, + "step": 13825 + }, + { + "epoch": 0.710556069482989, + "grad_norm": 1.1131882667541504, + "learning_rate": 2.0407770636985285e-06, + "loss": 0.7004, + "step": 13826 + }, + { + "epoch": 0.7106074622263336, + "grad_norm": 1.0459210872650146, + "learning_rate": 2.0401062645585467e-06, + "loss": 0.6362, + "step": 13827 + }, + { + "epoch": 0.7106588549696783, + "grad_norm": 1.0698922872543335, + "learning_rate": 2.0394355474250053e-06, + "loss": 0.6971, + "step": 13828 + }, + { + "epoch": 0.7107102477130229, + "grad_norm": 1.2453351020812988, + "learning_rate": 2.0387649123164876e-06, + "loss": 0.674, + "step": 13829 + }, + { + "epoch": 0.7107616404563676, + "grad_norm": 1.2080566883087158, + "learning_rate": 2.038094359251572e-06, + "loss": 0.7013, + "step": 13830 + }, + { + "epoch": 0.7108130331997122, + "grad_norm": 1.1145142316818237, + "learning_rate": 2.0374238882488414e-06, + "loss": 0.6904, + "step": 13831 + }, + { + "epoch": 0.7108644259430569, + "grad_norm": 0.7786082029342651, + "learning_rate": 2.036753499326868e-06, + "loss": 0.652, + "step": 13832 + }, + { + "epoch": 0.7109158186864015, + "grad_norm": 1.110437273979187, + "learning_rate": 2.0360831925042246e-06, + "loss": 0.7317, + "step": 13833 + }, + { + "epoch": 0.7109672114297462, + "grad_norm": 1.044748306274414, + "learning_rate": 2.0354129677994883e-06, + "loss": 0.6938, + "step": 13834 + }, + { + "epoch": 0.7110186041730907, + "grad_norm": 1.1183432340621948, + "learning_rate": 2.0347428252312214e-06, + "loss": 0.6841, + "step": 13835 + }, + { + "epoch": 0.7110699969164354, + "grad_norm": 1.1269700527191162, + "learning_rate": 2.0340727648179954e-06, + "loss": 0.7813, + "step": 13836 + }, + { + "epoch": 0.71112138965978, + "grad_norm": 0.8584217429161072, + "learning_rate": 2.033402786578372e-06, + "loss": 0.6511, + "step": 13837 + }, + { + "epoch": 0.7111727824031246, + "grad_norm": 1.0577318668365479, + "learning_rate": 2.032732890530918e-06, + "loss": 0.6555, + "step": 13838 + }, + { + "epoch": 0.7112241751464693, + "grad_norm": 0.7752557992935181, + "learning_rate": 2.0320630766941866e-06, + "loss": 0.6371, + "step": 13839 + }, + { + "epoch": 0.7112755678898139, + "grad_norm": 1.0737941265106201, + "learning_rate": 2.0313933450867417e-06, + "loss": 0.6728, + "step": 13840 + }, + { + "epoch": 0.7113269606331586, + "grad_norm": 1.0756685733795166, + "learning_rate": 2.030723695727137e-06, + "loss": 0.6498, + "step": 13841 + }, + { + "epoch": 0.7113783533765032, + "grad_norm": 1.1216992139816284, + "learning_rate": 2.0300541286339227e-06, + "loss": 0.6904, + "step": 13842 + }, + { + "epoch": 0.7114297461198479, + "grad_norm": 0.7423892021179199, + "learning_rate": 2.029384643825654e-06, + "loss": 0.6925, + "step": 13843 + }, + { + "epoch": 0.7114811388631925, + "grad_norm": 1.0447964668273926, + "learning_rate": 2.0287152413208784e-06, + "loss": 0.7417, + "step": 13844 + }, + { + "epoch": 0.7115325316065372, + "grad_norm": 1.0752873420715332, + "learning_rate": 2.028045921138141e-06, + "loss": 0.679, + "step": 13845 + }, + { + "epoch": 0.7115839243498818, + "grad_norm": 1.0866988897323608, + "learning_rate": 2.027376683295985e-06, + "loss": 0.6897, + "step": 13846 + }, + { + "epoch": 0.7116353170932265, + "grad_norm": 1.0621236562728882, + "learning_rate": 2.026707527812958e-06, + "loss": 0.7151, + "step": 13847 + }, + { + "epoch": 0.7116867098365711, + "grad_norm": 1.0988514423370361, + "learning_rate": 2.026038454707591e-06, + "loss": 0.7421, + "step": 13848 + }, + { + "epoch": 0.7117381025799158, + "grad_norm": 1.0312613248825073, + "learning_rate": 2.025369463998428e-06, + "loss": 0.689, + "step": 13849 + }, + { + "epoch": 0.7117894953232603, + "grad_norm": 1.0904605388641357, + "learning_rate": 2.0247005557040007e-06, + "loss": 0.7156, + "step": 13850 + }, + { + "epoch": 0.711840888066605, + "grad_norm": 1.1624822616577148, + "learning_rate": 2.0240317298428412e-06, + "loss": 0.7459, + "step": 13851 + }, + { + "epoch": 0.7118922808099496, + "grad_norm": 1.103622317314148, + "learning_rate": 2.0233629864334837e-06, + "loss": 0.7406, + "step": 13852 + }, + { + "epoch": 0.7119436735532942, + "grad_norm": 1.1310709714889526, + "learning_rate": 2.0226943254944536e-06, + "loss": 0.7409, + "step": 13853 + }, + { + "epoch": 0.7119950662966389, + "grad_norm": 1.1085220575332642, + "learning_rate": 2.022025747044277e-06, + "loss": 0.6887, + "step": 13854 + }, + { + "epoch": 0.7120464590399835, + "grad_norm": 1.0527399778366089, + "learning_rate": 2.021357251101476e-06, + "loss": 0.6504, + "step": 13855 + }, + { + "epoch": 0.7120978517833282, + "grad_norm": 1.1091697216033936, + "learning_rate": 2.020688837684576e-06, + "loss": 0.7078, + "step": 13856 + }, + { + "epoch": 0.7121492445266728, + "grad_norm": 0.7286615371704102, + "learning_rate": 2.020020506812092e-06, + "loss": 0.6149, + "step": 13857 + }, + { + "epoch": 0.7122006372700175, + "grad_norm": 1.0779544115066528, + "learning_rate": 2.019352258502543e-06, + "loss": 0.6673, + "step": 13858 + }, + { + "epoch": 0.7122520300133621, + "grad_norm": 0.7192875742912292, + "learning_rate": 2.0186840927744407e-06, + "loss": 0.6374, + "step": 13859 + }, + { + "epoch": 0.7123034227567068, + "grad_norm": 1.079571008682251, + "learning_rate": 2.0180160096463005e-06, + "loss": 0.6542, + "step": 13860 + }, + { + "epoch": 0.7123548155000514, + "grad_norm": 1.1336817741394043, + "learning_rate": 2.0173480091366304e-06, + "loss": 0.6861, + "step": 13861 + }, + { + "epoch": 0.7124062082433961, + "grad_norm": 1.1800259351730347, + "learning_rate": 2.016680091263939e-06, + "loss": 0.7058, + "step": 13862 + }, + { + "epoch": 0.7124576009867407, + "grad_norm": 1.0751947164535522, + "learning_rate": 2.0160122560467306e-06, + "loss": 0.7187, + "step": 13863 + }, + { + "epoch": 0.7125089937300854, + "grad_norm": 1.0949985980987549, + "learning_rate": 2.015344503503506e-06, + "loss": 0.6907, + "step": 13864 + }, + { + "epoch": 0.7125603864734299, + "grad_norm": 1.0753933191299438, + "learning_rate": 2.014676833652771e-06, + "loss": 0.7159, + "step": 13865 + }, + { + "epoch": 0.7126117792167745, + "grad_norm": 0.7183886170387268, + "learning_rate": 2.014009246513019e-06, + "loss": 0.6559, + "step": 13866 + }, + { + "epoch": 0.7126631719601192, + "grad_norm": 1.0763211250305176, + "learning_rate": 2.0133417421027517e-06, + "loss": 0.7202, + "step": 13867 + }, + { + "epoch": 0.7127145647034638, + "grad_norm": 1.0732475519180298, + "learning_rate": 2.0126743204404565e-06, + "loss": 0.6899, + "step": 13868 + }, + { + "epoch": 0.7127659574468085, + "grad_norm": 1.0360926389694214, + "learning_rate": 2.0120069815446293e-06, + "loss": 0.6815, + "step": 13869 + }, + { + "epoch": 0.7128173501901531, + "grad_norm": 0.6912949085235596, + "learning_rate": 2.011339725433758e-06, + "loss": 0.6544, + "step": 13870 + }, + { + "epoch": 0.7128687429334978, + "grad_norm": 1.1039221286773682, + "learning_rate": 2.010672552126329e-06, + "loss": 0.7274, + "step": 13871 + }, + { + "epoch": 0.7129201356768424, + "grad_norm": 1.1151742935180664, + "learning_rate": 2.0100054616408266e-06, + "loss": 0.7279, + "step": 13872 + }, + { + "epoch": 0.7129715284201871, + "grad_norm": 1.0857514142990112, + "learning_rate": 2.009338453995735e-06, + "loss": 0.6484, + "step": 13873 + }, + { + "epoch": 0.7130229211635317, + "grad_norm": 0.9999823570251465, + "learning_rate": 2.0086715292095334e-06, + "loss": 0.72, + "step": 13874 + }, + { + "epoch": 0.7130743139068764, + "grad_norm": 0.7725629210472107, + "learning_rate": 2.008004687300697e-06, + "loss": 0.685, + "step": 13875 + }, + { + "epoch": 0.713125706650221, + "grad_norm": 1.081921935081482, + "learning_rate": 2.0073379282877076e-06, + "loss": 0.6604, + "step": 13876 + }, + { + "epoch": 0.7131770993935657, + "grad_norm": 1.0342121124267578, + "learning_rate": 2.0066712521890303e-06, + "loss": 0.6824, + "step": 13877 + }, + { + "epoch": 0.7132284921369103, + "grad_norm": 1.1725395917892456, + "learning_rate": 2.0060046590231414e-06, + "loss": 0.7204, + "step": 13878 + }, + { + "epoch": 0.713279884880255, + "grad_norm": 1.1094027757644653, + "learning_rate": 2.005338148808508e-06, + "loss": 0.7062, + "step": 13879 + }, + { + "epoch": 0.7133312776235996, + "grad_norm": 1.149617075920105, + "learning_rate": 2.004671721563597e-06, + "loss": 0.757, + "step": 13880 + }, + { + "epoch": 0.7133826703669441, + "grad_norm": 1.102684736251831, + "learning_rate": 2.004005377306869e-06, + "loss": 0.731, + "step": 13881 + }, + { + "epoch": 0.7134340631102888, + "grad_norm": 1.0622472763061523, + "learning_rate": 2.0033391160567906e-06, + "loss": 0.6775, + "step": 13882 + }, + { + "epoch": 0.7134854558536334, + "grad_norm": 0.7642338871955872, + "learning_rate": 2.0026729378318184e-06, + "loss": 0.6592, + "step": 13883 + }, + { + "epoch": 0.7135368485969781, + "grad_norm": 1.162483811378479, + "learning_rate": 2.002006842650408e-06, + "loss": 0.6824, + "step": 13884 + }, + { + "epoch": 0.7135882413403227, + "grad_norm": 1.0743509531021118, + "learning_rate": 2.0013408305310184e-06, + "loss": 0.6999, + "step": 13885 + }, + { + "epoch": 0.7136396340836674, + "grad_norm": 1.0861505270004272, + "learning_rate": 2.000674901492099e-06, + "loss": 0.6917, + "step": 13886 + }, + { + "epoch": 0.713691026827012, + "grad_norm": 1.0744562149047852, + "learning_rate": 2.0000090555521006e-06, + "loss": 0.715, + "step": 13887 + }, + { + "epoch": 0.7137424195703567, + "grad_norm": 1.0702255964279175, + "learning_rate": 1.99934329272947e-06, + "loss": 0.7236, + "step": 13888 + }, + { + "epoch": 0.7137938123137013, + "grad_norm": 1.0426336526870728, + "learning_rate": 1.9986776130426566e-06, + "loss": 0.7253, + "step": 13889 + }, + { + "epoch": 0.713845205057046, + "grad_norm": 1.0441347360610962, + "learning_rate": 1.9980120165100974e-06, + "loss": 0.6385, + "step": 13890 + }, + { + "epoch": 0.7138965978003906, + "grad_norm": 0.9434767961502075, + "learning_rate": 1.997346503150239e-06, + "loss": 0.6494, + "step": 13891 + }, + { + "epoch": 0.7139479905437353, + "grad_norm": 1.1863094568252563, + "learning_rate": 1.9966810729815175e-06, + "loss": 0.7334, + "step": 13892 + }, + { + "epoch": 0.7139993832870799, + "grad_norm": 1.0655384063720703, + "learning_rate": 1.9960157260223673e-06, + "loss": 0.7167, + "step": 13893 + }, + { + "epoch": 0.7140507760304245, + "grad_norm": 0.6862566471099854, + "learning_rate": 1.995350462291227e-06, + "loss": 0.6332, + "step": 13894 + }, + { + "epoch": 0.7141021687737692, + "grad_norm": 1.0659171342849731, + "learning_rate": 1.9946852818065253e-06, + "loss": 0.6854, + "step": 13895 + }, + { + "epoch": 0.7141535615171137, + "grad_norm": 1.0813877582550049, + "learning_rate": 1.9940201845866925e-06, + "loss": 0.6685, + "step": 13896 + }, + { + "epoch": 0.7142049542604584, + "grad_norm": 1.033320426940918, + "learning_rate": 1.9933551706501537e-06, + "loss": 0.6956, + "step": 13897 + }, + { + "epoch": 0.714256347003803, + "grad_norm": 0.7505856156349182, + "learning_rate": 1.9926902400153374e-06, + "loss": 0.6595, + "step": 13898 + }, + { + "epoch": 0.7143077397471477, + "grad_norm": 1.1093891859054565, + "learning_rate": 1.992025392700664e-06, + "loss": 0.7293, + "step": 13899 + }, + { + "epoch": 0.7143591324904923, + "grad_norm": 1.073056936264038, + "learning_rate": 1.9913606287245537e-06, + "loss": 0.6624, + "step": 13900 + }, + { + "epoch": 0.714410525233837, + "grad_norm": 1.0899986028671265, + "learning_rate": 1.990695948105423e-06, + "loss": 0.7161, + "step": 13901 + }, + { + "epoch": 0.7144619179771816, + "grad_norm": 0.7137531042098999, + "learning_rate": 1.9900313508616904e-06, + "loss": 0.6422, + "step": 13902 + }, + { + "epoch": 0.7145133107205263, + "grad_norm": 1.0512052774429321, + "learning_rate": 1.9893668370117677e-06, + "loss": 0.6713, + "step": 13903 + }, + { + "epoch": 0.7145647034638709, + "grad_norm": 0.7321227192878723, + "learning_rate": 1.9887024065740657e-06, + "loss": 0.6648, + "step": 13904 + }, + { + "epoch": 0.7146160962072156, + "grad_norm": 1.0789406299591064, + "learning_rate": 1.988038059566993e-06, + "loss": 0.7391, + "step": 13905 + }, + { + "epoch": 0.7146674889505602, + "grad_norm": 0.730172872543335, + "learning_rate": 1.9873737960089545e-06, + "loss": 0.6705, + "step": 13906 + }, + { + "epoch": 0.7147188816939049, + "grad_norm": 1.0356365442276, + "learning_rate": 1.9867096159183576e-06, + "loss": 0.6734, + "step": 13907 + }, + { + "epoch": 0.7147702744372495, + "grad_norm": 1.6314005851745605, + "learning_rate": 1.986045519313602e-06, + "loss": 0.7727, + "step": 13908 + }, + { + "epoch": 0.7148216671805941, + "grad_norm": 1.0658164024353027, + "learning_rate": 1.985381506213087e-06, + "loss": 0.7016, + "step": 13909 + }, + { + "epoch": 0.7148730599239388, + "grad_norm": 1.1861186027526855, + "learning_rate": 1.9847175766352083e-06, + "loss": 0.7104, + "step": 13910 + }, + { + "epoch": 0.7149244526672833, + "grad_norm": 1.0932592153549194, + "learning_rate": 1.9840537305983643e-06, + "loss": 0.7547, + "step": 13911 + }, + { + "epoch": 0.714975845410628, + "grad_norm": 0.8178917765617371, + "learning_rate": 1.9833899681209444e-06, + "loss": 0.6544, + "step": 13912 + }, + { + "epoch": 0.7150272381539726, + "grad_norm": 0.7472593784332275, + "learning_rate": 1.9827262892213408e-06, + "loss": 0.6445, + "step": 13913 + }, + { + "epoch": 0.7150786308973173, + "grad_norm": 0.9168313145637512, + "learning_rate": 1.9820626939179378e-06, + "loss": 0.6383, + "step": 13914 + }, + { + "epoch": 0.7151300236406619, + "grad_norm": 1.0839271545410156, + "learning_rate": 1.981399182229125e-06, + "loss": 0.7092, + "step": 13915 + }, + { + "epoch": 0.7151814163840066, + "grad_norm": 1.069693684577942, + "learning_rate": 1.980735754173284e-06, + "loss": 0.6845, + "step": 13916 + }, + { + "epoch": 0.7152328091273512, + "grad_norm": 0.9683200716972351, + "learning_rate": 1.980072409768794e-06, + "loss": 0.6869, + "step": 13917 + }, + { + "epoch": 0.7152842018706959, + "grad_norm": 0.9229991436004639, + "learning_rate": 1.9794091490340385e-06, + "loss": 0.6556, + "step": 13918 + }, + { + "epoch": 0.7153355946140405, + "grad_norm": 1.0928629636764526, + "learning_rate": 1.9787459719873857e-06, + "loss": 0.6958, + "step": 13919 + }, + { + "epoch": 0.7153869873573852, + "grad_norm": 1.0500705242156982, + "learning_rate": 1.9780828786472167e-06, + "loss": 0.6618, + "step": 13920 + }, + { + "epoch": 0.7154383801007298, + "grad_norm": 1.1658707857131958, + "learning_rate": 1.9774198690319004e-06, + "loss": 0.7347, + "step": 13921 + }, + { + "epoch": 0.7154897728440744, + "grad_norm": 1.0598798990249634, + "learning_rate": 1.976756943159806e-06, + "loss": 0.6932, + "step": 13922 + }, + { + "epoch": 0.7155411655874191, + "grad_norm": 1.0335803031921387, + "learning_rate": 1.9760941010492983e-06, + "loss": 0.694, + "step": 13923 + }, + { + "epoch": 0.7155925583307637, + "grad_norm": 1.1211134195327759, + "learning_rate": 1.9754313427187466e-06, + "loss": 0.6964, + "step": 13924 + }, + { + "epoch": 0.7156439510741084, + "grad_norm": 1.3624458312988281, + "learning_rate": 1.9747686681865107e-06, + "loss": 0.6904, + "step": 13925 + }, + { + "epoch": 0.7156953438174529, + "grad_norm": 1.302241325378418, + "learning_rate": 1.974106077470949e-06, + "loss": 0.6994, + "step": 13926 + }, + { + "epoch": 0.7157467365607976, + "grad_norm": 1.0483657121658325, + "learning_rate": 1.973443570590423e-06, + "loss": 0.672, + "step": 13927 + }, + { + "epoch": 0.7157981293041422, + "grad_norm": 1.1144992113113403, + "learning_rate": 1.9727811475632836e-06, + "loss": 0.7257, + "step": 13928 + }, + { + "epoch": 0.7158495220474869, + "grad_norm": 1.0696719884872437, + "learning_rate": 1.972118808407887e-06, + "loss": 0.7456, + "step": 13929 + }, + { + "epoch": 0.7159009147908315, + "grad_norm": 1.0393975973129272, + "learning_rate": 1.971456553142582e-06, + "loss": 0.6634, + "step": 13930 + }, + { + "epoch": 0.7159523075341762, + "grad_norm": 1.0471985340118408, + "learning_rate": 1.9707943817857206e-06, + "loss": 0.6918, + "step": 13931 + }, + { + "epoch": 0.7160037002775208, + "grad_norm": 1.1355485916137695, + "learning_rate": 1.970132294355643e-06, + "loss": 0.7217, + "step": 13932 + }, + { + "epoch": 0.7160550930208655, + "grad_norm": 0.7269394993782043, + "learning_rate": 1.969470290870697e-06, + "loss": 0.6349, + "step": 13933 + }, + { + "epoch": 0.7161064857642101, + "grad_norm": 1.1481826305389404, + "learning_rate": 1.968808371349224e-06, + "loss": 0.6968, + "step": 13934 + }, + { + "epoch": 0.7161578785075547, + "grad_norm": 0.8469072580337524, + "learning_rate": 1.9681465358095596e-06, + "loss": 0.6801, + "step": 13935 + }, + { + "epoch": 0.7162092712508994, + "grad_norm": 1.08064603805542, + "learning_rate": 1.967484784270045e-06, + "loss": 0.6688, + "step": 13936 + }, + { + "epoch": 0.716260663994244, + "grad_norm": 0.706304132938385, + "learning_rate": 1.9668231167490126e-06, + "loss": 0.6405, + "step": 13937 + }, + { + "epoch": 0.7163120567375887, + "grad_norm": 1.154129147529602, + "learning_rate": 1.9661615332647944e-06, + "loss": 0.6728, + "step": 13938 + }, + { + "epoch": 0.7163634494809333, + "grad_norm": 0.9919379949569702, + "learning_rate": 1.9655000338357184e-06, + "loss": 0.6142, + "step": 13939 + }, + { + "epoch": 0.716414842224278, + "grad_norm": 1.1220649480819702, + "learning_rate": 1.9648386184801173e-06, + "loss": 0.7197, + "step": 13940 + }, + { + "epoch": 0.7164662349676225, + "grad_norm": 1.0807689428329468, + "learning_rate": 1.964177287216309e-06, + "loss": 0.7458, + "step": 13941 + }, + { + "epoch": 0.7165176277109672, + "grad_norm": 1.0619655847549438, + "learning_rate": 1.9635160400626217e-06, + "loss": 0.6963, + "step": 13942 + }, + { + "epoch": 0.7165690204543118, + "grad_norm": 1.0736488103866577, + "learning_rate": 1.9628548770373745e-06, + "loss": 0.7633, + "step": 13943 + }, + { + "epoch": 0.7166204131976565, + "grad_norm": 1.0870729684829712, + "learning_rate": 1.9621937981588832e-06, + "loss": 0.7087, + "step": 13944 + }, + { + "epoch": 0.7166718059410011, + "grad_norm": 0.7145068049430847, + "learning_rate": 1.961532803445467e-06, + "loss": 0.688, + "step": 13945 + }, + { + "epoch": 0.7167231986843458, + "grad_norm": 1.0990524291992188, + "learning_rate": 1.960871892915437e-06, + "loss": 0.7296, + "step": 13946 + }, + { + "epoch": 0.7167745914276904, + "grad_norm": 1.1086374521255493, + "learning_rate": 1.9602110665871078e-06, + "loss": 0.7141, + "step": 13947 + }, + { + "epoch": 0.716825984171035, + "grad_norm": 1.03158438205719, + "learning_rate": 1.9595503244787816e-06, + "loss": 0.6675, + "step": 13948 + }, + { + "epoch": 0.7168773769143797, + "grad_norm": 1.1512248516082764, + "learning_rate": 1.9588896666087715e-06, + "loss": 0.6856, + "step": 13949 + }, + { + "epoch": 0.7169287696577243, + "grad_norm": 0.7473583221435547, + "learning_rate": 1.958229092995378e-06, + "loss": 0.675, + "step": 13950 + }, + { + "epoch": 0.716980162401069, + "grad_norm": 1.0900137424468994, + "learning_rate": 1.957568603656904e-06, + "loss": 0.7451, + "step": 13951 + }, + { + "epoch": 0.7170315551444136, + "grad_norm": 0.8128623962402344, + "learning_rate": 1.956908198611647e-06, + "loss": 0.6789, + "step": 13952 + }, + { + "epoch": 0.7170829478877583, + "grad_norm": 1.0715359449386597, + "learning_rate": 1.9562478778779072e-06, + "loss": 0.7375, + "step": 13953 + }, + { + "epoch": 0.7171343406311029, + "grad_norm": 1.1331875324249268, + "learning_rate": 1.9555876414739775e-06, + "loss": 0.7885, + "step": 13954 + }, + { + "epoch": 0.7171857333744476, + "grad_norm": 1.055611491203308, + "learning_rate": 1.954927489418151e-06, + "loss": 0.7203, + "step": 13955 + }, + { + "epoch": 0.7172371261177922, + "grad_norm": 1.1216692924499512, + "learning_rate": 1.9542674217287185e-06, + "loss": 0.7524, + "step": 13956 + }, + { + "epoch": 0.7172885188611368, + "grad_norm": 1.0702455043792725, + "learning_rate": 1.953607438423964e-06, + "loss": 0.7292, + "step": 13957 + }, + { + "epoch": 0.7173399116044814, + "grad_norm": 1.0641452074050903, + "learning_rate": 1.9529475395221773e-06, + "loss": 0.6756, + "step": 13958 + }, + { + "epoch": 0.717391304347826, + "grad_norm": 1.0655573606491089, + "learning_rate": 1.95228772504164e-06, + "loss": 0.7202, + "step": 13959 + }, + { + "epoch": 0.7174426970911707, + "grad_norm": 1.077936053276062, + "learning_rate": 1.951627995000633e-06, + "loss": 0.7355, + "step": 13960 + }, + { + "epoch": 0.7174940898345153, + "grad_norm": 0.7696951031684875, + "learning_rate": 1.9509683494174326e-06, + "loss": 0.6018, + "step": 13961 + }, + { + "epoch": 0.71754548257786, + "grad_norm": 1.0416902303695679, + "learning_rate": 1.9503087883103176e-06, + "loss": 0.7373, + "step": 13962 + }, + { + "epoch": 0.7175968753212046, + "grad_norm": 1.1297434568405151, + "learning_rate": 1.949649311697562e-06, + "loss": 0.7428, + "step": 13963 + }, + { + "epoch": 0.7176482680645493, + "grad_norm": 1.2995857000350952, + "learning_rate": 1.948989919597435e-06, + "loss": 0.7006, + "step": 13964 + }, + { + "epoch": 0.7176996608078939, + "grad_norm": 1.142939805984497, + "learning_rate": 1.9483306120282046e-06, + "loss": 0.7303, + "step": 13965 + }, + { + "epoch": 0.7177510535512386, + "grad_norm": 1.0437626838684082, + "learning_rate": 1.9476713890081416e-06, + "loss": 0.6739, + "step": 13966 + }, + { + "epoch": 0.7178024462945832, + "grad_norm": 1.120582103729248, + "learning_rate": 1.9470122505555083e-06, + "loss": 0.6981, + "step": 13967 + }, + { + "epoch": 0.7178538390379279, + "grad_norm": 1.157538890838623, + "learning_rate": 1.946353196688565e-06, + "loss": 0.6581, + "step": 13968 + }, + { + "epoch": 0.7179052317812725, + "grad_norm": 1.0822207927703857, + "learning_rate": 1.9456942274255762e-06, + "loss": 0.7081, + "step": 13969 + }, + { + "epoch": 0.7179566245246172, + "grad_norm": 0.7534598708152771, + "learning_rate": 1.9450353427847922e-06, + "loss": 0.6442, + "step": 13970 + }, + { + "epoch": 0.7180080172679618, + "grad_norm": 1.0986030101776123, + "learning_rate": 1.9443765427844743e-06, + "loss": 0.7262, + "step": 13971 + }, + { + "epoch": 0.7180594100113064, + "grad_norm": 1.1220083236694336, + "learning_rate": 1.943717827442872e-06, + "loss": 0.6488, + "step": 13972 + }, + { + "epoch": 0.718110802754651, + "grad_norm": 1.099743366241455, + "learning_rate": 1.9430591967782364e-06, + "loss": 0.6651, + "step": 13973 + }, + { + "epoch": 0.7181621954979956, + "grad_norm": 1.0417327880859375, + "learning_rate": 1.9424006508088134e-06, + "loss": 0.7712, + "step": 13974 + }, + { + "epoch": 0.7182135882413403, + "grad_norm": 1.0913273096084595, + "learning_rate": 1.941742189552852e-06, + "loss": 0.7461, + "step": 13975 + }, + { + "epoch": 0.7182649809846849, + "grad_norm": 1.1196143627166748, + "learning_rate": 1.9410838130285937e-06, + "loss": 0.6778, + "step": 13976 + }, + { + "epoch": 0.7183163737280296, + "grad_norm": 1.1071196794509888, + "learning_rate": 1.9404255212542783e-06, + "loss": 0.7394, + "step": 13977 + }, + { + "epoch": 0.7183677664713742, + "grad_norm": 1.0539600849151611, + "learning_rate": 1.939767314248147e-06, + "loss": 0.6849, + "step": 13978 + }, + { + "epoch": 0.7184191592147189, + "grad_norm": 1.1147326231002808, + "learning_rate": 1.9391091920284346e-06, + "loss": 0.7075, + "step": 13979 + }, + { + "epoch": 0.7184705519580635, + "grad_norm": 1.140195369720459, + "learning_rate": 1.938451154613375e-06, + "loss": 0.7137, + "step": 13980 + }, + { + "epoch": 0.7185219447014082, + "grad_norm": 1.1057136058807373, + "learning_rate": 1.9377932020211974e-06, + "loss": 0.7117, + "step": 13981 + }, + { + "epoch": 0.7185733374447528, + "grad_norm": 1.087666392326355, + "learning_rate": 1.9371353342701376e-06, + "loss": 0.6851, + "step": 13982 + }, + { + "epoch": 0.7186247301880975, + "grad_norm": 1.0839101076126099, + "learning_rate": 1.936477551378414e-06, + "loss": 0.7102, + "step": 13983 + }, + { + "epoch": 0.7186761229314421, + "grad_norm": 1.0515133142471313, + "learning_rate": 1.935819853364257e-06, + "loss": 0.7062, + "step": 13984 + }, + { + "epoch": 0.7187275156747868, + "grad_norm": 1.3131293058395386, + "learning_rate": 1.9351622402458864e-06, + "loss": 0.7499, + "step": 13985 + }, + { + "epoch": 0.7187789084181314, + "grad_norm": 0.992249608039856, + "learning_rate": 1.9345047120415207e-06, + "loss": 0.7061, + "step": 13986 + }, + { + "epoch": 0.718830301161476, + "grad_norm": 1.1263328790664673, + "learning_rate": 1.933847268769381e-06, + "loss": 0.7353, + "step": 13987 + }, + { + "epoch": 0.7188816939048206, + "grad_norm": 1.087959885597229, + "learning_rate": 1.93318991044768e-06, + "loss": 0.6852, + "step": 13988 + }, + { + "epoch": 0.7189330866481652, + "grad_norm": 1.0750374794006348, + "learning_rate": 1.93253263709463e-06, + "loss": 0.7051, + "step": 13989 + }, + { + "epoch": 0.7189844793915099, + "grad_norm": 1.0830811262130737, + "learning_rate": 1.9318754487284407e-06, + "loss": 0.7198, + "step": 13990 + }, + { + "epoch": 0.7190358721348545, + "grad_norm": 1.0481847524642944, + "learning_rate": 1.9312183453673232e-06, + "loss": 0.647, + "step": 13991 + }, + { + "epoch": 0.7190872648781992, + "grad_norm": 1.0567548274993896, + "learning_rate": 1.9305613270294808e-06, + "loss": 0.727, + "step": 13992 + }, + { + "epoch": 0.7191386576215438, + "grad_norm": 1.1105051040649414, + "learning_rate": 1.9299043937331173e-06, + "loss": 0.7782, + "step": 13993 + }, + { + "epoch": 0.7191900503648885, + "grad_norm": 1.083558440208435, + "learning_rate": 1.929247545496432e-06, + "loss": 0.7314, + "step": 13994 + }, + { + "epoch": 0.7192414431082331, + "grad_norm": 1.1892979145050049, + "learning_rate": 1.928590782337627e-06, + "loss": 0.6949, + "step": 13995 + }, + { + "epoch": 0.7192928358515778, + "grad_norm": 1.1118824481964111, + "learning_rate": 1.9279341042748963e-06, + "loss": 0.727, + "step": 13996 + }, + { + "epoch": 0.7193442285949224, + "grad_norm": 1.0165057182312012, + "learning_rate": 1.9272775113264313e-06, + "loss": 0.7082, + "step": 13997 + }, + { + "epoch": 0.7193956213382671, + "grad_norm": 1.100739598274231, + "learning_rate": 1.9266210035104305e-06, + "loss": 0.7027, + "step": 13998 + }, + { + "epoch": 0.7194470140816117, + "grad_norm": 1.0895593166351318, + "learning_rate": 1.925964580845075e-06, + "loss": 0.7116, + "step": 13999 + }, + { + "epoch": 0.7194984068249564, + "grad_norm": 1.1292909383773804, + "learning_rate": 1.9253082433485566e-06, + "loss": 0.7149, + "step": 14000 + }, + { + "epoch": 0.719549799568301, + "grad_norm": 1.0232349634170532, + "learning_rate": 1.9246519910390584e-06, + "loss": 0.6561, + "step": 14001 + }, + { + "epoch": 0.7196011923116455, + "grad_norm": 1.0902801752090454, + "learning_rate": 1.923995823934762e-06, + "loss": 0.7163, + "step": 14002 + }, + { + "epoch": 0.7196525850549902, + "grad_norm": 1.124680995941162, + "learning_rate": 1.923339742053846e-06, + "loss": 0.7194, + "step": 14003 + }, + { + "epoch": 0.7197039777983348, + "grad_norm": 1.135898232460022, + "learning_rate": 1.9226837454144914e-06, + "loss": 0.7171, + "step": 14004 + }, + { + "epoch": 0.7197553705416795, + "grad_norm": 1.0684188604354858, + "learning_rate": 1.92202783403487e-06, + "loss": 0.7454, + "step": 14005 + }, + { + "epoch": 0.7198067632850241, + "grad_norm": 0.7131996154785156, + "learning_rate": 1.921372007933156e-06, + "loss": 0.6487, + "step": 14006 + }, + { + "epoch": 0.7198581560283688, + "grad_norm": 1.1133602857589722, + "learning_rate": 1.9207162671275165e-06, + "loss": 0.6443, + "step": 14007 + }, + { + "epoch": 0.7199095487717134, + "grad_norm": 1.0943559408187866, + "learning_rate": 1.9200606116361243e-06, + "loss": 0.7142, + "step": 14008 + }, + { + "epoch": 0.7199609415150581, + "grad_norm": 1.0612866878509521, + "learning_rate": 1.9194050414771415e-06, + "loss": 0.675, + "step": 14009 + }, + { + "epoch": 0.7200123342584027, + "grad_norm": 1.0420299768447876, + "learning_rate": 1.918749556668731e-06, + "loss": 0.6321, + "step": 14010 + }, + { + "epoch": 0.7200637270017474, + "grad_norm": 0.9825411438941956, + "learning_rate": 1.918094157229058e-06, + "loss": 0.6837, + "step": 14011 + }, + { + "epoch": 0.720115119745092, + "grad_norm": 1.0787869691848755, + "learning_rate": 1.9174388431762744e-06, + "loss": 0.7198, + "step": 14012 + }, + { + "epoch": 0.7201665124884367, + "grad_norm": 1.0238080024719238, + "learning_rate": 1.9167836145285406e-06, + "loss": 0.7404, + "step": 14013 + }, + { + "epoch": 0.7202179052317813, + "grad_norm": 0.8096597790718079, + "learning_rate": 1.9161284713040097e-06, + "loss": 0.6377, + "step": 14014 + }, + { + "epoch": 0.720269297975126, + "grad_norm": 1.0968657732009888, + "learning_rate": 1.915473413520832e-06, + "loss": 0.6932, + "step": 14015 + }, + { + "epoch": 0.7203206907184706, + "grad_norm": 1.1483261585235596, + "learning_rate": 1.9148184411971555e-06, + "loss": 0.6613, + "step": 14016 + }, + { + "epoch": 0.7203720834618151, + "grad_norm": 1.1127554178237915, + "learning_rate": 1.91416355435113e-06, + "loss": 0.7074, + "step": 14017 + }, + { + "epoch": 0.7204234762051598, + "grad_norm": 0.795612096786499, + "learning_rate": 1.9135087530008977e-06, + "loss": 0.6438, + "step": 14018 + }, + { + "epoch": 0.7204748689485044, + "grad_norm": 1.0631234645843506, + "learning_rate": 1.9128540371645992e-06, + "loss": 0.664, + "step": 14019 + }, + { + "epoch": 0.7205262616918491, + "grad_norm": 1.9766687154769897, + "learning_rate": 1.912199406860379e-06, + "loss": 0.6613, + "step": 14020 + }, + { + "epoch": 0.7205776544351937, + "grad_norm": 1.1213140487670898, + "learning_rate": 1.911544862106367e-06, + "loss": 0.7122, + "step": 14021 + }, + { + "epoch": 0.7206290471785384, + "grad_norm": 1.2482061386108398, + "learning_rate": 1.910890402920703e-06, + "loss": 0.7507, + "step": 14022 + }, + { + "epoch": 0.720680439921883, + "grad_norm": 0.9869159460067749, + "learning_rate": 1.9102360293215164e-06, + "loss": 0.6705, + "step": 14023 + }, + { + "epoch": 0.7207318326652277, + "grad_norm": 1.0798088312149048, + "learning_rate": 1.9095817413269425e-06, + "loss": 0.6587, + "step": 14024 + }, + { + "epoch": 0.7207832254085723, + "grad_norm": 1.0550565719604492, + "learning_rate": 1.908927538955101e-06, + "loss": 0.7206, + "step": 14025 + }, + { + "epoch": 0.720834618151917, + "grad_norm": 2.960447311401367, + "learning_rate": 1.908273422224123e-06, + "loss": 0.7113, + "step": 14026 + }, + { + "epoch": 0.7208860108952616, + "grad_norm": 0.9569618105888367, + "learning_rate": 1.907619391152131e-06, + "loss": 0.6544, + "step": 14027 + }, + { + "epoch": 0.7209374036386063, + "grad_norm": 1.2490954399108887, + "learning_rate": 1.9069654457572411e-06, + "loss": 0.7229, + "step": 14028 + }, + { + "epoch": 0.7209887963819509, + "grad_norm": 0.6951115727424622, + "learning_rate": 1.9063115860575764e-06, + "loss": 0.6467, + "step": 14029 + }, + { + "epoch": 0.7210401891252955, + "grad_norm": 1.1127711534500122, + "learning_rate": 1.9056578120712517e-06, + "loss": 0.7608, + "step": 14030 + }, + { + "epoch": 0.7210915818686402, + "grad_norm": 0.7124976515769958, + "learning_rate": 1.9050041238163786e-06, + "loss": 0.633, + "step": 14031 + }, + { + "epoch": 0.7211429746119847, + "grad_norm": 1.055277943611145, + "learning_rate": 1.9043505213110668e-06, + "loss": 0.7155, + "step": 14032 + }, + { + "epoch": 0.7211943673553294, + "grad_norm": 1.1538535356521606, + "learning_rate": 1.9036970045734316e-06, + "loss": 0.75, + "step": 14033 + }, + { + "epoch": 0.721245760098674, + "grad_norm": 1.0537784099578857, + "learning_rate": 1.9030435736215708e-06, + "loss": 0.695, + "step": 14034 + }, + { + "epoch": 0.7212971528420187, + "grad_norm": 1.0950802564620972, + "learning_rate": 1.9023902284735934e-06, + "loss": 0.6873, + "step": 14035 + }, + { + "epoch": 0.7213485455853633, + "grad_norm": 1.0882608890533447, + "learning_rate": 1.9017369691476e-06, + "loss": 0.6744, + "step": 14036 + }, + { + "epoch": 0.721399938328708, + "grad_norm": 0.7362953424453735, + "learning_rate": 1.9010837956616878e-06, + "loss": 0.6281, + "step": 14037 + }, + { + "epoch": 0.7214513310720526, + "grad_norm": 1.0581896305084229, + "learning_rate": 1.9004307080339574e-06, + "loss": 0.7212, + "step": 14038 + }, + { + "epoch": 0.7215027238153973, + "grad_norm": 0.8562883734703064, + "learning_rate": 1.8997777062824984e-06, + "loss": 0.6542, + "step": 14039 + }, + { + "epoch": 0.7215541165587419, + "grad_norm": 1.0497134923934937, + "learning_rate": 1.899124790425409e-06, + "loss": 0.7343, + "step": 14040 + }, + { + "epoch": 0.7216055093020866, + "grad_norm": 1.1082801818847656, + "learning_rate": 1.8984719604807717e-06, + "loss": 0.7149, + "step": 14041 + }, + { + "epoch": 0.7216569020454312, + "grad_norm": 0.8463016152381897, + "learning_rate": 1.8978192164666786e-06, + "loss": 0.6197, + "step": 14042 + }, + { + "epoch": 0.7217082947887759, + "grad_norm": 0.6962987780570984, + "learning_rate": 1.897166558401214e-06, + "loss": 0.6048, + "step": 14043 + }, + { + "epoch": 0.7217596875321205, + "grad_norm": 1.0162689685821533, + "learning_rate": 1.8965139863024584e-06, + "loss": 0.7056, + "step": 14044 + }, + { + "epoch": 0.7218110802754651, + "grad_norm": 1.0665395259857178, + "learning_rate": 1.8958615001884917e-06, + "loss": 0.6721, + "step": 14045 + }, + { + "epoch": 0.7218624730188098, + "grad_norm": 0.8321764469146729, + "learning_rate": 1.8952091000773943e-06, + "loss": 0.6403, + "step": 14046 + }, + { + "epoch": 0.7219138657621544, + "grad_norm": 1.1409231424331665, + "learning_rate": 1.8945567859872399e-06, + "loss": 0.7102, + "step": 14047 + }, + { + "epoch": 0.721965258505499, + "grad_norm": 1.1195776462554932, + "learning_rate": 1.8939045579360998e-06, + "loss": 0.7258, + "step": 14048 + }, + { + "epoch": 0.7220166512488436, + "grad_norm": 1.047645926475525, + "learning_rate": 1.8932524159420502e-06, + "loss": 0.686, + "step": 14049 + }, + { + "epoch": 0.7220680439921883, + "grad_norm": 0.8034675121307373, + "learning_rate": 1.8926003600231512e-06, + "loss": 0.6225, + "step": 14050 + }, + { + "epoch": 0.7221194367355329, + "grad_norm": 1.1398842334747314, + "learning_rate": 1.891948390197475e-06, + "loss": 0.7359, + "step": 14051 + }, + { + "epoch": 0.7221708294788776, + "grad_norm": 1.0306109189987183, + "learning_rate": 1.891296506483083e-06, + "loss": 0.7065, + "step": 14052 + }, + { + "epoch": 0.7222222222222222, + "grad_norm": 1.0688304901123047, + "learning_rate": 1.8906447088980357e-06, + "loss": 0.7684, + "step": 14053 + }, + { + "epoch": 0.7222736149655669, + "grad_norm": 1.1035021543502808, + "learning_rate": 1.8899929974603897e-06, + "loss": 0.7223, + "step": 14054 + }, + { + "epoch": 0.7223250077089115, + "grad_norm": 1.081215739250183, + "learning_rate": 1.8893413721882059e-06, + "loss": 0.6959, + "step": 14055 + }, + { + "epoch": 0.7223764004522562, + "grad_norm": 1.0626963376998901, + "learning_rate": 1.8886898330995358e-06, + "loss": 0.7463, + "step": 14056 + }, + { + "epoch": 0.7224277931956008, + "grad_norm": 1.0905474424362183, + "learning_rate": 1.8880383802124302e-06, + "loss": 0.7273, + "step": 14057 + }, + { + "epoch": 0.7224791859389454, + "grad_norm": 0.6941466331481934, + "learning_rate": 1.8873870135449373e-06, + "loss": 0.6575, + "step": 14058 + }, + { + "epoch": 0.7225305786822901, + "grad_norm": 1.054403305053711, + "learning_rate": 1.8867357331151071e-06, + "loss": 0.7456, + "step": 14059 + }, + { + "epoch": 0.7225819714256347, + "grad_norm": 1.079525113105774, + "learning_rate": 1.886084538940982e-06, + "loss": 0.6352, + "step": 14060 + }, + { + "epoch": 0.7226333641689794, + "grad_norm": 1.1174298524856567, + "learning_rate": 1.8854334310406025e-06, + "loss": 0.7563, + "step": 14061 + }, + { + "epoch": 0.722684756912324, + "grad_norm": 0.6860200762748718, + "learning_rate": 1.8847824094320128e-06, + "loss": 0.6246, + "step": 14062 + }, + { + "epoch": 0.7227361496556686, + "grad_norm": 1.0960761308670044, + "learning_rate": 1.8841314741332429e-06, + "loss": 0.7113, + "step": 14063 + }, + { + "epoch": 0.7227875423990132, + "grad_norm": 1.1676884889602661, + "learning_rate": 1.8834806251623338e-06, + "loss": 0.7305, + "step": 14064 + }, + { + "epoch": 0.7228389351423579, + "grad_norm": 0.7589775919914246, + "learning_rate": 1.8828298625373148e-06, + "loss": 0.6474, + "step": 14065 + }, + { + "epoch": 0.7228903278857025, + "grad_norm": 1.184144139289856, + "learning_rate": 1.8821791862762173e-06, + "loss": 0.6919, + "step": 14066 + }, + { + "epoch": 0.7229417206290472, + "grad_norm": 1.256714940071106, + "learning_rate": 1.8815285963970658e-06, + "loss": 0.6732, + "step": 14067 + }, + { + "epoch": 0.7229931133723918, + "grad_norm": 0.6841539740562439, + "learning_rate": 1.8808780929178894e-06, + "loss": 0.6129, + "step": 14068 + }, + { + "epoch": 0.7230445061157365, + "grad_norm": 1.092739462852478, + "learning_rate": 1.8802276758567096e-06, + "loss": 0.6867, + "step": 14069 + }, + { + "epoch": 0.7230958988590811, + "grad_norm": 1.0674060583114624, + "learning_rate": 1.8795773452315441e-06, + "loss": 0.635, + "step": 14070 + }, + { + "epoch": 0.7231472916024257, + "grad_norm": 1.213179111480713, + "learning_rate": 1.8789271010604153e-06, + "loss": 0.6657, + "step": 14071 + }, + { + "epoch": 0.7231986843457704, + "grad_norm": 1.1276285648345947, + "learning_rate": 1.8782769433613368e-06, + "loss": 0.6452, + "step": 14072 + }, + { + "epoch": 0.723250077089115, + "grad_norm": 0.6878656148910522, + "learning_rate": 1.8776268721523212e-06, + "loss": 0.6281, + "step": 14073 + }, + { + "epoch": 0.7233014698324597, + "grad_norm": 1.158353567123413, + "learning_rate": 1.8769768874513778e-06, + "loss": 0.76, + "step": 14074 + }, + { + "epoch": 0.7233528625758043, + "grad_norm": 1.1134287118911743, + "learning_rate": 1.8763269892765206e-06, + "loss": 0.7207, + "step": 14075 + }, + { + "epoch": 0.723404255319149, + "grad_norm": 0.8018239140510559, + "learning_rate": 1.875677177645749e-06, + "loss": 0.6293, + "step": 14076 + }, + { + "epoch": 0.7234556480624936, + "grad_norm": 0.7907557487487793, + "learning_rate": 1.8750274525770707e-06, + "loss": 0.6646, + "step": 14077 + }, + { + "epoch": 0.7235070408058382, + "grad_norm": 1.0302646160125732, + "learning_rate": 1.8743778140884855e-06, + "loss": 0.6593, + "step": 14078 + }, + { + "epoch": 0.7235584335491828, + "grad_norm": 1.0931450128555298, + "learning_rate": 1.8737282621979908e-06, + "loss": 0.6881, + "step": 14079 + }, + { + "epoch": 0.7236098262925275, + "grad_norm": 1.0379433631896973, + "learning_rate": 1.8730787969235864e-06, + "loss": 0.6924, + "step": 14080 + }, + { + "epoch": 0.7236612190358721, + "grad_norm": 0.7393413186073303, + "learning_rate": 1.8724294182832642e-06, + "loss": 0.6743, + "step": 14081 + }, + { + "epoch": 0.7237126117792168, + "grad_norm": 1.1454415321350098, + "learning_rate": 1.8717801262950164e-06, + "loss": 0.7286, + "step": 14082 + }, + { + "epoch": 0.7237640045225614, + "grad_norm": 1.0606586933135986, + "learning_rate": 1.8711309209768303e-06, + "loss": 0.721, + "step": 14083 + }, + { + "epoch": 0.723815397265906, + "grad_norm": 1.0268588066101074, + "learning_rate": 1.8704818023466975e-06, + "loss": 0.7052, + "step": 14084 + }, + { + "epoch": 0.7238667900092507, + "grad_norm": 1.0326272249221802, + "learning_rate": 1.8698327704225955e-06, + "loss": 0.6871, + "step": 14085 + }, + { + "epoch": 0.7239181827525953, + "grad_norm": 1.0355812311172485, + "learning_rate": 1.8691838252225125e-06, + "loss": 0.662, + "step": 14086 + }, + { + "epoch": 0.72396957549594, + "grad_norm": 1.1086996793746948, + "learning_rate": 1.868534966764423e-06, + "loss": 0.6974, + "step": 14087 + }, + { + "epoch": 0.7240209682392846, + "grad_norm": 1.0858620405197144, + "learning_rate": 1.867886195066309e-06, + "loss": 0.6928, + "step": 14088 + }, + { + "epoch": 0.7240723609826293, + "grad_norm": 0.8011069297790527, + "learning_rate": 1.8672375101461433e-06, + "loss": 0.6587, + "step": 14089 + }, + { + "epoch": 0.7241237537259739, + "grad_norm": 1.1187071800231934, + "learning_rate": 1.8665889120218954e-06, + "loss": 0.7291, + "step": 14090 + }, + { + "epoch": 0.7241751464693186, + "grad_norm": 1.0703941583633423, + "learning_rate": 1.8659404007115422e-06, + "loss": 0.7174, + "step": 14091 + }, + { + "epoch": 0.7242265392126632, + "grad_norm": 0.7623540759086609, + "learning_rate": 1.8652919762330434e-06, + "loss": 0.604, + "step": 14092 + }, + { + "epoch": 0.7242779319560078, + "grad_norm": 1.1057769060134888, + "learning_rate": 1.8646436386043692e-06, + "loss": 0.7634, + "step": 14093 + }, + { + "epoch": 0.7243293246993524, + "grad_norm": 1.120051383972168, + "learning_rate": 1.8639953878434813e-06, + "loss": 0.7032, + "step": 14094 + }, + { + "epoch": 0.724380717442697, + "grad_norm": 1.0555294752120972, + "learning_rate": 1.8633472239683392e-06, + "loss": 0.7034, + "step": 14095 + }, + { + "epoch": 0.7244321101860417, + "grad_norm": 1.1033895015716553, + "learning_rate": 1.8626991469969002e-06, + "loss": 0.6716, + "step": 14096 + }, + { + "epoch": 0.7244835029293863, + "grad_norm": 1.047999382019043, + "learning_rate": 1.8620511569471222e-06, + "loss": 0.6336, + "step": 14097 + }, + { + "epoch": 0.724534895672731, + "grad_norm": 0.998139500617981, + "learning_rate": 1.8614032538369576e-06, + "loss": 0.6696, + "step": 14098 + }, + { + "epoch": 0.7245862884160756, + "grad_norm": 1.0759371519088745, + "learning_rate": 1.8607554376843546e-06, + "loss": 0.6873, + "step": 14099 + }, + { + "epoch": 0.7246376811594203, + "grad_norm": 0.6747581362724304, + "learning_rate": 1.860107708507265e-06, + "loss": 0.6293, + "step": 14100 + }, + { + "epoch": 0.7246890739027649, + "grad_norm": 1.1367789506912231, + "learning_rate": 1.8594600663236334e-06, + "loss": 0.7328, + "step": 14101 + }, + { + "epoch": 0.7247404666461096, + "grad_norm": 1.0425395965576172, + "learning_rate": 1.8588125111514032e-06, + "loss": 0.7091, + "step": 14102 + }, + { + "epoch": 0.7247918593894542, + "grad_norm": 1.0911006927490234, + "learning_rate": 1.8581650430085135e-06, + "loss": 0.7584, + "step": 14103 + }, + { + "epoch": 0.7248432521327989, + "grad_norm": 1.1549819707870483, + "learning_rate": 1.8575176619129086e-06, + "loss": 0.7299, + "step": 14104 + }, + { + "epoch": 0.7248946448761435, + "grad_norm": 1.0662363767623901, + "learning_rate": 1.8568703678825172e-06, + "loss": 0.7376, + "step": 14105 + }, + { + "epoch": 0.7249460376194882, + "grad_norm": 1.162440299987793, + "learning_rate": 1.8562231609352788e-06, + "loss": 0.7287, + "step": 14106 + }, + { + "epoch": 0.7249974303628328, + "grad_norm": 0.8354968428611755, + "learning_rate": 1.8555760410891232e-06, + "loss": 0.707, + "step": 14107 + }, + { + "epoch": 0.7250488231061774, + "grad_norm": 1.0202405452728271, + "learning_rate": 1.8549290083619776e-06, + "loss": 0.6983, + "step": 14108 + }, + { + "epoch": 0.725100215849522, + "grad_norm": 1.1565581560134888, + "learning_rate": 1.8542820627717717e-06, + "loss": 0.7545, + "step": 14109 + }, + { + "epoch": 0.7251516085928666, + "grad_norm": 1.0327166318893433, + "learning_rate": 1.8536352043364287e-06, + "loss": 0.7067, + "step": 14110 + }, + { + "epoch": 0.7252030013362113, + "grad_norm": 1.0117297172546387, + "learning_rate": 1.852988433073869e-06, + "loss": 0.6417, + "step": 14111 + }, + { + "epoch": 0.7252543940795559, + "grad_norm": 1.0451098680496216, + "learning_rate": 1.8523417490020119e-06, + "loss": 0.7, + "step": 14112 + }, + { + "epoch": 0.7253057868229006, + "grad_norm": 1.1725757122039795, + "learning_rate": 1.851695152138779e-06, + "loss": 0.7188, + "step": 14113 + }, + { + "epoch": 0.7253571795662452, + "grad_norm": 1.090002179145813, + "learning_rate": 1.8510486425020769e-06, + "loss": 0.7425, + "step": 14114 + }, + { + "epoch": 0.7254085723095899, + "grad_norm": 1.048673391342163, + "learning_rate": 1.8504022201098237e-06, + "loss": 0.7581, + "step": 14115 + }, + { + "epoch": 0.7254599650529345, + "grad_norm": 0.691389799118042, + "learning_rate": 1.8497558849799264e-06, + "loss": 0.6438, + "step": 14116 + }, + { + "epoch": 0.7255113577962792, + "grad_norm": 1.0125349760055542, + "learning_rate": 1.8491096371302962e-06, + "loss": 0.6702, + "step": 14117 + }, + { + "epoch": 0.7255627505396238, + "grad_norm": 1.1240495443344116, + "learning_rate": 1.8484634765788316e-06, + "loss": 0.6547, + "step": 14118 + }, + { + "epoch": 0.7256141432829685, + "grad_norm": 1.1280101537704468, + "learning_rate": 1.84781740334344e-06, + "loss": 0.6748, + "step": 14119 + }, + { + "epoch": 0.7256655360263131, + "grad_norm": 1.1014515161514282, + "learning_rate": 1.8471714174420202e-06, + "loss": 0.6859, + "step": 14120 + }, + { + "epoch": 0.7257169287696578, + "grad_norm": 1.1023160219192505, + "learning_rate": 1.8465255188924674e-06, + "loss": 0.6654, + "step": 14121 + }, + { + "epoch": 0.7257683215130024, + "grad_norm": 1.0864132642745972, + "learning_rate": 1.8458797077126806e-06, + "loss": 0.6781, + "step": 14122 + }, + { + "epoch": 0.725819714256347, + "grad_norm": 0.7458215355873108, + "learning_rate": 1.8452339839205512e-06, + "loss": 0.7035, + "step": 14123 + }, + { + "epoch": 0.7258711069996916, + "grad_norm": 1.095097303390503, + "learning_rate": 1.8445883475339688e-06, + "loss": 0.73, + "step": 14124 + }, + { + "epoch": 0.7259224997430362, + "grad_norm": 1.1990714073181152, + "learning_rate": 1.8439427985708202e-06, + "loss": 0.729, + "step": 14125 + }, + { + "epoch": 0.7259738924863809, + "grad_norm": 1.03083336353302, + "learning_rate": 1.8432973370489953e-06, + "loss": 0.7261, + "step": 14126 + }, + { + "epoch": 0.7260252852297255, + "grad_norm": 1.364574670791626, + "learning_rate": 1.8426519629863715e-06, + "loss": 0.6632, + "step": 14127 + }, + { + "epoch": 0.7260766779730702, + "grad_norm": 0.7561004161834717, + "learning_rate": 1.8420066764008338e-06, + "loss": 0.6426, + "step": 14128 + }, + { + "epoch": 0.7261280707164148, + "grad_norm": 1.0767942667007446, + "learning_rate": 1.8413614773102584e-06, + "loss": 0.6533, + "step": 14129 + }, + { + "epoch": 0.7261794634597595, + "grad_norm": 1.125990629196167, + "learning_rate": 1.84071636573252e-06, + "loss": 0.6825, + "step": 14130 + }, + { + "epoch": 0.7262308562031041, + "grad_norm": 0.7646001577377319, + "learning_rate": 1.8400713416854954e-06, + "loss": 0.6578, + "step": 14131 + }, + { + "epoch": 0.7262822489464488, + "grad_norm": 1.138761281967163, + "learning_rate": 1.8394264051870519e-06, + "loss": 0.7373, + "step": 14132 + }, + { + "epoch": 0.7263336416897934, + "grad_norm": 1.1558908224105835, + "learning_rate": 1.838781556255063e-06, + "loss": 0.6711, + "step": 14133 + }, + { + "epoch": 0.7263850344331381, + "grad_norm": 1.098095417022705, + "learning_rate": 1.8381367949073882e-06, + "loss": 0.6601, + "step": 14134 + }, + { + "epoch": 0.7264364271764827, + "grad_norm": 0.6936180591583252, + "learning_rate": 1.8374921211618969e-06, + "loss": 0.6593, + "step": 14135 + }, + { + "epoch": 0.7264878199198274, + "grad_norm": 1.1750619411468506, + "learning_rate": 1.8368475350364473e-06, + "loss": 0.6687, + "step": 14136 + }, + { + "epoch": 0.726539212663172, + "grad_norm": 1.1021217107772827, + "learning_rate": 1.8362030365488993e-06, + "loss": 0.729, + "step": 14137 + }, + { + "epoch": 0.7265906054065167, + "grad_norm": 0.8244487047195435, + "learning_rate": 1.835558625717107e-06, + "loss": 0.7265, + "step": 14138 + }, + { + "epoch": 0.7266419981498612, + "grad_norm": 1.092629313468933, + "learning_rate": 1.8349143025589278e-06, + "loss": 0.686, + "step": 14139 + }, + { + "epoch": 0.7266933908932058, + "grad_norm": 1.0888848304748535, + "learning_rate": 1.834270067092212e-06, + "loss": 0.683, + "step": 14140 + }, + { + "epoch": 0.7267447836365505, + "grad_norm": 1.0825302600860596, + "learning_rate": 1.8336259193348071e-06, + "loss": 0.7254, + "step": 14141 + }, + { + "epoch": 0.7267961763798951, + "grad_norm": 1.1431008577346802, + "learning_rate": 1.832981859304564e-06, + "loss": 0.707, + "step": 14142 + }, + { + "epoch": 0.7268475691232398, + "grad_norm": 1.1113783121109009, + "learning_rate": 1.832337887019321e-06, + "loss": 0.7117, + "step": 14143 + }, + { + "epoch": 0.7268989618665844, + "grad_norm": 1.0729255676269531, + "learning_rate": 1.8316940024969244e-06, + "loss": 0.6891, + "step": 14144 + }, + { + "epoch": 0.7269503546099291, + "grad_norm": 1.5255107879638672, + "learning_rate": 1.8310502057552127e-06, + "loss": 0.7599, + "step": 14145 + }, + { + "epoch": 0.7270017473532737, + "grad_norm": 1.0644080638885498, + "learning_rate": 1.8304064968120216e-06, + "loss": 0.6899, + "step": 14146 + }, + { + "epoch": 0.7270531400966184, + "grad_norm": 1.007764458656311, + "learning_rate": 1.8297628756851848e-06, + "loss": 0.6524, + "step": 14147 + }, + { + "epoch": 0.727104532839963, + "grad_norm": 1.0331767797470093, + "learning_rate": 1.829119342392538e-06, + "loss": 0.6395, + "step": 14148 + }, + { + "epoch": 0.7271559255833077, + "grad_norm": 1.0513592958450317, + "learning_rate": 1.8284758969519085e-06, + "loss": 0.6908, + "step": 14149 + }, + { + "epoch": 0.7272073183266523, + "grad_norm": 1.090409278869629, + "learning_rate": 1.8278325393811226e-06, + "loss": 0.6788, + "step": 14150 + }, + { + "epoch": 0.727258711069997, + "grad_norm": 1.162103295326233, + "learning_rate": 1.827189269698007e-06, + "loss": 0.7348, + "step": 14151 + }, + { + "epoch": 0.7273101038133416, + "grad_norm": 1.1262058019638062, + "learning_rate": 1.8265460879203839e-06, + "loss": 0.69, + "step": 14152 + }, + { + "epoch": 0.7273614965566862, + "grad_norm": 1.0211646556854248, + "learning_rate": 1.8259029940660728e-06, + "loss": 0.6366, + "step": 14153 + }, + { + "epoch": 0.7274128893000308, + "grad_norm": 0.7680974006652832, + "learning_rate": 1.8252599881528893e-06, + "loss": 0.6604, + "step": 14154 + }, + { + "epoch": 0.7274642820433754, + "grad_norm": 0.7253857851028442, + "learning_rate": 1.824617070198653e-06, + "loss": 0.64, + "step": 14155 + }, + { + "epoch": 0.7275156747867201, + "grad_norm": 1.0396854877471924, + "learning_rate": 1.8239742402211709e-06, + "loss": 0.6732, + "step": 14156 + }, + { + "epoch": 0.7275670675300647, + "grad_norm": 0.8034530282020569, + "learning_rate": 1.8233314982382571e-06, + "loss": 0.6612, + "step": 14157 + }, + { + "epoch": 0.7276184602734094, + "grad_norm": 1.0856924057006836, + "learning_rate": 1.8226888442677182e-06, + "loss": 0.6572, + "step": 14158 + }, + { + "epoch": 0.727669853016754, + "grad_norm": 1.221211314201355, + "learning_rate": 1.822046278327358e-06, + "loss": 0.762, + "step": 14159 + }, + { + "epoch": 0.7277212457600987, + "grad_norm": 1.0133737325668335, + "learning_rate": 1.8214038004349826e-06, + "loss": 0.721, + "step": 14160 + }, + { + "epoch": 0.7277726385034433, + "grad_norm": 1.088274359703064, + "learning_rate": 1.8207614106083905e-06, + "loss": 0.7186, + "step": 14161 + }, + { + "epoch": 0.727824031246788, + "grad_norm": 1.056518316268921, + "learning_rate": 1.820119108865379e-06, + "loss": 0.6927, + "step": 14162 + }, + { + "epoch": 0.7278754239901326, + "grad_norm": 1.1000196933746338, + "learning_rate": 1.8194768952237436e-06, + "loss": 0.6606, + "step": 14163 + }, + { + "epoch": 0.7279268167334773, + "grad_norm": 1.164321780204773, + "learning_rate": 1.8188347697012798e-06, + "loss": 0.7214, + "step": 14164 + }, + { + "epoch": 0.7279782094768219, + "grad_norm": 1.1015284061431885, + "learning_rate": 1.8181927323157765e-06, + "loss": 0.6996, + "step": 14165 + }, + { + "epoch": 0.7280296022201665, + "grad_norm": 1.1950316429138184, + "learning_rate": 1.817550783085022e-06, + "loss": 0.7327, + "step": 14166 + }, + { + "epoch": 0.7280809949635112, + "grad_norm": 1.1232049465179443, + "learning_rate": 1.8169089220268004e-06, + "loss": 0.7516, + "step": 14167 + }, + { + "epoch": 0.7281323877068558, + "grad_norm": 1.0463675260543823, + "learning_rate": 1.8162671491589e-06, + "loss": 0.6534, + "step": 14168 + }, + { + "epoch": 0.7281837804502004, + "grad_norm": 1.077032446861267, + "learning_rate": 1.815625464499095e-06, + "loss": 0.737, + "step": 14169 + }, + { + "epoch": 0.728235173193545, + "grad_norm": 1.099536418914795, + "learning_rate": 1.8149838680651694e-06, + "loss": 0.7212, + "step": 14170 + }, + { + "epoch": 0.7282865659368897, + "grad_norm": 1.050209403038025, + "learning_rate": 1.8143423598748967e-06, + "loss": 0.7196, + "step": 14171 + }, + { + "epoch": 0.7283379586802343, + "grad_norm": 1.2169212102890015, + "learning_rate": 1.8137009399460492e-06, + "loss": 0.7135, + "step": 14172 + }, + { + "epoch": 0.728389351423579, + "grad_norm": 1.1904245615005493, + "learning_rate": 1.8130596082964008e-06, + "loss": 0.7275, + "step": 14173 + }, + { + "epoch": 0.7284407441669236, + "grad_norm": 1.1048997640609741, + "learning_rate": 1.8124183649437193e-06, + "loss": 0.7154, + "step": 14174 + }, + { + "epoch": 0.7284921369102683, + "grad_norm": 1.1846435070037842, + "learning_rate": 1.8117772099057706e-06, + "loss": 0.6776, + "step": 14175 + }, + { + "epoch": 0.7285435296536129, + "grad_norm": 1.1215087175369263, + "learning_rate": 1.8111361432003166e-06, + "loss": 0.6722, + "step": 14176 + }, + { + "epoch": 0.7285949223969576, + "grad_norm": 1.1111115217208862, + "learning_rate": 1.810495164845124e-06, + "loss": 0.6578, + "step": 14177 + }, + { + "epoch": 0.7286463151403022, + "grad_norm": 1.0743190050125122, + "learning_rate": 1.8098542748579446e-06, + "loss": 0.642, + "step": 14178 + }, + { + "epoch": 0.7286977078836469, + "grad_norm": 1.1126295328140259, + "learning_rate": 1.8092134732565396e-06, + "loss": 0.672, + "step": 14179 + }, + { + "epoch": 0.7287491006269915, + "grad_norm": 1.0887190103530884, + "learning_rate": 1.8085727600586606e-06, + "loss": 0.6818, + "step": 14180 + }, + { + "epoch": 0.7288004933703361, + "grad_norm": 1.1138381958007812, + "learning_rate": 1.8079321352820616e-06, + "loss": 0.7149, + "step": 14181 + }, + { + "epoch": 0.7288518861136808, + "grad_norm": 1.0463840961456299, + "learning_rate": 1.80729159894449e-06, + "loss": 0.6846, + "step": 14182 + }, + { + "epoch": 0.7289032788570254, + "grad_norm": 0.7222232818603516, + "learning_rate": 1.8066511510636913e-06, + "loss": 0.6855, + "step": 14183 + }, + { + "epoch": 0.72895467160037, + "grad_norm": 1.0851243734359741, + "learning_rate": 1.8060107916574143e-06, + "loss": 0.7056, + "step": 14184 + }, + { + "epoch": 0.7290060643437146, + "grad_norm": 1.147598147392273, + "learning_rate": 1.8053705207433941e-06, + "loss": 0.7511, + "step": 14185 + }, + { + "epoch": 0.7290574570870593, + "grad_norm": 1.0548043251037598, + "learning_rate": 1.804730338339375e-06, + "loss": 0.7677, + "step": 14186 + }, + { + "epoch": 0.7291088498304039, + "grad_norm": 1.1363869905471802, + "learning_rate": 1.804090244463092e-06, + "loss": 0.7889, + "step": 14187 + }, + { + "epoch": 0.7291602425737486, + "grad_norm": 1.0757659673690796, + "learning_rate": 1.8034502391322794e-06, + "loss": 0.6833, + "step": 14188 + }, + { + "epoch": 0.7292116353170932, + "grad_norm": 1.0416321754455566, + "learning_rate": 1.802810322364668e-06, + "loss": 0.6796, + "step": 14189 + }, + { + "epoch": 0.7292630280604379, + "grad_norm": 1.0118740797042847, + "learning_rate": 1.80217049417799e-06, + "loss": 0.6859, + "step": 14190 + }, + { + "epoch": 0.7293144208037825, + "grad_norm": 0.7777758240699768, + "learning_rate": 1.8015307545899714e-06, + "loss": 0.6491, + "step": 14191 + }, + { + "epoch": 0.7293658135471272, + "grad_norm": 1.1217104196548462, + "learning_rate": 1.800891103618334e-06, + "loss": 0.6992, + "step": 14192 + }, + { + "epoch": 0.7294172062904718, + "grad_norm": 1.2467244863510132, + "learning_rate": 1.8002515412808058e-06, + "loss": 0.6686, + "step": 14193 + }, + { + "epoch": 0.7294685990338164, + "grad_norm": 1.090186595916748, + "learning_rate": 1.7996120675950994e-06, + "loss": 0.6892, + "step": 14194 + }, + { + "epoch": 0.7295199917771611, + "grad_norm": 1.0485175848007202, + "learning_rate": 1.7989726825789371e-06, + "loss": 0.694, + "step": 14195 + }, + { + "epoch": 0.7295713845205057, + "grad_norm": 1.1058467626571655, + "learning_rate": 1.7983333862500301e-06, + "loss": 0.7156, + "step": 14196 + }, + { + "epoch": 0.7296227772638504, + "grad_norm": 1.1257439851760864, + "learning_rate": 1.797694178626096e-06, + "loss": 0.7333, + "step": 14197 + }, + { + "epoch": 0.729674170007195, + "grad_norm": 1.0711151361465454, + "learning_rate": 1.7970550597248377e-06, + "loss": 0.6853, + "step": 14198 + }, + { + "epoch": 0.7297255627505396, + "grad_norm": 1.0446034669876099, + "learning_rate": 1.7964160295639678e-06, + "loss": 0.6607, + "step": 14199 + }, + { + "epoch": 0.7297769554938842, + "grad_norm": 0.6731815338134766, + "learning_rate": 1.7957770881611886e-06, + "loss": 0.6413, + "step": 14200 + }, + { + "epoch": 0.7298283482372289, + "grad_norm": 1.097685694694519, + "learning_rate": 1.7951382355342024e-06, + "loss": 0.7283, + "step": 14201 + }, + { + "epoch": 0.7298797409805735, + "grad_norm": 1.049747109413147, + "learning_rate": 1.7944994717007108e-06, + "loss": 0.7695, + "step": 14202 + }, + { + "epoch": 0.7299311337239182, + "grad_norm": 1.0693048238754272, + "learning_rate": 1.7938607966784111e-06, + "loss": 0.7518, + "step": 14203 + }, + { + "epoch": 0.7299825264672628, + "grad_norm": 1.0700889825820923, + "learning_rate": 1.7932222104849979e-06, + "loss": 0.7016, + "step": 14204 + }, + { + "epoch": 0.7300339192106075, + "grad_norm": 1.0565557479858398, + "learning_rate": 1.7925837131381618e-06, + "loss": 0.674, + "step": 14205 + }, + { + "epoch": 0.7300853119539521, + "grad_norm": 1.109625220298767, + "learning_rate": 1.791945304655598e-06, + "loss": 0.7059, + "step": 14206 + }, + { + "epoch": 0.7301367046972967, + "grad_norm": 1.0743491649627686, + "learning_rate": 1.7913069850549874e-06, + "loss": 0.6613, + "step": 14207 + }, + { + "epoch": 0.7301880974406414, + "grad_norm": 0.6909602284431458, + "learning_rate": 1.7906687543540212e-06, + "loss": 0.6621, + "step": 14208 + }, + { + "epoch": 0.730239490183986, + "grad_norm": 1.108607292175293, + "learning_rate": 1.7900306125703792e-06, + "loss": 0.7114, + "step": 14209 + }, + { + "epoch": 0.7302908829273307, + "grad_norm": 1.1064765453338623, + "learning_rate": 1.7893925597217404e-06, + "loss": 0.6997, + "step": 14210 + }, + { + "epoch": 0.7303422756706753, + "grad_norm": 1.1871590614318848, + "learning_rate": 1.7887545958257863e-06, + "loss": 0.7421, + "step": 14211 + }, + { + "epoch": 0.73039366841402, + "grad_norm": 0.7593753337860107, + "learning_rate": 1.78811672090019e-06, + "loss": 0.6463, + "step": 14212 + }, + { + "epoch": 0.7304450611573646, + "grad_norm": 1.0574991703033447, + "learning_rate": 1.7874789349626248e-06, + "loss": 0.7147, + "step": 14213 + }, + { + "epoch": 0.7304964539007093, + "grad_norm": 1.008554458618164, + "learning_rate": 1.7868412380307599e-06, + "loss": 0.7168, + "step": 14214 + }, + { + "epoch": 0.7305478466440538, + "grad_norm": 0.71867835521698, + "learning_rate": 1.7862036301222652e-06, + "loss": 0.6985, + "step": 14215 + }, + { + "epoch": 0.7305992393873985, + "grad_norm": 1.1462565660476685, + "learning_rate": 1.7855661112548056e-06, + "loss": 0.6941, + "step": 14216 + }, + { + "epoch": 0.7306506321307431, + "grad_norm": 1.1414536237716675, + "learning_rate": 1.7849286814460442e-06, + "loss": 0.702, + "step": 14217 + }, + { + "epoch": 0.7307020248740878, + "grad_norm": 0.8802616000175476, + "learning_rate": 1.7842913407136392e-06, + "loss": 0.6394, + "step": 14218 + }, + { + "epoch": 0.7307534176174324, + "grad_norm": 1.0863372087478638, + "learning_rate": 1.7836540890752546e-06, + "loss": 0.6592, + "step": 14219 + }, + { + "epoch": 0.730804810360777, + "grad_norm": 0.7025415301322937, + "learning_rate": 1.783016926548538e-06, + "loss": 0.6558, + "step": 14220 + }, + { + "epoch": 0.7308562031041217, + "grad_norm": 1.0959371328353882, + "learning_rate": 1.7823798531511487e-06, + "loss": 0.6766, + "step": 14221 + }, + { + "epoch": 0.7309075958474663, + "grad_norm": 1.0273948907852173, + "learning_rate": 1.7817428689007354e-06, + "loss": 0.6428, + "step": 14222 + }, + { + "epoch": 0.730958988590811, + "grad_norm": 1.3654849529266357, + "learning_rate": 1.7811059738149445e-06, + "loss": 0.6434, + "step": 14223 + }, + { + "epoch": 0.7310103813341556, + "grad_norm": 1.0721516609191895, + "learning_rate": 1.780469167911425e-06, + "loss": 0.6959, + "step": 14224 + }, + { + "epoch": 0.7310617740775003, + "grad_norm": 1.1090396642684937, + "learning_rate": 1.7798324512078174e-06, + "loss": 0.7262, + "step": 14225 + }, + { + "epoch": 0.7311131668208449, + "grad_norm": 1.0582424402236938, + "learning_rate": 1.7791958237217666e-06, + "loss": 0.6957, + "step": 14226 + }, + { + "epoch": 0.7311645595641896, + "grad_norm": 1.1268717050552368, + "learning_rate": 1.778559285470905e-06, + "loss": 0.71, + "step": 14227 + }, + { + "epoch": 0.7312159523075342, + "grad_norm": 1.053268551826477, + "learning_rate": 1.7779228364728729e-06, + "loss": 0.702, + "step": 14228 + }, + { + "epoch": 0.7312673450508789, + "grad_norm": 1.062662124633789, + "learning_rate": 1.777286476745303e-06, + "loss": 0.7051, + "step": 14229 + }, + { + "epoch": 0.7313187377942234, + "grad_norm": 1.0808087587356567, + "learning_rate": 1.7766502063058245e-06, + "loss": 0.718, + "step": 14230 + }, + { + "epoch": 0.731370130537568, + "grad_norm": 1.0907949209213257, + "learning_rate": 1.7760140251720658e-06, + "loss": 0.7524, + "step": 14231 + }, + { + "epoch": 0.7314215232809127, + "grad_norm": 0.7170012593269348, + "learning_rate": 1.775377933361655e-06, + "loss": 0.6203, + "step": 14232 + }, + { + "epoch": 0.7314729160242573, + "grad_norm": 1.159892201423645, + "learning_rate": 1.7747419308922147e-06, + "loss": 0.7086, + "step": 14233 + }, + { + "epoch": 0.731524308767602, + "grad_norm": 1.0847471952438354, + "learning_rate": 1.774106017781364e-06, + "loss": 0.7012, + "step": 14234 + }, + { + "epoch": 0.7315757015109466, + "grad_norm": 1.0239468812942505, + "learning_rate": 1.7734701940467263e-06, + "loss": 0.6976, + "step": 14235 + }, + { + "epoch": 0.7316270942542913, + "grad_norm": 1.0780420303344727, + "learning_rate": 1.7728344597059117e-06, + "loss": 0.7741, + "step": 14236 + }, + { + "epoch": 0.7316784869976359, + "grad_norm": 1.148107886314392, + "learning_rate": 1.7721988147765372e-06, + "loss": 0.7254, + "step": 14237 + }, + { + "epoch": 0.7317298797409806, + "grad_norm": 1.0864347219467163, + "learning_rate": 1.7715632592762138e-06, + "loss": 0.7095, + "step": 14238 + }, + { + "epoch": 0.7317812724843252, + "grad_norm": 1.0514644384384155, + "learning_rate": 1.7709277932225494e-06, + "loss": 0.6897, + "step": 14239 + }, + { + "epoch": 0.7318326652276699, + "grad_norm": 1.0319058895111084, + "learning_rate": 1.7702924166331487e-06, + "loss": 0.7307, + "step": 14240 + }, + { + "epoch": 0.7318840579710145, + "grad_norm": 1.0796493291854858, + "learning_rate": 1.7696571295256182e-06, + "loss": 0.73, + "step": 14241 + }, + { + "epoch": 0.7319354507143592, + "grad_norm": 1.0330322980880737, + "learning_rate": 1.7690219319175584e-06, + "loss": 0.6603, + "step": 14242 + }, + { + "epoch": 0.7319868434577038, + "grad_norm": 0.7170283198356628, + "learning_rate": 1.768386823826565e-06, + "loss": 0.6325, + "step": 14243 + }, + { + "epoch": 0.7320382362010485, + "grad_norm": 0.6943623423576355, + "learning_rate": 1.7677518052702387e-06, + "loss": 0.6328, + "step": 14244 + }, + { + "epoch": 0.732089628944393, + "grad_norm": 0.6924095749855042, + "learning_rate": 1.7671168762661712e-06, + "loss": 0.6872, + "step": 14245 + }, + { + "epoch": 0.7321410216877376, + "grad_norm": 1.1192182302474976, + "learning_rate": 1.7664820368319534e-06, + "loss": 0.752, + "step": 14246 + }, + { + "epoch": 0.7321924144310823, + "grad_norm": 1.1731281280517578, + "learning_rate": 1.7658472869851733e-06, + "loss": 0.7455, + "step": 14247 + }, + { + "epoch": 0.7322438071744269, + "grad_norm": 1.0350943803787231, + "learning_rate": 1.7652126267434217e-06, + "loss": 0.6899, + "step": 14248 + }, + { + "epoch": 0.7322951999177716, + "grad_norm": 1.086379885673523, + "learning_rate": 1.7645780561242748e-06, + "loss": 0.714, + "step": 14249 + }, + { + "epoch": 0.7323465926611162, + "grad_norm": 1.1786599159240723, + "learning_rate": 1.7639435751453205e-06, + "loss": 0.73, + "step": 14250 + }, + { + "epoch": 0.7323979854044609, + "grad_norm": 1.144720196723938, + "learning_rate": 1.7633091838241356e-06, + "loss": 0.7297, + "step": 14251 + }, + { + "epoch": 0.7324493781478055, + "grad_norm": 1.0773013830184937, + "learning_rate": 1.762674882178294e-06, + "loss": 0.7268, + "step": 14252 + }, + { + "epoch": 0.7325007708911502, + "grad_norm": 1.0976192951202393, + "learning_rate": 1.7620406702253734e-06, + "loss": 0.6985, + "step": 14253 + }, + { + "epoch": 0.7325521636344948, + "grad_norm": 1.1154916286468506, + "learning_rate": 1.761406547982944e-06, + "loss": 0.6963, + "step": 14254 + }, + { + "epoch": 0.7326035563778395, + "grad_norm": 1.089724063873291, + "learning_rate": 1.760772515468574e-06, + "loss": 0.7551, + "step": 14255 + }, + { + "epoch": 0.7326549491211841, + "grad_norm": 1.0380268096923828, + "learning_rate": 1.7601385726998282e-06, + "loss": 0.6359, + "step": 14256 + }, + { + "epoch": 0.7327063418645288, + "grad_norm": 1.1182377338409424, + "learning_rate": 1.7595047196942745e-06, + "loss": 0.6982, + "step": 14257 + }, + { + "epoch": 0.7327577346078734, + "grad_norm": 1.1102083921432495, + "learning_rate": 1.7588709564694724e-06, + "loss": 0.649, + "step": 14258 + }, + { + "epoch": 0.7328091273512181, + "grad_norm": 1.058334231376648, + "learning_rate": 1.75823728304298e-06, + "loss": 0.6842, + "step": 14259 + }, + { + "epoch": 0.7328605200945626, + "grad_norm": 1.0519452095031738, + "learning_rate": 1.7576036994323537e-06, + "loss": 0.7538, + "step": 14260 + }, + { + "epoch": 0.7329119128379072, + "grad_norm": 0.7923277616500854, + "learning_rate": 1.7569702056551492e-06, + "loss": 0.677, + "step": 14261 + }, + { + "epoch": 0.7329633055812519, + "grad_norm": 1.0972868204116821, + "learning_rate": 1.7563368017289178e-06, + "loss": 0.6621, + "step": 14262 + }, + { + "epoch": 0.7330146983245965, + "grad_norm": 1.1292816400527954, + "learning_rate": 1.7557034876712076e-06, + "loss": 0.7358, + "step": 14263 + }, + { + "epoch": 0.7330660910679412, + "grad_norm": 1.1095266342163086, + "learning_rate": 1.7550702634995647e-06, + "loss": 0.7365, + "step": 14264 + }, + { + "epoch": 0.7331174838112858, + "grad_norm": 1.0919475555419922, + "learning_rate": 1.7544371292315326e-06, + "loss": 0.6828, + "step": 14265 + }, + { + "epoch": 0.7331688765546305, + "grad_norm": 1.0480883121490479, + "learning_rate": 1.753804084884656e-06, + "loss": 0.7281, + "step": 14266 + }, + { + "epoch": 0.7332202692979751, + "grad_norm": 1.12640380859375, + "learning_rate": 1.7531711304764714e-06, + "loss": 0.6605, + "step": 14267 + }, + { + "epoch": 0.7332716620413198, + "grad_norm": 1.1045379638671875, + "learning_rate": 1.7525382660245166e-06, + "loss": 0.7229, + "step": 14268 + }, + { + "epoch": 0.7333230547846644, + "grad_norm": 0.7560640573501587, + "learning_rate": 1.751905491546323e-06, + "loss": 0.6923, + "step": 14269 + }, + { + "epoch": 0.7333744475280091, + "grad_norm": 1.1003350019454956, + "learning_rate": 1.751272807059427e-06, + "loss": 0.6773, + "step": 14270 + }, + { + "epoch": 0.7334258402713537, + "grad_norm": 1.0724040269851685, + "learning_rate": 1.7506402125813522e-06, + "loss": 0.7364, + "step": 14271 + }, + { + "epoch": 0.7334772330146984, + "grad_norm": 1.1029902696609497, + "learning_rate": 1.7500077081296284e-06, + "loss": 0.6844, + "step": 14272 + }, + { + "epoch": 0.733528625758043, + "grad_norm": 1.0249310731887817, + "learning_rate": 1.7493752937217784e-06, + "loss": 0.6801, + "step": 14273 + }, + { + "epoch": 0.7335800185013877, + "grad_norm": 1.2113527059555054, + "learning_rate": 1.748742969375326e-06, + "loss": 0.8207, + "step": 14274 + }, + { + "epoch": 0.7336314112447322, + "grad_norm": 1.0830127000808716, + "learning_rate": 1.7481107351077887e-06, + "loss": 0.7696, + "step": 14275 + }, + { + "epoch": 0.7336828039880768, + "grad_norm": 1.1088954210281372, + "learning_rate": 1.7474785909366808e-06, + "loss": 0.6499, + "step": 14276 + }, + { + "epoch": 0.7337341967314215, + "grad_norm": 1.0736351013183594, + "learning_rate": 1.7468465368795228e-06, + "loss": 0.6734, + "step": 14277 + }, + { + "epoch": 0.7337855894747661, + "grad_norm": 1.0742754936218262, + "learning_rate": 1.7462145729538193e-06, + "loss": 0.7059, + "step": 14278 + }, + { + "epoch": 0.7338369822181108, + "grad_norm": 1.0745306015014648, + "learning_rate": 1.7455826991770836e-06, + "loss": 0.6538, + "step": 14279 + }, + { + "epoch": 0.7338883749614554, + "grad_norm": 1.0449812412261963, + "learning_rate": 1.7449509155668208e-06, + "loss": 0.6604, + "step": 14280 + }, + { + "epoch": 0.7339397677048001, + "grad_norm": 1.1699858903884888, + "learning_rate": 1.744319222140536e-06, + "loss": 0.6723, + "step": 14281 + }, + { + "epoch": 0.7339911604481447, + "grad_norm": 1.1008058786392212, + "learning_rate": 1.743687618915728e-06, + "loss": 0.754, + "step": 14282 + }, + { + "epoch": 0.7340425531914894, + "grad_norm": 0.7757901549339294, + "learning_rate": 1.7430561059099e-06, + "loss": 0.6547, + "step": 14283 + }, + { + "epoch": 0.734093945934834, + "grad_norm": 1.1030393838882446, + "learning_rate": 1.7424246831405466e-06, + "loss": 0.7704, + "step": 14284 + }, + { + "epoch": 0.7341453386781787, + "grad_norm": 0.7826477289199829, + "learning_rate": 1.7417933506251605e-06, + "loss": 0.6921, + "step": 14285 + }, + { + "epoch": 0.7341967314215233, + "grad_norm": 1.1917823553085327, + "learning_rate": 1.741162108381238e-06, + "loss": 0.7574, + "step": 14286 + }, + { + "epoch": 0.734248124164868, + "grad_norm": 1.259516954421997, + "learning_rate": 1.7405309564262619e-06, + "loss": 0.6814, + "step": 14287 + }, + { + "epoch": 0.7342995169082126, + "grad_norm": 1.1350409984588623, + "learning_rate": 1.7398998947777235e-06, + "loss": 0.7587, + "step": 14288 + }, + { + "epoch": 0.7343509096515572, + "grad_norm": 1.0344046354293823, + "learning_rate": 1.7392689234531036e-06, + "loss": 0.6896, + "step": 14289 + }, + { + "epoch": 0.7344023023949018, + "grad_norm": 0.6549025177955627, + "learning_rate": 1.7386380424698889e-06, + "loss": 0.6381, + "step": 14290 + }, + { + "epoch": 0.7344536951382464, + "grad_norm": 1.049855351448059, + "learning_rate": 1.738007251845552e-06, + "loss": 0.6983, + "step": 14291 + }, + { + "epoch": 0.7345050878815911, + "grad_norm": 1.0957801342010498, + "learning_rate": 1.7373765515975744e-06, + "loss": 0.6404, + "step": 14292 + }, + { + "epoch": 0.7345564806249357, + "grad_norm": 1.079935073852539, + "learning_rate": 1.7367459417434285e-06, + "loss": 0.709, + "step": 14293 + }, + { + "epoch": 0.7346078733682804, + "grad_norm": 0.6613960862159729, + "learning_rate": 1.7361154223005839e-06, + "loss": 0.6348, + "step": 14294 + }, + { + "epoch": 0.734659266111625, + "grad_norm": 1.0420163869857788, + "learning_rate": 1.7354849932865136e-06, + "loss": 0.6925, + "step": 14295 + }, + { + "epoch": 0.7347106588549697, + "grad_norm": 1.0916849374771118, + "learning_rate": 1.7348546547186824e-06, + "loss": 0.6552, + "step": 14296 + }, + { + "epoch": 0.7347620515983143, + "grad_norm": 0.6854373216629028, + "learning_rate": 1.7342244066145542e-06, + "loss": 0.6627, + "step": 14297 + }, + { + "epoch": 0.734813444341659, + "grad_norm": 1.0763461589813232, + "learning_rate": 1.7335942489915892e-06, + "loss": 0.6931, + "step": 14298 + }, + { + "epoch": 0.7348648370850036, + "grad_norm": 1.142867088317871, + "learning_rate": 1.7329641818672505e-06, + "loss": 0.7603, + "step": 14299 + }, + { + "epoch": 0.7349162298283483, + "grad_norm": 1.0380083322525024, + "learning_rate": 1.7323342052589892e-06, + "loss": 0.6619, + "step": 14300 + }, + { + "epoch": 0.7349676225716929, + "grad_norm": 1.0602900981903076, + "learning_rate": 1.7317043191842642e-06, + "loss": 0.6645, + "step": 14301 + }, + { + "epoch": 0.7350190153150375, + "grad_norm": 1.081571102142334, + "learning_rate": 1.7310745236605248e-06, + "loss": 0.7094, + "step": 14302 + }, + { + "epoch": 0.7350704080583822, + "grad_norm": 1.0831965208053589, + "learning_rate": 1.7304448187052182e-06, + "loss": 0.6577, + "step": 14303 + }, + { + "epoch": 0.7351218008017268, + "grad_norm": 1.1221976280212402, + "learning_rate": 1.7298152043357952e-06, + "loss": 0.7251, + "step": 14304 + }, + { + "epoch": 0.7351731935450715, + "grad_norm": 1.0369336605072021, + "learning_rate": 1.7291856805696972e-06, + "loss": 0.6848, + "step": 14305 + }, + { + "epoch": 0.735224586288416, + "grad_norm": 1.2235352993011475, + "learning_rate": 1.7285562474243667e-06, + "loss": 0.6594, + "step": 14306 + }, + { + "epoch": 0.7352759790317607, + "grad_norm": 1.0369309186935425, + "learning_rate": 1.7279269049172403e-06, + "loss": 0.6617, + "step": 14307 + }, + { + "epoch": 0.7353273717751053, + "grad_norm": 0.7648594975471497, + "learning_rate": 1.7272976530657575e-06, + "loss": 0.6213, + "step": 14308 + }, + { + "epoch": 0.73537876451845, + "grad_norm": 1.1536024808883667, + "learning_rate": 1.7266684918873516e-06, + "loss": 0.7317, + "step": 14309 + }, + { + "epoch": 0.7354301572617946, + "grad_norm": 1.1113276481628418, + "learning_rate": 1.7260394213994536e-06, + "loss": 0.7238, + "step": 14310 + }, + { + "epoch": 0.7354815500051393, + "grad_norm": 0.8133655190467834, + "learning_rate": 1.72541044161949e-06, + "loss": 0.6997, + "step": 14311 + }, + { + "epoch": 0.7355329427484839, + "grad_norm": 1.0221985578536987, + "learning_rate": 1.724781552564892e-06, + "loss": 0.697, + "step": 14312 + }, + { + "epoch": 0.7355843354918286, + "grad_norm": 1.0915533304214478, + "learning_rate": 1.7241527542530812e-06, + "loss": 0.6756, + "step": 14313 + }, + { + "epoch": 0.7356357282351732, + "grad_norm": 1.094248652458191, + "learning_rate": 1.7235240467014785e-06, + "loss": 0.7057, + "step": 14314 + }, + { + "epoch": 0.7356871209785179, + "grad_norm": 1.0718410015106201, + "learning_rate": 1.7228954299275035e-06, + "loss": 0.7053, + "step": 14315 + }, + { + "epoch": 0.7357385137218625, + "grad_norm": 1.017695665359497, + "learning_rate": 1.7222669039485707e-06, + "loss": 0.7073, + "step": 14316 + }, + { + "epoch": 0.7357899064652071, + "grad_norm": 1.0942466259002686, + "learning_rate": 1.721638468782097e-06, + "loss": 0.6962, + "step": 14317 + }, + { + "epoch": 0.7358412992085518, + "grad_norm": 1.107970118522644, + "learning_rate": 1.7210101244454923e-06, + "loss": 0.6773, + "step": 14318 + }, + { + "epoch": 0.7358926919518964, + "grad_norm": 1.056878685951233, + "learning_rate": 1.720381870956166e-06, + "loss": 0.733, + "step": 14319 + }, + { + "epoch": 0.7359440846952411, + "grad_norm": 0.9937541484832764, + "learning_rate": 1.719753708331522e-06, + "loss": 0.6672, + "step": 14320 + }, + { + "epoch": 0.7359954774385856, + "grad_norm": 1.0694400072097778, + "learning_rate": 1.7191256365889674e-06, + "loss": 0.7122, + "step": 14321 + }, + { + "epoch": 0.7360468701819303, + "grad_norm": 0.792662501335144, + "learning_rate": 1.718497655745902e-06, + "loss": 0.6199, + "step": 14322 + }, + { + "epoch": 0.7360982629252749, + "grad_norm": 1.1098343133926392, + "learning_rate": 1.7178697658197246e-06, + "loss": 0.6926, + "step": 14323 + }, + { + "epoch": 0.7361496556686196, + "grad_norm": 0.7159417271614075, + "learning_rate": 1.71724196682783e-06, + "loss": 0.6301, + "step": 14324 + }, + { + "epoch": 0.7362010484119642, + "grad_norm": 1.1120346784591675, + "learning_rate": 1.716614258787615e-06, + "loss": 0.804, + "step": 14325 + }, + { + "epoch": 0.7362524411553089, + "grad_norm": 1.102393627166748, + "learning_rate": 1.7159866417164688e-06, + "loss": 0.7112, + "step": 14326 + }, + { + "epoch": 0.7363038338986535, + "grad_norm": 1.1136939525604248, + "learning_rate": 1.7153591156317796e-06, + "loss": 0.7227, + "step": 14327 + }, + { + "epoch": 0.7363552266419982, + "grad_norm": 1.145032286643982, + "learning_rate": 1.7147316805509367e-06, + "loss": 0.7819, + "step": 14328 + }, + { + "epoch": 0.7364066193853428, + "grad_norm": 1.1060703992843628, + "learning_rate": 1.714104336491319e-06, + "loss": 0.7155, + "step": 14329 + }, + { + "epoch": 0.7364580121286874, + "grad_norm": 1.4844474792480469, + "learning_rate": 1.7134770834703112e-06, + "loss": 0.7068, + "step": 14330 + }, + { + "epoch": 0.7365094048720321, + "grad_norm": 1.0718817710876465, + "learning_rate": 1.7128499215052908e-06, + "loss": 0.6977, + "step": 14331 + }, + { + "epoch": 0.7365607976153767, + "grad_norm": 1.1282298564910889, + "learning_rate": 1.712222850613634e-06, + "loss": 0.7441, + "step": 14332 + }, + { + "epoch": 0.7366121903587214, + "grad_norm": 1.1066234111785889, + "learning_rate": 1.7115958708127123e-06, + "loss": 0.7603, + "step": 14333 + }, + { + "epoch": 0.736663583102066, + "grad_norm": 1.1001508235931396, + "learning_rate": 1.7109689821199e-06, + "loss": 0.7377, + "step": 14334 + }, + { + "epoch": 0.7367149758454107, + "grad_norm": 1.1116912364959717, + "learning_rate": 1.7103421845525648e-06, + "loss": 0.6974, + "step": 14335 + }, + { + "epoch": 0.7367663685887552, + "grad_norm": 0.6937499046325684, + "learning_rate": 1.70971547812807e-06, + "loss": 0.6376, + "step": 14336 + }, + { + "epoch": 0.7368177613320999, + "grad_norm": 1.0903215408325195, + "learning_rate": 1.7090888628637825e-06, + "loss": 0.6859, + "step": 14337 + }, + { + "epoch": 0.7368691540754445, + "grad_norm": 1.060340166091919, + "learning_rate": 1.708462338777062e-06, + "loss": 0.689, + "step": 14338 + }, + { + "epoch": 0.7369205468187892, + "grad_norm": 1.1633929014205933, + "learning_rate": 1.7078359058852673e-06, + "loss": 0.7314, + "step": 14339 + }, + { + "epoch": 0.7369719395621338, + "grad_norm": 0.7298997640609741, + "learning_rate": 1.7072095642057512e-06, + "loss": 0.6711, + "step": 14340 + }, + { + "epoch": 0.7370233323054785, + "grad_norm": 1.212416648864746, + "learning_rate": 1.7065833137558736e-06, + "loss": 0.7083, + "step": 14341 + }, + { + "epoch": 0.7370747250488231, + "grad_norm": 1.0263867378234863, + "learning_rate": 1.7059571545529775e-06, + "loss": 0.6839, + "step": 14342 + }, + { + "epoch": 0.7371261177921677, + "grad_norm": 1.078334093093872, + "learning_rate": 1.7053310866144162e-06, + "loss": 0.6891, + "step": 14343 + }, + { + "epoch": 0.7371775105355124, + "grad_norm": 1.0823578834533691, + "learning_rate": 1.7047051099575345e-06, + "loss": 0.7408, + "step": 14344 + }, + { + "epoch": 0.737228903278857, + "grad_norm": 1.1141408681869507, + "learning_rate": 1.704079224599674e-06, + "loss": 0.6997, + "step": 14345 + }, + { + "epoch": 0.7372802960222017, + "grad_norm": 1.0204076766967773, + "learning_rate": 1.7034534305581785e-06, + "loss": 0.6229, + "step": 14346 + }, + { + "epoch": 0.7373316887655463, + "grad_norm": 1.1284871101379395, + "learning_rate": 1.7028277278503841e-06, + "loss": 0.7607, + "step": 14347 + }, + { + "epoch": 0.737383081508891, + "grad_norm": 1.080053448677063, + "learning_rate": 1.7022021164936265e-06, + "loss": 0.7151, + "step": 14348 + }, + { + "epoch": 0.7374344742522356, + "grad_norm": 1.167927622795105, + "learning_rate": 1.7015765965052377e-06, + "loss": 0.7632, + "step": 14349 + }, + { + "epoch": 0.7374858669955803, + "grad_norm": 1.137816309928894, + "learning_rate": 1.700951167902551e-06, + "loss": 0.6982, + "step": 14350 + }, + { + "epoch": 0.7375372597389248, + "grad_norm": 0.7807933688163757, + "learning_rate": 1.700325830702893e-06, + "loss": 0.6673, + "step": 14351 + }, + { + "epoch": 0.7375886524822695, + "grad_norm": 1.0947612524032593, + "learning_rate": 1.6997005849235897e-06, + "loss": 0.6932, + "step": 14352 + }, + { + "epoch": 0.7376400452256141, + "grad_norm": 0.917332112789154, + "learning_rate": 1.6990754305819618e-06, + "loss": 0.6411, + "step": 14353 + }, + { + "epoch": 0.7376914379689588, + "grad_norm": 1.0428218841552734, + "learning_rate": 1.6984503676953333e-06, + "loss": 0.7369, + "step": 14354 + }, + { + "epoch": 0.7377428307123034, + "grad_norm": 1.125120997428894, + "learning_rate": 1.6978253962810204e-06, + "loss": 0.6916, + "step": 14355 + }, + { + "epoch": 0.737794223455648, + "grad_norm": 1.096920132637024, + "learning_rate": 1.6972005163563387e-06, + "loss": 0.6965, + "step": 14356 + }, + { + "epoch": 0.7378456161989927, + "grad_norm": 1.1084500551223755, + "learning_rate": 1.6965757279386003e-06, + "loss": 0.7592, + "step": 14357 + }, + { + "epoch": 0.7378970089423373, + "grad_norm": 1.150902271270752, + "learning_rate": 1.695951031045115e-06, + "loss": 0.6657, + "step": 14358 + }, + { + "epoch": 0.737948401685682, + "grad_norm": 1.1046724319458008, + "learning_rate": 1.6953264256931928e-06, + "loss": 0.6891, + "step": 14359 + }, + { + "epoch": 0.7379997944290266, + "grad_norm": 0.7009958028793335, + "learning_rate": 1.6947019119001378e-06, + "loss": 0.6395, + "step": 14360 + }, + { + "epoch": 0.7380511871723713, + "grad_norm": 0.7393320798873901, + "learning_rate": 1.694077489683253e-06, + "loss": 0.6307, + "step": 14361 + }, + { + "epoch": 0.7381025799157159, + "grad_norm": 1.0800014734268188, + "learning_rate": 1.6934531590598363e-06, + "loss": 0.711, + "step": 14362 + }, + { + "epoch": 0.7381539726590606, + "grad_norm": 0.7191575765609741, + "learning_rate": 1.6928289200471893e-06, + "loss": 0.6079, + "step": 14363 + }, + { + "epoch": 0.7382053654024052, + "grad_norm": 1.0938640832901, + "learning_rate": 1.6922047726626045e-06, + "loss": 0.7293, + "step": 14364 + }, + { + "epoch": 0.7382567581457499, + "grad_norm": 1.196191430091858, + "learning_rate": 1.6915807169233756e-06, + "loss": 0.7094, + "step": 14365 + }, + { + "epoch": 0.7383081508890944, + "grad_norm": 0.7167832851409912, + "learning_rate": 1.6909567528467897e-06, + "loss": 0.6422, + "step": 14366 + }, + { + "epoch": 0.738359543632439, + "grad_norm": 0.7350412011146545, + "learning_rate": 1.6903328804501385e-06, + "loss": 0.6648, + "step": 14367 + }, + { + "epoch": 0.7384109363757837, + "grad_norm": 0.6949954032897949, + "learning_rate": 1.6897090997507054e-06, + "loss": 0.7046, + "step": 14368 + }, + { + "epoch": 0.7384623291191283, + "grad_norm": 1.0517213344573975, + "learning_rate": 1.68908541076577e-06, + "loss": 0.7248, + "step": 14369 + }, + { + "epoch": 0.738513721862473, + "grad_norm": 1.0870717763900757, + "learning_rate": 1.6884618135126179e-06, + "loss": 0.7081, + "step": 14370 + }, + { + "epoch": 0.7385651146058176, + "grad_norm": 1.0994977951049805, + "learning_rate": 1.6878383080085203e-06, + "loss": 0.7038, + "step": 14371 + }, + { + "epoch": 0.7386165073491623, + "grad_norm": 1.1249569654464722, + "learning_rate": 1.6872148942707561e-06, + "loss": 0.6783, + "step": 14372 + }, + { + "epoch": 0.7386679000925069, + "grad_norm": 1.1715688705444336, + "learning_rate": 1.686591572316596e-06, + "loss": 0.6919, + "step": 14373 + }, + { + "epoch": 0.7387192928358516, + "grad_norm": 1.0909693241119385, + "learning_rate": 1.68596834216331e-06, + "loss": 0.6743, + "step": 14374 + }, + { + "epoch": 0.7387706855791962, + "grad_norm": 1.146000623703003, + "learning_rate": 1.6853452038281631e-06, + "loss": 0.7643, + "step": 14375 + }, + { + "epoch": 0.7388220783225409, + "grad_norm": 1.1377971172332764, + "learning_rate": 1.6847221573284234e-06, + "loss": 0.7406, + "step": 14376 + }, + { + "epoch": 0.7388734710658855, + "grad_norm": 0.9911730885505676, + "learning_rate": 1.6840992026813518e-06, + "loss": 0.6448, + "step": 14377 + }, + { + "epoch": 0.7389248638092302, + "grad_norm": 1.0707061290740967, + "learning_rate": 1.6834763399042054e-06, + "loss": 0.6871, + "step": 14378 + }, + { + "epoch": 0.7389762565525748, + "grad_norm": 1.0724838972091675, + "learning_rate": 1.6828535690142467e-06, + "loss": 0.7014, + "step": 14379 + }, + { + "epoch": 0.7390276492959195, + "grad_norm": 1.0569941997528076, + "learning_rate": 1.682230890028723e-06, + "loss": 0.641, + "step": 14380 + }, + { + "epoch": 0.7390790420392641, + "grad_norm": 1.000306248664856, + "learning_rate": 1.6816083029648916e-06, + "loss": 0.6758, + "step": 14381 + }, + { + "epoch": 0.7391304347826086, + "grad_norm": 1.0481690168380737, + "learning_rate": 1.6809858078399983e-06, + "loss": 0.7056, + "step": 14382 + }, + { + "epoch": 0.7391818275259533, + "grad_norm": 1.120600938796997, + "learning_rate": 1.6803634046712946e-06, + "loss": 0.6824, + "step": 14383 + }, + { + "epoch": 0.7392332202692979, + "grad_norm": 1.11635160446167, + "learning_rate": 1.6797410934760184e-06, + "loss": 0.7611, + "step": 14384 + }, + { + "epoch": 0.7392846130126426, + "grad_norm": 1.0553030967712402, + "learning_rate": 1.679118874271416e-06, + "loss": 0.6849, + "step": 14385 + }, + { + "epoch": 0.7393360057559872, + "grad_norm": 1.143857717514038, + "learning_rate": 1.6784967470747255e-06, + "loss": 0.7238, + "step": 14386 + }, + { + "epoch": 0.7393873984993319, + "grad_norm": 1.0616540908813477, + "learning_rate": 1.6778747119031812e-06, + "loss": 0.6871, + "step": 14387 + }, + { + "epoch": 0.7394387912426765, + "grad_norm": 0.7484264969825745, + "learning_rate": 1.6772527687740208e-06, + "loss": 0.6454, + "step": 14388 + }, + { + "epoch": 0.7394901839860212, + "grad_norm": 0.7689594030380249, + "learning_rate": 1.6766309177044743e-06, + "loss": 0.6393, + "step": 14389 + }, + { + "epoch": 0.7395415767293658, + "grad_norm": 1.110905647277832, + "learning_rate": 1.67600915871177e-06, + "loss": 0.6834, + "step": 14390 + }, + { + "epoch": 0.7395929694727105, + "grad_norm": 0.7399776577949524, + "learning_rate": 1.675387491813133e-06, + "loss": 0.6214, + "step": 14391 + }, + { + "epoch": 0.7396443622160551, + "grad_norm": 1.1431277990341187, + "learning_rate": 1.674765917025792e-06, + "loss": 0.7284, + "step": 14392 + }, + { + "epoch": 0.7396957549593998, + "grad_norm": 0.7543904781341553, + "learning_rate": 1.6741444343669616e-06, + "loss": 0.6676, + "step": 14393 + }, + { + "epoch": 0.7397471477027444, + "grad_norm": 1.1097379922866821, + "learning_rate": 1.6735230438538657e-06, + "loss": 0.6909, + "step": 14394 + }, + { + "epoch": 0.7397985404460891, + "grad_norm": 1.19999098777771, + "learning_rate": 1.6729017455037188e-06, + "loss": 0.6795, + "step": 14395 + }, + { + "epoch": 0.7398499331894337, + "grad_norm": 0.8068374395370483, + "learning_rate": 1.6722805393337328e-06, + "loss": 0.6355, + "step": 14396 + }, + { + "epoch": 0.7399013259327782, + "grad_norm": 1.103184461593628, + "learning_rate": 1.6716594253611218e-06, + "loss": 0.6873, + "step": 14397 + }, + { + "epoch": 0.7399527186761229, + "grad_norm": 1.0564887523651123, + "learning_rate": 1.6710384036030913e-06, + "loss": 0.6572, + "step": 14398 + }, + { + "epoch": 0.7400041114194675, + "grad_norm": 1.0392671823501587, + "learning_rate": 1.670417474076852e-06, + "loss": 0.6831, + "step": 14399 + }, + { + "epoch": 0.7400555041628122, + "grad_norm": 1.2166334390640259, + "learning_rate": 1.669796636799601e-06, + "loss": 0.702, + "step": 14400 + }, + { + "epoch": 0.7401068969061568, + "grad_norm": 1.0539792776107788, + "learning_rate": 1.6691758917885436e-06, + "loss": 0.7066, + "step": 14401 + }, + { + "epoch": 0.7401582896495015, + "grad_norm": 1.0950050354003906, + "learning_rate": 1.6685552390608773e-06, + "loss": 0.7173, + "step": 14402 + }, + { + "epoch": 0.7402096823928461, + "grad_norm": 1.070680022239685, + "learning_rate": 1.6679346786337968e-06, + "loss": 0.6797, + "step": 14403 + }, + { + "epoch": 0.7402610751361908, + "grad_norm": 1.0760188102722168, + "learning_rate": 1.6673142105244944e-06, + "loss": 0.7342, + "step": 14404 + }, + { + "epoch": 0.7403124678795354, + "grad_norm": 1.0499087572097778, + "learning_rate": 1.6666938347501639e-06, + "loss": 0.6629, + "step": 14405 + }, + { + "epoch": 0.7403638606228801, + "grad_norm": 1.176893711090088, + "learning_rate": 1.6660735513279908e-06, + "loss": 0.67, + "step": 14406 + }, + { + "epoch": 0.7404152533662247, + "grad_norm": 1.0847781896591187, + "learning_rate": 1.66545336027516e-06, + "loss": 0.7105, + "step": 14407 + }, + { + "epoch": 0.7404666461095694, + "grad_norm": 1.1286346912384033, + "learning_rate": 1.6648332616088591e-06, + "loss": 0.713, + "step": 14408 + }, + { + "epoch": 0.740518038852914, + "grad_norm": 1.0948981046676636, + "learning_rate": 1.6642132553462614e-06, + "loss": 0.739, + "step": 14409 + }, + { + "epoch": 0.7405694315962587, + "grad_norm": 1.1173588037490845, + "learning_rate": 1.6635933415045508e-06, + "loss": 0.7488, + "step": 14410 + }, + { + "epoch": 0.7406208243396033, + "grad_norm": 1.100874423980713, + "learning_rate": 1.6629735201008995e-06, + "loss": 0.6462, + "step": 14411 + }, + { + "epoch": 0.7406722170829478, + "grad_norm": 1.084777593612671, + "learning_rate": 1.6623537911524811e-06, + "loss": 0.7555, + "step": 14412 + }, + { + "epoch": 0.7407236098262925, + "grad_norm": 1.1028956174850464, + "learning_rate": 1.6617341546764637e-06, + "loss": 0.7103, + "step": 14413 + }, + { + "epoch": 0.7407750025696371, + "grad_norm": 1.0656365156173706, + "learning_rate": 1.6611146106900188e-06, + "loss": 0.703, + "step": 14414 + }, + { + "epoch": 0.7408263953129818, + "grad_norm": 1.5592055320739746, + "learning_rate": 1.6604951592103085e-06, + "loss": 0.6662, + "step": 14415 + }, + { + "epoch": 0.7408777880563264, + "grad_norm": 1.1778279542922974, + "learning_rate": 1.6598758002544968e-06, + "loss": 0.7409, + "step": 14416 + }, + { + "epoch": 0.7409291807996711, + "grad_norm": 0.9984576106071472, + "learning_rate": 1.659256533839741e-06, + "loss": 0.6356, + "step": 14417 + }, + { + "epoch": 0.7409805735430157, + "grad_norm": 1.0771429538726807, + "learning_rate": 1.658637359983201e-06, + "loss": 0.6912, + "step": 14418 + }, + { + "epoch": 0.7410319662863604, + "grad_norm": 1.1313060522079468, + "learning_rate": 1.6580182787020316e-06, + "loss": 0.6706, + "step": 14419 + }, + { + "epoch": 0.741083359029705, + "grad_norm": 1.1150556802749634, + "learning_rate": 1.6573992900133829e-06, + "loss": 0.7111, + "step": 14420 + }, + { + "epoch": 0.7411347517730497, + "grad_norm": 0.7771188616752625, + "learning_rate": 1.6567803939344079e-06, + "loss": 0.679, + "step": 14421 + }, + { + "epoch": 0.7411861445163943, + "grad_norm": 1.1444941759109497, + "learning_rate": 1.6561615904822492e-06, + "loss": 0.6856, + "step": 14422 + }, + { + "epoch": 0.741237537259739, + "grad_norm": 0.7549691200256348, + "learning_rate": 1.6555428796740547e-06, + "loss": 0.6639, + "step": 14423 + }, + { + "epoch": 0.7412889300030836, + "grad_norm": 1.0170756578445435, + "learning_rate": 1.6549242615269657e-06, + "loss": 0.6378, + "step": 14424 + }, + { + "epoch": 0.7413403227464282, + "grad_norm": 0.9287945628166199, + "learning_rate": 1.6543057360581205e-06, + "loss": 0.6573, + "step": 14425 + }, + { + "epoch": 0.7413917154897729, + "grad_norm": 1.1105414628982544, + "learning_rate": 1.6536873032846557e-06, + "loss": 0.7085, + "step": 14426 + }, + { + "epoch": 0.7414431082331174, + "grad_norm": 1.11565363407135, + "learning_rate": 1.6530689632237074e-06, + "loss": 0.7008, + "step": 14427 + }, + { + "epoch": 0.7414945009764621, + "grad_norm": 1.0347980260849, + "learning_rate": 1.6524507158924059e-06, + "loss": 0.6648, + "step": 14428 + }, + { + "epoch": 0.7415458937198067, + "grad_norm": 1.012943983078003, + "learning_rate": 1.6518325613078796e-06, + "loss": 0.7093, + "step": 14429 + }, + { + "epoch": 0.7415972864631514, + "grad_norm": 0.7016245722770691, + "learning_rate": 1.651214499487257e-06, + "loss": 0.6068, + "step": 14430 + }, + { + "epoch": 0.741648679206496, + "grad_norm": 1.0012305974960327, + "learning_rate": 1.6505965304476612e-06, + "loss": 0.6458, + "step": 14431 + }, + { + "epoch": 0.7417000719498407, + "grad_norm": 0.7446156740188599, + "learning_rate": 1.6499786542062135e-06, + "loss": 0.6415, + "step": 14432 + }, + { + "epoch": 0.7417514646931853, + "grad_norm": 1.1068799495697021, + "learning_rate": 1.6493608707800307e-06, + "loss": 0.7141, + "step": 14433 + }, + { + "epoch": 0.74180285743653, + "grad_norm": 1.1145105361938477, + "learning_rate": 1.648743180186234e-06, + "loss": 0.693, + "step": 14434 + }, + { + "epoch": 0.7418542501798746, + "grad_norm": 0.6873275637626648, + "learning_rate": 1.6481255824419312e-06, + "loss": 0.6775, + "step": 14435 + }, + { + "epoch": 0.7419056429232193, + "grad_norm": 0.6833871603012085, + "learning_rate": 1.647508077564237e-06, + "loss": 0.6518, + "step": 14436 + }, + { + "epoch": 0.7419570356665639, + "grad_norm": 1.0355788469314575, + "learning_rate": 1.6468906655702598e-06, + "loss": 0.6826, + "step": 14437 + }, + { + "epoch": 0.7420084284099085, + "grad_norm": 1.0751303434371948, + "learning_rate": 1.646273346477103e-06, + "loss": 0.6574, + "step": 14438 + }, + { + "epoch": 0.7420598211532532, + "grad_norm": 0.7193439602851868, + "learning_rate": 1.6456561203018735e-06, + "loss": 0.6668, + "step": 14439 + }, + { + "epoch": 0.7421112138965978, + "grad_norm": 0.657912015914917, + "learning_rate": 1.6450389870616701e-06, + "loss": 0.6613, + "step": 14440 + }, + { + "epoch": 0.7421626066399425, + "grad_norm": 1.0806620121002197, + "learning_rate": 1.644421946773591e-06, + "loss": 0.6639, + "step": 14441 + }, + { + "epoch": 0.742213999383287, + "grad_norm": 1.0534764528274536, + "learning_rate": 1.6438049994547312e-06, + "loss": 0.6853, + "step": 14442 + }, + { + "epoch": 0.7422653921266317, + "grad_norm": 1.0801892280578613, + "learning_rate": 1.6431881451221876e-06, + "loss": 0.6718, + "step": 14443 + }, + { + "epoch": 0.7423167848699763, + "grad_norm": 0.8470436334609985, + "learning_rate": 1.6425713837930446e-06, + "loss": 0.6289, + "step": 14444 + }, + { + "epoch": 0.742368177613321, + "grad_norm": 1.0590234994888306, + "learning_rate": 1.6419547154843945e-06, + "loss": 0.7243, + "step": 14445 + }, + { + "epoch": 0.7424195703566656, + "grad_norm": 1.1374567747116089, + "learning_rate": 1.6413381402133199e-06, + "loss": 0.7092, + "step": 14446 + }, + { + "epoch": 0.7424709631000103, + "grad_norm": 1.0237888097763062, + "learning_rate": 1.640721657996907e-06, + "loss": 0.7368, + "step": 14447 + }, + { + "epoch": 0.7425223558433549, + "grad_norm": 0.7743707299232483, + "learning_rate": 1.640105268852234e-06, + "loss": 0.6768, + "step": 14448 + }, + { + "epoch": 0.7425737485866996, + "grad_norm": 1.0602787733078003, + "learning_rate": 1.6394889727963765e-06, + "loss": 0.7498, + "step": 14449 + }, + { + "epoch": 0.7426251413300442, + "grad_norm": 1.0571208000183105, + "learning_rate": 1.6388727698464151e-06, + "loss": 0.6865, + "step": 14450 + }, + { + "epoch": 0.7426765340733888, + "grad_norm": 1.067276120185852, + "learning_rate": 1.638256660019415e-06, + "loss": 0.7306, + "step": 14451 + }, + { + "epoch": 0.7427279268167335, + "grad_norm": 1.0972620248794556, + "learning_rate": 1.6376406433324521e-06, + "loss": 0.7325, + "step": 14452 + }, + { + "epoch": 0.7427793195600781, + "grad_norm": 1.0665996074676514, + "learning_rate": 1.6370247198025908e-06, + "loss": 0.6595, + "step": 14453 + }, + { + "epoch": 0.7428307123034228, + "grad_norm": 1.0487874746322632, + "learning_rate": 1.6364088894468966e-06, + "loss": 0.6999, + "step": 14454 + }, + { + "epoch": 0.7428821050467674, + "grad_norm": 1.130619764328003, + "learning_rate": 1.6357931522824294e-06, + "loss": 0.7126, + "step": 14455 + }, + { + "epoch": 0.7429334977901121, + "grad_norm": 1.067199468612671, + "learning_rate": 1.6351775083262527e-06, + "loss": 0.7005, + "step": 14456 + }, + { + "epoch": 0.7429848905334566, + "grad_norm": 0.9996190667152405, + "learning_rate": 1.6345619575954213e-06, + "loss": 0.7012, + "step": 14457 + }, + { + "epoch": 0.7430362832768013, + "grad_norm": 1.0502296686172485, + "learning_rate": 1.6339465001069881e-06, + "loss": 0.6348, + "step": 14458 + }, + { + "epoch": 0.7430876760201459, + "grad_norm": 1.1761276721954346, + "learning_rate": 1.633331135878008e-06, + "loss": 0.694, + "step": 14459 + }, + { + "epoch": 0.7431390687634906, + "grad_norm": 0.6698073148727417, + "learning_rate": 1.6327158649255292e-06, + "loss": 0.6386, + "step": 14460 + }, + { + "epoch": 0.7431904615068352, + "grad_norm": 1.075586199760437, + "learning_rate": 1.6321006872665973e-06, + "loss": 0.7532, + "step": 14461 + }, + { + "epoch": 0.7432418542501799, + "grad_norm": 1.1371773481369019, + "learning_rate": 1.6314856029182551e-06, + "loss": 0.7344, + "step": 14462 + }, + { + "epoch": 0.7432932469935245, + "grad_norm": 0.7527408599853516, + "learning_rate": 1.6308706118975497e-06, + "loss": 0.6385, + "step": 14463 + }, + { + "epoch": 0.7433446397368692, + "grad_norm": 1.0566409826278687, + "learning_rate": 1.6302557142215126e-06, + "loss": 0.744, + "step": 14464 + }, + { + "epoch": 0.7433960324802138, + "grad_norm": 1.1459152698516846, + "learning_rate": 1.6296409099071847e-06, + "loss": 0.656, + "step": 14465 + }, + { + "epoch": 0.7434474252235584, + "grad_norm": 1.0621795654296875, + "learning_rate": 1.6290261989715988e-06, + "loss": 0.7039, + "step": 14466 + }, + { + "epoch": 0.7434988179669031, + "grad_norm": 1.0709971189498901, + "learning_rate": 1.6284115814317858e-06, + "loss": 0.6479, + "step": 14467 + }, + { + "epoch": 0.7435502107102477, + "grad_norm": 1.2010709047317505, + "learning_rate": 1.6277970573047725e-06, + "loss": 0.6964, + "step": 14468 + }, + { + "epoch": 0.7436016034535924, + "grad_norm": 1.0765489339828491, + "learning_rate": 1.6271826266075879e-06, + "loss": 0.6754, + "step": 14469 + }, + { + "epoch": 0.743652996196937, + "grad_norm": 1.0910835266113281, + "learning_rate": 1.6265682893572542e-06, + "loss": 0.6746, + "step": 14470 + }, + { + "epoch": 0.7437043889402817, + "grad_norm": 1.1003977060317993, + "learning_rate": 1.6259540455707905e-06, + "loss": 0.6998, + "step": 14471 + }, + { + "epoch": 0.7437557816836263, + "grad_norm": 1.060490369796753, + "learning_rate": 1.6253398952652195e-06, + "loss": 0.6764, + "step": 14472 + }, + { + "epoch": 0.7438071744269709, + "grad_norm": 0.7307368516921997, + "learning_rate": 1.6247258384575498e-06, + "loss": 0.6816, + "step": 14473 + }, + { + "epoch": 0.7438585671703155, + "grad_norm": 1.0595277547836304, + "learning_rate": 1.6241118751648e-06, + "loss": 0.6952, + "step": 14474 + }, + { + "epoch": 0.7439099599136602, + "grad_norm": 1.1569379568099976, + "learning_rate": 1.6234980054039772e-06, + "loss": 0.6301, + "step": 14475 + }, + { + "epoch": 0.7439613526570048, + "grad_norm": 1.0260765552520752, + "learning_rate": 1.6228842291920944e-06, + "loss": 0.6693, + "step": 14476 + }, + { + "epoch": 0.7440127454003495, + "grad_norm": 0.7341328263282776, + "learning_rate": 1.622270546546149e-06, + "loss": 0.6419, + "step": 14477 + }, + { + "epoch": 0.7440641381436941, + "grad_norm": 1.0996835231781006, + "learning_rate": 1.6216569574831498e-06, + "loss": 0.718, + "step": 14478 + }, + { + "epoch": 0.7441155308870387, + "grad_norm": 1.1043181419372559, + "learning_rate": 1.6210434620200943e-06, + "loss": 0.712, + "step": 14479 + }, + { + "epoch": 0.7441669236303834, + "grad_norm": 1.1032187938690186, + "learning_rate": 1.6204300601739793e-06, + "loss": 0.7516, + "step": 14480 + }, + { + "epoch": 0.744218316373728, + "grad_norm": 1.1184886693954468, + "learning_rate": 1.619816751961802e-06, + "loss": 0.7377, + "step": 14481 + }, + { + "epoch": 0.7442697091170727, + "grad_norm": 1.0468957424163818, + "learning_rate": 1.6192035374005527e-06, + "loss": 0.6952, + "step": 14482 + }, + { + "epoch": 0.7443211018604173, + "grad_norm": 1.1374778747558594, + "learning_rate": 1.6185904165072224e-06, + "loss": 0.6656, + "step": 14483 + }, + { + "epoch": 0.744372494603762, + "grad_norm": 1.1544244289398193, + "learning_rate": 1.6179773892987954e-06, + "loss": 0.7108, + "step": 14484 + }, + { + "epoch": 0.7444238873471066, + "grad_norm": 1.0571223497390747, + "learning_rate": 1.6173644557922618e-06, + "loss": 0.7322, + "step": 14485 + }, + { + "epoch": 0.7444752800904513, + "grad_norm": 1.1012685298919678, + "learning_rate": 1.6167516160045966e-06, + "loss": 0.7105, + "step": 14486 + }, + { + "epoch": 0.7445266728337959, + "grad_norm": 1.0537281036376953, + "learning_rate": 1.6161388699527837e-06, + "loss": 0.7002, + "step": 14487 + }, + { + "epoch": 0.7445780655771405, + "grad_norm": 1.0902161598205566, + "learning_rate": 1.6155262176537984e-06, + "loss": 0.6969, + "step": 14488 + }, + { + "epoch": 0.7446294583204851, + "grad_norm": 0.7881810069084167, + "learning_rate": 1.6149136591246128e-06, + "loss": 0.6332, + "step": 14489 + }, + { + "epoch": 0.7446808510638298, + "grad_norm": 1.1265263557434082, + "learning_rate": 1.6143011943822023e-06, + "loss": 0.7117, + "step": 14490 + }, + { + "epoch": 0.7447322438071744, + "grad_norm": 1.0892252922058105, + "learning_rate": 1.6136888234435316e-06, + "loss": 0.7196, + "step": 14491 + }, + { + "epoch": 0.744783636550519, + "grad_norm": 1.1547425985336304, + "learning_rate": 1.6130765463255727e-06, + "loss": 0.7087, + "step": 14492 + }, + { + "epoch": 0.7448350292938637, + "grad_norm": 1.1573731899261475, + "learning_rate": 1.6124643630452824e-06, + "loss": 0.6912, + "step": 14493 + }, + { + "epoch": 0.7448864220372083, + "grad_norm": 1.1074811220169067, + "learning_rate": 1.6118522736196268e-06, + "loss": 0.7017, + "step": 14494 + }, + { + "epoch": 0.744937814780553, + "grad_norm": 1.10630464553833, + "learning_rate": 1.6112402780655628e-06, + "loss": 0.7021, + "step": 14495 + }, + { + "epoch": 0.7449892075238976, + "grad_norm": 1.064739465713501, + "learning_rate": 1.6106283764000457e-06, + "loss": 0.6311, + "step": 14496 + }, + { + "epoch": 0.7450406002672423, + "grad_norm": 1.0302397012710571, + "learning_rate": 1.6100165686400276e-06, + "loss": 0.6682, + "step": 14497 + }, + { + "epoch": 0.7450919930105869, + "grad_norm": 1.0948736667633057, + "learning_rate": 1.6094048548024627e-06, + "loss": 0.7271, + "step": 14498 + }, + { + "epoch": 0.7451433857539316, + "grad_norm": 1.002073049545288, + "learning_rate": 1.6087932349042972e-06, + "loss": 0.6741, + "step": 14499 + }, + { + "epoch": 0.7451947784972762, + "grad_norm": 0.7170271873474121, + "learning_rate": 1.6081817089624747e-06, + "loss": 0.6165, + "step": 14500 + }, + { + "epoch": 0.7452461712406209, + "grad_norm": 1.085148572921753, + "learning_rate": 1.6075702769939428e-06, + "loss": 0.6686, + "step": 14501 + }, + { + "epoch": 0.7452975639839655, + "grad_norm": 1.159510612487793, + "learning_rate": 1.6069589390156354e-06, + "loss": 0.6885, + "step": 14502 + }, + { + "epoch": 0.74534895672731, + "grad_norm": 0.6998153924942017, + "learning_rate": 1.6063476950444956e-06, + "loss": 0.6614, + "step": 14503 + }, + { + "epoch": 0.7454003494706547, + "grad_norm": 1.1275975704193115, + "learning_rate": 1.6057365450974565e-06, + "loss": 0.6925, + "step": 14504 + }, + { + "epoch": 0.7454517422139993, + "grad_norm": 0.7556573748588562, + "learning_rate": 1.6051254891914503e-06, + "loss": 0.6465, + "step": 14505 + }, + { + "epoch": 0.745503134957344, + "grad_norm": 1.081508755683899, + "learning_rate": 1.6045145273434049e-06, + "loss": 0.6697, + "step": 14506 + }, + { + "epoch": 0.7455545277006886, + "grad_norm": 1.098134160041809, + "learning_rate": 1.6039036595702516e-06, + "loss": 0.6929, + "step": 14507 + }, + { + "epoch": 0.7456059204440333, + "grad_norm": 1.0652738809585571, + "learning_rate": 1.6032928858889129e-06, + "loss": 0.7027, + "step": 14508 + }, + { + "epoch": 0.7456573131873779, + "grad_norm": 1.0449364185333252, + "learning_rate": 1.6026822063163094e-06, + "loss": 0.714, + "step": 14509 + }, + { + "epoch": 0.7457087059307226, + "grad_norm": 1.1428554058074951, + "learning_rate": 1.6020716208693638e-06, + "loss": 0.7099, + "step": 14510 + }, + { + "epoch": 0.7457600986740672, + "grad_norm": 1.1615890264511108, + "learning_rate": 1.6014611295649913e-06, + "loss": 0.6992, + "step": 14511 + }, + { + "epoch": 0.7458114914174119, + "grad_norm": 1.09706711769104, + "learning_rate": 1.6008507324201057e-06, + "loss": 0.7401, + "step": 14512 + }, + { + "epoch": 0.7458628841607565, + "grad_norm": 1.0818554162979126, + "learning_rate": 1.6002404294516172e-06, + "loss": 0.6539, + "step": 14513 + }, + { + "epoch": 0.7459142769041012, + "grad_norm": 1.068363904953003, + "learning_rate": 1.5996302206764397e-06, + "loss": 0.696, + "step": 14514 + }, + { + "epoch": 0.7459656696474458, + "grad_norm": 1.1185141801834106, + "learning_rate": 1.5990201061114735e-06, + "loss": 0.6795, + "step": 14515 + }, + { + "epoch": 0.7460170623907905, + "grad_norm": 1.102892279624939, + "learning_rate": 1.5984100857736262e-06, + "loss": 0.6981, + "step": 14516 + }, + { + "epoch": 0.7460684551341351, + "grad_norm": 1.1448637247085571, + "learning_rate": 1.5978001596797982e-06, + "loss": 0.7107, + "step": 14517 + }, + { + "epoch": 0.7461198478774796, + "grad_norm": 1.0524957180023193, + "learning_rate": 1.5971903278468876e-06, + "loss": 0.7193, + "step": 14518 + }, + { + "epoch": 0.7461712406208243, + "grad_norm": 1.0600471496582031, + "learning_rate": 1.5965805902917887e-06, + "loss": 0.709, + "step": 14519 + }, + { + "epoch": 0.7462226333641689, + "grad_norm": 0.7105703353881836, + "learning_rate": 1.5959709470313983e-06, + "loss": 0.6114, + "step": 14520 + }, + { + "epoch": 0.7462740261075136, + "grad_norm": 1.126447081565857, + "learning_rate": 1.595361398082605e-06, + "loss": 0.6907, + "step": 14521 + }, + { + "epoch": 0.7463254188508582, + "grad_norm": 0.76372891664505, + "learning_rate": 1.594751943462296e-06, + "loss": 0.6258, + "step": 14522 + }, + { + "epoch": 0.7463768115942029, + "grad_norm": 1.1397889852523804, + "learning_rate": 1.5941425831873598e-06, + "loss": 0.7151, + "step": 14523 + }, + { + "epoch": 0.7464282043375475, + "grad_norm": 1.0411078929901123, + "learning_rate": 1.593533317274677e-06, + "loss": 0.6991, + "step": 14524 + }, + { + "epoch": 0.7464795970808922, + "grad_norm": 1.0593031644821167, + "learning_rate": 1.5929241457411287e-06, + "loss": 0.6751, + "step": 14525 + }, + { + "epoch": 0.7465309898242368, + "grad_norm": 1.074785828590393, + "learning_rate": 1.5923150686035904e-06, + "loss": 0.7108, + "step": 14526 + }, + { + "epoch": 0.7465823825675815, + "grad_norm": 1.077697992324829, + "learning_rate": 1.5917060858789425e-06, + "loss": 0.6936, + "step": 14527 + }, + { + "epoch": 0.7466337753109261, + "grad_norm": 1.0711596012115479, + "learning_rate": 1.59109719758405e-06, + "loss": 0.7351, + "step": 14528 + }, + { + "epoch": 0.7466851680542708, + "grad_norm": 1.1203749179840088, + "learning_rate": 1.5904884037357881e-06, + "loss": 0.7026, + "step": 14529 + }, + { + "epoch": 0.7467365607976154, + "grad_norm": 1.076998233795166, + "learning_rate": 1.5898797043510222e-06, + "loss": 0.7283, + "step": 14530 + }, + { + "epoch": 0.7467879535409601, + "grad_norm": 1.0998997688293457, + "learning_rate": 1.589271099446616e-06, + "loss": 0.68, + "step": 14531 + }, + { + "epoch": 0.7468393462843047, + "grad_norm": 0.846117377281189, + "learning_rate": 1.588662589039433e-06, + "loss": 0.6679, + "step": 14532 + }, + { + "epoch": 0.7468907390276492, + "grad_norm": 1.0174329280853271, + "learning_rate": 1.5880541731463328e-06, + "loss": 0.6802, + "step": 14533 + }, + { + "epoch": 0.7469421317709939, + "grad_norm": 0.7177878618240356, + "learning_rate": 1.5874458517841706e-06, + "loss": 0.645, + "step": 14534 + }, + { + "epoch": 0.7469935245143385, + "grad_norm": 1.0515228509902954, + "learning_rate": 1.5868376249697997e-06, + "loss": 0.6867, + "step": 14535 + }, + { + "epoch": 0.7470449172576832, + "grad_norm": 1.0683770179748535, + "learning_rate": 1.5862294927200767e-06, + "loss": 0.7196, + "step": 14536 + }, + { + "epoch": 0.7470963100010278, + "grad_norm": 1.0987048149108887, + "learning_rate": 1.5856214550518428e-06, + "loss": 0.72, + "step": 14537 + }, + { + "epoch": 0.7471477027443725, + "grad_norm": 1.0804156064987183, + "learning_rate": 1.5850135119819493e-06, + "loss": 0.6493, + "step": 14538 + }, + { + "epoch": 0.7471990954877171, + "grad_norm": 1.1463483572006226, + "learning_rate": 1.5844056635272375e-06, + "loss": 0.7099, + "step": 14539 + }, + { + "epoch": 0.7472504882310618, + "grad_norm": 1.0617399215698242, + "learning_rate": 1.5837979097045513e-06, + "loss": 0.7329, + "step": 14540 + }, + { + "epoch": 0.7473018809744064, + "grad_norm": 1.160759687423706, + "learning_rate": 1.5831902505307266e-06, + "loss": 0.6865, + "step": 14541 + }, + { + "epoch": 0.7473532737177511, + "grad_norm": 1.0092291831970215, + "learning_rate": 1.5825826860225984e-06, + "loss": 0.6861, + "step": 14542 + }, + { + "epoch": 0.7474046664610957, + "grad_norm": 0.7182475328445435, + "learning_rate": 1.5819752161970041e-06, + "loss": 0.6849, + "step": 14543 + }, + { + "epoch": 0.7474560592044404, + "grad_norm": 1.113362431526184, + "learning_rate": 1.5813678410707683e-06, + "loss": 0.6957, + "step": 14544 + }, + { + "epoch": 0.747507451947785, + "grad_norm": 1.0936427116394043, + "learning_rate": 1.5807605606607234e-06, + "loss": 0.7136, + "step": 14545 + }, + { + "epoch": 0.7475588446911297, + "grad_norm": 1.088171362876892, + "learning_rate": 1.5801533749836928e-06, + "loss": 0.7113, + "step": 14546 + }, + { + "epoch": 0.7476102374344743, + "grad_norm": 1.0859395265579224, + "learning_rate": 1.5795462840564995e-06, + "loss": 0.7175, + "step": 14547 + }, + { + "epoch": 0.7476616301778188, + "grad_norm": 1.073412299156189, + "learning_rate": 1.5789392878959613e-06, + "loss": 0.7141, + "step": 14548 + }, + { + "epoch": 0.7477130229211635, + "grad_norm": 1.0909572839736938, + "learning_rate": 1.5783323865189e-06, + "loss": 0.6582, + "step": 14549 + }, + { + "epoch": 0.7477644156645081, + "grad_norm": 1.123697280883789, + "learning_rate": 1.577725579942127e-06, + "loss": 0.694, + "step": 14550 + }, + { + "epoch": 0.7478158084078528, + "grad_norm": 1.052595615386963, + "learning_rate": 1.577118868182454e-06, + "loss": 0.664, + "step": 14551 + }, + { + "epoch": 0.7478672011511974, + "grad_norm": 1.0164849758148193, + "learning_rate": 1.576512251256695e-06, + "loss": 0.7173, + "step": 14552 + }, + { + "epoch": 0.7479185938945421, + "grad_norm": 1.3022266626358032, + "learning_rate": 1.5759057291816504e-06, + "loss": 0.7396, + "step": 14553 + }, + { + "epoch": 0.7479699866378867, + "grad_norm": 1.105928659439087, + "learning_rate": 1.5752993019741293e-06, + "loss": 0.7448, + "step": 14554 + }, + { + "epoch": 0.7480213793812314, + "grad_norm": 0.6971480250358582, + "learning_rate": 1.5746929696509295e-06, + "loss": 0.6364, + "step": 14555 + }, + { + "epoch": 0.748072772124576, + "grad_norm": 1.050684928894043, + "learning_rate": 1.574086732228856e-06, + "loss": 0.7055, + "step": 14556 + }, + { + "epoch": 0.7481241648679207, + "grad_norm": 0.8172042965888977, + "learning_rate": 1.5734805897246975e-06, + "loss": 0.6546, + "step": 14557 + }, + { + "epoch": 0.7481755576112653, + "grad_norm": 1.0912295579910278, + "learning_rate": 1.5728745421552533e-06, + "loss": 0.6857, + "step": 14558 + }, + { + "epoch": 0.74822695035461, + "grad_norm": 1.078141212463379, + "learning_rate": 1.572268589537313e-06, + "loss": 0.6535, + "step": 14559 + }, + { + "epoch": 0.7482783430979546, + "grad_norm": 1.1294174194335938, + "learning_rate": 1.5716627318876625e-06, + "loss": 0.7245, + "step": 14560 + }, + { + "epoch": 0.7483297358412992, + "grad_norm": 1.2866188287734985, + "learning_rate": 1.5710569692230915e-06, + "loss": 0.6683, + "step": 14561 + }, + { + "epoch": 0.7483811285846439, + "grad_norm": 1.177375316619873, + "learning_rate": 1.5704513015603816e-06, + "loss": 0.7064, + "step": 14562 + }, + { + "epoch": 0.7484325213279885, + "grad_norm": 1.1644792556762695, + "learning_rate": 1.5698457289163133e-06, + "loss": 0.7272, + "step": 14563 + }, + { + "epoch": 0.7484839140713331, + "grad_norm": 1.1118727922439575, + "learning_rate": 1.5692402513076626e-06, + "loss": 0.7249, + "step": 14564 + }, + { + "epoch": 0.7485353068146777, + "grad_norm": 1.0487784147262573, + "learning_rate": 1.5686348687512104e-06, + "loss": 0.6323, + "step": 14565 + }, + { + "epoch": 0.7485866995580224, + "grad_norm": 1.0837593078613281, + "learning_rate": 1.568029581263722e-06, + "loss": 0.634, + "step": 14566 + }, + { + "epoch": 0.748638092301367, + "grad_norm": 0.6909868121147156, + "learning_rate": 1.5674243888619723e-06, + "loss": 0.6632, + "step": 14567 + }, + { + "epoch": 0.7486894850447117, + "grad_norm": 1.0808559656143188, + "learning_rate": 1.566819291562726e-06, + "loss": 0.697, + "step": 14568 + }, + { + "epoch": 0.7487408777880563, + "grad_norm": 1.1089955568313599, + "learning_rate": 1.5662142893827526e-06, + "loss": 0.657, + "step": 14569 + }, + { + "epoch": 0.748792270531401, + "grad_norm": 1.077532172203064, + "learning_rate": 1.5656093823388075e-06, + "loss": 0.6939, + "step": 14570 + }, + { + "epoch": 0.7488436632747456, + "grad_norm": 1.0215346813201904, + "learning_rate": 1.5650045704476551e-06, + "loss": 0.655, + "step": 14571 + }, + { + "epoch": 0.7488950560180903, + "grad_norm": 1.1013273000717163, + "learning_rate": 1.5643998537260508e-06, + "loss": 0.7004, + "step": 14572 + }, + { + "epoch": 0.7489464487614349, + "grad_norm": 1.0711328983306885, + "learning_rate": 1.5637952321907468e-06, + "loss": 0.7203, + "step": 14573 + }, + { + "epoch": 0.7489978415047795, + "grad_norm": 1.0642130374908447, + "learning_rate": 1.5631907058584984e-06, + "loss": 0.6854, + "step": 14574 + }, + { + "epoch": 0.7490492342481242, + "grad_norm": 1.0488909482955933, + "learning_rate": 1.5625862747460525e-06, + "loss": 0.6764, + "step": 14575 + }, + { + "epoch": 0.7491006269914688, + "grad_norm": 1.0491408109664917, + "learning_rate": 1.5619819388701556e-06, + "loss": 0.6731, + "step": 14576 + }, + { + "epoch": 0.7491520197348135, + "grad_norm": 0.727572500705719, + "learning_rate": 1.5613776982475498e-06, + "loss": 0.6767, + "step": 14577 + }, + { + "epoch": 0.7492034124781581, + "grad_norm": 1.0920815467834473, + "learning_rate": 1.5607735528949809e-06, + "loss": 0.6964, + "step": 14578 + }, + { + "epoch": 0.7492548052215027, + "grad_norm": 1.144430160522461, + "learning_rate": 1.5601695028291803e-06, + "loss": 0.7364, + "step": 14579 + }, + { + "epoch": 0.7493061979648473, + "grad_norm": 1.1286284923553467, + "learning_rate": 1.5595655480668892e-06, + "loss": 0.769, + "step": 14580 + }, + { + "epoch": 0.749357590708192, + "grad_norm": 1.0696187019348145, + "learning_rate": 1.5589616886248392e-06, + "loss": 0.6849, + "step": 14581 + }, + { + "epoch": 0.7494089834515366, + "grad_norm": 1.0763758420944214, + "learning_rate": 1.558357924519759e-06, + "loss": 0.736, + "step": 14582 + }, + { + "epoch": 0.7494603761948813, + "grad_norm": 1.0998051166534424, + "learning_rate": 1.5577542557683795e-06, + "loss": 0.766, + "step": 14583 + }, + { + "epoch": 0.7495117689382259, + "grad_norm": 1.0414323806762695, + "learning_rate": 1.5571506823874222e-06, + "loss": 0.7192, + "step": 14584 + }, + { + "epoch": 0.7495631616815706, + "grad_norm": 0.7306990623474121, + "learning_rate": 1.5565472043936153e-06, + "loss": 0.648, + "step": 14585 + }, + { + "epoch": 0.7496145544249152, + "grad_norm": 1.2100245952606201, + "learning_rate": 1.5559438218036715e-06, + "loss": 0.703, + "step": 14586 + }, + { + "epoch": 0.7496659471682598, + "grad_norm": 1.096915602684021, + "learning_rate": 1.5553405346343136e-06, + "loss": 0.7429, + "step": 14587 + }, + { + "epoch": 0.7497173399116045, + "grad_norm": 1.0621086359024048, + "learning_rate": 1.5547373429022538e-06, + "loss": 0.7181, + "step": 14588 + }, + { + "epoch": 0.7497687326549491, + "grad_norm": 1.1159355640411377, + "learning_rate": 1.5541342466242048e-06, + "loss": 0.7111, + "step": 14589 + }, + { + "epoch": 0.7498201253982938, + "grad_norm": 1.1127530336380005, + "learning_rate": 1.553531245816874e-06, + "loss": 0.716, + "step": 14590 + }, + { + "epoch": 0.7498715181416384, + "grad_norm": 1.049605369567871, + "learning_rate": 1.5529283404969708e-06, + "loss": 0.7121, + "step": 14591 + }, + { + "epoch": 0.7499229108849831, + "grad_norm": 1.033848762512207, + "learning_rate": 1.5523255306811985e-06, + "loss": 0.6896, + "step": 14592 + }, + { + "epoch": 0.7499743036283277, + "grad_norm": 1.0976485013961792, + "learning_rate": 1.5517228163862564e-06, + "loss": 0.7484, + "step": 14593 + }, + { + "epoch": 0.7500256963716723, + "grad_norm": 0.80330491065979, + "learning_rate": 1.5511201976288482e-06, + "loss": 0.654, + "step": 14594 + }, + { + "epoch": 0.7500770891150169, + "grad_norm": 1.0927814245224, + "learning_rate": 1.5505176744256635e-06, + "loss": 0.7452, + "step": 14595 + }, + { + "epoch": 0.7501284818583616, + "grad_norm": 1.0446815490722656, + "learning_rate": 1.5499152467933997e-06, + "loss": 0.6778, + "step": 14596 + }, + { + "epoch": 0.7501798746017062, + "grad_norm": 1.1000257730484009, + "learning_rate": 1.5493129147487472e-06, + "loss": 0.7208, + "step": 14597 + }, + { + "epoch": 0.7502312673450509, + "grad_norm": 1.0486077070236206, + "learning_rate": 1.5487106783083938e-06, + "loss": 0.7552, + "step": 14598 + }, + { + "epoch": 0.7502826600883955, + "grad_norm": 1.0124183893203735, + "learning_rate": 1.548108537489023e-06, + "loss": 0.7464, + "step": 14599 + }, + { + "epoch": 0.7503340528317402, + "grad_norm": 1.1284147500991821, + "learning_rate": 1.5475064923073218e-06, + "loss": 0.7104, + "step": 14600 + }, + { + "epoch": 0.7503854455750848, + "grad_norm": 1.072666049003601, + "learning_rate": 1.5469045427799673e-06, + "loss": 0.7136, + "step": 14601 + }, + { + "epoch": 0.7504368383184294, + "grad_norm": 1.0590327978134155, + "learning_rate": 1.5463026889236366e-06, + "loss": 0.6623, + "step": 14602 + }, + { + "epoch": 0.7504882310617741, + "grad_norm": 0.6954993009567261, + "learning_rate": 1.5457009307550075e-06, + "loss": 0.6194, + "step": 14603 + }, + { + "epoch": 0.7505396238051187, + "grad_norm": 1.0340416431427002, + "learning_rate": 1.5450992682907505e-06, + "loss": 0.6904, + "step": 14604 + }, + { + "epoch": 0.7505910165484634, + "grad_norm": 0.7251468896865845, + "learning_rate": 1.5444977015475348e-06, + "loss": 0.6404, + "step": 14605 + }, + { + "epoch": 0.750642409291808, + "grad_norm": 0.8599929809570312, + "learning_rate": 1.5438962305420268e-06, + "loss": 0.649, + "step": 14606 + }, + { + "epoch": 0.7506938020351527, + "grad_norm": 1.076378583908081, + "learning_rate": 1.543294855290895e-06, + "loss": 0.6665, + "step": 14607 + }, + { + "epoch": 0.7507451947784973, + "grad_norm": 1.1116105318069458, + "learning_rate": 1.542693575810794e-06, + "loss": 0.6896, + "step": 14608 + }, + { + "epoch": 0.7507965875218419, + "grad_norm": 1.1071462631225586, + "learning_rate": 1.5420923921183883e-06, + "loss": 0.716, + "step": 14609 + }, + { + "epoch": 0.7508479802651865, + "grad_norm": 1.0668339729309082, + "learning_rate": 1.5414913042303326e-06, + "loss": 0.6528, + "step": 14610 + }, + { + "epoch": 0.7508993730085312, + "grad_norm": 1.0244096517562866, + "learning_rate": 1.540890312163279e-06, + "loss": 0.6694, + "step": 14611 + }, + { + "epoch": 0.7509507657518758, + "grad_norm": 1.0975127220153809, + "learning_rate": 1.5402894159338815e-06, + "loss": 0.6952, + "step": 14612 + }, + { + "epoch": 0.7510021584952205, + "grad_norm": 1.1539146900177002, + "learning_rate": 1.5396886155587865e-06, + "loss": 0.6837, + "step": 14613 + }, + { + "epoch": 0.7510535512385651, + "grad_norm": 1.0508637428283691, + "learning_rate": 1.539087911054641e-06, + "loss": 0.7746, + "step": 14614 + }, + { + "epoch": 0.7511049439819097, + "grad_norm": 0.8155038952827454, + "learning_rate": 1.5384873024380847e-06, + "loss": 0.603, + "step": 14615 + }, + { + "epoch": 0.7511563367252544, + "grad_norm": 1.1555852890014648, + "learning_rate": 1.5378867897257621e-06, + "loss": 0.7162, + "step": 14616 + }, + { + "epoch": 0.751207729468599, + "grad_norm": 0.7882441282272339, + "learning_rate": 1.5372863729343095e-06, + "loss": 0.6445, + "step": 14617 + }, + { + "epoch": 0.7512591222119437, + "grad_norm": 1.050296664237976, + "learning_rate": 1.5366860520803617e-06, + "loss": 0.6924, + "step": 14618 + }, + { + "epoch": 0.7513105149552883, + "grad_norm": 1.0270732641220093, + "learning_rate": 1.5360858271805495e-06, + "loss": 0.7052, + "step": 14619 + }, + { + "epoch": 0.751361907698633, + "grad_norm": 1.0977312326431274, + "learning_rate": 1.5354856982515075e-06, + "loss": 0.6419, + "step": 14620 + }, + { + "epoch": 0.7514133004419776, + "grad_norm": 1.0647611618041992, + "learning_rate": 1.5348856653098564e-06, + "loss": 0.6847, + "step": 14621 + }, + { + "epoch": 0.7514646931853223, + "grad_norm": 0.7457675933837891, + "learning_rate": 1.5342857283722252e-06, + "loss": 0.6512, + "step": 14622 + }, + { + "epoch": 0.7515160859286669, + "grad_norm": 1.1003379821777344, + "learning_rate": 1.5336858874552347e-06, + "loss": 0.702, + "step": 14623 + }, + { + "epoch": 0.7515674786720115, + "grad_norm": 1.0859150886535645, + "learning_rate": 1.5330861425755016e-06, + "loss": 0.6916, + "step": 14624 + }, + { + "epoch": 0.7516188714153561, + "grad_norm": 1.0299313068389893, + "learning_rate": 1.5324864937496464e-06, + "loss": 0.6725, + "step": 14625 + }, + { + "epoch": 0.7516702641587008, + "grad_norm": 1.1331722736358643, + "learning_rate": 1.5318869409942805e-06, + "loss": 0.6525, + "step": 14626 + }, + { + "epoch": 0.7517216569020454, + "grad_norm": 1.0439374446868896, + "learning_rate": 1.5312874843260155e-06, + "loss": 0.6756, + "step": 14627 + }, + { + "epoch": 0.75177304964539, + "grad_norm": 1.1752055883407593, + "learning_rate": 1.5306881237614585e-06, + "loss": 0.7062, + "step": 14628 + }, + { + "epoch": 0.7518244423887347, + "grad_norm": 1.2122249603271484, + "learning_rate": 1.5300888593172197e-06, + "loss": 0.7152, + "step": 14629 + }, + { + "epoch": 0.7518758351320793, + "grad_norm": 0.7624787092208862, + "learning_rate": 1.529489691009896e-06, + "loss": 0.6764, + "step": 14630 + }, + { + "epoch": 0.751927227875424, + "grad_norm": 1.1658755540847778, + "learning_rate": 1.5288906188560921e-06, + "loss": 0.6781, + "step": 14631 + }, + { + "epoch": 0.7519786206187686, + "grad_norm": 1.1925791501998901, + "learning_rate": 1.5282916428724037e-06, + "loss": 0.7448, + "step": 14632 + }, + { + "epoch": 0.7520300133621133, + "grad_norm": 1.104042649269104, + "learning_rate": 1.5276927630754284e-06, + "loss": 0.7336, + "step": 14633 + }, + { + "epoch": 0.7520814061054579, + "grad_norm": 1.0433967113494873, + "learning_rate": 1.5270939794817568e-06, + "loss": 0.6799, + "step": 14634 + }, + { + "epoch": 0.7521327988488026, + "grad_norm": 0.7943648099899292, + "learning_rate": 1.5264952921079785e-06, + "loss": 0.6486, + "step": 14635 + }, + { + "epoch": 0.7521841915921472, + "grad_norm": 6.587066173553467, + "learning_rate": 1.5258967009706838e-06, + "loss": 0.8681, + "step": 14636 + }, + { + "epoch": 0.7522355843354919, + "grad_norm": 1.0755301713943481, + "learning_rate": 1.5252982060864519e-06, + "loss": 0.7224, + "step": 14637 + }, + { + "epoch": 0.7522869770788365, + "grad_norm": 1.0901204347610474, + "learning_rate": 1.524699807471869e-06, + "loss": 0.6499, + "step": 14638 + }, + { + "epoch": 0.7523383698221812, + "grad_norm": 1.1285970211029053, + "learning_rate": 1.5241015051435126e-06, + "loss": 0.6964, + "step": 14639 + }, + { + "epoch": 0.7523897625655257, + "grad_norm": 1.020275354385376, + "learning_rate": 1.5235032991179594e-06, + "loss": 0.6933, + "step": 14640 + }, + { + "epoch": 0.7524411553088703, + "grad_norm": 0.7390571236610413, + "learning_rate": 1.5229051894117814e-06, + "loss": 0.6698, + "step": 14641 + }, + { + "epoch": 0.752492548052215, + "grad_norm": 1.0690643787384033, + "learning_rate": 1.5223071760415531e-06, + "loss": 0.6811, + "step": 14642 + }, + { + "epoch": 0.7525439407955596, + "grad_norm": 1.2547463178634644, + "learning_rate": 1.5217092590238414e-06, + "loss": 0.6922, + "step": 14643 + }, + { + "epoch": 0.7525953335389043, + "grad_norm": 1.1222835779190063, + "learning_rate": 1.5211114383752107e-06, + "loss": 0.7404, + "step": 14644 + }, + { + "epoch": 0.7526467262822489, + "grad_norm": 1.0229933261871338, + "learning_rate": 1.5205137141122283e-06, + "loss": 0.6338, + "step": 14645 + }, + { + "epoch": 0.7526981190255936, + "grad_norm": 1.1105070114135742, + "learning_rate": 1.5199160862514489e-06, + "loss": 0.7126, + "step": 14646 + }, + { + "epoch": 0.7527495117689382, + "grad_norm": 0.6789641380310059, + "learning_rate": 1.5193185548094352e-06, + "loss": 0.6643, + "step": 14647 + }, + { + "epoch": 0.7528009045122829, + "grad_norm": 1.119982123374939, + "learning_rate": 1.5187211198027384e-06, + "loss": 0.714, + "step": 14648 + }, + { + "epoch": 0.7528522972556275, + "grad_norm": 1.0780493021011353, + "learning_rate": 1.5181237812479167e-06, + "loss": 0.7298, + "step": 14649 + }, + { + "epoch": 0.7529036899989722, + "grad_norm": 1.0204311609268188, + "learning_rate": 1.5175265391615124e-06, + "loss": 0.7045, + "step": 14650 + }, + { + "epoch": 0.7529550827423168, + "grad_norm": 1.0576122999191284, + "learning_rate": 1.5169293935600781e-06, + "loss": 0.7343, + "step": 14651 + }, + { + "epoch": 0.7530064754856615, + "grad_norm": 1.0363264083862305, + "learning_rate": 1.5163323444601564e-06, + "loss": 0.6365, + "step": 14652 + }, + { + "epoch": 0.7530578682290061, + "grad_norm": 0.7709307074546814, + "learning_rate": 1.515735391878288e-06, + "loss": 0.6544, + "step": 14653 + }, + { + "epoch": 0.7531092609723508, + "grad_norm": 1.1073668003082275, + "learning_rate": 1.5151385358310145e-06, + "loss": 0.7518, + "step": 14654 + }, + { + "epoch": 0.7531606537156953, + "grad_norm": 1.0389786958694458, + "learning_rate": 1.5145417763348707e-06, + "loss": 0.7049, + "step": 14655 + }, + { + "epoch": 0.7532120464590399, + "grad_norm": 1.0750513076782227, + "learning_rate": 1.513945113406391e-06, + "loss": 0.6797, + "step": 14656 + }, + { + "epoch": 0.7532634392023846, + "grad_norm": 1.1291142702102661, + "learning_rate": 1.5133485470621045e-06, + "loss": 0.7425, + "step": 14657 + }, + { + "epoch": 0.7533148319457292, + "grad_norm": 1.0670005083084106, + "learning_rate": 1.5127520773185444e-06, + "loss": 0.6299, + "step": 14658 + }, + { + "epoch": 0.7533662246890739, + "grad_norm": 0.7200586199760437, + "learning_rate": 1.5121557041922303e-06, + "loss": 0.6849, + "step": 14659 + }, + { + "epoch": 0.7534176174324185, + "grad_norm": 1.0573009252548218, + "learning_rate": 1.5115594276996892e-06, + "loss": 0.7155, + "step": 14660 + }, + { + "epoch": 0.7534690101757632, + "grad_norm": 1.1047613620758057, + "learning_rate": 1.510963247857441e-06, + "loss": 0.7314, + "step": 14661 + }, + { + "epoch": 0.7535204029191078, + "grad_norm": 1.0709912776947021, + "learning_rate": 1.5103671646820005e-06, + "loss": 0.6699, + "step": 14662 + }, + { + "epoch": 0.7535717956624525, + "grad_norm": 1.3862683773040771, + "learning_rate": 1.5097711781898872e-06, + "loss": 0.6999, + "step": 14663 + }, + { + "epoch": 0.7536231884057971, + "grad_norm": 1.0711385011672974, + "learning_rate": 1.5091752883976108e-06, + "loss": 0.6777, + "step": 14664 + }, + { + "epoch": 0.7536745811491418, + "grad_norm": 0.711447536945343, + "learning_rate": 1.5085794953216814e-06, + "loss": 0.6691, + "step": 14665 + }, + { + "epoch": 0.7537259738924864, + "grad_norm": 1.0052300691604614, + "learning_rate": 1.507983798978605e-06, + "loss": 0.6469, + "step": 14666 + }, + { + "epoch": 0.7537773666358311, + "grad_norm": 1.0860706567764282, + "learning_rate": 1.5073881993848878e-06, + "loss": 0.7218, + "step": 14667 + }, + { + "epoch": 0.7538287593791757, + "grad_norm": 1.0909597873687744, + "learning_rate": 1.5067926965570306e-06, + "loss": 0.6937, + "step": 14668 + }, + { + "epoch": 0.7538801521225204, + "grad_norm": 1.0869536399841309, + "learning_rate": 1.5061972905115325e-06, + "loss": 0.6805, + "step": 14669 + }, + { + "epoch": 0.7539315448658649, + "grad_norm": 1.1279218196868896, + "learning_rate": 1.505601981264887e-06, + "loss": 0.6851, + "step": 14670 + }, + { + "epoch": 0.7539829376092095, + "grad_norm": 0.9909319877624512, + "learning_rate": 1.505006768833594e-06, + "loss": 0.7182, + "step": 14671 + }, + { + "epoch": 0.7540343303525542, + "grad_norm": 1.1667131185531616, + "learning_rate": 1.5044116532341368e-06, + "loss": 0.7457, + "step": 14672 + }, + { + "epoch": 0.7540857230958988, + "grad_norm": 0.986508309841156, + "learning_rate": 1.503816634483009e-06, + "loss": 0.6818, + "step": 14673 + }, + { + "epoch": 0.7541371158392435, + "grad_norm": 1.0790176391601562, + "learning_rate": 1.5032217125966941e-06, + "loss": 0.6703, + "step": 14674 + }, + { + "epoch": 0.7541885085825881, + "grad_norm": 0.6582726240158081, + "learning_rate": 1.5026268875916744e-06, + "loss": 0.6245, + "step": 14675 + }, + { + "epoch": 0.7542399013259328, + "grad_norm": 1.1031982898712158, + "learning_rate": 1.5020321594844318e-06, + "loss": 0.7204, + "step": 14676 + }, + { + "epoch": 0.7542912940692774, + "grad_norm": 1.0822887420654297, + "learning_rate": 1.5014375282914428e-06, + "loss": 0.7385, + "step": 14677 + }, + { + "epoch": 0.7543426868126221, + "grad_norm": 0.7739769816398621, + "learning_rate": 1.5008429940291824e-06, + "loss": 0.6487, + "step": 14678 + }, + { + "epoch": 0.7543940795559667, + "grad_norm": 1.0661025047302246, + "learning_rate": 1.5002485567141206e-06, + "loss": 0.6887, + "step": 14679 + }, + { + "epoch": 0.7544454722993114, + "grad_norm": 1.2991708517074585, + "learning_rate": 1.4996542163627304e-06, + "loss": 0.7283, + "step": 14680 + }, + { + "epoch": 0.754496865042656, + "grad_norm": 1.0514014959335327, + "learning_rate": 1.4990599729914767e-06, + "loss": 0.627, + "step": 14681 + }, + { + "epoch": 0.7545482577860007, + "grad_norm": 1.0946869850158691, + "learning_rate": 1.4984658266168238e-06, + "loss": 0.6776, + "step": 14682 + }, + { + "epoch": 0.7545996505293453, + "grad_norm": 1.0848274230957031, + "learning_rate": 1.4978717772552315e-06, + "loss": 0.6517, + "step": 14683 + }, + { + "epoch": 0.75465104327269, + "grad_norm": 1.080322265625, + "learning_rate": 1.4972778249231612e-06, + "loss": 0.6928, + "step": 14684 + }, + { + "epoch": 0.7547024360160345, + "grad_norm": 1.077034831047058, + "learning_rate": 1.4966839696370672e-06, + "loss": 0.6815, + "step": 14685 + }, + { + "epoch": 0.7547538287593791, + "grad_norm": 0.8386796712875366, + "learning_rate": 1.4960902114134018e-06, + "loss": 0.668, + "step": 14686 + }, + { + "epoch": 0.7548052215027238, + "grad_norm": 0.7833622694015503, + "learning_rate": 1.4954965502686197e-06, + "loss": 0.6679, + "step": 14687 + }, + { + "epoch": 0.7548566142460684, + "grad_norm": 1.0739221572875977, + "learning_rate": 1.4949029862191633e-06, + "loss": 0.6743, + "step": 14688 + }, + { + "epoch": 0.7549080069894131, + "grad_norm": 1.1791303157806396, + "learning_rate": 1.4943095192814817e-06, + "loss": 0.7518, + "step": 14689 + }, + { + "epoch": 0.7549593997327577, + "grad_norm": 0.7052087187767029, + "learning_rate": 1.4937161494720165e-06, + "loss": 0.6566, + "step": 14690 + }, + { + "epoch": 0.7550107924761024, + "grad_norm": 1.0414574146270752, + "learning_rate": 1.4931228768072064e-06, + "loss": 0.7458, + "step": 14691 + }, + { + "epoch": 0.755062185219447, + "grad_norm": 1.0964345932006836, + "learning_rate": 1.4925297013034885e-06, + "loss": 0.697, + "step": 14692 + }, + { + "epoch": 0.7551135779627917, + "grad_norm": 1.0862501859664917, + "learning_rate": 1.4919366229772998e-06, + "loss": 0.7084, + "step": 14693 + }, + { + "epoch": 0.7551649707061363, + "grad_norm": 0.8173523545265198, + "learning_rate": 1.49134364184507e-06, + "loss": 0.6284, + "step": 14694 + }, + { + "epoch": 0.755216363449481, + "grad_norm": 1.1427497863769531, + "learning_rate": 1.4907507579232271e-06, + "loss": 0.717, + "step": 14695 + }, + { + "epoch": 0.7552677561928256, + "grad_norm": 1.0648058652877808, + "learning_rate": 1.4901579712282005e-06, + "loss": 0.6521, + "step": 14696 + }, + { + "epoch": 0.7553191489361702, + "grad_norm": 1.0985503196716309, + "learning_rate": 1.4895652817764123e-06, + "loss": 0.7094, + "step": 14697 + }, + { + "epoch": 0.7553705416795149, + "grad_norm": 0.7156886458396912, + "learning_rate": 1.488972689584283e-06, + "loss": 0.6671, + "step": 14698 + }, + { + "epoch": 0.7554219344228595, + "grad_norm": 0.6683071255683899, + "learning_rate": 1.4883801946682302e-06, + "loss": 0.619, + "step": 14699 + }, + { + "epoch": 0.7554733271662041, + "grad_norm": 1.0860047340393066, + "learning_rate": 1.4877877970446736e-06, + "loss": 0.7145, + "step": 14700 + }, + { + "epoch": 0.7555247199095487, + "grad_norm": 1.1880815029144287, + "learning_rate": 1.4871954967300195e-06, + "loss": 0.7069, + "step": 14701 + }, + { + "epoch": 0.7555761126528934, + "grad_norm": 1.0995229482650757, + "learning_rate": 1.4866032937406838e-06, + "loss": 0.7391, + "step": 14702 + }, + { + "epoch": 0.755627505396238, + "grad_norm": 0.9969474673271179, + "learning_rate": 1.4860111880930717e-06, + "loss": 0.6347, + "step": 14703 + }, + { + "epoch": 0.7556788981395827, + "grad_norm": 1.1377861499786377, + "learning_rate": 1.4854191798035872e-06, + "loss": 0.7374, + "step": 14704 + }, + { + "epoch": 0.7557302908829273, + "grad_norm": 1.0894659757614136, + "learning_rate": 1.4848272688886345e-06, + "loss": 0.678, + "step": 14705 + }, + { + "epoch": 0.755781683626272, + "grad_norm": 0.7088754773139954, + "learning_rate": 1.4842354553646116e-06, + "loss": 0.6116, + "step": 14706 + }, + { + "epoch": 0.7558330763696166, + "grad_norm": 0.8325372934341431, + "learning_rate": 1.4836437392479158e-06, + "loss": 0.6579, + "step": 14707 + }, + { + "epoch": 0.7558844691129613, + "grad_norm": 1.1061888933181763, + "learning_rate": 1.4830521205549391e-06, + "loss": 0.7424, + "step": 14708 + }, + { + "epoch": 0.7559358618563059, + "grad_norm": 1.202398419380188, + "learning_rate": 1.4824605993020768e-06, + "loss": 0.7193, + "step": 14709 + }, + { + "epoch": 0.7559872545996505, + "grad_norm": 0.6812346577644348, + "learning_rate": 1.4818691755057147e-06, + "loss": 0.6518, + "step": 14710 + }, + { + "epoch": 0.7560386473429952, + "grad_norm": 1.055953860282898, + "learning_rate": 1.4812778491822395e-06, + "loss": 0.6658, + "step": 14711 + }, + { + "epoch": 0.7560900400863398, + "grad_norm": 1.0800637006759644, + "learning_rate": 1.4806866203480325e-06, + "loss": 0.6986, + "step": 14712 + }, + { + "epoch": 0.7561414328296845, + "grad_norm": 1.1118544340133667, + "learning_rate": 1.4800954890194774e-06, + "loss": 0.6833, + "step": 14713 + }, + { + "epoch": 0.7561928255730291, + "grad_norm": 1.1362606287002563, + "learning_rate": 1.4795044552129506e-06, + "loss": 0.7288, + "step": 14714 + }, + { + "epoch": 0.7562442183163737, + "grad_norm": 1.050248146057129, + "learning_rate": 1.4789135189448272e-06, + "loss": 0.6767, + "step": 14715 + }, + { + "epoch": 0.7562956110597183, + "grad_norm": 1.141561508178711, + "learning_rate": 1.4783226802314793e-06, + "loss": 0.698, + "step": 14716 + }, + { + "epoch": 0.756347003803063, + "grad_norm": 1.0791537761688232, + "learning_rate": 1.4777319390892753e-06, + "loss": 0.6131, + "step": 14717 + }, + { + "epoch": 0.7563983965464076, + "grad_norm": 1.104942798614502, + "learning_rate": 1.4771412955345854e-06, + "loss": 0.6856, + "step": 14718 + }, + { + "epoch": 0.7564497892897523, + "grad_norm": 0.9015743732452393, + "learning_rate": 1.4765507495837723e-06, + "loss": 0.6687, + "step": 14719 + }, + { + "epoch": 0.7565011820330969, + "grad_norm": 0.8958661556243896, + "learning_rate": 1.4759603012531977e-06, + "loss": 0.6087, + "step": 14720 + }, + { + "epoch": 0.7565525747764416, + "grad_norm": 1.1765196323394775, + "learning_rate": 1.4753699505592183e-06, + "loss": 0.682, + "step": 14721 + }, + { + "epoch": 0.7566039675197862, + "grad_norm": 1.1300238370895386, + "learning_rate": 1.4747796975181955e-06, + "loss": 0.7107, + "step": 14722 + }, + { + "epoch": 0.7566553602631308, + "grad_norm": 1.0682175159454346, + "learning_rate": 1.4741895421464763e-06, + "loss": 0.7295, + "step": 14723 + }, + { + "epoch": 0.7567067530064755, + "grad_norm": 0.7258800268173218, + "learning_rate": 1.4735994844604168e-06, + "loss": 0.648, + "step": 14724 + }, + { + "epoch": 0.7567581457498201, + "grad_norm": 0.7390783429145813, + "learning_rate": 1.473009524476361e-06, + "loss": 0.6368, + "step": 14725 + }, + { + "epoch": 0.7568095384931648, + "grad_norm": 11.998215675354004, + "learning_rate": 1.472419662210658e-06, + "loss": 0.6954, + "step": 14726 + }, + { + "epoch": 0.7568609312365094, + "grad_norm": 1.0905632972717285, + "learning_rate": 1.4718298976796486e-06, + "loss": 0.6824, + "step": 14727 + }, + { + "epoch": 0.7569123239798541, + "grad_norm": 1.0100054740905762, + "learning_rate": 1.471240230899671e-06, + "loss": 0.6895, + "step": 14728 + }, + { + "epoch": 0.7569637167231987, + "grad_norm": 1.1355903148651123, + "learning_rate": 1.470650661887068e-06, + "loss": 0.7073, + "step": 14729 + }, + { + "epoch": 0.7570151094665434, + "grad_norm": 1.0258853435516357, + "learning_rate": 1.4700611906581668e-06, + "loss": 0.6989, + "step": 14730 + }, + { + "epoch": 0.7570665022098879, + "grad_norm": 1.1530462503433228, + "learning_rate": 1.4694718172293043e-06, + "loss": 0.7427, + "step": 14731 + }, + { + "epoch": 0.7571178949532326, + "grad_norm": 1.10906183719635, + "learning_rate": 1.4688825416168085e-06, + "loss": 0.7482, + "step": 14732 + }, + { + "epoch": 0.7571692876965772, + "grad_norm": 1.081913709640503, + "learning_rate": 1.468293363837005e-06, + "loss": 0.6975, + "step": 14733 + }, + { + "epoch": 0.7572206804399219, + "grad_norm": 1.0596214532852173, + "learning_rate": 1.4677042839062167e-06, + "loss": 0.6836, + "step": 14734 + }, + { + "epoch": 0.7572720731832665, + "grad_norm": 1.1221777200698853, + "learning_rate": 1.4671153018407675e-06, + "loss": 0.7091, + "step": 14735 + }, + { + "epoch": 0.7573234659266112, + "grad_norm": 1.0711395740509033, + "learning_rate": 1.4665264176569732e-06, + "loss": 0.71, + "step": 14736 + }, + { + "epoch": 0.7573748586699558, + "grad_norm": 1.0611110925674438, + "learning_rate": 1.4659376313711493e-06, + "loss": 0.7518, + "step": 14737 + }, + { + "epoch": 0.7574262514133004, + "grad_norm": 0.8284400701522827, + "learning_rate": 1.465348942999612e-06, + "loss": 0.621, + "step": 14738 + }, + { + "epoch": 0.7574776441566451, + "grad_norm": 0.8643609285354614, + "learning_rate": 1.4647603525586662e-06, + "loss": 0.6798, + "step": 14739 + }, + { + "epoch": 0.7575290368999897, + "grad_norm": 1.1170579195022583, + "learning_rate": 1.4641718600646232e-06, + "loss": 0.652, + "step": 14740 + }, + { + "epoch": 0.7575804296433344, + "grad_norm": 1.1139161586761475, + "learning_rate": 1.463583465533785e-06, + "loss": 0.7205, + "step": 14741 + }, + { + "epoch": 0.757631822386679, + "grad_norm": 0.8386148810386658, + "learning_rate": 1.462995168982458e-06, + "loss": 0.6349, + "step": 14742 + }, + { + "epoch": 0.7576832151300237, + "grad_norm": 1.07239830493927, + "learning_rate": 1.4624069704269356e-06, + "loss": 0.7042, + "step": 14743 + }, + { + "epoch": 0.7577346078733683, + "grad_norm": 1.2405868768692017, + "learning_rate": 1.4618188698835183e-06, + "loss": 0.7029, + "step": 14744 + }, + { + "epoch": 0.757786000616713, + "grad_norm": 1.2066837549209595, + "learning_rate": 1.4612308673684995e-06, + "loss": 0.7578, + "step": 14745 + }, + { + "epoch": 0.7578373933600575, + "grad_norm": 1.0945523977279663, + "learning_rate": 1.4606429628981684e-06, + "loss": 0.7309, + "step": 14746 + }, + { + "epoch": 0.7578887861034022, + "grad_norm": 1.1321070194244385, + "learning_rate": 1.4600551564888155e-06, + "loss": 0.6889, + "step": 14747 + }, + { + "epoch": 0.7579401788467468, + "grad_norm": 0.7038831114768982, + "learning_rate": 1.4594674481567261e-06, + "loss": 0.6646, + "step": 14748 + }, + { + "epoch": 0.7579915715900915, + "grad_norm": 0.767866313457489, + "learning_rate": 1.4588798379181828e-06, + "loss": 0.6535, + "step": 14749 + }, + { + "epoch": 0.7580429643334361, + "grad_norm": 1.0805637836456299, + "learning_rate": 1.4582923257894644e-06, + "loss": 0.6994, + "step": 14750 + }, + { + "epoch": 0.7580943570767807, + "grad_norm": 1.0691684484481812, + "learning_rate": 1.4577049117868531e-06, + "loss": 0.6924, + "step": 14751 + }, + { + "epoch": 0.7581457498201254, + "grad_norm": 1.1572397947311401, + "learning_rate": 1.457117595926617e-06, + "loss": 0.7133, + "step": 14752 + }, + { + "epoch": 0.75819714256347, + "grad_norm": 1.1316540241241455, + "learning_rate": 1.4565303782250334e-06, + "loss": 0.6821, + "step": 14753 + }, + { + "epoch": 0.7582485353068147, + "grad_norm": 1.01896333694458, + "learning_rate": 1.45594325869837e-06, + "loss": 0.6375, + "step": 14754 + }, + { + "epoch": 0.7582999280501593, + "grad_norm": 1.1571788787841797, + "learning_rate": 1.455356237362892e-06, + "loss": 0.7122, + "step": 14755 + }, + { + "epoch": 0.758351320793504, + "grad_norm": 0.7224657535552979, + "learning_rate": 1.4547693142348662e-06, + "loss": 0.6561, + "step": 14756 + }, + { + "epoch": 0.7584027135368486, + "grad_norm": 0.7299119830131531, + "learning_rate": 1.454182489330551e-06, + "loss": 0.6316, + "step": 14757 + }, + { + "epoch": 0.7584541062801933, + "grad_norm": 1.1546157598495483, + "learning_rate": 1.4535957626662094e-06, + "loss": 0.7291, + "step": 14758 + }, + { + "epoch": 0.7585054990235379, + "grad_norm": 1.2008512020111084, + "learning_rate": 1.453009134258091e-06, + "loss": 0.7141, + "step": 14759 + }, + { + "epoch": 0.7585568917668826, + "grad_norm": 1.039949893951416, + "learning_rate": 1.4524226041224531e-06, + "loss": 0.7258, + "step": 14760 + }, + { + "epoch": 0.7586082845102271, + "grad_norm": 1.1076335906982422, + "learning_rate": 1.4518361722755453e-06, + "loss": 0.7173, + "step": 14761 + }, + { + "epoch": 0.7586596772535718, + "grad_norm": 1.1180522441864014, + "learning_rate": 1.4512498387336143e-06, + "loss": 0.7612, + "step": 14762 + }, + { + "epoch": 0.7587110699969164, + "grad_norm": 1.1019178628921509, + "learning_rate": 1.450663603512904e-06, + "loss": 0.6551, + "step": 14763 + }, + { + "epoch": 0.758762462740261, + "grad_norm": 1.144449234008789, + "learning_rate": 1.4500774666296603e-06, + "loss": 0.7015, + "step": 14764 + }, + { + "epoch": 0.7588138554836057, + "grad_norm": 0.9088379144668579, + "learning_rate": 1.4494914281001194e-06, + "loss": 0.6546, + "step": 14765 + }, + { + "epoch": 0.7588652482269503, + "grad_norm": 1.0962613821029663, + "learning_rate": 1.448905487940519e-06, + "loss": 0.6962, + "step": 14766 + }, + { + "epoch": 0.758916640970295, + "grad_norm": 1.1001778841018677, + "learning_rate": 1.4483196461670934e-06, + "loss": 0.7164, + "step": 14767 + }, + { + "epoch": 0.7589680337136396, + "grad_norm": 1.1231653690338135, + "learning_rate": 1.447733902796072e-06, + "loss": 0.6976, + "step": 14768 + }, + { + "epoch": 0.7590194264569843, + "grad_norm": 0.7405795454978943, + "learning_rate": 1.4471482578436862e-06, + "loss": 0.6452, + "step": 14769 + }, + { + "epoch": 0.7590708192003289, + "grad_norm": 1.0876694917678833, + "learning_rate": 1.4465627113261604e-06, + "loss": 0.7455, + "step": 14770 + }, + { + "epoch": 0.7591222119436736, + "grad_norm": 1.0004351139068604, + "learning_rate": 1.4459772632597179e-06, + "loss": 0.632, + "step": 14771 + }, + { + "epoch": 0.7591736046870182, + "grad_norm": 1.0393434762954712, + "learning_rate": 1.445391913660577e-06, + "loss": 0.74, + "step": 14772 + }, + { + "epoch": 0.7592249974303629, + "grad_norm": 0.7711547017097473, + "learning_rate": 1.4448066625449587e-06, + "loss": 0.6707, + "step": 14773 + }, + { + "epoch": 0.7592763901737075, + "grad_norm": 0.8378944396972656, + "learning_rate": 1.4442215099290768e-06, + "loss": 0.6671, + "step": 14774 + }, + { + "epoch": 0.7593277829170522, + "grad_norm": 1.117581844329834, + "learning_rate": 1.4436364558291426e-06, + "loss": 0.7867, + "step": 14775 + }, + { + "epoch": 0.7593791756603967, + "grad_norm": 1.0259259939193726, + "learning_rate": 1.4430515002613643e-06, + "loss": 0.6509, + "step": 14776 + }, + { + "epoch": 0.7594305684037413, + "grad_norm": 1.0407464504241943, + "learning_rate": 1.442466643241952e-06, + "loss": 0.6471, + "step": 14777 + }, + { + "epoch": 0.759481961147086, + "grad_norm": 1.0910242795944214, + "learning_rate": 1.4418818847871086e-06, + "loss": 0.7256, + "step": 14778 + }, + { + "epoch": 0.7595333538904306, + "grad_norm": 1.0766626596450806, + "learning_rate": 1.4412972249130324e-06, + "loss": 0.7338, + "step": 14779 + }, + { + "epoch": 0.7595847466337753, + "grad_norm": 0.8886849284172058, + "learning_rate": 1.4407126636359275e-06, + "loss": 0.6092, + "step": 14780 + }, + { + "epoch": 0.7596361393771199, + "grad_norm": 1.1051263809204102, + "learning_rate": 1.440128200971983e-06, + "loss": 0.6475, + "step": 14781 + }, + { + "epoch": 0.7596875321204646, + "grad_norm": 1.1441853046417236, + "learning_rate": 1.439543836937397e-06, + "loss": 0.7735, + "step": 14782 + }, + { + "epoch": 0.7597389248638092, + "grad_norm": 1.1015620231628418, + "learning_rate": 1.4389595715483584e-06, + "loss": 0.6712, + "step": 14783 + }, + { + "epoch": 0.7597903176071539, + "grad_norm": 0.7938957810401917, + "learning_rate": 1.4383754048210547e-06, + "loss": 0.6771, + "step": 14784 + }, + { + "epoch": 0.7598417103504985, + "grad_norm": 1.0900685787200928, + "learning_rate": 1.4377913367716683e-06, + "loss": 0.6953, + "step": 14785 + }, + { + "epoch": 0.7598931030938432, + "grad_norm": 1.1259419918060303, + "learning_rate": 1.4372073674163856e-06, + "loss": 0.7018, + "step": 14786 + }, + { + "epoch": 0.7599444958371878, + "grad_norm": 1.1968770027160645, + "learning_rate": 1.4366234967713838e-06, + "loss": 0.6813, + "step": 14787 + }, + { + "epoch": 0.7599958885805325, + "grad_norm": 1.0884437561035156, + "learning_rate": 1.4360397248528385e-06, + "loss": 0.7358, + "step": 14788 + }, + { + "epoch": 0.7600472813238771, + "grad_norm": 0.9581515192985535, + "learning_rate": 1.435456051676926e-06, + "loss": 0.634, + "step": 14789 + }, + { + "epoch": 0.7600986740672218, + "grad_norm": 0.8046326041221619, + "learning_rate": 1.4348724772598166e-06, + "loss": 0.663, + "step": 14790 + }, + { + "epoch": 0.7601500668105663, + "grad_norm": 1.0776023864746094, + "learning_rate": 1.434289001617678e-06, + "loss": 0.6815, + "step": 14791 + }, + { + "epoch": 0.7602014595539109, + "grad_norm": 1.0970484018325806, + "learning_rate": 1.4337056247666754e-06, + "loss": 0.6592, + "step": 14792 + }, + { + "epoch": 0.7602528522972556, + "grad_norm": 1.1023536920547485, + "learning_rate": 1.4331223467229755e-06, + "loss": 0.7536, + "step": 14793 + }, + { + "epoch": 0.7603042450406002, + "grad_norm": 1.074668049812317, + "learning_rate": 1.4325391675027328e-06, + "loss": 0.6585, + "step": 14794 + }, + { + "epoch": 0.7603556377839449, + "grad_norm": 1.072434425354004, + "learning_rate": 1.431956087122109e-06, + "loss": 0.7494, + "step": 14795 + }, + { + "epoch": 0.7604070305272895, + "grad_norm": 0.6674770712852478, + "learning_rate": 1.4313731055972575e-06, + "loss": 0.6366, + "step": 14796 + }, + { + "epoch": 0.7604584232706342, + "grad_norm": 1.145717740058899, + "learning_rate": 1.4307902229443293e-06, + "loss": 0.7026, + "step": 14797 + }, + { + "epoch": 0.7605098160139788, + "grad_norm": 0.8038463592529297, + "learning_rate": 1.4302074391794758e-06, + "loss": 0.635, + "step": 14798 + }, + { + "epoch": 0.7605612087573235, + "grad_norm": 1.1579630374908447, + "learning_rate": 1.429624754318843e-06, + "loss": 0.7373, + "step": 14799 + }, + { + "epoch": 0.7606126015006681, + "grad_norm": 1.0624281167984009, + "learning_rate": 1.4290421683785738e-06, + "loss": 0.656, + "step": 14800 + }, + { + "epoch": 0.7606639942440128, + "grad_norm": 1.0585752725601196, + "learning_rate": 1.428459681374808e-06, + "loss": 0.6767, + "step": 14801 + }, + { + "epoch": 0.7607153869873574, + "grad_norm": 1.0929564237594604, + "learning_rate": 1.4278772933236873e-06, + "loss": 0.7143, + "step": 14802 + }, + { + "epoch": 0.7607667797307021, + "grad_norm": 0.697571337223053, + "learning_rate": 1.427295004241346e-06, + "loss": 0.7019, + "step": 14803 + }, + { + "epoch": 0.7608181724740467, + "grad_norm": 1.0117690563201904, + "learning_rate": 1.4267128141439157e-06, + "loss": 0.6817, + "step": 14804 + }, + { + "epoch": 0.7608695652173914, + "grad_norm": 1.1021381616592407, + "learning_rate": 1.4261307230475263e-06, + "loss": 0.7084, + "step": 14805 + }, + { + "epoch": 0.7609209579607359, + "grad_norm": 0.7960483431816101, + "learning_rate": 1.425548730968307e-06, + "loss": 0.6393, + "step": 14806 + }, + { + "epoch": 0.7609723507040805, + "grad_norm": 1.080965280532837, + "learning_rate": 1.4249668379223818e-06, + "loss": 0.7124, + "step": 14807 + }, + { + "epoch": 0.7610237434474252, + "grad_norm": 1.0404525995254517, + "learning_rate": 1.4243850439258705e-06, + "loss": 0.6919, + "step": 14808 + }, + { + "epoch": 0.7610751361907698, + "grad_norm": 1.1059893369674683, + "learning_rate": 1.4238033489948977e-06, + "loss": 0.7502, + "step": 14809 + }, + { + "epoch": 0.7611265289341145, + "grad_norm": 1.030486822128296, + "learning_rate": 1.4232217531455722e-06, + "loss": 0.7452, + "step": 14810 + }, + { + "epoch": 0.7611779216774591, + "grad_norm": 1.0789388418197632, + "learning_rate": 1.4226402563940133e-06, + "loss": 0.698, + "step": 14811 + }, + { + "epoch": 0.7612293144208038, + "grad_norm": 1.0779368877410889, + "learning_rate": 1.42205885875633e-06, + "loss": 0.6414, + "step": 14812 + }, + { + "epoch": 0.7612807071641484, + "grad_norm": 1.033353567123413, + "learning_rate": 1.4214775602486302e-06, + "loss": 0.7108, + "step": 14813 + }, + { + "epoch": 0.7613320999074931, + "grad_norm": 1.06680428981781, + "learning_rate": 1.420896360887018e-06, + "loss": 0.6727, + "step": 14814 + }, + { + "epoch": 0.7613834926508377, + "grad_norm": 1.1153900623321533, + "learning_rate": 1.4203152606875992e-06, + "loss": 0.6829, + "step": 14815 + }, + { + "epoch": 0.7614348853941824, + "grad_norm": 1.0962063074111938, + "learning_rate": 1.419734259666472e-06, + "loss": 0.6768, + "step": 14816 + }, + { + "epoch": 0.761486278137527, + "grad_norm": 1.1300619840621948, + "learning_rate": 1.4191533578397336e-06, + "loss": 0.677, + "step": 14817 + }, + { + "epoch": 0.7615376708808717, + "grad_norm": 0.736458420753479, + "learning_rate": 1.4185725552234769e-06, + "loss": 0.6705, + "step": 14818 + }, + { + "epoch": 0.7615890636242163, + "grad_norm": 1.1062917709350586, + "learning_rate": 1.4179918518337966e-06, + "loss": 0.7441, + "step": 14819 + }, + { + "epoch": 0.761640456367561, + "grad_norm": 1.133357048034668, + "learning_rate": 1.4174112476867796e-06, + "loss": 0.7031, + "step": 14820 + }, + { + "epoch": 0.7616918491109056, + "grad_norm": 0.7239965796470642, + "learning_rate": 1.4168307427985111e-06, + "loss": 0.6595, + "step": 14821 + }, + { + "epoch": 0.7617432418542501, + "grad_norm": 0.6841790080070496, + "learning_rate": 1.4162503371850795e-06, + "loss": 0.6471, + "step": 14822 + }, + { + "epoch": 0.7617946345975948, + "grad_norm": 1.050122857093811, + "learning_rate": 1.4156700308625582e-06, + "loss": 0.6615, + "step": 14823 + }, + { + "epoch": 0.7618460273409394, + "grad_norm": 0.7692776918411255, + "learning_rate": 1.41508982384703e-06, + "loss": 0.6293, + "step": 14824 + }, + { + "epoch": 0.7618974200842841, + "grad_norm": 0.7168476581573486, + "learning_rate": 1.4145097161545696e-06, + "loss": 0.6624, + "step": 14825 + }, + { + "epoch": 0.7619488128276287, + "grad_norm": 1.054405927658081, + "learning_rate": 1.4139297078012476e-06, + "loss": 0.6635, + "step": 14826 + }, + { + "epoch": 0.7620002055709734, + "grad_norm": 1.1064034700393677, + "learning_rate": 1.4133497988031337e-06, + "loss": 0.7106, + "step": 14827 + }, + { + "epoch": 0.762051598314318, + "grad_norm": 0.7077066898345947, + "learning_rate": 1.4127699891762963e-06, + "loss": 0.6495, + "step": 14828 + }, + { + "epoch": 0.7621029910576627, + "grad_norm": 1.1734358072280884, + "learning_rate": 1.4121902789367997e-06, + "loss": 0.7292, + "step": 14829 + }, + { + "epoch": 0.7621543838010073, + "grad_norm": 1.2431397438049316, + "learning_rate": 1.4116106681007024e-06, + "loss": 0.7022, + "step": 14830 + }, + { + "epoch": 0.762205776544352, + "grad_norm": 0.696506917476654, + "learning_rate": 1.4110311566840683e-06, + "loss": 0.6283, + "step": 14831 + }, + { + "epoch": 0.7622571692876966, + "grad_norm": 1.1775492429733276, + "learning_rate": 1.4104517447029475e-06, + "loss": 0.7581, + "step": 14832 + }, + { + "epoch": 0.7623085620310412, + "grad_norm": 1.12771475315094, + "learning_rate": 1.409872432173397e-06, + "loss": 0.6704, + "step": 14833 + }, + { + "epoch": 0.7623599547743859, + "grad_norm": 0.7990912795066833, + "learning_rate": 1.4092932191114639e-06, + "loss": 0.6794, + "step": 14834 + }, + { + "epoch": 0.7624113475177305, + "grad_norm": 1.093575358390808, + "learning_rate": 1.408714105533201e-06, + "loss": 0.7362, + "step": 14835 + }, + { + "epoch": 0.7624627402610752, + "grad_norm": 1.1284068822860718, + "learning_rate": 1.4081350914546465e-06, + "loss": 0.6783, + "step": 14836 + }, + { + "epoch": 0.7625141330044197, + "grad_norm": 1.047597050666809, + "learning_rate": 1.4075561768918477e-06, + "loss": 0.6806, + "step": 14837 + }, + { + "epoch": 0.7625655257477644, + "grad_norm": 1.091753602027893, + "learning_rate": 1.4069773618608423e-06, + "loss": 0.6769, + "step": 14838 + }, + { + "epoch": 0.762616918491109, + "grad_norm": 1.1113297939300537, + "learning_rate": 1.4063986463776646e-06, + "loss": 0.6935, + "step": 14839 + }, + { + "epoch": 0.7626683112344537, + "grad_norm": 1.0237194299697876, + "learning_rate": 1.4058200304583524e-06, + "loss": 0.6717, + "step": 14840 + }, + { + "epoch": 0.7627197039777983, + "grad_norm": 1.0335760116577148, + "learning_rate": 1.4052415141189346e-06, + "loss": 0.6838, + "step": 14841 + }, + { + "epoch": 0.762771096721143, + "grad_norm": 1.1562623977661133, + "learning_rate": 1.4046630973754399e-06, + "loss": 0.6877, + "step": 14842 + }, + { + "epoch": 0.7628224894644876, + "grad_norm": 1.0531816482543945, + "learning_rate": 1.4040847802438922e-06, + "loss": 0.6834, + "step": 14843 + }, + { + "epoch": 0.7628738822078323, + "grad_norm": 1.113742709159851, + "learning_rate": 1.4035065627403187e-06, + "loss": 0.7374, + "step": 14844 + }, + { + "epoch": 0.7629252749511769, + "grad_norm": 0.7361916899681091, + "learning_rate": 1.402928444880734e-06, + "loss": 0.6501, + "step": 14845 + }, + { + "epoch": 0.7629766676945215, + "grad_norm": 0.7651697993278503, + "learning_rate": 1.4023504266811588e-06, + "loss": 0.6502, + "step": 14846 + }, + { + "epoch": 0.7630280604378662, + "grad_norm": 1.1182572841644287, + "learning_rate": 1.4017725081576067e-06, + "loss": 0.7058, + "step": 14847 + }, + { + "epoch": 0.7630794531812108, + "grad_norm": 1.1113694906234741, + "learning_rate": 1.4011946893260881e-06, + "loss": 0.674, + "step": 14848 + }, + { + "epoch": 0.7631308459245555, + "grad_norm": 0.7384142279624939, + "learning_rate": 1.4006169702026146e-06, + "loss": 0.6358, + "step": 14849 + }, + { + "epoch": 0.7631822386679001, + "grad_norm": 1.1747992038726807, + "learning_rate": 1.4000393508031896e-06, + "loss": 0.7197, + "step": 14850 + }, + { + "epoch": 0.7632336314112448, + "grad_norm": 0.6869306564331055, + "learning_rate": 1.3994618311438214e-06, + "loss": 0.6508, + "step": 14851 + }, + { + "epoch": 0.7632850241545893, + "grad_norm": 1.0907849073410034, + "learning_rate": 1.3988844112405037e-06, + "loss": 0.6634, + "step": 14852 + }, + { + "epoch": 0.763336416897934, + "grad_norm": 0.7894008755683899, + "learning_rate": 1.39830709110924e-06, + "loss": 0.6114, + "step": 14853 + }, + { + "epoch": 0.7633878096412786, + "grad_norm": 0.693315327167511, + "learning_rate": 1.397729870766023e-06, + "loss": 0.6705, + "step": 14854 + }, + { + "epoch": 0.7634392023846233, + "grad_norm": 1.155637502670288, + "learning_rate": 1.397152750226846e-06, + "loss": 0.6436, + "step": 14855 + }, + { + "epoch": 0.7634905951279679, + "grad_norm": 0.7805944085121155, + "learning_rate": 1.3965757295076966e-06, + "loss": 0.631, + "step": 14856 + }, + { + "epoch": 0.7635419878713126, + "grad_norm": 1.0873417854309082, + "learning_rate": 1.3959988086245646e-06, + "loss": 0.701, + "step": 14857 + }, + { + "epoch": 0.7635933806146572, + "grad_norm": 1.0130430459976196, + "learning_rate": 1.3954219875934332e-06, + "loss": 0.7126, + "step": 14858 + }, + { + "epoch": 0.7636447733580018, + "grad_norm": 1.1657843589782715, + "learning_rate": 1.3948452664302814e-06, + "loss": 0.6496, + "step": 14859 + }, + { + "epoch": 0.7636961661013465, + "grad_norm": 1.0380887985229492, + "learning_rate": 1.394268645151093e-06, + "loss": 0.736, + "step": 14860 + }, + { + "epoch": 0.7637475588446911, + "grad_norm": 1.1529345512390137, + "learning_rate": 1.393692123771837e-06, + "loss": 0.7752, + "step": 14861 + }, + { + "epoch": 0.7637989515880358, + "grad_norm": 1.0513157844543457, + "learning_rate": 1.3931157023084918e-06, + "loss": 0.6373, + "step": 14862 + }, + { + "epoch": 0.7638503443313804, + "grad_norm": 1.035420298576355, + "learning_rate": 1.392539380777026e-06, + "loss": 0.7055, + "step": 14863 + }, + { + "epoch": 0.7639017370747251, + "grad_norm": 1.0435914993286133, + "learning_rate": 1.3919631591934063e-06, + "loss": 0.6596, + "step": 14864 + }, + { + "epoch": 0.7639531298180697, + "grad_norm": 1.1054826974868774, + "learning_rate": 1.3913870375735965e-06, + "loss": 0.7074, + "step": 14865 + }, + { + "epoch": 0.7640045225614144, + "grad_norm": 1.087292194366455, + "learning_rate": 1.390811015933562e-06, + "loss": 0.7279, + "step": 14866 + }, + { + "epoch": 0.7640559153047589, + "grad_norm": 1.1077383756637573, + "learning_rate": 1.3902350942892595e-06, + "loss": 0.7102, + "step": 14867 + }, + { + "epoch": 0.7641073080481036, + "grad_norm": 1.1028647422790527, + "learning_rate": 1.389659272656646e-06, + "loss": 0.7165, + "step": 14868 + }, + { + "epoch": 0.7641587007914482, + "grad_norm": 1.0598477125167847, + "learning_rate": 1.3890835510516738e-06, + "loss": 0.6959, + "step": 14869 + }, + { + "epoch": 0.7642100935347929, + "grad_norm": 1.1149882078170776, + "learning_rate": 1.3885079294902965e-06, + "loss": 0.6943, + "step": 14870 + }, + { + "epoch": 0.7642614862781375, + "grad_norm": 1.100407361984253, + "learning_rate": 1.387932407988461e-06, + "loss": 0.7161, + "step": 14871 + }, + { + "epoch": 0.7643128790214821, + "grad_norm": 1.3909032344818115, + "learning_rate": 1.38735698656211e-06, + "loss": 0.663, + "step": 14872 + }, + { + "epoch": 0.7643642717648268, + "grad_norm": 0.7601723074913025, + "learning_rate": 1.3867816652271921e-06, + "loss": 0.6769, + "step": 14873 + }, + { + "epoch": 0.7644156645081714, + "grad_norm": 1.1539779901504517, + "learning_rate": 1.3862064439996403e-06, + "loss": 0.746, + "step": 14874 + }, + { + "epoch": 0.7644670572515161, + "grad_norm": 1.1712108850479126, + "learning_rate": 1.385631322895396e-06, + "loss": 0.7196, + "step": 14875 + }, + { + "epoch": 0.7645184499948607, + "grad_norm": 0.7044581174850464, + "learning_rate": 1.385056301930392e-06, + "loss": 0.6592, + "step": 14876 + }, + { + "epoch": 0.7645698427382054, + "grad_norm": 1.1520460844039917, + "learning_rate": 1.3844813811205605e-06, + "loss": 0.707, + "step": 14877 + }, + { + "epoch": 0.76462123548155, + "grad_norm": 1.0719215869903564, + "learning_rate": 1.3839065604818276e-06, + "loss": 0.6557, + "step": 14878 + }, + { + "epoch": 0.7646726282248947, + "grad_norm": 3.792450189590454, + "learning_rate": 1.3833318400301227e-06, + "loss": 0.7009, + "step": 14879 + }, + { + "epoch": 0.7647240209682393, + "grad_norm": 0.7956342697143555, + "learning_rate": 1.3827572197813672e-06, + "loss": 0.6804, + "step": 14880 + }, + { + "epoch": 0.764775413711584, + "grad_norm": 1.1279836893081665, + "learning_rate": 1.3821826997514803e-06, + "loss": 0.6941, + "step": 14881 + }, + { + "epoch": 0.7648268064549285, + "grad_norm": 1.1273633241653442, + "learning_rate": 1.3816082799563825e-06, + "loss": 0.7042, + "step": 14882 + }, + { + "epoch": 0.7648781991982732, + "grad_norm": 1.061460256576538, + "learning_rate": 1.381033960411987e-06, + "loss": 0.6705, + "step": 14883 + }, + { + "epoch": 0.7649295919416178, + "grad_norm": 1.1251585483551025, + "learning_rate": 1.3804597411342064e-06, + "loss": 0.6871, + "step": 14884 + }, + { + "epoch": 0.7649809846849625, + "grad_norm": 1.101520299911499, + "learning_rate": 1.3798856221389473e-06, + "loss": 0.7176, + "step": 14885 + }, + { + "epoch": 0.7650323774283071, + "grad_norm": 0.98215651512146, + "learning_rate": 1.379311603442121e-06, + "loss": 0.6466, + "step": 14886 + }, + { + "epoch": 0.7650837701716517, + "grad_norm": 0.984893262386322, + "learning_rate": 1.3787376850596263e-06, + "loss": 0.6967, + "step": 14887 + }, + { + "epoch": 0.7651351629149964, + "grad_norm": 1.140602469444275, + "learning_rate": 1.3781638670073672e-06, + "loss": 0.7316, + "step": 14888 + }, + { + "epoch": 0.765186555658341, + "grad_norm": 1.0875083208084106, + "learning_rate": 1.3775901493012412e-06, + "loss": 0.7019, + "step": 14889 + }, + { + "epoch": 0.7652379484016857, + "grad_norm": 1.0610740184783936, + "learning_rate": 1.3770165319571416e-06, + "loss": 0.6589, + "step": 14890 + }, + { + "epoch": 0.7652893411450303, + "grad_norm": 1.0957528352737427, + "learning_rate": 1.3764430149909646e-06, + "loss": 0.6619, + "step": 14891 + }, + { + "epoch": 0.765340733888375, + "grad_norm": 1.1849086284637451, + "learning_rate": 1.3758695984185976e-06, + "loss": 0.6909, + "step": 14892 + }, + { + "epoch": 0.7653921266317196, + "grad_norm": 1.0704739093780518, + "learning_rate": 1.3752962822559285e-06, + "loss": 0.6758, + "step": 14893 + }, + { + "epoch": 0.7654435193750643, + "grad_norm": 1.0993798971176147, + "learning_rate": 1.3747230665188394e-06, + "loss": 0.734, + "step": 14894 + }, + { + "epoch": 0.7654949121184089, + "grad_norm": 1.117469072341919, + "learning_rate": 1.374149951223217e-06, + "loss": 0.7104, + "step": 14895 + }, + { + "epoch": 0.7655463048617536, + "grad_norm": 1.0320302248001099, + "learning_rate": 1.3735769363849327e-06, + "loss": 0.7013, + "step": 14896 + }, + { + "epoch": 0.7655976976050982, + "grad_norm": 1.1136685609817505, + "learning_rate": 1.3730040220198682e-06, + "loss": 0.7212, + "step": 14897 + }, + { + "epoch": 0.7656490903484428, + "grad_norm": 1.1024675369262695, + "learning_rate": 1.3724312081438928e-06, + "loss": 0.6983, + "step": 14898 + }, + { + "epoch": 0.7657004830917874, + "grad_norm": 1.1504346132278442, + "learning_rate": 1.3718584947728802e-06, + "loss": 0.6847, + "step": 14899 + }, + { + "epoch": 0.765751875835132, + "grad_norm": 1.0730969905853271, + "learning_rate": 1.3712858819226959e-06, + "loss": 0.6796, + "step": 14900 + }, + { + "epoch": 0.7658032685784767, + "grad_norm": 1.0776852369308472, + "learning_rate": 1.370713369609204e-06, + "loss": 0.6921, + "step": 14901 + }, + { + "epoch": 0.7658546613218213, + "grad_norm": 1.0063329935073853, + "learning_rate": 1.3701409578482699e-06, + "loss": 0.6096, + "step": 14902 + }, + { + "epoch": 0.765906054065166, + "grad_norm": 1.2496848106384277, + "learning_rate": 1.3695686466557479e-06, + "loss": 0.7476, + "step": 14903 + }, + { + "epoch": 0.7659574468085106, + "grad_norm": 1.0727111101150513, + "learning_rate": 1.3689964360474977e-06, + "loss": 0.6952, + "step": 14904 + }, + { + "epoch": 0.7660088395518553, + "grad_norm": 1.101004958152771, + "learning_rate": 1.3684243260393727e-06, + "loss": 0.6648, + "step": 14905 + }, + { + "epoch": 0.7660602322951999, + "grad_norm": 1.0858149528503418, + "learning_rate": 1.3678523166472224e-06, + "loss": 0.7236, + "step": 14906 + }, + { + "epoch": 0.7661116250385446, + "grad_norm": 1.024114727973938, + "learning_rate": 1.3672804078868939e-06, + "loss": 0.6261, + "step": 14907 + }, + { + "epoch": 0.7661630177818892, + "grad_norm": 1.0218740701675415, + "learning_rate": 1.3667085997742357e-06, + "loss": 0.6815, + "step": 14908 + }, + { + "epoch": 0.7662144105252339, + "grad_norm": 1.0070140361785889, + "learning_rate": 1.3661368923250884e-06, + "loss": 0.6892, + "step": 14909 + }, + { + "epoch": 0.7662658032685785, + "grad_norm": 1.0742318630218506, + "learning_rate": 1.3655652855552903e-06, + "loss": 0.7174, + "step": 14910 + }, + { + "epoch": 0.7663171960119232, + "grad_norm": 1.1130491495132446, + "learning_rate": 1.3649937794806828e-06, + "loss": 0.709, + "step": 14911 + }, + { + "epoch": 0.7663685887552678, + "grad_norm": 1.0463439226150513, + "learning_rate": 1.3644223741170937e-06, + "loss": 0.6848, + "step": 14912 + }, + { + "epoch": 0.7664199814986123, + "grad_norm": 1.0667227506637573, + "learning_rate": 1.3638510694803592e-06, + "loss": 0.665, + "step": 14913 + }, + { + "epoch": 0.766471374241957, + "grad_norm": 1.1058173179626465, + "learning_rate": 1.3632798655863044e-06, + "loss": 0.6682, + "step": 14914 + }, + { + "epoch": 0.7665227669853016, + "grad_norm": 1.1155399084091187, + "learning_rate": 1.3627087624507595e-06, + "loss": 0.7422, + "step": 14915 + }, + { + "epoch": 0.7665741597286463, + "grad_norm": 1.0278761386871338, + "learning_rate": 1.362137760089542e-06, + "loss": 0.6727, + "step": 14916 + }, + { + "epoch": 0.7666255524719909, + "grad_norm": 1.0731332302093506, + "learning_rate": 1.3615668585184755e-06, + "loss": 0.6743, + "step": 14917 + }, + { + "epoch": 0.7666769452153356, + "grad_norm": 1.1312066316604614, + "learning_rate": 1.3609960577533771e-06, + "loss": 0.6897, + "step": 14918 + }, + { + "epoch": 0.7667283379586802, + "grad_norm": 1.134623646736145, + "learning_rate": 1.36042535781006e-06, + "loss": 0.7043, + "step": 14919 + }, + { + "epoch": 0.7667797307020249, + "grad_norm": 0.6827690601348877, + "learning_rate": 1.3598547587043353e-06, + "loss": 0.669, + "step": 14920 + }, + { + "epoch": 0.7668311234453695, + "grad_norm": 0.7556540966033936, + "learning_rate": 1.359284260452015e-06, + "loss": 0.6947, + "step": 14921 + }, + { + "epoch": 0.7668825161887142, + "grad_norm": 1.1143728494644165, + "learning_rate": 1.3587138630689034e-06, + "loss": 0.695, + "step": 14922 + }, + { + "epoch": 0.7669339089320588, + "grad_norm": 1.123047113418579, + "learning_rate": 1.3581435665708026e-06, + "loss": 0.7115, + "step": 14923 + }, + { + "epoch": 0.7669853016754035, + "grad_norm": 1.070523977279663, + "learning_rate": 1.3575733709735173e-06, + "loss": 0.6743, + "step": 14924 + }, + { + "epoch": 0.7670366944187481, + "grad_norm": 1.0614688396453857, + "learning_rate": 1.3570032762928393e-06, + "loss": 0.7387, + "step": 14925 + }, + { + "epoch": 0.7670880871620928, + "grad_norm": 1.0980879068374634, + "learning_rate": 1.3564332825445686e-06, + "loss": 0.7061, + "step": 14926 + }, + { + "epoch": 0.7671394799054374, + "grad_norm": 1.0785927772521973, + "learning_rate": 1.355863389744494e-06, + "loss": 0.763, + "step": 14927 + }, + { + "epoch": 0.7671908726487819, + "grad_norm": 1.0811175107955933, + "learning_rate": 1.355293597908409e-06, + "loss": 0.6883, + "step": 14928 + }, + { + "epoch": 0.7672422653921266, + "grad_norm": 1.1359682083129883, + "learning_rate": 1.3547239070520952e-06, + "loss": 0.6881, + "step": 14929 + }, + { + "epoch": 0.7672936581354712, + "grad_norm": 1.1414706707000732, + "learning_rate": 1.3541543171913402e-06, + "loss": 0.7133, + "step": 14930 + }, + { + "epoch": 0.7673450508788159, + "grad_norm": 1.1339125633239746, + "learning_rate": 1.353584828341923e-06, + "loss": 0.6387, + "step": 14931 + }, + { + "epoch": 0.7673964436221605, + "grad_norm": 0.865020215511322, + "learning_rate": 1.3530154405196212e-06, + "loss": 0.6716, + "step": 14932 + }, + { + "epoch": 0.7674478363655052, + "grad_norm": 1.1197324991226196, + "learning_rate": 1.3524461537402128e-06, + "loss": 0.728, + "step": 14933 + }, + { + "epoch": 0.7674992291088498, + "grad_norm": 1.1189656257629395, + "learning_rate": 1.3518769680194689e-06, + "loss": 0.7117, + "step": 14934 + }, + { + "epoch": 0.7675506218521945, + "grad_norm": 1.1070607900619507, + "learning_rate": 1.351307883373159e-06, + "loss": 0.65, + "step": 14935 + }, + { + "epoch": 0.7676020145955391, + "grad_norm": 1.1507623195648193, + "learning_rate": 1.3507388998170495e-06, + "loss": 0.7271, + "step": 14936 + }, + { + "epoch": 0.7676534073388838, + "grad_norm": 0.7905425429344177, + "learning_rate": 1.350170017366908e-06, + "loss": 0.5978, + "step": 14937 + }, + { + "epoch": 0.7677048000822284, + "grad_norm": 1.0898300409317017, + "learning_rate": 1.3496012360384909e-06, + "loss": 0.647, + "step": 14938 + }, + { + "epoch": 0.7677561928255731, + "grad_norm": 1.1691676378250122, + "learning_rate": 1.3490325558475604e-06, + "loss": 0.6905, + "step": 14939 + }, + { + "epoch": 0.7678075855689177, + "grad_norm": 1.0773496627807617, + "learning_rate": 1.3484639768098707e-06, + "loss": 0.6641, + "step": 14940 + }, + { + "epoch": 0.7678589783122624, + "grad_norm": 0.9809443354606628, + "learning_rate": 1.3478954989411741e-06, + "loss": 0.6975, + "step": 14941 + }, + { + "epoch": 0.767910371055607, + "grad_norm": 1.1087486743927002, + "learning_rate": 1.347327122257223e-06, + "loss": 0.6884, + "step": 14942 + }, + { + "epoch": 0.7679617637989515, + "grad_norm": 1.082742691040039, + "learning_rate": 1.346758846773763e-06, + "loss": 0.6886, + "step": 14943 + }, + { + "epoch": 0.7680131565422962, + "grad_norm": 1.0822051763534546, + "learning_rate": 1.3461906725065415e-06, + "loss": 0.6829, + "step": 14944 + }, + { + "epoch": 0.7680645492856408, + "grad_norm": 1.1438727378845215, + "learning_rate": 1.3456225994712957e-06, + "loss": 0.7385, + "step": 14945 + }, + { + "epoch": 0.7681159420289855, + "grad_norm": 0.7072693109512329, + "learning_rate": 1.3450546276837683e-06, + "loss": 0.6002, + "step": 14946 + }, + { + "epoch": 0.7681673347723301, + "grad_norm": 1.0974787473678589, + "learning_rate": 1.3444867571596936e-06, + "loss": 0.6895, + "step": 14947 + }, + { + "epoch": 0.7682187275156748, + "grad_norm": 1.055829644203186, + "learning_rate": 1.3439189879148063e-06, + "loss": 0.6673, + "step": 14948 + }, + { + "epoch": 0.7682701202590194, + "grad_norm": 1.089518427848816, + "learning_rate": 1.343351319964834e-06, + "loss": 0.6743, + "step": 14949 + }, + { + "epoch": 0.7683215130023641, + "grad_norm": 1.088774561882019, + "learning_rate": 1.342783753325508e-06, + "loss": 0.6744, + "step": 14950 + }, + { + "epoch": 0.7683729057457087, + "grad_norm": 1.1432021856307983, + "learning_rate": 1.3422162880125516e-06, + "loss": 0.6767, + "step": 14951 + }, + { + "epoch": 0.7684242984890534, + "grad_norm": 1.0914415121078491, + "learning_rate": 1.3416489240416863e-06, + "loss": 0.6912, + "step": 14952 + }, + { + "epoch": 0.768475691232398, + "grad_norm": 1.0884901285171509, + "learning_rate": 1.3410816614286343e-06, + "loss": 0.6841, + "step": 14953 + }, + { + "epoch": 0.7685270839757427, + "grad_norm": 1.0588417053222656, + "learning_rate": 1.340514500189108e-06, + "loss": 0.6725, + "step": 14954 + }, + { + "epoch": 0.7685784767190873, + "grad_norm": 0.8193063139915466, + "learning_rate": 1.3399474403388236e-06, + "loss": 0.6368, + "step": 14955 + }, + { + "epoch": 0.768629869462432, + "grad_norm": 0.7181390523910522, + "learning_rate": 1.339380481893492e-06, + "loss": 0.6361, + "step": 14956 + }, + { + "epoch": 0.7686812622057766, + "grad_norm": 1.122700572013855, + "learning_rate": 1.3388136248688204e-06, + "loss": 0.6439, + "step": 14957 + }, + { + "epoch": 0.7687326549491211, + "grad_norm": 1.11155366897583, + "learning_rate": 1.3382468692805134e-06, + "loss": 0.7646, + "step": 14958 + }, + { + "epoch": 0.7687840476924658, + "grad_norm": 1.2224559783935547, + "learning_rate": 1.3376802151442758e-06, + "loss": 0.7152, + "step": 14959 + }, + { + "epoch": 0.7688354404358104, + "grad_norm": 1.1393802165985107, + "learning_rate": 1.3371136624758062e-06, + "loss": 0.7458, + "step": 14960 + }, + { + "epoch": 0.7688868331791551, + "grad_norm": 1.054969072341919, + "learning_rate": 1.3365472112907996e-06, + "loss": 0.7064, + "step": 14961 + }, + { + "epoch": 0.7689382259224997, + "grad_norm": 1.2036988735198975, + "learning_rate": 1.3359808616049523e-06, + "loss": 0.6983, + "step": 14962 + }, + { + "epoch": 0.7689896186658444, + "grad_norm": 0.832382321357727, + "learning_rate": 1.3354146134339556e-06, + "loss": 0.6447, + "step": 14963 + }, + { + "epoch": 0.769041011409189, + "grad_norm": 1.0457384586334229, + "learning_rate": 1.3348484667934968e-06, + "loss": 0.662, + "step": 14964 + }, + { + "epoch": 0.7690924041525337, + "grad_norm": 1.051928997039795, + "learning_rate": 1.3342824216992606e-06, + "loss": 0.6897, + "step": 14965 + }, + { + "epoch": 0.7691437968958783, + "grad_norm": 1.0934467315673828, + "learning_rate": 1.3337164781669338e-06, + "loss": 0.7029, + "step": 14966 + }, + { + "epoch": 0.769195189639223, + "grad_norm": 1.10590398311615, + "learning_rate": 1.3331506362121905e-06, + "loss": 0.6498, + "step": 14967 + }, + { + "epoch": 0.7692465823825676, + "grad_norm": 0.7254303693771362, + "learning_rate": 1.3325848958507126e-06, + "loss": 0.6183, + "step": 14968 + }, + { + "epoch": 0.7692979751259122, + "grad_norm": 1.1386992931365967, + "learning_rate": 1.3320192570981728e-06, + "loss": 0.7404, + "step": 14969 + }, + { + "epoch": 0.7693493678692569, + "grad_norm": 1.0649176836013794, + "learning_rate": 1.3314537199702422e-06, + "loss": 0.693, + "step": 14970 + }, + { + "epoch": 0.7694007606126015, + "grad_norm": 1.1090952157974243, + "learning_rate": 1.3308882844825882e-06, + "loss": 0.6947, + "step": 14971 + }, + { + "epoch": 0.7694521533559462, + "grad_norm": 1.0703777074813843, + "learning_rate": 1.3303229506508796e-06, + "loss": 0.7141, + "step": 14972 + }, + { + "epoch": 0.7695035460992907, + "grad_norm": 1.0385386943817139, + "learning_rate": 1.329757718490779e-06, + "loss": 0.7045, + "step": 14973 + }, + { + "epoch": 0.7695549388426354, + "grad_norm": 1.0577046871185303, + "learning_rate": 1.3291925880179435e-06, + "loss": 0.7178, + "step": 14974 + }, + { + "epoch": 0.76960633158598, + "grad_norm": 1.0998378992080688, + "learning_rate": 1.3286275592480347e-06, + "loss": 0.6835, + "step": 14975 + }, + { + "epoch": 0.7696577243293247, + "grad_norm": 1.1373803615570068, + "learning_rate": 1.3280626321967054e-06, + "loss": 0.6773, + "step": 14976 + }, + { + "epoch": 0.7697091170726693, + "grad_norm": 1.0475999116897583, + "learning_rate": 1.3274978068796074e-06, + "loss": 0.6601, + "step": 14977 + }, + { + "epoch": 0.769760509816014, + "grad_norm": 1.0746185779571533, + "learning_rate": 1.3269330833123883e-06, + "loss": 0.7003, + "step": 14978 + }, + { + "epoch": 0.7698119025593586, + "grad_norm": 1.1823216676712036, + "learning_rate": 1.3263684615106986e-06, + "loss": 0.7216, + "step": 14979 + }, + { + "epoch": 0.7698632953027033, + "grad_norm": 1.2271422147750854, + "learning_rate": 1.3258039414901753e-06, + "loss": 0.7208, + "step": 14980 + }, + { + "epoch": 0.7699146880460479, + "grad_norm": 0.740078330039978, + "learning_rate": 1.3252395232664638e-06, + "loss": 0.7025, + "step": 14981 + }, + { + "epoch": 0.7699660807893925, + "grad_norm": 1.1314533948898315, + "learning_rate": 1.3246752068552005e-06, + "loss": 0.6747, + "step": 14982 + }, + { + "epoch": 0.7700174735327372, + "grad_norm": 1.1109609603881836, + "learning_rate": 1.3241109922720185e-06, + "loss": 0.6648, + "step": 14983 + }, + { + "epoch": 0.7700688662760818, + "grad_norm": 0.7932596802711487, + "learning_rate": 1.323546879532553e-06, + "loss": 0.6468, + "step": 14984 + }, + { + "epoch": 0.7701202590194265, + "grad_norm": 1.5118470191955566, + "learning_rate": 1.3229828686524316e-06, + "loss": 0.7246, + "step": 14985 + }, + { + "epoch": 0.7701716517627711, + "grad_norm": 1.0029833316802979, + "learning_rate": 1.3224189596472802e-06, + "loss": 0.6867, + "step": 14986 + }, + { + "epoch": 0.7702230445061158, + "grad_norm": 1.1663767099380493, + "learning_rate": 1.3218551525327223e-06, + "loss": 0.6746, + "step": 14987 + }, + { + "epoch": 0.7702744372494604, + "grad_norm": 1.1489217281341553, + "learning_rate": 1.321291447324382e-06, + "loss": 0.73, + "step": 14988 + }, + { + "epoch": 0.770325829992805, + "grad_norm": 1.1388678550720215, + "learning_rate": 1.3207278440378712e-06, + "loss": 0.6422, + "step": 14989 + }, + { + "epoch": 0.7703772227361496, + "grad_norm": 1.083748459815979, + "learning_rate": 1.3201643426888105e-06, + "loss": 0.7003, + "step": 14990 + }, + { + "epoch": 0.7704286154794943, + "grad_norm": 1.0556535720825195, + "learning_rate": 1.319600943292808e-06, + "loss": 0.644, + "step": 14991 + }, + { + "epoch": 0.7704800082228389, + "grad_norm": 0.81584632396698, + "learning_rate": 1.319037645865477e-06, + "loss": 0.6414, + "step": 14992 + }, + { + "epoch": 0.7705314009661836, + "grad_norm": 1.117989420890808, + "learning_rate": 1.3184744504224223e-06, + "loss": 0.6837, + "step": 14993 + }, + { + "epoch": 0.7705827937095282, + "grad_norm": 1.0433285236358643, + "learning_rate": 1.3179113569792468e-06, + "loss": 0.6268, + "step": 14994 + }, + { + "epoch": 0.7706341864528728, + "grad_norm": 1.0543354749679565, + "learning_rate": 1.3173483655515551e-06, + "loss": 0.6874, + "step": 14995 + }, + { + "epoch": 0.7706855791962175, + "grad_norm": 0.692695140838623, + "learning_rate": 1.3167854761549398e-06, + "loss": 0.6265, + "step": 14996 + }, + { + "epoch": 0.7707369719395621, + "grad_norm": 1.086273431777954, + "learning_rate": 1.3162226888050006e-06, + "loss": 0.653, + "step": 14997 + }, + { + "epoch": 0.7707883646829068, + "grad_norm": 1.1172932386398315, + "learning_rate": 1.315660003517329e-06, + "loss": 0.719, + "step": 14998 + }, + { + "epoch": 0.7708397574262514, + "grad_norm": 1.1586240530014038, + "learning_rate": 1.315097420307514e-06, + "loss": 0.7082, + "step": 14999 + }, + { + "epoch": 0.7708911501695961, + "grad_norm": 1.1955419778823853, + "learning_rate": 1.3145349391911411e-06, + "loss": 0.7632, + "step": 15000 + }, + { + "epoch": 0.7709425429129407, + "grad_norm": 1.1116758584976196, + "learning_rate": 1.3139725601837983e-06, + "loss": 0.76, + "step": 15001 + }, + { + "epoch": 0.7709939356562854, + "grad_norm": 1.1061992645263672, + "learning_rate": 1.3134102833010636e-06, + "loss": 0.7492, + "step": 15002 + }, + { + "epoch": 0.77104532839963, + "grad_norm": 1.1486345529556274, + "learning_rate": 1.3128481085585155e-06, + "loss": 0.7697, + "step": 15003 + }, + { + "epoch": 0.7710967211429746, + "grad_norm": 1.0619031190872192, + "learning_rate": 1.3122860359717331e-06, + "loss": 0.6692, + "step": 15004 + }, + { + "epoch": 0.7711481138863192, + "grad_norm": 1.133262276649475, + "learning_rate": 1.311724065556283e-06, + "loss": 0.7028, + "step": 15005 + }, + { + "epoch": 0.7711995066296639, + "grad_norm": 1.0883941650390625, + "learning_rate": 1.3111621973277406e-06, + "loss": 0.633, + "step": 15006 + }, + { + "epoch": 0.7712508993730085, + "grad_norm": 1.0570839643478394, + "learning_rate": 1.310600431301669e-06, + "loss": 0.651, + "step": 15007 + }, + { + "epoch": 0.7713022921163531, + "grad_norm": 1.1378048658370972, + "learning_rate": 1.3100387674936371e-06, + "loss": 0.7082, + "step": 15008 + }, + { + "epoch": 0.7713536848596978, + "grad_norm": 1.053115963935852, + "learning_rate": 1.3094772059192e-06, + "loss": 0.6867, + "step": 15009 + }, + { + "epoch": 0.7714050776030424, + "grad_norm": 1.1144675016403198, + "learning_rate": 1.3089157465939217e-06, + "loss": 0.6988, + "step": 15010 + }, + { + "epoch": 0.7714564703463871, + "grad_norm": 0.7625012993812561, + "learning_rate": 1.3083543895333555e-06, + "loss": 0.6512, + "step": 15011 + }, + { + "epoch": 0.7715078630897317, + "grad_norm": 1.072308897972107, + "learning_rate": 1.3077931347530537e-06, + "loss": 0.6754, + "step": 15012 + }, + { + "epoch": 0.7715592558330764, + "grad_norm": 1.020975947380066, + "learning_rate": 1.307231982268568e-06, + "loss": 0.694, + "step": 15013 + }, + { + "epoch": 0.771610648576421, + "grad_norm": 1.1260422468185425, + "learning_rate": 1.3066709320954457e-06, + "loss": 0.706, + "step": 15014 + }, + { + "epoch": 0.7716620413197657, + "grad_norm": 1.1120591163635254, + "learning_rate": 1.3061099842492298e-06, + "loss": 0.6959, + "step": 15015 + }, + { + "epoch": 0.7717134340631103, + "grad_norm": 0.7297281622886658, + "learning_rate": 1.3055491387454616e-06, + "loss": 0.605, + "step": 15016 + }, + { + "epoch": 0.771764826806455, + "grad_norm": 1.133056402206421, + "learning_rate": 1.3049883955996827e-06, + "loss": 0.697, + "step": 15017 + }, + { + "epoch": 0.7718162195497996, + "grad_norm": 1.0469683408737183, + "learning_rate": 1.3044277548274248e-06, + "loss": 0.7059, + "step": 15018 + }, + { + "epoch": 0.7718676122931442, + "grad_norm": 1.0993491411209106, + "learning_rate": 1.3038672164442245e-06, + "loss": 0.6438, + "step": 15019 + }, + { + "epoch": 0.7719190050364888, + "grad_norm": 1.0702852010726929, + "learning_rate": 1.3033067804656103e-06, + "loss": 0.6761, + "step": 15020 + }, + { + "epoch": 0.7719703977798335, + "grad_norm": 1.047142505645752, + "learning_rate": 1.30274644690711e-06, + "loss": 0.6255, + "step": 15021 + }, + { + "epoch": 0.7720217905231781, + "grad_norm": 1.1507539749145508, + "learning_rate": 1.3021862157842463e-06, + "loss": 0.7017, + "step": 15022 + }, + { + "epoch": 0.7720731832665227, + "grad_norm": 1.118160605430603, + "learning_rate": 1.301626087112544e-06, + "loss": 0.7276, + "step": 15023 + }, + { + "epoch": 0.7721245760098674, + "grad_norm": 1.0919917821884155, + "learning_rate": 1.3010660609075209e-06, + "loss": 0.6976, + "step": 15024 + }, + { + "epoch": 0.772175968753212, + "grad_norm": 1.0616801977157593, + "learning_rate": 1.300506137184691e-06, + "loss": 0.6918, + "step": 15025 + }, + { + "epoch": 0.7722273614965567, + "grad_norm": 1.1338061094284058, + "learning_rate": 1.299946315959571e-06, + "loss": 0.7087, + "step": 15026 + }, + { + "epoch": 0.7722787542399013, + "grad_norm": 1.1329405307769775, + "learning_rate": 1.2993865972476688e-06, + "loss": 0.7991, + "step": 15027 + }, + { + "epoch": 0.772330146983246, + "grad_norm": 1.0974472761154175, + "learning_rate": 1.2988269810644928e-06, + "loss": 0.6758, + "step": 15028 + }, + { + "epoch": 0.7723815397265906, + "grad_norm": 1.0620429515838623, + "learning_rate": 1.2982674674255457e-06, + "loss": 0.6248, + "step": 15029 + }, + { + "epoch": 0.7724329324699353, + "grad_norm": 1.0933665037155151, + "learning_rate": 1.297708056346334e-06, + "loss": 0.7396, + "step": 15030 + }, + { + "epoch": 0.7724843252132799, + "grad_norm": 1.067371129989624, + "learning_rate": 1.2971487478423505e-06, + "loss": 0.73, + "step": 15031 + }, + { + "epoch": 0.7725357179566246, + "grad_norm": 1.1072922945022583, + "learning_rate": 1.296589541929096e-06, + "loss": 0.7467, + "step": 15032 + }, + { + "epoch": 0.7725871106999692, + "grad_norm": 1.0448694229125977, + "learning_rate": 1.2960304386220623e-06, + "loss": 0.6815, + "step": 15033 + }, + { + "epoch": 0.7726385034433138, + "grad_norm": 0.7934486865997314, + "learning_rate": 1.2954714379367377e-06, + "loss": 0.6184, + "step": 15034 + }, + { + "epoch": 0.7726898961866584, + "grad_norm": 1.0337358713150024, + "learning_rate": 1.2949125398886142e-06, + "loss": 0.6445, + "step": 15035 + }, + { + "epoch": 0.772741288930003, + "grad_norm": 1.1336174011230469, + "learning_rate": 1.294353744493172e-06, + "loss": 0.7281, + "step": 15036 + }, + { + "epoch": 0.7727926816733477, + "grad_norm": 1.1540257930755615, + "learning_rate": 1.2937950517658988e-06, + "loss": 0.7515, + "step": 15037 + }, + { + "epoch": 0.7728440744166923, + "grad_norm": 1.073453426361084, + "learning_rate": 1.293236461722267e-06, + "loss": 0.6647, + "step": 15038 + }, + { + "epoch": 0.772895467160037, + "grad_norm": 1.1061946153640747, + "learning_rate": 1.2926779743777573e-06, + "loss": 0.6697, + "step": 15039 + }, + { + "epoch": 0.7729468599033816, + "grad_norm": 1.3312920331954956, + "learning_rate": 1.2921195897478417e-06, + "loss": 0.7546, + "step": 15040 + }, + { + "epoch": 0.7729982526467263, + "grad_norm": 1.1164494752883911, + "learning_rate": 1.2915613078479905e-06, + "loss": 0.6776, + "step": 15041 + }, + { + "epoch": 0.7730496453900709, + "grad_norm": 1.0668320655822754, + "learning_rate": 1.2910031286936708e-06, + "loss": 0.708, + "step": 15042 + }, + { + "epoch": 0.7731010381334156, + "grad_norm": 1.1103875637054443, + "learning_rate": 1.2904450523003493e-06, + "loss": 0.6616, + "step": 15043 + }, + { + "epoch": 0.7731524308767602, + "grad_norm": 1.1029402017593384, + "learning_rate": 1.2898870786834872e-06, + "loss": 0.6941, + "step": 15044 + }, + { + "epoch": 0.7732038236201049, + "grad_norm": 1.1395201683044434, + "learning_rate": 1.2893292078585418e-06, + "loss": 0.6971, + "step": 15045 + }, + { + "epoch": 0.7732552163634495, + "grad_norm": 1.071264386177063, + "learning_rate": 1.288771439840974e-06, + "loss": 0.6728, + "step": 15046 + }, + { + "epoch": 0.7733066091067942, + "grad_norm": 1.0800262689590454, + "learning_rate": 1.2882137746462309e-06, + "loss": 0.6482, + "step": 15047 + }, + { + "epoch": 0.7733580018501388, + "grad_norm": 0.7044035196304321, + "learning_rate": 1.2876562122897684e-06, + "loss": 0.6256, + "step": 15048 + }, + { + "epoch": 0.7734093945934833, + "grad_norm": 1.07472825050354, + "learning_rate": 1.2870987527870326e-06, + "loss": 0.665, + "step": 15049 + }, + { + "epoch": 0.773460787336828, + "grad_norm": 0.7600728869438171, + "learning_rate": 1.2865413961534679e-06, + "loss": 0.6658, + "step": 15050 + }, + { + "epoch": 0.7735121800801726, + "grad_norm": 1.0378344058990479, + "learning_rate": 1.2859841424045145e-06, + "loss": 0.7015, + "step": 15051 + }, + { + "epoch": 0.7735635728235173, + "grad_norm": 1.1569468975067139, + "learning_rate": 1.2854269915556155e-06, + "loss": 0.7062, + "step": 15052 + }, + { + "epoch": 0.7736149655668619, + "grad_norm": 1.0347727537155151, + "learning_rate": 1.2848699436222057e-06, + "loss": 0.7039, + "step": 15053 + }, + { + "epoch": 0.7736663583102066, + "grad_norm": 1.0622179508209229, + "learning_rate": 1.2843129986197167e-06, + "loss": 0.715, + "step": 15054 + }, + { + "epoch": 0.7737177510535512, + "grad_norm": 1.0950325727462769, + "learning_rate": 1.2837561565635826e-06, + "loss": 0.6871, + "step": 15055 + }, + { + "epoch": 0.7737691437968959, + "grad_norm": 0.9359648823738098, + "learning_rate": 1.2831994174692287e-06, + "loss": 0.6615, + "step": 15056 + }, + { + "epoch": 0.7738205365402405, + "grad_norm": 1.0624192953109741, + "learning_rate": 1.2826427813520808e-06, + "loss": 0.7033, + "step": 15057 + }, + { + "epoch": 0.7738719292835852, + "grad_norm": 0.9694546461105347, + "learning_rate": 1.2820862482275597e-06, + "loss": 0.6468, + "step": 15058 + }, + { + "epoch": 0.7739233220269298, + "grad_norm": 1.0054559707641602, + "learning_rate": 1.2815298181110885e-06, + "loss": 0.6781, + "step": 15059 + }, + { + "epoch": 0.7739747147702745, + "grad_norm": 1.1061476469039917, + "learning_rate": 1.2809734910180776e-06, + "loss": 0.681, + "step": 15060 + }, + { + "epoch": 0.7740261075136191, + "grad_norm": 0.656936764717102, + "learning_rate": 1.280417266963946e-06, + "loss": 0.6281, + "step": 15061 + }, + { + "epoch": 0.7740775002569638, + "grad_norm": 1.0671902894973755, + "learning_rate": 1.2798611459641014e-06, + "loss": 0.6318, + "step": 15062 + }, + { + "epoch": 0.7741288930003084, + "grad_norm": 1.0009052753448486, + "learning_rate": 1.2793051280339507e-06, + "loss": 0.6545, + "step": 15063 + }, + { + "epoch": 0.774180285743653, + "grad_norm": 1.0898479223251343, + "learning_rate": 1.2787492131889024e-06, + "loss": 0.6765, + "step": 15064 + }, + { + "epoch": 0.7742316784869976, + "grad_norm": 1.1622505187988281, + "learning_rate": 1.2781934014443564e-06, + "loss": 0.7107, + "step": 15065 + }, + { + "epoch": 0.7742830712303422, + "grad_norm": 1.1035284996032715, + "learning_rate": 1.2776376928157124e-06, + "loss": 0.6869, + "step": 15066 + }, + { + "epoch": 0.7743344639736869, + "grad_norm": 1.0921597480773926, + "learning_rate": 1.2770820873183653e-06, + "loss": 0.6996, + "step": 15067 + }, + { + "epoch": 0.7743858567170315, + "grad_norm": 0.7903449535369873, + "learning_rate": 1.2765265849677117e-06, + "loss": 0.6826, + "step": 15068 + }, + { + "epoch": 0.7744372494603762, + "grad_norm": 1.0121012926101685, + "learning_rate": 1.2759711857791402e-06, + "loss": 0.6794, + "step": 15069 + }, + { + "epoch": 0.7744886422037208, + "grad_norm": 0.6857595443725586, + "learning_rate": 1.2754158897680391e-06, + "loss": 0.649, + "step": 15070 + }, + { + "epoch": 0.7745400349470655, + "grad_norm": 1.1309070587158203, + "learning_rate": 1.2748606969497918e-06, + "loss": 0.7093, + "step": 15071 + }, + { + "epoch": 0.7745914276904101, + "grad_norm": 1.2390469312667847, + "learning_rate": 1.2743056073397847e-06, + "loss": 0.6574, + "step": 15072 + }, + { + "epoch": 0.7746428204337548, + "grad_norm": 1.2025063037872314, + "learning_rate": 1.2737506209533913e-06, + "loss": 0.7497, + "step": 15073 + }, + { + "epoch": 0.7746942131770994, + "grad_norm": 1.122231125831604, + "learning_rate": 1.2731957378059923e-06, + "loss": 0.6793, + "step": 15074 + }, + { + "epoch": 0.7747456059204441, + "grad_norm": 1.0672181844711304, + "learning_rate": 1.2726409579129596e-06, + "loss": 0.689, + "step": 15075 + }, + { + "epoch": 0.7747969986637887, + "grad_norm": 1.0475307703018188, + "learning_rate": 1.2720862812896628e-06, + "loss": 0.6629, + "step": 15076 + }, + { + "epoch": 0.7748483914071334, + "grad_norm": 1.089813232421875, + "learning_rate": 1.2715317079514723e-06, + "loss": 0.7069, + "step": 15077 + }, + { + "epoch": 0.774899784150478, + "grad_norm": 1.0784388780593872, + "learning_rate": 1.270977237913752e-06, + "loss": 0.707, + "step": 15078 + }, + { + "epoch": 0.7749511768938226, + "grad_norm": 0.7371863722801208, + "learning_rate": 1.2704228711918631e-06, + "loss": 0.6438, + "step": 15079 + }, + { + "epoch": 0.7750025696371672, + "grad_norm": 1.062500238418579, + "learning_rate": 1.269868607801164e-06, + "loss": 0.7122, + "step": 15080 + }, + { + "epoch": 0.7750539623805118, + "grad_norm": 1.16629159450531, + "learning_rate": 1.2693144477570156e-06, + "loss": 0.7199, + "step": 15081 + }, + { + "epoch": 0.7751053551238565, + "grad_norm": 1.1163733005523682, + "learning_rate": 1.268760391074766e-06, + "loss": 0.6973, + "step": 15082 + }, + { + "epoch": 0.7751567478672011, + "grad_norm": 1.0068421363830566, + "learning_rate": 1.2682064377697684e-06, + "loss": 0.6833, + "step": 15083 + }, + { + "epoch": 0.7752081406105458, + "grad_norm": 1.196900486946106, + "learning_rate": 1.2676525878573693e-06, + "loss": 0.7338, + "step": 15084 + }, + { + "epoch": 0.7752595333538904, + "grad_norm": 1.1061978340148926, + "learning_rate": 1.2670988413529157e-06, + "loss": 0.7436, + "step": 15085 + }, + { + "epoch": 0.7753109260972351, + "grad_norm": 1.0913710594177246, + "learning_rate": 1.2665451982717486e-06, + "loss": 0.7292, + "step": 15086 + }, + { + "epoch": 0.7753623188405797, + "grad_norm": 1.090458631515503, + "learning_rate": 1.2659916586292054e-06, + "loss": 0.6974, + "step": 15087 + }, + { + "epoch": 0.7754137115839244, + "grad_norm": 1.120648980140686, + "learning_rate": 1.265438222440627e-06, + "loss": 0.7051, + "step": 15088 + }, + { + "epoch": 0.775465104327269, + "grad_norm": 1.091753363609314, + "learning_rate": 1.2648848897213412e-06, + "loss": 0.6921, + "step": 15089 + }, + { + "epoch": 0.7755164970706137, + "grad_norm": 1.0596282482147217, + "learning_rate": 1.2643316604866822e-06, + "loss": 0.6449, + "step": 15090 + }, + { + "epoch": 0.7755678898139583, + "grad_norm": 1.0974825620651245, + "learning_rate": 1.2637785347519771e-06, + "loss": 0.6889, + "step": 15091 + }, + { + "epoch": 0.775619282557303, + "grad_norm": 1.1222844123840332, + "learning_rate": 1.26322551253255e-06, + "loss": 0.7257, + "step": 15092 + }, + { + "epoch": 0.7756706753006476, + "grad_norm": 1.111581802368164, + "learning_rate": 1.2626725938437217e-06, + "loss": 0.7162, + "step": 15093 + }, + { + "epoch": 0.7757220680439922, + "grad_norm": 1.0950855016708374, + "learning_rate": 1.2621197787008144e-06, + "loss": 0.6725, + "step": 15094 + }, + { + "epoch": 0.7757734607873368, + "grad_norm": 1.0455998182296753, + "learning_rate": 1.2615670671191426e-06, + "loss": 0.6864, + "step": 15095 + }, + { + "epoch": 0.7758248535306814, + "grad_norm": 1.1095768213272095, + "learning_rate": 1.2610144591140188e-06, + "loss": 0.6393, + "step": 15096 + }, + { + "epoch": 0.7758762462740261, + "grad_norm": 1.0568294525146484, + "learning_rate": 1.2604619547007568e-06, + "loss": 0.6678, + "step": 15097 + }, + { + "epoch": 0.7759276390173707, + "grad_norm": 1.0725914239883423, + "learning_rate": 1.2599095538946593e-06, + "loss": 0.6584, + "step": 15098 + }, + { + "epoch": 0.7759790317607154, + "grad_norm": 1.1389981508255005, + "learning_rate": 1.2593572567110345e-06, + "loss": 0.7026, + "step": 15099 + }, + { + "epoch": 0.77603042450406, + "grad_norm": 1.1912168264389038, + "learning_rate": 1.2588050631651826e-06, + "loss": 0.7231, + "step": 15100 + }, + { + "epoch": 0.7760818172474047, + "grad_norm": 1.0318366289138794, + "learning_rate": 1.2582529732724063e-06, + "loss": 0.6063, + "step": 15101 + }, + { + "epoch": 0.7761332099907493, + "grad_norm": 1.0504744052886963, + "learning_rate": 1.2577009870479956e-06, + "loss": 0.6213, + "step": 15102 + }, + { + "epoch": 0.776184602734094, + "grad_norm": 1.0184589624404907, + "learning_rate": 1.2571491045072487e-06, + "loss": 0.7065, + "step": 15103 + }, + { + "epoch": 0.7762359954774386, + "grad_norm": 0.8171871900558472, + "learning_rate": 1.256597325665454e-06, + "loss": 0.6558, + "step": 15104 + }, + { + "epoch": 0.7762873882207832, + "grad_norm": 1.0988686084747314, + "learning_rate": 1.2560456505378976e-06, + "loss": 0.717, + "step": 15105 + }, + { + "epoch": 0.7763387809641279, + "grad_norm": 1.0973354578018188, + "learning_rate": 1.255494079139868e-06, + "loss": 0.6966, + "step": 15106 + }, + { + "epoch": 0.7763901737074725, + "grad_norm": 1.0841270685195923, + "learning_rate": 1.2549426114866443e-06, + "loss": 0.7029, + "step": 15107 + }, + { + "epoch": 0.7764415664508172, + "grad_norm": 1.041652798652649, + "learning_rate": 1.2543912475935062e-06, + "loss": 0.6904, + "step": 15108 + }, + { + "epoch": 0.7764929591941618, + "grad_norm": 1.1016466617584229, + "learning_rate": 1.2538399874757279e-06, + "loss": 0.642, + "step": 15109 + }, + { + "epoch": 0.7765443519375064, + "grad_norm": 1.1137545108795166, + "learning_rate": 1.253288831148587e-06, + "loss": 0.6431, + "step": 15110 + }, + { + "epoch": 0.776595744680851, + "grad_norm": 1.0999317169189453, + "learning_rate": 1.2527377786273477e-06, + "loss": 0.6882, + "step": 15111 + }, + { + "epoch": 0.7766471374241957, + "grad_norm": 1.1158596277236938, + "learning_rate": 1.2521868299272815e-06, + "loss": 0.7171, + "step": 15112 + }, + { + "epoch": 0.7766985301675403, + "grad_norm": 1.0803213119506836, + "learning_rate": 1.2516359850636528e-06, + "loss": 0.6983, + "step": 15113 + }, + { + "epoch": 0.776749922910885, + "grad_norm": 1.068266749382019, + "learning_rate": 1.2510852440517207e-06, + "loss": 0.7012, + "step": 15114 + }, + { + "epoch": 0.7768013156542296, + "grad_norm": 1.1030173301696777, + "learning_rate": 1.250534606906747e-06, + "loss": 0.6707, + "step": 15115 + }, + { + "epoch": 0.7768527083975743, + "grad_norm": 1.0002933740615845, + "learning_rate": 1.249984073643986e-06, + "loss": 0.6658, + "step": 15116 + }, + { + "epoch": 0.7769041011409189, + "grad_norm": 1.0675413608551025, + "learning_rate": 1.2494336442786913e-06, + "loss": 0.7577, + "step": 15117 + }, + { + "epoch": 0.7769554938842635, + "grad_norm": 1.1166417598724365, + "learning_rate": 1.2488833188261107e-06, + "loss": 0.7587, + "step": 15118 + }, + { + "epoch": 0.7770068866276082, + "grad_norm": 1.1370668411254883, + "learning_rate": 1.248333097301495e-06, + "loss": 0.7457, + "step": 15119 + }, + { + "epoch": 0.7770582793709528, + "grad_norm": 1.0643632411956787, + "learning_rate": 1.2477829797200874e-06, + "loss": 0.7122, + "step": 15120 + }, + { + "epoch": 0.7771096721142975, + "grad_norm": 0.7214568853378296, + "learning_rate": 1.2472329660971289e-06, + "loss": 0.6655, + "step": 15121 + }, + { + "epoch": 0.7771610648576421, + "grad_norm": 1.1130198240280151, + "learning_rate": 1.246683056447856e-06, + "loss": 0.6996, + "step": 15122 + }, + { + "epoch": 0.7772124576009868, + "grad_norm": 0.9522799849510193, + "learning_rate": 1.2461332507875085e-06, + "loss": 0.6574, + "step": 15123 + }, + { + "epoch": 0.7772638503443314, + "grad_norm": 1.0614334344863892, + "learning_rate": 1.2455835491313173e-06, + "loss": 0.7139, + "step": 15124 + }, + { + "epoch": 0.777315243087676, + "grad_norm": 1.0643733739852905, + "learning_rate": 1.2450339514945126e-06, + "loss": 0.6592, + "step": 15125 + }, + { + "epoch": 0.7773666358310206, + "grad_norm": 1.0488003492355347, + "learning_rate": 1.244484457892321e-06, + "loss": 0.6923, + "step": 15126 + }, + { + "epoch": 0.7774180285743653, + "grad_norm": 0.7421144843101501, + "learning_rate": 1.243935068339966e-06, + "loss": 0.6331, + "step": 15127 + }, + { + "epoch": 0.7774694213177099, + "grad_norm": 1.0389087200164795, + "learning_rate": 1.2433857828526707e-06, + "loss": 0.6883, + "step": 15128 + }, + { + "epoch": 0.7775208140610546, + "grad_norm": 0.9748851656913757, + "learning_rate": 1.2428366014456527e-06, + "loss": 0.6395, + "step": 15129 + }, + { + "epoch": 0.7775722068043992, + "grad_norm": 1.0269935131072998, + "learning_rate": 1.242287524134128e-06, + "loss": 0.6585, + "step": 15130 + }, + { + "epoch": 0.7776235995477438, + "grad_norm": 1.0559674501419067, + "learning_rate": 1.2417385509333075e-06, + "loss": 0.7055, + "step": 15131 + }, + { + "epoch": 0.7776749922910885, + "grad_norm": 0.7085198163986206, + "learning_rate": 1.2411896818584035e-06, + "loss": 0.6683, + "step": 15132 + }, + { + "epoch": 0.7777263850344331, + "grad_norm": 0.6997009515762329, + "learning_rate": 1.2406409169246214e-06, + "loss": 0.6628, + "step": 15133 + }, + { + "epoch": 0.7777777777777778, + "grad_norm": 1.1082754135131836, + "learning_rate": 1.2400922561471663e-06, + "loss": 0.6936, + "step": 15134 + }, + { + "epoch": 0.7778291705211224, + "grad_norm": 0.7113308906555176, + "learning_rate": 1.2395436995412362e-06, + "loss": 0.622, + "step": 15135 + }, + { + "epoch": 0.7778805632644671, + "grad_norm": 1.0882643461227417, + "learning_rate": 1.238995247122034e-06, + "loss": 0.6737, + "step": 15136 + }, + { + "epoch": 0.7779319560078117, + "grad_norm": 0.8400201201438904, + "learning_rate": 1.238446898904752e-06, + "loss": 0.6753, + "step": 15137 + }, + { + "epoch": 0.7779833487511564, + "grad_norm": 1.024397373199463, + "learning_rate": 1.2378986549045823e-06, + "loss": 0.6465, + "step": 15138 + }, + { + "epoch": 0.778034741494501, + "grad_norm": 1.0820677280426025, + "learning_rate": 1.2373505151367187e-06, + "loss": 0.6479, + "step": 15139 + }, + { + "epoch": 0.7780861342378456, + "grad_norm": 1.1748380661010742, + "learning_rate": 1.236802479616342e-06, + "loss": 0.6763, + "step": 15140 + }, + { + "epoch": 0.7781375269811902, + "grad_norm": 0.7659570574760437, + "learning_rate": 1.2362545483586397e-06, + "loss": 0.6296, + "step": 15141 + }, + { + "epoch": 0.7781889197245349, + "grad_norm": 1.075256109237671, + "learning_rate": 1.235706721378792e-06, + "loss": 0.6817, + "step": 15142 + }, + { + "epoch": 0.7782403124678795, + "grad_norm": 1.0412192344665527, + "learning_rate": 1.2351589986919772e-06, + "loss": 0.7378, + "step": 15143 + }, + { + "epoch": 0.7782917052112241, + "grad_norm": 0.7334829568862915, + "learning_rate": 1.234611380313368e-06, + "loss": 0.624, + "step": 15144 + }, + { + "epoch": 0.7783430979545688, + "grad_norm": 1.034638524055481, + "learning_rate": 1.23406386625814e-06, + "loss": 0.656, + "step": 15145 + }, + { + "epoch": 0.7783944906979134, + "grad_norm": 1.365031361579895, + "learning_rate": 1.2335164565414615e-06, + "loss": 0.6989, + "step": 15146 + }, + { + "epoch": 0.7784458834412581, + "grad_norm": 0.7359990477561951, + "learning_rate": 1.2329691511784968e-06, + "loss": 0.5912, + "step": 15147 + }, + { + "epoch": 0.7784972761846027, + "grad_norm": 1.104994297027588, + "learning_rate": 1.232421950184413e-06, + "loss": 0.7589, + "step": 15148 + }, + { + "epoch": 0.7785486689279474, + "grad_norm": 1.056930661201477, + "learning_rate": 1.2318748535743692e-06, + "loss": 0.7097, + "step": 15149 + }, + { + "epoch": 0.778600061671292, + "grad_norm": 0.7590477466583252, + "learning_rate": 1.2313278613635227e-06, + "loss": 0.6598, + "step": 15150 + }, + { + "epoch": 0.7786514544146367, + "grad_norm": 1.1030726432800293, + "learning_rate": 1.2307809735670278e-06, + "loss": 0.6233, + "step": 15151 + }, + { + "epoch": 0.7787028471579813, + "grad_norm": 1.1402710676193237, + "learning_rate": 1.2302341902000404e-06, + "loss": 0.664, + "step": 15152 + }, + { + "epoch": 0.778754239901326, + "grad_norm": 1.0985804796218872, + "learning_rate": 1.229687511277703e-06, + "loss": 0.6254, + "step": 15153 + }, + { + "epoch": 0.7788056326446706, + "grad_norm": 1.1713682413101196, + "learning_rate": 1.2291409368151674e-06, + "loss": 0.6847, + "step": 15154 + }, + { + "epoch": 0.7788570253880153, + "grad_norm": 1.0770275592803955, + "learning_rate": 1.2285944668275751e-06, + "loss": 0.7433, + "step": 15155 + }, + { + "epoch": 0.7789084181313598, + "grad_norm": 1.106186032295227, + "learning_rate": 1.2280481013300655e-06, + "loss": 0.7381, + "step": 15156 + }, + { + "epoch": 0.7789598108747045, + "grad_norm": 1.0342353582382202, + "learning_rate": 1.2275018403377775e-06, + "loss": 0.6545, + "step": 15157 + }, + { + "epoch": 0.7790112036180491, + "grad_norm": 0.7113685011863708, + "learning_rate": 1.226955683865846e-06, + "loss": 0.6543, + "step": 15158 + }, + { + "epoch": 0.7790625963613937, + "grad_norm": 1.1921164989471436, + "learning_rate": 1.2264096319294022e-06, + "loss": 0.669, + "step": 15159 + }, + { + "epoch": 0.7791139891047384, + "grad_norm": 1.063372015953064, + "learning_rate": 1.2258636845435728e-06, + "loss": 0.7078, + "step": 15160 + }, + { + "epoch": 0.779165381848083, + "grad_norm": 1.0098252296447754, + "learning_rate": 1.2253178417234872e-06, + "loss": 0.6882, + "step": 15161 + }, + { + "epoch": 0.7792167745914277, + "grad_norm": 0.8311984539031982, + "learning_rate": 1.2247721034842674e-06, + "loss": 0.6027, + "step": 15162 + }, + { + "epoch": 0.7792681673347723, + "grad_norm": 1.0969524383544922, + "learning_rate": 1.2242264698410333e-06, + "loss": 0.6666, + "step": 15163 + }, + { + "epoch": 0.779319560078117, + "grad_norm": 1.117874264717102, + "learning_rate": 1.2236809408089e-06, + "loss": 0.6585, + "step": 15164 + }, + { + "epoch": 0.7793709528214616, + "grad_norm": 1.074995994567871, + "learning_rate": 1.2231355164029862e-06, + "loss": 0.6696, + "step": 15165 + }, + { + "epoch": 0.7794223455648063, + "grad_norm": 1.0222442150115967, + "learning_rate": 1.2225901966384002e-06, + "loss": 0.7345, + "step": 15166 + }, + { + "epoch": 0.7794737383081509, + "grad_norm": 1.0814427137374878, + "learning_rate": 1.222044981530252e-06, + "loss": 0.6676, + "step": 15167 + }, + { + "epoch": 0.7795251310514956, + "grad_norm": 1.0580130815505981, + "learning_rate": 1.221499871093646e-06, + "loss": 0.6562, + "step": 15168 + }, + { + "epoch": 0.7795765237948402, + "grad_norm": 1.1052751541137695, + "learning_rate": 1.2209548653436848e-06, + "loss": 0.7139, + "step": 15169 + }, + { + "epoch": 0.7796279165381849, + "grad_norm": 1.0552520751953125, + "learning_rate": 1.2204099642954702e-06, + "loss": 0.6606, + "step": 15170 + }, + { + "epoch": 0.7796793092815294, + "grad_norm": 1.0600188970565796, + "learning_rate": 1.2198651679640983e-06, + "loss": 0.6948, + "step": 15171 + }, + { + "epoch": 0.779730702024874, + "grad_norm": 0.8103666305541992, + "learning_rate": 1.2193204763646626e-06, + "loss": 0.6701, + "step": 15172 + }, + { + "epoch": 0.7797820947682187, + "grad_norm": 1.1309090852737427, + "learning_rate": 1.218775889512253e-06, + "loss": 0.6662, + "step": 15173 + }, + { + "epoch": 0.7798334875115633, + "grad_norm": 1.0502512454986572, + "learning_rate": 1.2182314074219615e-06, + "loss": 0.6774, + "step": 15174 + }, + { + "epoch": 0.779884880254908, + "grad_norm": 0.7737354636192322, + "learning_rate": 1.2176870301088706e-06, + "loss": 0.6206, + "step": 15175 + }, + { + "epoch": 0.7799362729982526, + "grad_norm": 1.1204779148101807, + "learning_rate": 1.2171427575880634e-06, + "loss": 0.7456, + "step": 15176 + }, + { + "epoch": 0.7799876657415973, + "grad_norm": 1.0258342027664185, + "learning_rate": 1.216598589874618e-06, + "loss": 0.6855, + "step": 15177 + }, + { + "epoch": 0.7800390584849419, + "grad_norm": 1.162435531616211, + "learning_rate": 1.2160545269836144e-06, + "loss": 0.7076, + "step": 15178 + }, + { + "epoch": 0.7800904512282866, + "grad_norm": 0.7873539328575134, + "learning_rate": 1.215510568930124e-06, + "loss": 0.6741, + "step": 15179 + }, + { + "epoch": 0.7801418439716312, + "grad_norm": 1.03671133518219, + "learning_rate": 1.2149667157292167e-06, + "loss": 0.6566, + "step": 15180 + }, + { + "epoch": 0.7801932367149759, + "grad_norm": 1.0335502624511719, + "learning_rate": 1.2144229673959652e-06, + "loss": 0.6418, + "step": 15181 + }, + { + "epoch": 0.7802446294583205, + "grad_norm": 1.0655313730239868, + "learning_rate": 1.2138793239454277e-06, + "loss": 0.7284, + "step": 15182 + }, + { + "epoch": 0.7802960222016652, + "grad_norm": 1.1996794939041138, + "learning_rate": 1.213335785392671e-06, + "loss": 0.7062, + "step": 15183 + }, + { + "epoch": 0.7803474149450098, + "grad_norm": 0.7722945809364319, + "learning_rate": 1.2127923517527535e-06, + "loss": 0.6413, + "step": 15184 + }, + { + "epoch": 0.7803988076883545, + "grad_norm": 1.2419521808624268, + "learning_rate": 1.2122490230407307e-06, + "loss": 0.7201, + "step": 15185 + }, + { + "epoch": 0.780450200431699, + "grad_norm": 1.0601229667663574, + "learning_rate": 1.2117057992716553e-06, + "loss": 0.6556, + "step": 15186 + }, + { + "epoch": 0.7805015931750436, + "grad_norm": 0.6914064884185791, + "learning_rate": 1.2111626804605798e-06, + "loss": 0.6338, + "step": 15187 + }, + { + "epoch": 0.7805529859183883, + "grad_norm": 1.099282145500183, + "learning_rate": 1.210619666622551e-06, + "loss": 0.6962, + "step": 15188 + }, + { + "epoch": 0.7806043786617329, + "grad_norm": 1.1018799543380737, + "learning_rate": 1.210076757772612e-06, + "loss": 0.7409, + "step": 15189 + }, + { + "epoch": 0.7806557714050776, + "grad_norm": 1.0707240104675293, + "learning_rate": 1.2095339539258088e-06, + "loss": 0.6935, + "step": 15190 + }, + { + "epoch": 0.7807071641484222, + "grad_norm": 1.0771310329437256, + "learning_rate": 1.208991255097175e-06, + "loss": 0.6624, + "step": 15191 + }, + { + "epoch": 0.7807585568917669, + "grad_norm": 1.1325247287750244, + "learning_rate": 1.20844866130175e-06, + "loss": 0.7262, + "step": 15192 + }, + { + "epoch": 0.7808099496351115, + "grad_norm": 1.0927236080169678, + "learning_rate": 1.2079061725545638e-06, + "loss": 0.7139, + "step": 15193 + }, + { + "epoch": 0.7808613423784562, + "grad_norm": 1.1666457653045654, + "learning_rate": 1.2073637888706518e-06, + "loss": 0.6611, + "step": 15194 + }, + { + "epoch": 0.7809127351218008, + "grad_norm": 1.1042253971099854, + "learning_rate": 1.2068215102650355e-06, + "loss": 0.6723, + "step": 15195 + }, + { + "epoch": 0.7809641278651455, + "grad_norm": 0.7333838939666748, + "learning_rate": 1.2062793367527425e-06, + "loss": 0.6692, + "step": 15196 + }, + { + "epoch": 0.7810155206084901, + "grad_norm": 0.9074286818504333, + "learning_rate": 1.2057372683487938e-06, + "loss": 0.6638, + "step": 15197 + }, + { + "epoch": 0.7810669133518348, + "grad_norm": 1.0616259574890137, + "learning_rate": 1.2051953050682058e-06, + "loss": 0.7319, + "step": 15198 + }, + { + "epoch": 0.7811183060951794, + "grad_norm": 1.1737288236618042, + "learning_rate": 1.204653446925997e-06, + "loss": 0.7286, + "step": 15199 + }, + { + "epoch": 0.781169698838524, + "grad_norm": 1.0466140508651733, + "learning_rate": 1.2041116939371782e-06, + "loss": 0.6698, + "step": 15200 + }, + { + "epoch": 0.7812210915818686, + "grad_norm": 1.0597411394119263, + "learning_rate": 1.2035700461167605e-06, + "loss": 0.6609, + "step": 15201 + }, + { + "epoch": 0.7812724843252132, + "grad_norm": 1.098227858543396, + "learning_rate": 1.203028503479748e-06, + "loss": 0.7091, + "step": 15202 + }, + { + "epoch": 0.7813238770685579, + "grad_norm": 1.0967403650283813, + "learning_rate": 1.2024870660411497e-06, + "loss": 0.7121, + "step": 15203 + }, + { + "epoch": 0.7813752698119025, + "grad_norm": 0.6840171217918396, + "learning_rate": 1.20194573381596e-06, + "loss": 0.6192, + "step": 15204 + }, + { + "epoch": 0.7814266625552472, + "grad_norm": 1.0861412286758423, + "learning_rate": 1.2014045068191822e-06, + "loss": 0.7349, + "step": 15205 + }, + { + "epoch": 0.7814780552985918, + "grad_norm": 1.1589401960372925, + "learning_rate": 1.2008633850658097e-06, + "loss": 0.6742, + "step": 15206 + }, + { + "epoch": 0.7815294480419365, + "grad_norm": 1.1869323253631592, + "learning_rate": 1.200322368570833e-06, + "loss": 0.7438, + "step": 15207 + }, + { + "epoch": 0.7815808407852811, + "grad_norm": 1.146286129951477, + "learning_rate": 1.1997814573492445e-06, + "loss": 0.6719, + "step": 15208 + }, + { + "epoch": 0.7816322335286258, + "grad_norm": 1.1644409894943237, + "learning_rate": 1.1992406514160281e-06, + "loss": 0.711, + "step": 15209 + }, + { + "epoch": 0.7816836262719704, + "grad_norm": 1.0913883447647095, + "learning_rate": 1.1986999507861714e-06, + "loss": 0.6735, + "step": 15210 + }, + { + "epoch": 0.7817350190153151, + "grad_norm": 1.1090627908706665, + "learning_rate": 1.1981593554746485e-06, + "loss": 0.6973, + "step": 15211 + }, + { + "epoch": 0.7817864117586597, + "grad_norm": 0.744002103805542, + "learning_rate": 1.1976188654964427e-06, + "loss": 0.6588, + "step": 15212 + }, + { + "epoch": 0.7818378045020044, + "grad_norm": 1.063124418258667, + "learning_rate": 1.1970784808665264e-06, + "loss": 0.7322, + "step": 15213 + }, + { + "epoch": 0.781889197245349, + "grad_norm": 0.7242627143859863, + "learning_rate": 1.1965382015998716e-06, + "loss": 0.6701, + "step": 15214 + }, + { + "epoch": 0.7819405899886936, + "grad_norm": 1.0454884767532349, + "learning_rate": 1.1959980277114458e-06, + "loss": 0.6623, + "step": 15215 + }, + { + "epoch": 0.7819919827320382, + "grad_norm": 1.0639580488204956, + "learning_rate": 1.195457959216218e-06, + "loss": 0.7034, + "step": 15216 + }, + { + "epoch": 0.7820433754753828, + "grad_norm": 1.209542989730835, + "learning_rate": 1.1949179961291497e-06, + "loss": 0.7425, + "step": 15217 + }, + { + "epoch": 0.7820947682187275, + "grad_norm": 1.1384985446929932, + "learning_rate": 1.194378138465201e-06, + "loss": 0.6406, + "step": 15218 + }, + { + "epoch": 0.7821461609620721, + "grad_norm": 1.1262634992599487, + "learning_rate": 1.1938383862393294e-06, + "loss": 0.6952, + "step": 15219 + }, + { + "epoch": 0.7821975537054168, + "grad_norm": 1.0201456546783447, + "learning_rate": 1.1932987394664874e-06, + "loss": 0.6561, + "step": 15220 + }, + { + "epoch": 0.7822489464487614, + "grad_norm": 1.0551997423171997, + "learning_rate": 1.1927591981616294e-06, + "loss": 0.7186, + "step": 15221 + }, + { + "epoch": 0.7823003391921061, + "grad_norm": 1.1329519748687744, + "learning_rate": 1.1922197623397025e-06, + "loss": 0.7296, + "step": 15222 + }, + { + "epoch": 0.7823517319354507, + "grad_norm": 1.069242000579834, + "learning_rate": 1.1916804320156522e-06, + "loss": 0.7311, + "step": 15223 + }, + { + "epoch": 0.7824031246787954, + "grad_norm": 1.2301722764968872, + "learning_rate": 1.1911412072044198e-06, + "loss": 0.6621, + "step": 15224 + }, + { + "epoch": 0.78245451742214, + "grad_norm": 1.0648915767669678, + "learning_rate": 1.190602087920948e-06, + "loss": 0.7255, + "step": 15225 + }, + { + "epoch": 0.7825059101654847, + "grad_norm": 1.0701582431793213, + "learning_rate": 1.1900630741801717e-06, + "loss": 0.7313, + "step": 15226 + }, + { + "epoch": 0.7825573029088293, + "grad_norm": 1.0615111589431763, + "learning_rate": 1.1895241659970248e-06, + "loss": 0.6473, + "step": 15227 + }, + { + "epoch": 0.782608695652174, + "grad_norm": 1.0918419361114502, + "learning_rate": 1.1889853633864367e-06, + "loss": 0.6589, + "step": 15228 + }, + { + "epoch": 0.7826600883955186, + "grad_norm": 1.0165928602218628, + "learning_rate": 1.1884466663633388e-06, + "loss": 0.6802, + "step": 15229 + }, + { + "epoch": 0.7827114811388632, + "grad_norm": 1.063353419303894, + "learning_rate": 1.1879080749426542e-06, + "loss": 0.7442, + "step": 15230 + }, + { + "epoch": 0.7827628738822078, + "grad_norm": 1.0520106554031372, + "learning_rate": 1.1873695891393045e-06, + "loss": 0.6641, + "step": 15231 + }, + { + "epoch": 0.7828142666255524, + "grad_norm": 1.113895297050476, + "learning_rate": 1.1868312089682115e-06, + "loss": 0.7177, + "step": 15232 + }, + { + "epoch": 0.7828656593688971, + "grad_norm": 1.0736457109451294, + "learning_rate": 1.1862929344442876e-06, + "loss": 0.6614, + "step": 15233 + }, + { + "epoch": 0.7829170521122417, + "grad_norm": 1.0752116441726685, + "learning_rate": 1.1857547655824498e-06, + "loss": 0.6932, + "step": 15234 + }, + { + "epoch": 0.7829684448555864, + "grad_norm": 1.212699294090271, + "learning_rate": 1.1852167023976063e-06, + "loss": 0.7333, + "step": 15235 + }, + { + "epoch": 0.783019837598931, + "grad_norm": 1.0576047897338867, + "learning_rate": 1.1846787449046653e-06, + "loss": 0.6776, + "step": 15236 + }, + { + "epoch": 0.7830712303422757, + "grad_norm": 0.7595146298408508, + "learning_rate": 1.1841408931185304e-06, + "loss": 0.6211, + "step": 15237 + }, + { + "epoch": 0.7831226230856203, + "grad_norm": 1.0573607683181763, + "learning_rate": 1.183603147054106e-06, + "loss": 0.6799, + "step": 15238 + }, + { + "epoch": 0.783174015828965, + "grad_norm": 0.7038010954856873, + "learning_rate": 1.1830655067262886e-06, + "loss": 0.6533, + "step": 15239 + }, + { + "epoch": 0.7832254085723096, + "grad_norm": 1.1273553371429443, + "learning_rate": 1.1825279721499726e-06, + "loss": 0.7197, + "step": 15240 + }, + { + "epoch": 0.7832768013156542, + "grad_norm": 1.1088274717330933, + "learning_rate": 1.1819905433400547e-06, + "loss": 0.6839, + "step": 15241 + }, + { + "epoch": 0.7833281940589989, + "grad_norm": 1.1247124671936035, + "learning_rate": 1.1814532203114226e-06, + "loss": 0.7254, + "step": 15242 + }, + { + "epoch": 0.7833795868023435, + "grad_norm": 1.101311445236206, + "learning_rate": 1.1809160030789641e-06, + "loss": 0.6767, + "step": 15243 + }, + { + "epoch": 0.7834309795456882, + "grad_norm": 1.079833984375, + "learning_rate": 1.1803788916575603e-06, + "loss": 0.7373, + "step": 15244 + }, + { + "epoch": 0.7834823722890328, + "grad_norm": 1.0802195072174072, + "learning_rate": 1.1798418860620985e-06, + "loss": 0.7036, + "step": 15245 + }, + { + "epoch": 0.7835337650323775, + "grad_norm": 1.0954346656799316, + "learning_rate": 1.1793049863074503e-06, + "loss": 0.7337, + "step": 15246 + }, + { + "epoch": 0.783585157775722, + "grad_norm": 1.1220605373382568, + "learning_rate": 1.1787681924084948e-06, + "loss": 0.76, + "step": 15247 + }, + { + "epoch": 0.7836365505190667, + "grad_norm": 1.0888350009918213, + "learning_rate": 1.1782315043801034e-06, + "loss": 0.685, + "step": 15248 + }, + { + "epoch": 0.7836879432624113, + "grad_norm": 1.0027142763137817, + "learning_rate": 1.1776949222371442e-06, + "loss": 0.617, + "step": 15249 + }, + { + "epoch": 0.783739336005756, + "grad_norm": 1.1378785371780396, + "learning_rate": 1.1771584459944862e-06, + "loss": 0.7059, + "step": 15250 + }, + { + "epoch": 0.7837907287491006, + "grad_norm": 1.0144895315170288, + "learning_rate": 1.176622075666992e-06, + "loss": 0.6381, + "step": 15251 + }, + { + "epoch": 0.7838421214924453, + "grad_norm": 1.144880771636963, + "learning_rate": 1.1760858112695222e-06, + "loss": 0.6633, + "step": 15252 + }, + { + "epoch": 0.7838935142357899, + "grad_norm": 1.119289755821228, + "learning_rate": 1.1755496528169325e-06, + "loss": 0.7189, + "step": 15253 + }, + { + "epoch": 0.7839449069791345, + "grad_norm": 1.0901485681533813, + "learning_rate": 1.175013600324082e-06, + "loss": 0.7165, + "step": 15254 + }, + { + "epoch": 0.7839962997224792, + "grad_norm": 1.0795358419418335, + "learning_rate": 1.1744776538058173e-06, + "loss": 0.6789, + "step": 15255 + }, + { + "epoch": 0.7840476924658238, + "grad_norm": 1.0920484066009521, + "learning_rate": 1.1739418132769915e-06, + "loss": 0.6735, + "step": 15256 + }, + { + "epoch": 0.7840990852091685, + "grad_norm": 1.137935996055603, + "learning_rate": 1.173406078752447e-06, + "loss": 0.72, + "step": 15257 + }, + { + "epoch": 0.7841504779525131, + "grad_norm": 1.0923997163772583, + "learning_rate": 1.1728704502470302e-06, + "loss": 0.7203, + "step": 15258 + }, + { + "epoch": 0.7842018706958578, + "grad_norm": 1.0696948766708374, + "learning_rate": 1.172334927775579e-06, + "loss": 0.7199, + "step": 15259 + }, + { + "epoch": 0.7842532634392024, + "grad_norm": 1.1216126680374146, + "learning_rate": 1.1717995113529306e-06, + "loss": 0.6549, + "step": 15260 + }, + { + "epoch": 0.7843046561825471, + "grad_norm": 0.7955917716026306, + "learning_rate": 1.1712642009939219e-06, + "loss": 0.6891, + "step": 15261 + }, + { + "epoch": 0.7843560489258916, + "grad_norm": 1.0609737634658813, + "learning_rate": 1.1707289967133794e-06, + "loss": 0.7095, + "step": 15262 + }, + { + "epoch": 0.7844074416692363, + "grad_norm": 1.0039712190628052, + "learning_rate": 1.1701938985261353e-06, + "loss": 0.6901, + "step": 15263 + }, + { + "epoch": 0.7844588344125809, + "grad_norm": 1.0993261337280273, + "learning_rate": 1.1696589064470138e-06, + "loss": 0.6642, + "step": 15264 + }, + { + "epoch": 0.7845102271559256, + "grad_norm": 1.063266396522522, + "learning_rate": 1.1691240204908366e-06, + "loss": 0.6465, + "step": 15265 + }, + { + "epoch": 0.7845616198992702, + "grad_norm": 1.0830645561218262, + "learning_rate": 1.1685892406724225e-06, + "loss": 0.7528, + "step": 15266 + }, + { + "epoch": 0.7846130126426148, + "grad_norm": 1.0366984605789185, + "learning_rate": 1.1680545670065907e-06, + "loss": 0.6849, + "step": 15267 + }, + { + "epoch": 0.7846644053859595, + "grad_norm": 1.0818408727645874, + "learning_rate": 1.1675199995081538e-06, + "loss": 0.665, + "step": 15268 + }, + { + "epoch": 0.7847157981293041, + "grad_norm": 1.0609105825424194, + "learning_rate": 1.1669855381919214e-06, + "loss": 0.6838, + "step": 15269 + }, + { + "epoch": 0.7847671908726488, + "grad_norm": 1.129634976387024, + "learning_rate": 1.1664511830727004e-06, + "loss": 0.7014, + "step": 15270 + }, + { + "epoch": 0.7848185836159934, + "grad_norm": 1.104885458946228, + "learning_rate": 1.1659169341652986e-06, + "loss": 0.6391, + "step": 15271 + }, + { + "epoch": 0.7848699763593381, + "grad_norm": 1.1087974309921265, + "learning_rate": 1.1653827914845162e-06, + "loss": 0.6553, + "step": 15272 + }, + { + "epoch": 0.7849213691026827, + "grad_norm": 1.07155442237854, + "learning_rate": 1.1648487550451509e-06, + "loss": 0.7328, + "step": 15273 + }, + { + "epoch": 0.7849727618460274, + "grad_norm": 1.0907989740371704, + "learning_rate": 1.1643148248620024e-06, + "loss": 0.7626, + "step": 15274 + }, + { + "epoch": 0.785024154589372, + "grad_norm": 1.080358862876892, + "learning_rate": 1.1637810009498584e-06, + "loss": 0.6801, + "step": 15275 + }, + { + "epoch": 0.7850755473327167, + "grad_norm": 1.0830459594726562, + "learning_rate": 1.163247283323513e-06, + "loss": 0.7129, + "step": 15276 + }, + { + "epoch": 0.7851269400760612, + "grad_norm": 1.1767476797103882, + "learning_rate": 1.1627136719977521e-06, + "loss": 0.7469, + "step": 15277 + }, + { + "epoch": 0.7851783328194059, + "grad_norm": 1.0379974842071533, + "learning_rate": 1.16218016698736e-06, + "loss": 0.7045, + "step": 15278 + }, + { + "epoch": 0.7852297255627505, + "grad_norm": 0.7395645380020142, + "learning_rate": 1.1616467683071164e-06, + "loss": 0.641, + "step": 15279 + }, + { + "epoch": 0.7852811183060951, + "grad_norm": 0.718439519405365, + "learning_rate": 1.1611134759718024e-06, + "loss": 0.6423, + "step": 15280 + }, + { + "epoch": 0.7853325110494398, + "grad_norm": 1.0622670650482178, + "learning_rate": 1.1605802899961916e-06, + "loss": 0.7016, + "step": 15281 + }, + { + "epoch": 0.7853839037927844, + "grad_norm": 1.12211275100708, + "learning_rate": 1.160047210395055e-06, + "loss": 0.6991, + "step": 15282 + }, + { + "epoch": 0.7854352965361291, + "grad_norm": 1.0998120307922363, + "learning_rate": 1.1595142371831664e-06, + "loss": 0.6689, + "step": 15283 + }, + { + "epoch": 0.7854866892794737, + "grad_norm": 1.0639524459838867, + "learning_rate": 1.1589813703752873e-06, + "loss": 0.7095, + "step": 15284 + }, + { + "epoch": 0.7855380820228184, + "grad_norm": 1.0444622039794922, + "learning_rate": 1.1584486099861846e-06, + "loss": 0.7119, + "step": 15285 + }, + { + "epoch": 0.785589474766163, + "grad_norm": 1.0309337377548218, + "learning_rate": 1.1579159560306162e-06, + "loss": 0.6581, + "step": 15286 + }, + { + "epoch": 0.7856408675095077, + "grad_norm": 1.1340227127075195, + "learning_rate": 1.1573834085233443e-06, + "loss": 0.712, + "step": 15287 + }, + { + "epoch": 0.7856922602528523, + "grad_norm": 1.0500249862670898, + "learning_rate": 1.1568509674791178e-06, + "loss": 0.6363, + "step": 15288 + }, + { + "epoch": 0.785743652996197, + "grad_norm": 1.0418407917022705, + "learning_rate": 1.1563186329126925e-06, + "loss": 0.6394, + "step": 15289 + }, + { + "epoch": 0.7857950457395416, + "grad_norm": 1.14478600025177, + "learning_rate": 1.1557864048388161e-06, + "loss": 0.7266, + "step": 15290 + }, + { + "epoch": 0.7858464384828863, + "grad_norm": 1.0929548740386963, + "learning_rate": 1.155254283272233e-06, + "loss": 0.6493, + "step": 15291 + }, + { + "epoch": 0.7858978312262308, + "grad_norm": 0.7912446856498718, + "learning_rate": 1.1547222682276882e-06, + "loss": 0.6176, + "step": 15292 + }, + { + "epoch": 0.7859492239695755, + "grad_norm": 1.0888994932174683, + "learning_rate": 1.1541903597199216e-06, + "loss": 0.7002, + "step": 15293 + }, + { + "epoch": 0.7860006167129201, + "grad_norm": 1.1142997741699219, + "learning_rate": 1.1536585577636688e-06, + "loss": 0.6936, + "step": 15294 + }, + { + "epoch": 0.7860520094562647, + "grad_norm": 1.058814287185669, + "learning_rate": 1.1531268623736636e-06, + "loss": 0.6909, + "step": 15295 + }, + { + "epoch": 0.7861034021996094, + "grad_norm": 1.095106840133667, + "learning_rate": 1.15259527356464e-06, + "loss": 0.7284, + "step": 15296 + }, + { + "epoch": 0.786154794942954, + "grad_norm": 0.8747300505638123, + "learning_rate": 1.1520637913513222e-06, + "loss": 0.6686, + "step": 15297 + }, + { + "epoch": 0.7862061876862987, + "grad_norm": 1.1078985929489136, + "learning_rate": 1.1515324157484382e-06, + "loss": 0.7278, + "step": 15298 + }, + { + "epoch": 0.7862575804296433, + "grad_norm": 1.117053747177124, + "learning_rate": 1.151001146770709e-06, + "loss": 0.6755, + "step": 15299 + }, + { + "epoch": 0.786308973172988, + "grad_norm": 1.0862746238708496, + "learning_rate": 1.1504699844328527e-06, + "loss": 0.6879, + "step": 15300 + }, + { + "epoch": 0.7863603659163326, + "grad_norm": 1.131516456604004, + "learning_rate": 1.1499389287495888e-06, + "loss": 0.7534, + "step": 15301 + }, + { + "epoch": 0.7864117586596773, + "grad_norm": 1.0414159297943115, + "learning_rate": 1.1494079797356271e-06, + "loss": 0.6801, + "step": 15302 + }, + { + "epoch": 0.7864631514030219, + "grad_norm": 1.0961095094680786, + "learning_rate": 1.1488771374056834e-06, + "loss": 0.6936, + "step": 15303 + }, + { + "epoch": 0.7865145441463666, + "grad_norm": 1.0260212421417236, + "learning_rate": 1.1483464017744583e-06, + "loss": 0.6569, + "step": 15304 + }, + { + "epoch": 0.7865659368897112, + "grad_norm": 1.1671698093414307, + "learning_rate": 1.1478157728566608e-06, + "loss": 0.6708, + "step": 15305 + }, + { + "epoch": 0.7866173296330559, + "grad_norm": 1.0307930707931519, + "learning_rate": 1.1472852506669917e-06, + "loss": 0.6759, + "step": 15306 + }, + { + "epoch": 0.7866687223764004, + "grad_norm": 0.755401611328125, + "learning_rate": 1.146754835220149e-06, + "loss": 0.6727, + "step": 15307 + }, + { + "epoch": 0.786720115119745, + "grad_norm": 1.0879428386688232, + "learning_rate": 1.1462245265308264e-06, + "loss": 0.7193, + "step": 15308 + }, + { + "epoch": 0.7867715078630897, + "grad_norm": 0.7504734992980957, + "learning_rate": 1.145694324613721e-06, + "loss": 0.6725, + "step": 15309 + }, + { + "epoch": 0.7868229006064343, + "grad_norm": 1.0496242046356201, + "learning_rate": 1.1451642294835192e-06, + "loss": 0.6197, + "step": 15310 + }, + { + "epoch": 0.786874293349779, + "grad_norm": 0.7963430285453796, + "learning_rate": 1.1446342411549071e-06, + "loss": 0.5974, + "step": 15311 + }, + { + "epoch": 0.7869256860931236, + "grad_norm": 1.0633864402770996, + "learning_rate": 1.1441043596425738e-06, + "loss": 0.7066, + "step": 15312 + }, + { + "epoch": 0.7869770788364683, + "grad_norm": 0.7577975988388062, + "learning_rate": 1.143574584961193e-06, + "loss": 0.6446, + "step": 15313 + }, + { + "epoch": 0.7870284715798129, + "grad_norm": 0.7951050400733948, + "learning_rate": 1.1430449171254472e-06, + "loss": 0.6478, + "step": 15314 + }, + { + "epoch": 0.7870798643231576, + "grad_norm": 1.067762851715088, + "learning_rate": 1.1425153561500103e-06, + "loss": 0.6979, + "step": 15315 + }, + { + "epoch": 0.7871312570665022, + "grad_norm": 1.1863747835159302, + "learning_rate": 1.141985902049554e-06, + "loss": 0.6375, + "step": 15316 + }, + { + "epoch": 0.7871826498098469, + "grad_norm": 1.1192349195480347, + "learning_rate": 1.141456554838745e-06, + "loss": 0.6847, + "step": 15317 + }, + { + "epoch": 0.7872340425531915, + "grad_norm": 1.0687884092330933, + "learning_rate": 1.140927314532254e-06, + "loss": 0.6998, + "step": 15318 + }, + { + "epoch": 0.7872854352965362, + "grad_norm": 0.7029162049293518, + "learning_rate": 1.1403981811447412e-06, + "loss": 0.6393, + "step": 15319 + }, + { + "epoch": 0.7873368280398808, + "grad_norm": 1.165034294128418, + "learning_rate": 1.1398691546908674e-06, + "loss": 0.6864, + "step": 15320 + }, + { + "epoch": 0.7873882207832255, + "grad_norm": 1.159232258796692, + "learning_rate": 1.1393402351852884e-06, + "loss": 0.7412, + "step": 15321 + }, + { + "epoch": 0.7874396135265701, + "grad_norm": 1.13481605052948, + "learning_rate": 1.13881142264266e-06, + "loss": 0.7026, + "step": 15322 + }, + { + "epoch": 0.7874910062699146, + "grad_norm": 1.14860999584198, + "learning_rate": 1.1382827170776334e-06, + "loss": 0.7037, + "step": 15323 + }, + { + "epoch": 0.7875423990132593, + "grad_norm": 1.0993632078170776, + "learning_rate": 1.137754118504855e-06, + "loss": 0.7057, + "step": 15324 + }, + { + "epoch": 0.7875937917566039, + "grad_norm": 1.106679916381836, + "learning_rate": 1.1372256269389742e-06, + "loss": 0.7162, + "step": 15325 + }, + { + "epoch": 0.7876451844999486, + "grad_norm": 0.7836660146713257, + "learning_rate": 1.1366972423946276e-06, + "loss": 0.6259, + "step": 15326 + }, + { + "epoch": 0.7876965772432932, + "grad_norm": 1.051287293434143, + "learning_rate": 1.1361689648864592e-06, + "loss": 0.6736, + "step": 15327 + }, + { + "epoch": 0.7877479699866379, + "grad_norm": 1.0648866891860962, + "learning_rate": 1.1356407944291037e-06, + "loss": 0.7138, + "step": 15328 + }, + { + "epoch": 0.7877993627299825, + "grad_norm": 1.0551220178604126, + "learning_rate": 1.1351127310371946e-06, + "loss": 0.6718, + "step": 15329 + }, + { + "epoch": 0.7878507554733272, + "grad_norm": 1.0599972009658813, + "learning_rate": 1.1345847747253602e-06, + "loss": 0.6995, + "step": 15330 + }, + { + "epoch": 0.7879021482166718, + "grad_norm": 0.6620064377784729, + "learning_rate": 1.1340569255082318e-06, + "loss": 0.6081, + "step": 15331 + }, + { + "epoch": 0.7879535409600165, + "grad_norm": 0.7149621248245239, + "learning_rate": 1.1335291834004324e-06, + "loss": 0.6475, + "step": 15332 + }, + { + "epoch": 0.7880049337033611, + "grad_norm": 1.1159652471542358, + "learning_rate": 1.133001548416582e-06, + "loss": 0.7404, + "step": 15333 + }, + { + "epoch": 0.7880563264467058, + "grad_norm": 1.1008071899414062, + "learning_rate": 1.1324740205713014e-06, + "loss": 0.7867, + "step": 15334 + }, + { + "epoch": 0.7881077191900504, + "grad_norm": 1.0862202644348145, + "learning_rate": 1.1319465998792057e-06, + "loss": 0.6913, + "step": 15335 + }, + { + "epoch": 0.788159111933395, + "grad_norm": 1.120643973350525, + "learning_rate": 1.1314192863549072e-06, + "loss": 0.7168, + "step": 15336 + }, + { + "epoch": 0.7882105046767397, + "grad_norm": 1.07773756980896, + "learning_rate": 1.1308920800130146e-06, + "loss": 0.6948, + "step": 15337 + }, + { + "epoch": 0.7882618974200842, + "grad_norm": 1.143275260925293, + "learning_rate": 1.1303649808681377e-06, + "loss": 0.7255, + "step": 15338 + }, + { + "epoch": 0.7883132901634289, + "grad_norm": 1.0647166967391968, + "learning_rate": 1.1298379889348759e-06, + "loss": 0.6771, + "step": 15339 + }, + { + "epoch": 0.7883646829067735, + "grad_norm": 1.062179684638977, + "learning_rate": 1.1293111042278332e-06, + "loss": 0.6931, + "step": 15340 + }, + { + "epoch": 0.7884160756501182, + "grad_norm": 0.6962597966194153, + "learning_rate": 1.1287843267616067e-06, + "loss": 0.6333, + "step": 15341 + }, + { + "epoch": 0.7884674683934628, + "grad_norm": 1.111122727394104, + "learning_rate": 1.128257656550789e-06, + "loss": 0.7336, + "step": 15342 + }, + { + "epoch": 0.7885188611368075, + "grad_norm": 1.185849905014038, + "learning_rate": 1.1277310936099762e-06, + "loss": 0.736, + "step": 15343 + }, + { + "epoch": 0.7885702538801521, + "grad_norm": 1.091580867767334, + "learning_rate": 1.1272046379537538e-06, + "loss": 0.678, + "step": 15344 + }, + { + "epoch": 0.7886216466234968, + "grad_norm": 0.7518974542617798, + "learning_rate": 1.1266782895967098e-06, + "loss": 0.6122, + "step": 15345 + }, + { + "epoch": 0.7886730393668414, + "grad_norm": 1.0606197118759155, + "learning_rate": 1.1261520485534238e-06, + "loss": 0.7046, + "step": 15346 + }, + { + "epoch": 0.7887244321101861, + "grad_norm": 1.096616506576538, + "learning_rate": 1.1256259148384818e-06, + "loss": 0.6493, + "step": 15347 + }, + { + "epoch": 0.7887758248535307, + "grad_norm": 1.1386187076568604, + "learning_rate": 1.125099888466454e-06, + "loss": 0.6984, + "step": 15348 + }, + { + "epoch": 0.7888272175968754, + "grad_norm": 1.1115142107009888, + "learning_rate": 1.1245739694519187e-06, + "loss": 0.7221, + "step": 15349 + }, + { + "epoch": 0.78887861034022, + "grad_norm": 1.0191231966018677, + "learning_rate": 1.1240481578094448e-06, + "loss": 0.6827, + "step": 15350 + }, + { + "epoch": 0.7889300030835646, + "grad_norm": 1.1206845045089722, + "learning_rate": 1.123522453553602e-06, + "loss": 0.6595, + "step": 15351 + }, + { + "epoch": 0.7889813958269093, + "grad_norm": 1.060234546661377, + "learning_rate": 1.1229968566989552e-06, + "loss": 0.6212, + "step": 15352 + }, + { + "epoch": 0.7890327885702538, + "grad_norm": 1.059466004371643, + "learning_rate": 1.1224713672600646e-06, + "loss": 0.6671, + "step": 15353 + }, + { + "epoch": 0.7890841813135985, + "grad_norm": 0.873365044593811, + "learning_rate": 1.1219459852514937e-06, + "loss": 0.6293, + "step": 15354 + }, + { + "epoch": 0.7891355740569431, + "grad_norm": 0.9886416792869568, + "learning_rate": 1.1214207106877928e-06, + "loss": 0.7025, + "step": 15355 + }, + { + "epoch": 0.7891869668002878, + "grad_norm": 1.0985876321792603, + "learning_rate": 1.1208955435835201e-06, + "loss": 0.7038, + "step": 15356 + }, + { + "epoch": 0.7892383595436324, + "grad_norm": 0.6853715777397156, + "learning_rate": 1.1203704839532232e-06, + "loss": 0.6188, + "step": 15357 + }, + { + "epoch": 0.7892897522869771, + "grad_norm": 1.1219056844711304, + "learning_rate": 1.1198455318114499e-06, + "loss": 0.6938, + "step": 15358 + }, + { + "epoch": 0.7893411450303217, + "grad_norm": 1.0656853914260864, + "learning_rate": 1.119320687172743e-06, + "loss": 0.6616, + "step": 15359 + }, + { + "epoch": 0.7893925377736664, + "grad_norm": 1.1165146827697754, + "learning_rate": 1.1187959500516465e-06, + "loss": 0.6651, + "step": 15360 + }, + { + "epoch": 0.789443930517011, + "grad_norm": 0.7125632762908936, + "learning_rate": 1.1182713204626978e-06, + "loss": 0.6287, + "step": 15361 + }, + { + "epoch": 0.7894953232603557, + "grad_norm": 1.0847622156143188, + "learning_rate": 1.1177467984204304e-06, + "loss": 0.6631, + "step": 15362 + }, + { + "epoch": 0.7895467160037003, + "grad_norm": 1.1944921016693115, + "learning_rate": 1.1172223839393808e-06, + "loss": 0.6907, + "step": 15363 + }, + { + "epoch": 0.789598108747045, + "grad_norm": 1.137958288192749, + "learning_rate": 1.116698077034073e-06, + "loss": 0.6962, + "step": 15364 + }, + { + "epoch": 0.7896495014903896, + "grad_norm": 1.092236876487732, + "learning_rate": 1.1161738777190374e-06, + "loss": 0.6685, + "step": 15365 + }, + { + "epoch": 0.7897008942337342, + "grad_norm": 1.1049565076828003, + "learning_rate": 1.1156497860087945e-06, + "loss": 0.7135, + "step": 15366 + }, + { + "epoch": 0.7897522869770789, + "grad_norm": 1.1603111028671265, + "learning_rate": 1.115125801917869e-06, + "loss": 0.6997, + "step": 15367 + }, + { + "epoch": 0.7898036797204234, + "grad_norm": 1.22853684425354, + "learning_rate": 1.1146019254607732e-06, + "loss": 0.7021, + "step": 15368 + }, + { + "epoch": 0.7898550724637681, + "grad_norm": 1.1579856872558594, + "learning_rate": 1.1140781566520248e-06, + "loss": 0.6951, + "step": 15369 + }, + { + "epoch": 0.7899064652071127, + "grad_norm": 1.136399745941162, + "learning_rate": 1.1135544955061344e-06, + "loss": 0.7212, + "step": 15370 + }, + { + "epoch": 0.7899578579504574, + "grad_norm": 1.0282552242279053, + "learning_rate": 1.1130309420376112e-06, + "loss": 0.6759, + "step": 15371 + }, + { + "epoch": 0.790009250693802, + "grad_norm": 1.2187716960906982, + "learning_rate": 1.1125074962609584e-06, + "loss": 0.7373, + "step": 15372 + }, + { + "epoch": 0.7900606434371467, + "grad_norm": 0.6978018879890442, + "learning_rate": 1.1119841581906815e-06, + "loss": 0.6225, + "step": 15373 + }, + { + "epoch": 0.7901120361804913, + "grad_norm": 1.1205804347991943, + "learning_rate": 1.1114609278412785e-06, + "loss": 0.6929, + "step": 15374 + }, + { + "epoch": 0.790163428923836, + "grad_norm": 1.1470102071762085, + "learning_rate": 1.1109378052272446e-06, + "loss": 0.6497, + "step": 15375 + }, + { + "epoch": 0.7902148216671806, + "grad_norm": 1.073972463607788, + "learning_rate": 1.110414790363078e-06, + "loss": 0.6911, + "step": 15376 + }, + { + "epoch": 0.7902662144105252, + "grad_norm": 1.080496072769165, + "learning_rate": 1.1098918832632632e-06, + "loss": 0.7286, + "step": 15377 + }, + { + "epoch": 0.7903176071538699, + "grad_norm": 1.0411713123321533, + "learning_rate": 1.1093690839422927e-06, + "loss": 0.7172, + "step": 15378 + }, + { + "epoch": 0.7903689998972145, + "grad_norm": 1.1401727199554443, + "learning_rate": 1.1088463924146487e-06, + "loss": 0.6686, + "step": 15379 + }, + { + "epoch": 0.7904203926405592, + "grad_norm": 1.0712045431137085, + "learning_rate": 1.1083238086948133e-06, + "loss": 0.6868, + "step": 15380 + }, + { + "epoch": 0.7904717853839038, + "grad_norm": 0.747482180595398, + "learning_rate": 1.1078013327972636e-06, + "loss": 0.6288, + "step": 15381 + }, + { + "epoch": 0.7905231781272485, + "grad_norm": 0.8077569007873535, + "learning_rate": 1.107278964736479e-06, + "loss": 0.6822, + "step": 15382 + }, + { + "epoch": 0.790574570870593, + "grad_norm": 1.1478655338287354, + "learning_rate": 1.1067567045269295e-06, + "loss": 0.7183, + "step": 15383 + }, + { + "epoch": 0.7906259636139377, + "grad_norm": 1.1394224166870117, + "learning_rate": 1.1062345521830837e-06, + "loss": 0.6814, + "step": 15384 + }, + { + "epoch": 0.7906773563572823, + "grad_norm": 1.108833909034729, + "learning_rate": 1.1057125077194113e-06, + "loss": 0.69, + "step": 15385 + }, + { + "epoch": 0.790728749100627, + "grad_norm": 1.116007685661316, + "learning_rate": 1.1051905711503746e-06, + "loss": 0.647, + "step": 15386 + }, + { + "epoch": 0.7907801418439716, + "grad_norm": 1.1368153095245361, + "learning_rate": 1.104668742490434e-06, + "loss": 0.6832, + "step": 15387 + }, + { + "epoch": 0.7908315345873163, + "grad_norm": 0.707216203212738, + "learning_rate": 1.1041470217540467e-06, + "loss": 0.6569, + "step": 15388 + }, + { + "epoch": 0.7908829273306609, + "grad_norm": 1.1801246404647827, + "learning_rate": 1.1036254089556702e-06, + "loss": 0.7399, + "step": 15389 + }, + { + "epoch": 0.7909343200740055, + "grad_norm": 0.8007926344871521, + "learning_rate": 1.1031039041097518e-06, + "loss": 0.6408, + "step": 15390 + }, + { + "epoch": 0.7909857128173502, + "grad_norm": 1.0790886878967285, + "learning_rate": 1.1025825072307445e-06, + "loss": 0.6522, + "step": 15391 + }, + { + "epoch": 0.7910371055606948, + "grad_norm": 1.1342428922653198, + "learning_rate": 1.1020612183330914e-06, + "loss": 0.7072, + "step": 15392 + }, + { + "epoch": 0.7910884983040395, + "grad_norm": 1.1169917583465576, + "learning_rate": 1.101540037431235e-06, + "loss": 0.7417, + "step": 15393 + }, + { + "epoch": 0.7911398910473841, + "grad_norm": 1.0921988487243652, + "learning_rate": 1.101018964539618e-06, + "loss": 0.7439, + "step": 15394 + }, + { + "epoch": 0.7911912837907288, + "grad_norm": 1.0874522924423218, + "learning_rate": 1.100497999672674e-06, + "loss": 0.7239, + "step": 15395 + }, + { + "epoch": 0.7912426765340734, + "grad_norm": 1.16403329372406, + "learning_rate": 1.0999771428448403e-06, + "loss": 0.7077, + "step": 15396 + }, + { + "epoch": 0.7912940692774181, + "grad_norm": 1.1167305707931519, + "learning_rate": 1.0994563940705433e-06, + "loss": 0.7549, + "step": 15397 + }, + { + "epoch": 0.7913454620207626, + "grad_norm": 1.0662370920181274, + "learning_rate": 1.0989357533642138e-06, + "loss": 0.7008, + "step": 15398 + }, + { + "epoch": 0.7913968547641073, + "grad_norm": 1.1400477886199951, + "learning_rate": 1.0984152207402766e-06, + "loss": 0.6512, + "step": 15399 + }, + { + "epoch": 0.7914482475074519, + "grad_norm": 1.0472333431243896, + "learning_rate": 1.097894796213152e-06, + "loss": 0.7194, + "step": 15400 + }, + { + "epoch": 0.7914996402507966, + "grad_norm": 1.099311113357544, + "learning_rate": 1.0973744797972585e-06, + "loss": 0.6885, + "step": 15401 + }, + { + "epoch": 0.7915510329941412, + "grad_norm": 0.9624381065368652, + "learning_rate": 1.096854271507014e-06, + "loss": 0.6412, + "step": 15402 + }, + { + "epoch": 0.7916024257374858, + "grad_norm": 0.995807945728302, + "learning_rate": 1.096334171356831e-06, + "loss": 0.6849, + "step": 15403 + }, + { + "epoch": 0.7916538184808305, + "grad_norm": 1.0772331953048706, + "learning_rate": 1.095814179361117e-06, + "loss": 0.7222, + "step": 15404 + }, + { + "epoch": 0.7917052112241751, + "grad_norm": 0.7315527200698853, + "learning_rate": 1.0952942955342833e-06, + "loss": 0.6396, + "step": 15405 + }, + { + "epoch": 0.7917566039675198, + "grad_norm": 1.0514954328536987, + "learning_rate": 1.0947745198907279e-06, + "loss": 0.6554, + "step": 15406 + }, + { + "epoch": 0.7918079967108644, + "grad_norm": 1.1457874774932861, + "learning_rate": 1.094254852444856e-06, + "loss": 0.7222, + "step": 15407 + }, + { + "epoch": 0.7918593894542091, + "grad_norm": 1.104487419128418, + "learning_rate": 1.093735293211064e-06, + "loss": 0.727, + "step": 15408 + }, + { + "epoch": 0.7919107821975537, + "grad_norm": 0.6998841762542725, + "learning_rate": 1.093215842203747e-06, + "loss": 0.6702, + "step": 15409 + }, + { + "epoch": 0.7919621749408984, + "grad_norm": 0.7906347513198853, + "learning_rate": 1.0926964994372952e-06, + "loss": 0.6085, + "step": 15410 + }, + { + "epoch": 0.792013567684243, + "grad_norm": 1.0360815525054932, + "learning_rate": 1.0921772649261003e-06, + "loss": 0.7055, + "step": 15411 + }, + { + "epoch": 0.7920649604275877, + "grad_norm": 1.1705381870269775, + "learning_rate": 1.091658138684547e-06, + "loss": 0.7185, + "step": 15412 + }, + { + "epoch": 0.7921163531709323, + "grad_norm": 0.7343947887420654, + "learning_rate": 1.0911391207270167e-06, + "loss": 0.6142, + "step": 15413 + }, + { + "epoch": 0.7921677459142769, + "grad_norm": 0.7360662221908569, + "learning_rate": 1.0906202110678909e-06, + "loss": 0.6221, + "step": 15414 + }, + { + "epoch": 0.7922191386576215, + "grad_norm": 1.0816441774368286, + "learning_rate": 1.0901014097215468e-06, + "loss": 0.7096, + "step": 15415 + }, + { + "epoch": 0.7922705314009661, + "grad_norm": 1.0699526071548462, + "learning_rate": 1.089582716702357e-06, + "loss": 0.6984, + "step": 15416 + }, + { + "epoch": 0.7923219241443108, + "grad_norm": 0.679084062576294, + "learning_rate": 1.089064132024692e-06, + "loss": 0.6247, + "step": 15417 + }, + { + "epoch": 0.7923733168876554, + "grad_norm": 1.0251060724258423, + "learning_rate": 1.0885456557029227e-06, + "loss": 0.6763, + "step": 15418 + }, + { + "epoch": 0.7924247096310001, + "grad_norm": 1.1182841062545776, + "learning_rate": 1.0880272877514093e-06, + "loss": 0.7153, + "step": 15419 + }, + { + "epoch": 0.7924761023743447, + "grad_norm": 1.0087915658950806, + "learning_rate": 1.087509028184517e-06, + "loss": 0.67, + "step": 15420 + }, + { + "epoch": 0.7925274951176894, + "grad_norm": 0.7238808870315552, + "learning_rate": 1.0869908770166037e-06, + "loss": 0.6492, + "step": 15421 + }, + { + "epoch": 0.792578887861034, + "grad_norm": 1.124735713005066, + "learning_rate": 1.0864728342620235e-06, + "loss": 0.7536, + "step": 15422 + }, + { + "epoch": 0.7926302806043787, + "grad_norm": 1.2828058004379272, + "learning_rate": 1.0859548999351327e-06, + "loss": 0.6964, + "step": 15423 + }, + { + "epoch": 0.7926816733477233, + "grad_norm": 1.071630835533142, + "learning_rate": 1.085437074050279e-06, + "loss": 0.7005, + "step": 15424 + }, + { + "epoch": 0.792733066091068, + "grad_norm": 1.17682945728302, + "learning_rate": 1.0849193566218097e-06, + "loss": 0.7131, + "step": 15425 + }, + { + "epoch": 0.7927844588344126, + "grad_norm": 1.1434905529022217, + "learning_rate": 1.0844017476640673e-06, + "loss": 0.7137, + "step": 15426 + }, + { + "epoch": 0.7928358515777573, + "grad_norm": 1.0828107595443726, + "learning_rate": 1.0838842471913945e-06, + "loss": 0.7025, + "step": 15427 + }, + { + "epoch": 0.7928872443211019, + "grad_norm": 1.1218725442886353, + "learning_rate": 1.0833668552181292e-06, + "loss": 0.6961, + "step": 15428 + }, + { + "epoch": 0.7929386370644464, + "grad_norm": 1.1331369876861572, + "learning_rate": 1.0828495717586046e-06, + "loss": 0.6944, + "step": 15429 + }, + { + "epoch": 0.7929900298077911, + "grad_norm": 1.12319016456604, + "learning_rate": 1.082332396827152e-06, + "loss": 0.7044, + "step": 15430 + }, + { + "epoch": 0.7930414225511357, + "grad_norm": 1.061699390411377, + "learning_rate": 1.0818153304381046e-06, + "loss": 0.6747, + "step": 15431 + }, + { + "epoch": 0.7930928152944804, + "grad_norm": 1.1284937858581543, + "learning_rate": 1.0812983726057818e-06, + "loss": 0.7328, + "step": 15432 + }, + { + "epoch": 0.793144208037825, + "grad_norm": 1.093911051750183, + "learning_rate": 1.0807815233445113e-06, + "loss": 0.6951, + "step": 15433 + }, + { + "epoch": 0.7931956007811697, + "grad_norm": 0.8249751925468445, + "learning_rate": 1.0802647826686107e-06, + "loss": 0.6463, + "step": 15434 + }, + { + "epoch": 0.7932469935245143, + "grad_norm": 1.108451247215271, + "learning_rate": 1.0797481505923962e-06, + "loss": 0.6782, + "step": 15435 + }, + { + "epoch": 0.793298386267859, + "grad_norm": 0.9854241013526917, + "learning_rate": 1.079231627130184e-06, + "loss": 0.6602, + "step": 15436 + }, + { + "epoch": 0.7933497790112036, + "grad_norm": 0.8660914897918701, + "learning_rate": 1.0787152122962829e-06, + "loss": 0.6254, + "step": 15437 + }, + { + "epoch": 0.7934011717545483, + "grad_norm": 0.7369109988212585, + "learning_rate": 1.0781989061050013e-06, + "loss": 0.6672, + "step": 15438 + }, + { + "epoch": 0.7934525644978929, + "grad_norm": 1.0640764236450195, + "learning_rate": 1.0776827085706425e-06, + "loss": 0.7052, + "step": 15439 + }, + { + "epoch": 0.7935039572412376, + "grad_norm": 1.1049494743347168, + "learning_rate": 1.077166619707512e-06, + "loss": 0.7184, + "step": 15440 + }, + { + "epoch": 0.7935553499845822, + "grad_norm": 0.8125507831573486, + "learning_rate": 1.0766506395299032e-06, + "loss": 0.6624, + "step": 15441 + }, + { + "epoch": 0.7936067427279269, + "grad_norm": 1.0871307849884033, + "learning_rate": 1.0761347680521157e-06, + "loss": 0.7105, + "step": 15442 + }, + { + "epoch": 0.7936581354712715, + "grad_norm": 1.1494214534759521, + "learning_rate": 1.0756190052884396e-06, + "loss": 0.6716, + "step": 15443 + }, + { + "epoch": 0.793709528214616, + "grad_norm": 1.099273920059204, + "learning_rate": 1.0751033512531672e-06, + "loss": 0.7484, + "step": 15444 + }, + { + "epoch": 0.7937609209579607, + "grad_norm": 0.7576067447662354, + "learning_rate": 1.074587805960584e-06, + "loss": 0.6579, + "step": 15445 + }, + { + "epoch": 0.7938123137013053, + "grad_norm": 0.6758350729942322, + "learning_rate": 1.0740723694249722e-06, + "loss": 0.6545, + "step": 15446 + }, + { + "epoch": 0.79386370644465, + "grad_norm": 1.1015040874481201, + "learning_rate": 1.0735570416606161e-06, + "loss": 0.7055, + "step": 15447 + }, + { + "epoch": 0.7939150991879946, + "grad_norm": 1.1130210161209106, + "learning_rate": 1.0730418226817885e-06, + "loss": 0.7451, + "step": 15448 + }, + { + "epoch": 0.7939664919313393, + "grad_norm": 1.0987616777420044, + "learning_rate": 1.0725267125027676e-06, + "loss": 0.7341, + "step": 15449 + }, + { + "epoch": 0.7940178846746839, + "grad_norm": 1.0817186832427979, + "learning_rate": 1.0720117111378236e-06, + "loss": 0.6707, + "step": 15450 + }, + { + "epoch": 0.7940692774180286, + "grad_norm": 1.2252196073532104, + "learning_rate": 1.0714968186012254e-06, + "loss": 0.6502, + "step": 15451 + }, + { + "epoch": 0.7941206701613732, + "grad_norm": 1.096917986869812, + "learning_rate": 1.070982034907237e-06, + "loss": 0.7314, + "step": 15452 + }, + { + "epoch": 0.7941720629047179, + "grad_norm": 0.806534469127655, + "learning_rate": 1.0704673600701237e-06, + "loss": 0.6288, + "step": 15453 + }, + { + "epoch": 0.7942234556480625, + "grad_norm": 1.2084870338439941, + "learning_rate": 1.0699527941041438e-06, + "loss": 0.7791, + "step": 15454 + }, + { + "epoch": 0.7942748483914072, + "grad_norm": 0.7811590433120728, + "learning_rate": 1.0694383370235523e-06, + "loss": 0.6449, + "step": 15455 + }, + { + "epoch": 0.7943262411347518, + "grad_norm": 0.7063359022140503, + "learning_rate": 1.0689239888426062e-06, + "loss": 0.6451, + "step": 15456 + }, + { + "epoch": 0.7943776338780965, + "grad_norm": 1.0560468435287476, + "learning_rate": 1.0684097495755514e-06, + "loss": 0.7243, + "step": 15457 + }, + { + "epoch": 0.7944290266214411, + "grad_norm": 1.0755469799041748, + "learning_rate": 1.067895619236639e-06, + "loss": 0.6501, + "step": 15458 + }, + { + "epoch": 0.7944804193647856, + "grad_norm": 1.1115938425064087, + "learning_rate": 1.0673815978401108e-06, + "loss": 0.6624, + "step": 15459 + }, + { + "epoch": 0.7945318121081303, + "grad_norm": 1.1269582509994507, + "learning_rate": 1.0668676854002124e-06, + "loss": 0.706, + "step": 15460 + }, + { + "epoch": 0.7945832048514749, + "grad_norm": 1.1205912828445435, + "learning_rate": 1.066353881931177e-06, + "loss": 0.7469, + "step": 15461 + }, + { + "epoch": 0.7946345975948196, + "grad_norm": 0.8226692080497742, + "learning_rate": 1.065840187447243e-06, + "loss": 0.6725, + "step": 15462 + }, + { + "epoch": 0.7946859903381642, + "grad_norm": 0.7626643776893616, + "learning_rate": 1.0653266019626424e-06, + "loss": 0.6521, + "step": 15463 + }, + { + "epoch": 0.7947373830815089, + "grad_norm": 1.1196069717407227, + "learning_rate": 1.0648131254916027e-06, + "loss": 0.6626, + "step": 15464 + }, + { + "epoch": 0.7947887758248535, + "grad_norm": 1.0794119834899902, + "learning_rate": 1.0642997580483532e-06, + "loss": 0.7218, + "step": 15465 + }, + { + "epoch": 0.7948401685681982, + "grad_norm": 1.02251398563385, + "learning_rate": 1.063786499647116e-06, + "loss": 0.6871, + "step": 15466 + }, + { + "epoch": 0.7948915613115428, + "grad_norm": 1.0550682544708252, + "learning_rate": 1.063273350302111e-06, + "loss": 0.7014, + "step": 15467 + }, + { + "epoch": 0.7949429540548875, + "grad_norm": 1.0607175827026367, + "learning_rate": 1.062760310027554e-06, + "loss": 0.6451, + "step": 15468 + }, + { + "epoch": 0.7949943467982321, + "grad_norm": 1.029678225517273, + "learning_rate": 1.0622473788376636e-06, + "loss": 0.7026, + "step": 15469 + }, + { + "epoch": 0.7950457395415768, + "grad_norm": 1.1250770092010498, + "learning_rate": 1.0617345567466453e-06, + "loss": 0.7139, + "step": 15470 + }, + { + "epoch": 0.7950971322849214, + "grad_norm": 1.1087660789489746, + "learning_rate": 1.0612218437687117e-06, + "loss": 0.7356, + "step": 15471 + }, + { + "epoch": 0.795148525028266, + "grad_norm": 1.1242544651031494, + "learning_rate": 1.0607092399180662e-06, + "loss": 0.7083, + "step": 15472 + }, + { + "epoch": 0.7951999177716107, + "grad_norm": 1.0315282344818115, + "learning_rate": 1.06019674520891e-06, + "loss": 0.7517, + "step": 15473 + }, + { + "epoch": 0.7952513105149552, + "grad_norm": 1.2478526830673218, + "learning_rate": 1.0596843596554452e-06, + "loss": 0.6419, + "step": 15474 + }, + { + "epoch": 0.7953027032582999, + "grad_norm": 1.0698009729385376, + "learning_rate": 1.0591720832718654e-06, + "loss": 0.6791, + "step": 15475 + }, + { + "epoch": 0.7953540960016445, + "grad_norm": 1.1302348375320435, + "learning_rate": 1.0586599160723643e-06, + "loss": 0.7517, + "step": 15476 + }, + { + "epoch": 0.7954054887449892, + "grad_norm": 1.3847373723983765, + "learning_rate": 1.0581478580711307e-06, + "loss": 0.661, + "step": 15477 + }, + { + "epoch": 0.7954568814883338, + "grad_norm": 0.8708206415176392, + "learning_rate": 1.0576359092823546e-06, + "loss": 0.6355, + "step": 15478 + }, + { + "epoch": 0.7955082742316785, + "grad_norm": 1.0562161207199097, + "learning_rate": 1.057124069720218e-06, + "loss": 0.6408, + "step": 15479 + }, + { + "epoch": 0.7955596669750231, + "grad_norm": 1.0930520296096802, + "learning_rate": 1.0566123393989025e-06, + "loss": 0.663, + "step": 15480 + }, + { + "epoch": 0.7956110597183678, + "grad_norm": 1.1158961057662964, + "learning_rate": 1.0561007183325839e-06, + "loss": 0.6789, + "step": 15481 + }, + { + "epoch": 0.7956624524617124, + "grad_norm": 0.692581057548523, + "learning_rate": 1.0555892065354416e-06, + "loss": 0.6434, + "step": 15482 + }, + { + "epoch": 0.7957138452050571, + "grad_norm": 1.0573724508285522, + "learning_rate": 1.0550778040216426e-06, + "loss": 0.6777, + "step": 15483 + }, + { + "epoch": 0.7957652379484017, + "grad_norm": 1.0789636373519897, + "learning_rate": 1.0545665108053588e-06, + "loss": 0.6642, + "step": 15484 + }, + { + "epoch": 0.7958166306917464, + "grad_norm": 1.0245907306671143, + "learning_rate": 1.0540553269007547e-06, + "loss": 0.6543, + "step": 15485 + }, + { + "epoch": 0.795868023435091, + "grad_norm": 1.120058536529541, + "learning_rate": 1.053544252321993e-06, + "loss": 0.6469, + "step": 15486 + }, + { + "epoch": 0.7959194161784356, + "grad_norm": 0.7369247078895569, + "learning_rate": 1.053033287083235e-06, + "loss": 0.6577, + "step": 15487 + }, + { + "epoch": 0.7959708089217803, + "grad_norm": 1.1340211629867554, + "learning_rate": 1.0525224311986365e-06, + "loss": 0.6867, + "step": 15488 + }, + { + "epoch": 0.7960222016651248, + "grad_norm": 0.7096269130706787, + "learning_rate": 1.0520116846823514e-06, + "loss": 0.6239, + "step": 15489 + }, + { + "epoch": 0.7960735944084695, + "grad_norm": 0.7398607134819031, + "learning_rate": 1.0515010475485283e-06, + "loss": 0.6242, + "step": 15490 + }, + { + "epoch": 0.7961249871518141, + "grad_norm": 1.0964971780776978, + "learning_rate": 1.050990519811318e-06, + "loss": 0.664, + "step": 15491 + }, + { + "epoch": 0.7961763798951588, + "grad_norm": 1.0639350414276123, + "learning_rate": 1.0504801014848642e-06, + "loss": 0.6732, + "step": 15492 + }, + { + "epoch": 0.7962277726385034, + "grad_norm": 1.0505406856536865, + "learning_rate": 1.049969792583308e-06, + "loss": 0.6634, + "step": 15493 + }, + { + "epoch": 0.7962791653818481, + "grad_norm": 1.1659843921661377, + "learning_rate": 1.0494595931207868e-06, + "loss": 0.7412, + "step": 15494 + }, + { + "epoch": 0.7963305581251927, + "grad_norm": 0.6134080290794373, + "learning_rate": 1.0489495031114383e-06, + "loss": 0.5861, + "step": 15495 + }, + { + "epoch": 0.7963819508685374, + "grad_norm": 1.1059802770614624, + "learning_rate": 1.0484395225693944e-06, + "loss": 0.718, + "step": 15496 + }, + { + "epoch": 0.796433343611882, + "grad_norm": 0.7163943648338318, + "learning_rate": 1.0479296515087829e-06, + "loss": 0.6066, + "step": 15497 + }, + { + "epoch": 0.7964847363552267, + "grad_norm": 1.0998553037643433, + "learning_rate": 1.0474198899437338e-06, + "loss": 0.6791, + "step": 15498 + }, + { + "epoch": 0.7965361290985713, + "grad_norm": 1.0531493425369263, + "learning_rate": 1.0469102378883655e-06, + "loss": 0.689, + "step": 15499 + }, + { + "epoch": 0.796587521841916, + "grad_norm": 1.0829755067825317, + "learning_rate": 1.0464006953568028e-06, + "loss": 0.6864, + "step": 15500 + }, + { + "epoch": 0.7966389145852606, + "grad_norm": 1.0901342630386353, + "learning_rate": 1.0458912623631612e-06, + "loss": 0.7151, + "step": 15501 + }, + { + "epoch": 0.7966903073286052, + "grad_norm": 1.1875600814819336, + "learning_rate": 1.0453819389215552e-06, + "loss": 0.65, + "step": 15502 + }, + { + "epoch": 0.7967417000719499, + "grad_norm": 1.1538118124008179, + "learning_rate": 1.0448727250460945e-06, + "loss": 0.6925, + "step": 15503 + }, + { + "epoch": 0.7967930928152945, + "grad_norm": 1.0133543014526367, + "learning_rate": 1.0443636207508901e-06, + "loss": 0.6742, + "step": 15504 + }, + { + "epoch": 0.7968444855586391, + "grad_norm": 1.0463714599609375, + "learning_rate": 1.0438546260500455e-06, + "loss": 0.6745, + "step": 15505 + }, + { + "epoch": 0.7968958783019837, + "grad_norm": 1.0775257349014282, + "learning_rate": 1.043345740957662e-06, + "loss": 0.7062, + "step": 15506 + }, + { + "epoch": 0.7969472710453284, + "grad_norm": 1.1011615991592407, + "learning_rate": 1.0428369654878411e-06, + "loss": 0.6864, + "step": 15507 + }, + { + "epoch": 0.796998663788673, + "grad_norm": 1.0516574382781982, + "learning_rate": 1.042328299654678e-06, + "loss": 0.6813, + "step": 15508 + }, + { + "epoch": 0.7970500565320177, + "grad_norm": 1.0544841289520264, + "learning_rate": 1.0418197434722654e-06, + "loss": 0.6963, + "step": 15509 + }, + { + "epoch": 0.7971014492753623, + "grad_norm": 1.0776758193969727, + "learning_rate": 1.0413112969546919e-06, + "loss": 0.7233, + "step": 15510 + }, + { + "epoch": 0.797152842018707, + "grad_norm": 1.0615752935409546, + "learning_rate": 1.040802960116048e-06, + "loss": 0.7203, + "step": 15511 + }, + { + "epoch": 0.7972042347620516, + "grad_norm": 0.8504183292388916, + "learning_rate": 1.0402947329704128e-06, + "loss": 0.6262, + "step": 15512 + }, + { + "epoch": 0.7972556275053962, + "grad_norm": 1.1260610818862915, + "learning_rate": 1.0397866155318715e-06, + "loss": 0.7167, + "step": 15513 + }, + { + "epoch": 0.7973070202487409, + "grad_norm": 1.0777418613433838, + "learning_rate": 1.0392786078145e-06, + "loss": 0.7125, + "step": 15514 + }, + { + "epoch": 0.7973584129920855, + "grad_norm": 1.129145860671997, + "learning_rate": 1.0387707098323723e-06, + "loss": 0.7395, + "step": 15515 + }, + { + "epoch": 0.7974098057354302, + "grad_norm": 1.115346908569336, + "learning_rate": 1.0382629215995627e-06, + "loss": 0.7768, + "step": 15516 + }, + { + "epoch": 0.7974611984787748, + "grad_norm": 1.2034854888916016, + "learning_rate": 1.037755243130138e-06, + "loss": 0.6906, + "step": 15517 + }, + { + "epoch": 0.7975125912221195, + "grad_norm": 1.0902252197265625, + "learning_rate": 1.0372476744381644e-06, + "loss": 0.6732, + "step": 15518 + }, + { + "epoch": 0.7975639839654641, + "grad_norm": 1.0775169134140015, + "learning_rate": 1.0367402155377031e-06, + "loss": 0.6899, + "step": 15519 + }, + { + "epoch": 0.7976153767088087, + "grad_norm": 1.1425414085388184, + "learning_rate": 1.0362328664428163e-06, + "loss": 0.6947, + "step": 15520 + }, + { + "epoch": 0.7976667694521533, + "grad_norm": 1.0651463270187378, + "learning_rate": 1.0357256271675592e-06, + "loss": 0.6408, + "step": 15521 + }, + { + "epoch": 0.797718162195498, + "grad_norm": 1.1091195344924927, + "learning_rate": 1.0352184977259855e-06, + "loss": 0.6675, + "step": 15522 + }, + { + "epoch": 0.7977695549388426, + "grad_norm": 1.086436152458191, + "learning_rate": 1.0347114781321443e-06, + "loss": 0.7005, + "step": 15523 + }, + { + "epoch": 0.7978209476821873, + "grad_norm": 1.1279137134552002, + "learning_rate": 1.0342045684000857e-06, + "loss": 0.7386, + "step": 15524 + }, + { + "epoch": 0.7978723404255319, + "grad_norm": 1.048555612564087, + "learning_rate": 1.033697768543852e-06, + "loss": 0.7227, + "step": 15525 + }, + { + "epoch": 0.7979237331688765, + "grad_norm": 1.0486903190612793, + "learning_rate": 1.0331910785774856e-06, + "loss": 0.6992, + "step": 15526 + }, + { + "epoch": 0.7979751259122212, + "grad_norm": 1.0420340299606323, + "learning_rate": 1.0326844985150248e-06, + "loss": 0.686, + "step": 15527 + }, + { + "epoch": 0.7980265186555658, + "grad_norm": 1.1786748170852661, + "learning_rate": 1.0321780283705023e-06, + "loss": 0.675, + "step": 15528 + }, + { + "epoch": 0.7980779113989105, + "grad_norm": 0.7970410585403442, + "learning_rate": 1.0316716681579542e-06, + "loss": 0.6326, + "step": 15529 + }, + { + "epoch": 0.7981293041422551, + "grad_norm": 1.1323000192642212, + "learning_rate": 1.0311654178914077e-06, + "loss": 0.7366, + "step": 15530 + }, + { + "epoch": 0.7981806968855998, + "grad_norm": 1.047073245048523, + "learning_rate": 1.0306592775848895e-06, + "loss": 0.5966, + "step": 15531 + }, + { + "epoch": 0.7982320896289444, + "grad_norm": 1.116508960723877, + "learning_rate": 1.03015324725242e-06, + "loss": 0.6602, + "step": 15532 + }, + { + "epoch": 0.7982834823722891, + "grad_norm": 1.1088706254959106, + "learning_rate": 1.0296473269080248e-06, + "loss": 0.712, + "step": 15533 + }, + { + "epoch": 0.7983348751156337, + "grad_norm": 0.7519833445549011, + "learning_rate": 1.0291415165657143e-06, + "loss": 0.6156, + "step": 15534 + }, + { + "epoch": 0.7983862678589783, + "grad_norm": 1.0853588581085205, + "learning_rate": 1.0286358162395078e-06, + "loss": 0.7073, + "step": 15535 + }, + { + "epoch": 0.7984376606023229, + "grad_norm": 1.1213548183441162, + "learning_rate": 1.0281302259434117e-06, + "loss": 0.7099, + "step": 15536 + }, + { + "epoch": 0.7984890533456676, + "grad_norm": 1.0626076459884644, + "learning_rate": 1.0276247456914379e-06, + "loss": 0.6133, + "step": 15537 + }, + { + "epoch": 0.7985404460890122, + "grad_norm": 1.1052672863006592, + "learning_rate": 1.0271193754975895e-06, + "loss": 0.7001, + "step": 15538 + }, + { + "epoch": 0.7985918388323568, + "grad_norm": 0.8310667276382446, + "learning_rate": 1.0266141153758663e-06, + "loss": 0.6647, + "step": 15539 + }, + { + "epoch": 0.7986432315757015, + "grad_norm": 1.1308774948120117, + "learning_rate": 1.0261089653402718e-06, + "loss": 0.6995, + "step": 15540 + }, + { + "epoch": 0.7986946243190461, + "grad_norm": 1.0704110860824585, + "learning_rate": 1.0256039254047957e-06, + "loss": 0.6167, + "step": 15541 + }, + { + "epoch": 0.7987460170623908, + "grad_norm": 1.0361599922180176, + "learning_rate": 1.025098995583435e-06, + "loss": 0.7248, + "step": 15542 + }, + { + "epoch": 0.7987974098057354, + "grad_norm": 1.0793198347091675, + "learning_rate": 1.0245941758901773e-06, + "loss": 0.6968, + "step": 15543 + }, + { + "epoch": 0.7988488025490801, + "grad_norm": 0.6834498047828674, + "learning_rate": 1.0240894663390099e-06, + "loss": 0.6779, + "step": 15544 + }, + { + "epoch": 0.7989001952924247, + "grad_norm": 1.0209816694259644, + "learning_rate": 1.0235848669439142e-06, + "loss": 0.6628, + "step": 15545 + }, + { + "epoch": 0.7989515880357694, + "grad_norm": 1.0612114667892456, + "learning_rate": 1.0230803777188735e-06, + "loss": 0.7005, + "step": 15546 + }, + { + "epoch": 0.799002980779114, + "grad_norm": 1.0969185829162598, + "learning_rate": 1.0225759986778639e-06, + "loss": 0.7234, + "step": 15547 + }, + { + "epoch": 0.7990543735224587, + "grad_norm": 1.0239009857177734, + "learning_rate": 1.022071729834858e-06, + "loss": 0.663, + "step": 15548 + }, + { + "epoch": 0.7991057662658033, + "grad_norm": 1.0455204248428345, + "learning_rate": 1.0215675712038314e-06, + "loss": 0.6833, + "step": 15549 + }, + { + "epoch": 0.7991571590091479, + "grad_norm": 1.0256009101867676, + "learning_rate": 1.0210635227987464e-06, + "loss": 0.6624, + "step": 15550 + }, + { + "epoch": 0.7992085517524925, + "grad_norm": 1.0611093044281006, + "learning_rate": 1.0205595846335725e-06, + "loss": 0.6867, + "step": 15551 + }, + { + "epoch": 0.7992599444958371, + "grad_norm": 1.095306158065796, + "learning_rate": 1.020055756722269e-06, + "loss": 0.6809, + "step": 15552 + }, + { + "epoch": 0.7993113372391818, + "grad_norm": 1.1080607175827026, + "learning_rate": 1.0195520390787994e-06, + "loss": 0.7465, + "step": 15553 + }, + { + "epoch": 0.7993627299825264, + "grad_norm": 0.6830762624740601, + "learning_rate": 1.0190484317171135e-06, + "loss": 0.6493, + "step": 15554 + }, + { + "epoch": 0.7994141227258711, + "grad_norm": 1.0532892942428589, + "learning_rate": 1.0185449346511682e-06, + "loss": 0.693, + "step": 15555 + }, + { + "epoch": 0.7994655154692157, + "grad_norm": 1.1248561143875122, + "learning_rate": 1.0180415478949124e-06, + "loss": 0.7388, + "step": 15556 + }, + { + "epoch": 0.7995169082125604, + "grad_norm": 1.112711787223816, + "learning_rate": 1.0175382714622918e-06, + "loss": 0.6962, + "step": 15557 + }, + { + "epoch": 0.799568300955905, + "grad_norm": 1.0684731006622314, + "learning_rate": 1.0170351053672516e-06, + "loss": 0.7177, + "step": 15558 + }, + { + "epoch": 0.7996196936992497, + "grad_norm": 1.0826455354690552, + "learning_rate": 1.0165320496237324e-06, + "loss": 0.6994, + "step": 15559 + }, + { + "epoch": 0.7996710864425943, + "grad_norm": 1.101013422012329, + "learning_rate": 1.0160291042456711e-06, + "loss": 0.6792, + "step": 15560 + }, + { + "epoch": 0.799722479185939, + "grad_norm": 0.7452312707901001, + "learning_rate": 1.0155262692470013e-06, + "loss": 0.6396, + "step": 15561 + }, + { + "epoch": 0.7997738719292836, + "grad_norm": 1.071894884109497, + "learning_rate": 1.0150235446416585e-06, + "loss": 0.6931, + "step": 15562 + }, + { + "epoch": 0.7998252646726283, + "grad_norm": 0.8513612747192383, + "learning_rate": 1.014520930443565e-06, + "loss": 0.6294, + "step": 15563 + }, + { + "epoch": 0.7998766574159729, + "grad_norm": 1.0244433879852295, + "learning_rate": 1.0140184266666508e-06, + "loss": 0.6574, + "step": 15564 + }, + { + "epoch": 0.7999280501593174, + "grad_norm": 1.205319881439209, + "learning_rate": 1.0135160333248373e-06, + "loss": 0.6913, + "step": 15565 + }, + { + "epoch": 0.7999794429026621, + "grad_norm": 1.0883944034576416, + "learning_rate": 1.013013750432042e-06, + "loss": 0.7568, + "step": 15566 + }, + { + "epoch": 0.8000308356460067, + "grad_norm": 1.0591858625411987, + "learning_rate": 1.0125115780021833e-06, + "loss": 0.6339, + "step": 15567 + }, + { + "epoch": 0.8000822283893514, + "grad_norm": 1.0410797595977783, + "learning_rate": 1.0120095160491732e-06, + "loss": 0.6937, + "step": 15568 + }, + { + "epoch": 0.800133621132696, + "grad_norm": 0.7365021109580994, + "learning_rate": 1.0115075645869221e-06, + "loss": 0.6459, + "step": 15569 + }, + { + "epoch": 0.8001850138760407, + "grad_norm": 1.1553517580032349, + "learning_rate": 1.0110057236293358e-06, + "loss": 0.6838, + "step": 15570 + }, + { + "epoch": 0.8002364066193853, + "grad_norm": 1.0949389934539795, + "learning_rate": 1.0105039931903203e-06, + "loss": 0.6504, + "step": 15571 + }, + { + "epoch": 0.80028779936273, + "grad_norm": 1.1046245098114014, + "learning_rate": 1.010002373283776e-06, + "loss": 0.6931, + "step": 15572 + }, + { + "epoch": 0.8003391921060746, + "grad_norm": 1.1114506721496582, + "learning_rate": 1.0095008639235997e-06, + "loss": 0.6671, + "step": 15573 + }, + { + "epoch": 0.8003905848494193, + "grad_norm": 1.2202574014663696, + "learning_rate": 1.0089994651236857e-06, + "loss": 0.6798, + "step": 15574 + }, + { + "epoch": 0.8004419775927639, + "grad_norm": 1.0099931955337524, + "learning_rate": 1.0084981768979275e-06, + "loss": 0.7015, + "step": 15575 + }, + { + "epoch": 0.8004933703361086, + "grad_norm": 1.0747121572494507, + "learning_rate": 1.007996999260213e-06, + "loss": 0.7437, + "step": 15576 + }, + { + "epoch": 0.8005447630794532, + "grad_norm": 1.162327766418457, + "learning_rate": 1.0074959322244277e-06, + "loss": 0.7627, + "step": 15577 + }, + { + "epoch": 0.8005961558227979, + "grad_norm": 0.7410362362861633, + "learning_rate": 1.006994975804454e-06, + "loss": 0.6269, + "step": 15578 + }, + { + "epoch": 0.8006475485661425, + "grad_norm": 1.1591094732284546, + "learning_rate": 1.0064941300141701e-06, + "loss": 0.7315, + "step": 15579 + }, + { + "epoch": 0.8006989413094872, + "grad_norm": 1.108821153640747, + "learning_rate": 1.0059933948674549e-06, + "loss": 0.6776, + "step": 15580 + }, + { + "epoch": 0.8007503340528317, + "grad_norm": 1.0784499645233154, + "learning_rate": 1.0054927703781803e-06, + "loss": 0.6911, + "step": 15581 + }, + { + "epoch": 0.8008017267961763, + "grad_norm": 1.0099598169326782, + "learning_rate": 1.0049922565602172e-06, + "loss": 0.6709, + "step": 15582 + }, + { + "epoch": 0.800853119539521, + "grad_norm": 1.070371150970459, + "learning_rate": 1.00449185342743e-06, + "loss": 0.6939, + "step": 15583 + }, + { + "epoch": 0.8009045122828656, + "grad_norm": 1.055928349494934, + "learning_rate": 1.0039915609936873e-06, + "loss": 0.6671, + "step": 15584 + }, + { + "epoch": 0.8009559050262103, + "grad_norm": 1.0562026500701904, + "learning_rate": 1.0034913792728473e-06, + "loss": 0.6894, + "step": 15585 + }, + { + "epoch": 0.8010072977695549, + "grad_norm": 1.091769814491272, + "learning_rate": 1.002991308278769e-06, + "loss": 0.7148, + "step": 15586 + }, + { + "epoch": 0.8010586905128996, + "grad_norm": 1.0034046173095703, + "learning_rate": 1.0024913480253052e-06, + "loss": 0.6782, + "step": 15587 + }, + { + "epoch": 0.8011100832562442, + "grad_norm": 0.6950538754463196, + "learning_rate": 1.0019914985263107e-06, + "loss": 0.6747, + "step": 15588 + }, + { + "epoch": 0.8011614759995889, + "grad_norm": 1.0451956987380981, + "learning_rate": 1.001491759795633e-06, + "loss": 0.6771, + "step": 15589 + }, + { + "epoch": 0.8012128687429335, + "grad_norm": 1.072151780128479, + "learning_rate": 1.0009921318471166e-06, + "loss": 0.6786, + "step": 15590 + }, + { + "epoch": 0.8012642614862782, + "grad_norm": 1.0466493368148804, + "learning_rate": 1.0004926146946082e-06, + "loss": 0.6758, + "step": 15591 + }, + { + "epoch": 0.8013156542296228, + "grad_norm": 1.0870583057403564, + "learning_rate": 9.999932083519414e-07, + "loss": 0.718, + "step": 15592 + }, + { + "epoch": 0.8013670469729675, + "grad_norm": 1.0378315448760986, + "learning_rate": 9.994939128329572e-07, + "loss": 0.7239, + "step": 15593 + }, + { + "epoch": 0.8014184397163121, + "grad_norm": 1.083591341972351, + "learning_rate": 9.989947281514872e-07, + "loss": 0.7308, + "step": 15594 + }, + { + "epoch": 0.8014698324596568, + "grad_norm": 1.0793935060501099, + "learning_rate": 9.984956543213626e-07, + "loss": 0.7228, + "step": 15595 + }, + { + "epoch": 0.8015212252030013, + "grad_norm": 1.0766280889511108, + "learning_rate": 9.979966913564088e-07, + "loss": 0.7099, + "step": 15596 + }, + { + "epoch": 0.8015726179463459, + "grad_norm": 0.7160735130310059, + "learning_rate": 9.974978392704526e-07, + "loss": 0.6255, + "step": 15597 + }, + { + "epoch": 0.8016240106896906, + "grad_norm": 1.1363067626953125, + "learning_rate": 9.969990980773143e-07, + "loss": 0.6699, + "step": 15598 + }, + { + "epoch": 0.8016754034330352, + "grad_norm": 1.161634922027588, + "learning_rate": 9.965004677908097e-07, + "loss": 0.6926, + "step": 15599 + }, + { + "epoch": 0.8017267961763799, + "grad_norm": 1.2038350105285645, + "learning_rate": 9.96001948424757e-07, + "loss": 0.7305, + "step": 15600 + }, + { + "epoch": 0.8017781889197245, + "grad_norm": 1.0605764389038086, + "learning_rate": 9.955035399929668e-07, + "loss": 0.6734, + "step": 15601 + }, + { + "epoch": 0.8018295816630692, + "grad_norm": 1.115917444229126, + "learning_rate": 9.950052425092482e-07, + "loss": 0.7073, + "step": 15602 + }, + { + "epoch": 0.8018809744064138, + "grad_norm": 1.080880880355835, + "learning_rate": 9.945070559874054e-07, + "loss": 0.6887, + "step": 15603 + }, + { + "epoch": 0.8019323671497585, + "grad_norm": 1.098900556564331, + "learning_rate": 9.94008980441245e-07, + "loss": 0.6543, + "step": 15604 + }, + { + "epoch": 0.8019837598931031, + "grad_norm": 1.1283313035964966, + "learning_rate": 9.935110158845613e-07, + "loss": 0.6558, + "step": 15605 + }, + { + "epoch": 0.8020351526364478, + "grad_norm": 0.6943267583847046, + "learning_rate": 9.930131623311545e-07, + "loss": 0.686, + "step": 15606 + }, + { + "epoch": 0.8020865453797924, + "grad_norm": 1.0808005332946777, + "learning_rate": 9.925154197948178e-07, + "loss": 0.7146, + "step": 15607 + }, + { + "epoch": 0.802137938123137, + "grad_norm": 1.1013894081115723, + "learning_rate": 9.92017788289339e-07, + "loss": 0.7331, + "step": 15608 + }, + { + "epoch": 0.8021893308664817, + "grad_norm": 1.070826530456543, + "learning_rate": 9.91520267828509e-07, + "loss": 0.6792, + "step": 15609 + }, + { + "epoch": 0.8022407236098263, + "grad_norm": 1.0613547563552856, + "learning_rate": 9.9102285842611e-07, + "loss": 0.687, + "step": 15610 + }, + { + "epoch": 0.8022921163531709, + "grad_norm": 0.7654101252555847, + "learning_rate": 9.90525560095924e-07, + "loss": 0.6429, + "step": 15611 + }, + { + "epoch": 0.8023435090965155, + "grad_norm": 1.1360700130462646, + "learning_rate": 9.900283728517268e-07, + "loss": 0.7617, + "step": 15612 + }, + { + "epoch": 0.8023949018398602, + "grad_norm": 1.096570372581482, + "learning_rate": 9.89531296707298e-07, + "loss": 0.6974, + "step": 15613 + }, + { + "epoch": 0.8024462945832048, + "grad_norm": 1.1864348649978638, + "learning_rate": 9.890343316764044e-07, + "loss": 0.7214, + "step": 15614 + }, + { + "epoch": 0.8024976873265495, + "grad_norm": 1.13390052318573, + "learning_rate": 9.885374777728179e-07, + "loss": 0.6595, + "step": 15615 + }, + { + "epoch": 0.8025490800698941, + "grad_norm": 1.0865827798843384, + "learning_rate": 9.880407350103026e-07, + "loss": 0.7085, + "step": 15616 + }, + { + "epoch": 0.8026004728132388, + "grad_norm": 1.1118522882461548, + "learning_rate": 9.87544103402624e-07, + "loss": 0.6965, + "step": 15617 + }, + { + "epoch": 0.8026518655565834, + "grad_norm": 1.0904381275177002, + "learning_rate": 9.87047582963539e-07, + "loss": 0.7246, + "step": 15618 + }, + { + "epoch": 0.8027032582999281, + "grad_norm": 1.0271846055984497, + "learning_rate": 9.865511737068056e-07, + "loss": 0.699, + "step": 15619 + }, + { + "epoch": 0.8027546510432727, + "grad_norm": 1.064740538597107, + "learning_rate": 9.860548756461763e-07, + "loss": 0.7583, + "step": 15620 + }, + { + "epoch": 0.8028060437866174, + "grad_norm": 1.0679099559783936, + "learning_rate": 9.855586887954006e-07, + "loss": 0.7152, + "step": 15621 + }, + { + "epoch": 0.802857436529962, + "grad_norm": 1.084946870803833, + "learning_rate": 9.850626131682283e-07, + "loss": 0.6279, + "step": 15622 + }, + { + "epoch": 0.8029088292733066, + "grad_norm": 1.075135588645935, + "learning_rate": 9.845666487784027e-07, + "loss": 0.652, + "step": 15623 + }, + { + "epoch": 0.8029602220166513, + "grad_norm": 1.0716438293457031, + "learning_rate": 9.84070795639664e-07, + "loss": 0.6568, + "step": 15624 + }, + { + "epoch": 0.8030116147599959, + "grad_norm": 1.0080878734588623, + "learning_rate": 9.835750537657496e-07, + "loss": 0.6888, + "step": 15625 + }, + { + "epoch": 0.8030630075033405, + "grad_norm": 1.2478488683700562, + "learning_rate": 9.830794231703972e-07, + "loss": 0.6563, + "step": 15626 + }, + { + "epoch": 0.8031144002466851, + "grad_norm": 1.0444831848144531, + "learning_rate": 9.825839038673368e-07, + "loss": 0.6629, + "step": 15627 + }, + { + "epoch": 0.8031657929900298, + "grad_norm": 1.0878138542175293, + "learning_rate": 9.820884958702982e-07, + "loss": 0.6598, + "step": 15628 + }, + { + "epoch": 0.8032171857333744, + "grad_norm": 0.7106992602348328, + "learning_rate": 9.815931991930043e-07, + "loss": 0.6209, + "step": 15629 + }, + { + "epoch": 0.8032685784767191, + "grad_norm": 1.0674991607666016, + "learning_rate": 9.810980138491816e-07, + "loss": 0.7091, + "step": 15630 + }, + { + "epoch": 0.8033199712200637, + "grad_norm": 1.1658207178115845, + "learning_rate": 9.80602939852548e-07, + "loss": 0.7056, + "step": 15631 + }, + { + "epoch": 0.8033713639634084, + "grad_norm": 1.1125903129577637, + "learning_rate": 9.801079772168182e-07, + "loss": 0.693, + "step": 15632 + }, + { + "epoch": 0.803422756706753, + "grad_norm": 1.257317066192627, + "learning_rate": 9.796131259557102e-07, + "loss": 0.7015, + "step": 15633 + }, + { + "epoch": 0.8034741494500977, + "grad_norm": 1.1985628604888916, + "learning_rate": 9.791183860829284e-07, + "loss": 0.7352, + "step": 15634 + }, + { + "epoch": 0.8035255421934423, + "grad_norm": 1.1209681034088135, + "learning_rate": 9.786237576121843e-07, + "loss": 0.6715, + "step": 15635 + }, + { + "epoch": 0.803576934936787, + "grad_norm": 1.0488269329071045, + "learning_rate": 9.781292405571808e-07, + "loss": 0.6897, + "step": 15636 + }, + { + "epoch": 0.8036283276801316, + "grad_norm": 1.1063227653503418, + "learning_rate": 9.776348349316188e-07, + "loss": 0.6677, + "step": 15637 + }, + { + "epoch": 0.8036797204234762, + "grad_norm": 1.105486512184143, + "learning_rate": 9.771405407491945e-07, + "loss": 0.6687, + "step": 15638 + }, + { + "epoch": 0.8037311131668209, + "grad_norm": 1.1207412481307983, + "learning_rate": 9.766463580236063e-07, + "loss": 0.7404, + "step": 15639 + }, + { + "epoch": 0.8037825059101655, + "grad_norm": 1.1348786354064941, + "learning_rate": 9.761522867685441e-07, + "loss": 0.6946, + "step": 15640 + }, + { + "epoch": 0.8038338986535101, + "grad_norm": 1.0985136032104492, + "learning_rate": 9.75658326997695e-07, + "loss": 0.6845, + "step": 15641 + }, + { + "epoch": 0.8038852913968547, + "grad_norm": 1.0935940742492676, + "learning_rate": 9.751644787247488e-07, + "loss": 0.6789, + "step": 15642 + }, + { + "epoch": 0.8039366841401994, + "grad_norm": 1.1621572971343994, + "learning_rate": 9.746707419633827e-07, + "loss": 0.6679, + "step": 15643 + }, + { + "epoch": 0.803988076883544, + "grad_norm": 1.0795621871948242, + "learning_rate": 9.741771167272802e-07, + "loss": 0.6454, + "step": 15644 + }, + { + "epoch": 0.8040394696268887, + "grad_norm": 0.7665844559669495, + "learning_rate": 9.73683603030115e-07, + "loss": 0.64, + "step": 15645 + }, + { + "epoch": 0.8040908623702333, + "grad_norm": 0.7586016058921814, + "learning_rate": 9.731902008855637e-07, + "loss": 0.6952, + "step": 15646 + }, + { + "epoch": 0.804142255113578, + "grad_norm": 1.0784357786178589, + "learning_rate": 9.72696910307292e-07, + "loss": 0.6533, + "step": 15647 + }, + { + "epoch": 0.8041936478569226, + "grad_norm": 1.0640716552734375, + "learning_rate": 9.722037313089706e-07, + "loss": 0.746, + "step": 15648 + }, + { + "epoch": 0.8042450406002672, + "grad_norm": 1.0395636558532715, + "learning_rate": 9.717106639042623e-07, + "loss": 0.6932, + "step": 15649 + }, + { + "epoch": 0.8042964333436119, + "grad_norm": 1.0173046588897705, + "learning_rate": 9.712177081068259e-07, + "loss": 0.6643, + "step": 15650 + }, + { + "epoch": 0.8043478260869565, + "grad_norm": 1.0855402946472168, + "learning_rate": 9.707248639303223e-07, + "loss": 0.662, + "step": 15651 + }, + { + "epoch": 0.8043992188303012, + "grad_norm": 1.06850004196167, + "learning_rate": 9.702321313884055e-07, + "loss": 0.6414, + "step": 15652 + }, + { + "epoch": 0.8044506115736458, + "grad_norm": 1.0983762741088867, + "learning_rate": 9.69739510494726e-07, + "loss": 0.7262, + "step": 15653 + }, + { + "epoch": 0.8045020043169905, + "grad_norm": 1.365220069885254, + "learning_rate": 9.692470012629318e-07, + "loss": 0.6667, + "step": 15654 + }, + { + "epoch": 0.8045533970603351, + "grad_norm": 1.0826640129089355, + "learning_rate": 9.687546037066714e-07, + "loss": 0.7195, + "step": 15655 + }, + { + "epoch": 0.8046047898036797, + "grad_norm": 0.7553123235702515, + "learning_rate": 9.682623178395828e-07, + "loss": 0.6178, + "step": 15656 + }, + { + "epoch": 0.8046561825470243, + "grad_norm": 1.1085318326950073, + "learning_rate": 9.677701436753083e-07, + "loss": 0.7208, + "step": 15657 + }, + { + "epoch": 0.804707575290369, + "grad_norm": 1.1513320207595825, + "learning_rate": 9.672780812274828e-07, + "loss": 0.7226, + "step": 15658 + }, + { + "epoch": 0.8047589680337136, + "grad_norm": 1.1253992319107056, + "learning_rate": 9.667861305097392e-07, + "loss": 0.6981, + "step": 15659 + }, + { + "epoch": 0.8048103607770583, + "grad_norm": 1.0486522912979126, + "learning_rate": 9.662942915357089e-07, + "loss": 0.6468, + "step": 15660 + }, + { + "epoch": 0.8048617535204029, + "grad_norm": 1.1129220724105835, + "learning_rate": 9.658025643190156e-07, + "loss": 0.6828, + "step": 15661 + }, + { + "epoch": 0.8049131462637475, + "grad_norm": 1.104549765586853, + "learning_rate": 9.653109488732886e-07, + "loss": 0.7193, + "step": 15662 + }, + { + "epoch": 0.8049645390070922, + "grad_norm": 1.1184258460998535, + "learning_rate": 9.648194452121417e-07, + "loss": 0.7119, + "step": 15663 + }, + { + "epoch": 0.8050159317504368, + "grad_norm": 1.0413025617599487, + "learning_rate": 9.643280533491978e-07, + "loss": 0.6775, + "step": 15664 + }, + { + "epoch": 0.8050673244937815, + "grad_norm": 1.085171103477478, + "learning_rate": 9.638367732980686e-07, + "loss": 0.7297, + "step": 15665 + }, + { + "epoch": 0.8051187172371261, + "grad_norm": 1.055050015449524, + "learning_rate": 9.633456050723666e-07, + "loss": 0.6911, + "step": 15666 + }, + { + "epoch": 0.8051701099804708, + "grad_norm": 0.7985503673553467, + "learning_rate": 9.628545486856984e-07, + "loss": 0.6405, + "step": 15667 + }, + { + "epoch": 0.8052215027238154, + "grad_norm": 1.0397311449050903, + "learning_rate": 9.62363604151671e-07, + "loss": 0.6581, + "step": 15668 + }, + { + "epoch": 0.8052728954671601, + "grad_norm": 1.1171287298202515, + "learning_rate": 9.618727714838865e-07, + "loss": 0.7058, + "step": 15669 + }, + { + "epoch": 0.8053242882105047, + "grad_norm": 1.140602707862854, + "learning_rate": 9.613820506959425e-07, + "loss": 0.6892, + "step": 15670 + }, + { + "epoch": 0.8053756809538494, + "grad_norm": 1.0671157836914062, + "learning_rate": 9.608914418014358e-07, + "loss": 0.6819, + "step": 15671 + }, + { + "epoch": 0.8054270736971939, + "grad_norm": 1.0495786666870117, + "learning_rate": 9.604009448139578e-07, + "loss": 0.684, + "step": 15672 + }, + { + "epoch": 0.8054784664405386, + "grad_norm": 1.1356549263000488, + "learning_rate": 9.599105597471004e-07, + "loss": 0.6342, + "step": 15673 + }, + { + "epoch": 0.8055298591838832, + "grad_norm": 1.1024576425552368, + "learning_rate": 9.594202866144487e-07, + "loss": 0.7053, + "step": 15674 + }, + { + "epoch": 0.8055812519272278, + "grad_norm": 1.5508676767349243, + "learning_rate": 9.589301254295863e-07, + "loss": 0.6786, + "step": 15675 + }, + { + "epoch": 0.8056326446705725, + "grad_norm": 1.115859031677246, + "learning_rate": 9.584400762060925e-07, + "loss": 0.6749, + "step": 15676 + }, + { + "epoch": 0.8056840374139171, + "grad_norm": 1.1650100946426392, + "learning_rate": 9.579501389575468e-07, + "loss": 0.7519, + "step": 15677 + }, + { + "epoch": 0.8057354301572618, + "grad_norm": 1.087889313697815, + "learning_rate": 9.574603136975224e-07, + "loss": 0.743, + "step": 15678 + }, + { + "epoch": 0.8057868229006064, + "grad_norm": 1.1158632040023804, + "learning_rate": 9.569706004395902e-07, + "loss": 0.7035, + "step": 15679 + }, + { + "epoch": 0.8058382156439511, + "grad_norm": 1.1405482292175293, + "learning_rate": 9.564809991973162e-07, + "loss": 0.7114, + "step": 15680 + }, + { + "epoch": 0.8058896083872957, + "grad_norm": 1.0613936185836792, + "learning_rate": 9.559915099842686e-07, + "loss": 0.7328, + "step": 15681 + }, + { + "epoch": 0.8059410011306404, + "grad_norm": 1.132988691329956, + "learning_rate": 9.555021328140079e-07, + "loss": 0.7375, + "step": 15682 + }, + { + "epoch": 0.805992393873985, + "grad_norm": 1.0173375606536865, + "learning_rate": 9.550128677000913e-07, + "loss": 0.6894, + "step": 15683 + }, + { + "epoch": 0.8060437866173297, + "grad_norm": 1.0887941122055054, + "learning_rate": 9.545237146560776e-07, + "loss": 0.7042, + "step": 15684 + }, + { + "epoch": 0.8060951793606743, + "grad_norm": 1.068013072013855, + "learning_rate": 9.540346736955146e-07, + "loss": 0.6867, + "step": 15685 + }, + { + "epoch": 0.806146572104019, + "grad_norm": 0.7514823079109192, + "learning_rate": 9.535457448319557e-07, + "loss": 0.6406, + "step": 15686 + }, + { + "epoch": 0.8061979648473635, + "grad_norm": 1.1355196237564087, + "learning_rate": 9.530569280789453e-07, + "loss": 0.6679, + "step": 15687 + }, + { + "epoch": 0.8062493575907081, + "grad_norm": 1.115376353263855, + "learning_rate": 9.525682234500266e-07, + "loss": 0.7105, + "step": 15688 + }, + { + "epoch": 0.8063007503340528, + "grad_norm": 1.0226393938064575, + "learning_rate": 9.520796309587382e-07, + "loss": 0.6378, + "step": 15689 + }, + { + "epoch": 0.8063521430773974, + "grad_norm": 1.0538554191589355, + "learning_rate": 9.515911506186198e-07, + "loss": 0.7031, + "step": 15690 + }, + { + "epoch": 0.8064035358207421, + "grad_norm": 1.3419560194015503, + "learning_rate": 9.51102782443204e-07, + "loss": 0.6946, + "step": 15691 + }, + { + "epoch": 0.8064549285640867, + "grad_norm": 1.055940866470337, + "learning_rate": 9.506145264460198e-07, + "loss": 0.7036, + "step": 15692 + }, + { + "epoch": 0.8065063213074314, + "grad_norm": 1.057896375656128, + "learning_rate": 9.501263826405977e-07, + "loss": 0.693, + "step": 15693 + }, + { + "epoch": 0.806557714050776, + "grad_norm": 0.7647779583930969, + "learning_rate": 9.496383510404605e-07, + "loss": 0.6237, + "step": 15694 + }, + { + "epoch": 0.8066091067941207, + "grad_norm": 1.1295554637908936, + "learning_rate": 9.491504316591305e-07, + "loss": 0.7537, + "step": 15695 + }, + { + "epoch": 0.8066604995374653, + "grad_norm": 1.0404016971588135, + "learning_rate": 9.486626245101227e-07, + "loss": 0.6972, + "step": 15696 + }, + { + "epoch": 0.80671189228081, + "grad_norm": 1.1245297193527222, + "learning_rate": 9.481749296069581e-07, + "loss": 0.7007, + "step": 15697 + }, + { + "epoch": 0.8067632850241546, + "grad_norm": 1.0916781425476074, + "learning_rate": 9.476873469631425e-07, + "loss": 0.6921, + "step": 15698 + }, + { + "epoch": 0.8068146777674993, + "grad_norm": 0.7407129406929016, + "learning_rate": 9.471998765921886e-07, + "loss": 0.5987, + "step": 15699 + }, + { + "epoch": 0.8068660705108439, + "grad_norm": 1.0746461153030396, + "learning_rate": 9.467125185076015e-07, + "loss": 0.6822, + "step": 15700 + }, + { + "epoch": 0.8069174632541886, + "grad_norm": 0.7065196633338928, + "learning_rate": 9.462252727228827e-07, + "loss": 0.701, + "step": 15701 + }, + { + "epoch": 0.8069688559975331, + "grad_norm": 1.111271858215332, + "learning_rate": 9.45738139251533e-07, + "loss": 0.7077, + "step": 15702 + }, + { + "epoch": 0.8070202487408777, + "grad_norm": 0.7323873043060303, + "learning_rate": 9.452511181070495e-07, + "loss": 0.6751, + "step": 15703 + }, + { + "epoch": 0.8070716414842224, + "grad_norm": 1.0814621448516846, + "learning_rate": 9.447642093029241e-07, + "loss": 0.6295, + "step": 15704 + }, + { + "epoch": 0.807123034227567, + "grad_norm": 0.7119811773300171, + "learning_rate": 9.44277412852646e-07, + "loss": 0.627, + "step": 15705 + }, + { + "epoch": 0.8071744269709117, + "grad_norm": 1.097100853919983, + "learning_rate": 9.437907287697063e-07, + "loss": 0.7065, + "step": 15706 + }, + { + "epoch": 0.8072258197142563, + "grad_norm": 1.092657446861267, + "learning_rate": 9.433041570675844e-07, + "loss": 0.7178, + "step": 15707 + }, + { + "epoch": 0.807277212457601, + "grad_norm": 1.057586669921875, + "learning_rate": 9.428176977597641e-07, + "loss": 0.6167, + "step": 15708 + }, + { + "epoch": 0.8073286052009456, + "grad_norm": 1.0280319452285767, + "learning_rate": 9.423313508597215e-07, + "loss": 0.6656, + "step": 15709 + }, + { + "epoch": 0.8073799979442903, + "grad_norm": 1.059216022491455, + "learning_rate": 9.418451163809334e-07, + "loss": 0.7224, + "step": 15710 + }, + { + "epoch": 0.8074313906876349, + "grad_norm": 1.1042299270629883, + "learning_rate": 9.413589943368701e-07, + "loss": 0.6767, + "step": 15711 + }, + { + "epoch": 0.8074827834309796, + "grad_norm": 1.123189926147461, + "learning_rate": 9.408729847409986e-07, + "loss": 0.686, + "step": 15712 + }, + { + "epoch": 0.8075341761743242, + "grad_norm": 1.057746171951294, + "learning_rate": 9.403870876067883e-07, + "loss": 0.7288, + "step": 15713 + }, + { + "epoch": 0.8075855689176689, + "grad_norm": 1.0662380456924438, + "learning_rate": 9.399013029476966e-07, + "loss": 0.7151, + "step": 15714 + }, + { + "epoch": 0.8076369616610135, + "grad_norm": 0.7433615922927856, + "learning_rate": 9.394156307771857e-07, + "loss": 0.6595, + "step": 15715 + }, + { + "epoch": 0.8076883544043582, + "grad_norm": 1.012619137763977, + "learning_rate": 9.389300711087107e-07, + "loss": 0.6692, + "step": 15716 + }, + { + "epoch": 0.8077397471477027, + "grad_norm": 0.6762338876724243, + "learning_rate": 9.384446239557243e-07, + "loss": 0.6169, + "step": 15717 + }, + { + "epoch": 0.8077911398910473, + "grad_norm": 1.132218360900879, + "learning_rate": 9.379592893316747e-07, + "loss": 0.6916, + "step": 15718 + }, + { + "epoch": 0.807842532634392, + "grad_norm": 1.0885467529296875, + "learning_rate": 9.374740672500121e-07, + "loss": 0.6729, + "step": 15719 + }, + { + "epoch": 0.8078939253777366, + "grad_norm": 1.108532190322876, + "learning_rate": 9.369889577241775e-07, + "loss": 0.707, + "step": 15720 + }, + { + "epoch": 0.8079453181210813, + "grad_norm": 1.0918254852294922, + "learning_rate": 9.365039607676119e-07, + "loss": 0.6692, + "step": 15721 + }, + { + "epoch": 0.8079967108644259, + "grad_norm": 1.087377905845642, + "learning_rate": 9.360190763937527e-07, + "loss": 0.674, + "step": 15722 + }, + { + "epoch": 0.8080481036077706, + "grad_norm": 1.094320297241211, + "learning_rate": 9.355343046160326e-07, + "loss": 0.67, + "step": 15723 + }, + { + "epoch": 0.8080994963511152, + "grad_norm": 1.0726280212402344, + "learning_rate": 9.350496454478852e-07, + "loss": 0.7016, + "step": 15724 + }, + { + "epoch": 0.8081508890944599, + "grad_norm": 1.114052414894104, + "learning_rate": 9.345650989027355e-07, + "loss": 0.756, + "step": 15725 + }, + { + "epoch": 0.8082022818378045, + "grad_norm": 1.0577454566955566, + "learning_rate": 9.340806649940126e-07, + "loss": 0.6422, + "step": 15726 + }, + { + "epoch": 0.8082536745811492, + "grad_norm": 1.0712518692016602, + "learning_rate": 9.335963437351325e-07, + "loss": 0.6679, + "step": 15727 + }, + { + "epoch": 0.8083050673244938, + "grad_norm": 1.0712239742279053, + "learning_rate": 9.331121351395184e-07, + "loss": 0.7428, + "step": 15728 + }, + { + "epoch": 0.8083564600678385, + "grad_norm": 1.047443151473999, + "learning_rate": 9.326280392205838e-07, + "loss": 0.6785, + "step": 15729 + }, + { + "epoch": 0.8084078528111831, + "grad_norm": 0.7112809419631958, + "learning_rate": 9.321440559917411e-07, + "loss": 0.6528, + "step": 15730 + }, + { + "epoch": 0.8084592455545278, + "grad_norm": 1.0286002159118652, + "learning_rate": 9.316601854663982e-07, + "loss": 0.6493, + "step": 15731 + }, + { + "epoch": 0.8085106382978723, + "grad_norm": 1.0858556032180786, + "learning_rate": 9.311764276579638e-07, + "loss": 0.7672, + "step": 15732 + }, + { + "epoch": 0.8085620310412169, + "grad_norm": 1.1062660217285156, + "learning_rate": 9.306927825798401e-07, + "loss": 0.7081, + "step": 15733 + }, + { + "epoch": 0.8086134237845616, + "grad_norm": 1.0216107368469238, + "learning_rate": 9.302092502454246e-07, + "loss": 0.6897, + "step": 15734 + }, + { + "epoch": 0.8086648165279062, + "grad_norm": 1.0044503211975098, + "learning_rate": 9.297258306681184e-07, + "loss": 0.6502, + "step": 15735 + }, + { + "epoch": 0.8087162092712509, + "grad_norm": 1.053539752960205, + "learning_rate": 9.292425238613095e-07, + "loss": 0.6859, + "step": 15736 + }, + { + "epoch": 0.8087676020145955, + "grad_norm": 1.121359944343567, + "learning_rate": 9.287593298383929e-07, + "loss": 0.7202, + "step": 15737 + }, + { + "epoch": 0.8088189947579402, + "grad_norm": 1.1422903537750244, + "learning_rate": 9.282762486127527e-07, + "loss": 0.6971, + "step": 15738 + }, + { + "epoch": 0.8088703875012848, + "grad_norm": 0.7986714243888855, + "learning_rate": 9.277932801977773e-07, + "loss": 0.6354, + "step": 15739 + }, + { + "epoch": 0.8089217802446295, + "grad_norm": 1.117967128753662, + "learning_rate": 9.273104246068426e-07, + "loss": 0.6674, + "step": 15740 + }, + { + "epoch": 0.8089731729879741, + "grad_norm": 1.1038676500320435, + "learning_rate": 9.268276818533306e-07, + "loss": 0.7255, + "step": 15741 + }, + { + "epoch": 0.8090245657313188, + "grad_norm": 1.1117148399353027, + "learning_rate": 9.263450519506146e-07, + "loss": 0.6595, + "step": 15742 + }, + { + "epoch": 0.8090759584746634, + "grad_norm": 1.1843831539154053, + "learning_rate": 9.258625349120643e-07, + "loss": 0.7194, + "step": 15743 + }, + { + "epoch": 0.809127351218008, + "grad_norm": 1.0743772983551025, + "learning_rate": 9.253801307510518e-07, + "loss": 0.7205, + "step": 15744 + }, + { + "epoch": 0.8091787439613527, + "grad_norm": 1.0779274702072144, + "learning_rate": 9.24897839480941e-07, + "loss": 0.7372, + "step": 15745 + }, + { + "epoch": 0.8092301367046973, + "grad_norm": 1.1169421672821045, + "learning_rate": 9.244156611150939e-07, + "loss": 0.6943, + "step": 15746 + }, + { + "epoch": 0.809281529448042, + "grad_norm": 1.1247409582138062, + "learning_rate": 9.239335956668688e-07, + "loss": 0.7663, + "step": 15747 + }, + { + "epoch": 0.8093329221913865, + "grad_norm": 1.0870590209960938, + "learning_rate": 9.234516431496255e-07, + "loss": 0.6561, + "step": 15748 + }, + { + "epoch": 0.8093843149347312, + "grad_norm": 1.01999032497406, + "learning_rate": 9.229698035767115e-07, + "loss": 0.689, + "step": 15749 + }, + { + "epoch": 0.8094357076780758, + "grad_norm": 1.1100109815597534, + "learning_rate": 9.224880769614802e-07, + "loss": 0.7131, + "step": 15750 + }, + { + "epoch": 0.8094871004214205, + "grad_norm": 1.0621927976608276, + "learning_rate": 9.220064633172782e-07, + "loss": 0.716, + "step": 15751 + }, + { + "epoch": 0.8095384931647651, + "grad_norm": 1.1482672691345215, + "learning_rate": 9.215249626574463e-07, + "loss": 0.7401, + "step": 15752 + }, + { + "epoch": 0.8095898859081098, + "grad_norm": 1.0536185503005981, + "learning_rate": 9.210435749953283e-07, + "loss": 0.7111, + "step": 15753 + }, + { + "epoch": 0.8096412786514544, + "grad_norm": 0.6769371032714844, + "learning_rate": 9.205623003442587e-07, + "loss": 0.6057, + "step": 15754 + }, + { + "epoch": 0.8096926713947991, + "grad_norm": 1.1154134273529053, + "learning_rate": 9.200811387175757e-07, + "loss": 0.7378, + "step": 15755 + }, + { + "epoch": 0.8097440641381437, + "grad_norm": 1.0264484882354736, + "learning_rate": 9.196000901286051e-07, + "loss": 0.7002, + "step": 15756 + }, + { + "epoch": 0.8097954568814884, + "grad_norm": 1.0683865547180176, + "learning_rate": 9.19119154590678e-07, + "loss": 0.7229, + "step": 15757 + }, + { + "epoch": 0.809846849624833, + "grad_norm": 0.7656886577606201, + "learning_rate": 9.186383321171183e-07, + "loss": 0.6523, + "step": 15758 + }, + { + "epoch": 0.8098982423681776, + "grad_norm": 1.0203948020935059, + "learning_rate": 9.18157622721248e-07, + "loss": 0.6423, + "step": 15759 + }, + { + "epoch": 0.8099496351115223, + "grad_norm": 1.1084619760513306, + "learning_rate": 9.176770264163837e-07, + "loss": 0.7276, + "step": 15760 + }, + { + "epoch": 0.8100010278548669, + "grad_norm": 1.1010531187057495, + "learning_rate": 9.171965432158436e-07, + "loss": 0.6365, + "step": 15761 + }, + { + "epoch": 0.8100524205982116, + "grad_norm": 1.0976752042770386, + "learning_rate": 9.167161731329383e-07, + "loss": 0.6359, + "step": 15762 + }, + { + "epoch": 0.8101038133415561, + "grad_norm": 1.0391898155212402, + "learning_rate": 9.162359161809759e-07, + "loss": 0.7008, + "step": 15763 + }, + { + "epoch": 0.8101552060849008, + "grad_norm": 1.2269951105117798, + "learning_rate": 9.157557723732663e-07, + "loss": 0.6584, + "step": 15764 + }, + { + "epoch": 0.8102065988282454, + "grad_norm": 1.0294278860092163, + "learning_rate": 9.152757417231073e-07, + "loss": 0.6456, + "step": 15765 + }, + { + "epoch": 0.8102579915715901, + "grad_norm": 1.0425649881362915, + "learning_rate": 9.147958242438015e-07, + "loss": 0.6254, + "step": 15766 + }, + { + "epoch": 0.8103093843149347, + "grad_norm": 1.0869574546813965, + "learning_rate": 9.143160199486451e-07, + "loss": 0.6795, + "step": 15767 + }, + { + "epoch": 0.8103607770582794, + "grad_norm": 1.1499159336090088, + "learning_rate": 9.138363288509311e-07, + "loss": 0.6874, + "step": 15768 + }, + { + "epoch": 0.810412169801624, + "grad_norm": 1.0265332460403442, + "learning_rate": 9.133567509639485e-07, + "loss": 0.6901, + "step": 15769 + }, + { + "epoch": 0.8104635625449687, + "grad_norm": 1.1268881559371948, + "learning_rate": 9.128772863009872e-07, + "loss": 0.7581, + "step": 15770 + }, + { + "epoch": 0.8105149552883133, + "grad_norm": 1.051356315612793, + "learning_rate": 9.123979348753298e-07, + "loss": 0.6763, + "step": 15771 + }, + { + "epoch": 0.810566348031658, + "grad_norm": 1.1271179914474487, + "learning_rate": 9.119186967002552e-07, + "loss": 0.6837, + "step": 15772 + }, + { + "epoch": 0.8106177407750026, + "grad_norm": 1.136104702949524, + "learning_rate": 9.114395717890451e-07, + "loss": 0.7132, + "step": 15773 + }, + { + "epoch": 0.8106691335183472, + "grad_norm": 1.0177066326141357, + "learning_rate": 9.109605601549715e-07, + "loss": 0.635, + "step": 15774 + }, + { + "epoch": 0.8107205262616919, + "grad_norm": 1.0858715772628784, + "learning_rate": 9.10481661811306e-07, + "loss": 0.6865, + "step": 15775 + }, + { + "epoch": 0.8107719190050365, + "grad_norm": 1.1957052946090698, + "learning_rate": 9.100028767713165e-07, + "loss": 0.7596, + "step": 15776 + }, + { + "epoch": 0.8108233117483812, + "grad_norm": 0.7590300440788269, + "learning_rate": 9.095242050482711e-07, + "loss": 0.6472, + "step": 15777 + }, + { + "epoch": 0.8108747044917257, + "grad_norm": 1.0864298343658447, + "learning_rate": 9.090456466554276e-07, + "loss": 0.6912, + "step": 15778 + }, + { + "epoch": 0.8109260972350704, + "grad_norm": 1.184112310409546, + "learning_rate": 9.085672016060476e-07, + "loss": 0.6964, + "step": 15779 + }, + { + "epoch": 0.810977489978415, + "grad_norm": 0.9929559230804443, + "learning_rate": 9.080888699133861e-07, + "loss": 0.6842, + "step": 15780 + }, + { + "epoch": 0.8110288827217597, + "grad_norm": 3.1788060665130615, + "learning_rate": 9.076106515906957e-07, + "loss": 0.6627, + "step": 15781 + }, + { + "epoch": 0.8110802754651043, + "grad_norm": 1.07465660572052, + "learning_rate": 9.071325466512248e-07, + "loss": 0.6892, + "step": 15782 + }, + { + "epoch": 0.811131668208449, + "grad_norm": 1.1318098306655884, + "learning_rate": 9.066545551082217e-07, + "loss": 0.7382, + "step": 15783 + }, + { + "epoch": 0.8111830609517936, + "grad_norm": 1.1264047622680664, + "learning_rate": 9.061766769749292e-07, + "loss": 0.7033, + "step": 15784 + }, + { + "epoch": 0.8112344536951382, + "grad_norm": 1.0931671857833862, + "learning_rate": 9.056989122645849e-07, + "loss": 0.6685, + "step": 15785 + }, + { + "epoch": 0.8112858464384829, + "grad_norm": 0.7591493129730225, + "learning_rate": 9.052212609904292e-07, + "loss": 0.6653, + "step": 15786 + }, + { + "epoch": 0.8113372391818275, + "grad_norm": 1.1107436418533325, + "learning_rate": 9.047437231656941e-07, + "loss": 0.7339, + "step": 15787 + }, + { + "epoch": 0.8113886319251722, + "grad_norm": 1.0172797441482544, + "learning_rate": 9.042662988036105e-07, + "loss": 0.6834, + "step": 15788 + }, + { + "epoch": 0.8114400246685168, + "grad_norm": 1.06940495967865, + "learning_rate": 9.037889879174039e-07, + "loss": 0.6598, + "step": 15789 + }, + { + "epoch": 0.8114914174118615, + "grad_norm": 1.105717658996582, + "learning_rate": 9.033117905203031e-07, + "loss": 0.6735, + "step": 15790 + }, + { + "epoch": 0.8115428101552061, + "grad_norm": 1.1460373401641846, + "learning_rate": 9.028347066255244e-07, + "loss": 0.7049, + "step": 15791 + }, + { + "epoch": 0.8115942028985508, + "grad_norm": 1.0101784467697144, + "learning_rate": 9.023577362462887e-07, + "loss": 0.6599, + "step": 15792 + }, + { + "epoch": 0.8116455956418953, + "grad_norm": 1.0904449224472046, + "learning_rate": 9.018808793958105e-07, + "loss": 0.7056, + "step": 15793 + }, + { + "epoch": 0.81169698838524, + "grad_norm": 1.0892186164855957, + "learning_rate": 9.014041360872999e-07, + "loss": 0.6919, + "step": 15794 + }, + { + "epoch": 0.8117483811285846, + "grad_norm": 1.088026523590088, + "learning_rate": 9.009275063339679e-07, + "loss": 0.6781, + "step": 15795 + }, + { + "epoch": 0.8117997738719293, + "grad_norm": 1.1358389854431152, + "learning_rate": 9.004509901490188e-07, + "loss": 0.6701, + "step": 15796 + }, + { + "epoch": 0.8118511666152739, + "grad_norm": 1.2061302661895752, + "learning_rate": 8.99974587545655e-07, + "loss": 0.7178, + "step": 15797 + }, + { + "epoch": 0.8119025593586185, + "grad_norm": 0.6747239232063293, + "learning_rate": 8.994982985370748e-07, + "loss": 0.6577, + "step": 15798 + }, + { + "epoch": 0.8119539521019632, + "grad_norm": 1.0582994222640991, + "learning_rate": 8.990221231364771e-07, + "loss": 0.6677, + "step": 15799 + }, + { + "epoch": 0.8120053448453078, + "grad_norm": 0.6579222083091736, + "learning_rate": 8.985460613570495e-07, + "loss": 0.6675, + "step": 15800 + }, + { + "epoch": 0.8120567375886525, + "grad_norm": 1.0619511604309082, + "learning_rate": 8.980701132119868e-07, + "loss": 0.7018, + "step": 15801 + }, + { + "epoch": 0.8121081303319971, + "grad_norm": 1.12665855884552, + "learning_rate": 8.975942787144726e-07, + "loss": 0.7361, + "step": 15802 + }, + { + "epoch": 0.8121595230753418, + "grad_norm": 1.1392865180969238, + "learning_rate": 8.97118557877692e-07, + "loss": 0.7145, + "step": 15803 + }, + { + "epoch": 0.8122109158186864, + "grad_norm": 1.1067801713943481, + "learning_rate": 8.966429507148244e-07, + "loss": 0.6579, + "step": 15804 + }, + { + "epoch": 0.8122623085620311, + "grad_norm": 1.0840297937393188, + "learning_rate": 8.96167457239046e-07, + "loss": 0.653, + "step": 15805 + }, + { + "epoch": 0.8123137013053757, + "grad_norm": 1.095035195350647, + "learning_rate": 8.956920774635347e-07, + "loss": 0.6193, + "step": 15806 + }, + { + "epoch": 0.8123650940487204, + "grad_norm": 1.1100565195083618, + "learning_rate": 8.952168114014558e-07, + "loss": 0.6164, + "step": 15807 + }, + { + "epoch": 0.8124164867920649, + "grad_norm": 1.1385915279388428, + "learning_rate": 8.947416590659808e-07, + "loss": 0.7084, + "step": 15808 + }, + { + "epoch": 0.8124678795354096, + "grad_norm": 0.7350379824638367, + "learning_rate": 8.942666204702732e-07, + "loss": 0.6476, + "step": 15809 + }, + { + "epoch": 0.8125192722787542, + "grad_norm": 1.1220670938491821, + "learning_rate": 8.937916956274939e-07, + "loss": 0.7247, + "step": 15810 + }, + { + "epoch": 0.8125706650220988, + "grad_norm": 1.195489525794983, + "learning_rate": 8.933168845508006e-07, + "loss": 0.7369, + "step": 15811 + }, + { + "epoch": 0.8126220577654435, + "grad_norm": 1.1213343143463135, + "learning_rate": 8.9284218725335e-07, + "loss": 0.679, + "step": 15812 + }, + { + "epoch": 0.8126734505087881, + "grad_norm": 1.129107117652893, + "learning_rate": 8.923676037482931e-07, + "loss": 0.6866, + "step": 15813 + }, + { + "epoch": 0.8127248432521328, + "grad_norm": 0.6698294281959534, + "learning_rate": 8.918931340487774e-07, + "loss": 0.6338, + "step": 15814 + }, + { + "epoch": 0.8127762359954774, + "grad_norm": 1.0568783283233643, + "learning_rate": 8.914187781679529e-07, + "loss": 0.6364, + "step": 15815 + }, + { + "epoch": 0.8128276287388221, + "grad_norm": 1.124681830406189, + "learning_rate": 8.909445361189556e-07, + "loss": 0.7724, + "step": 15816 + }, + { + "epoch": 0.8128790214821667, + "grad_norm": 1.0001587867736816, + "learning_rate": 8.904704079149302e-07, + "loss": 0.6339, + "step": 15817 + }, + { + "epoch": 0.8129304142255114, + "grad_norm": 1.0655757188796997, + "learning_rate": 8.899963935690087e-07, + "loss": 0.7274, + "step": 15818 + }, + { + "epoch": 0.812981806968856, + "grad_norm": 1.112015724182129, + "learning_rate": 8.895224930943292e-07, + "loss": 0.7039, + "step": 15819 + }, + { + "epoch": 0.8130331997122007, + "grad_norm": 1.1138441562652588, + "learning_rate": 8.89048706504016e-07, + "loss": 0.6476, + "step": 15820 + }, + { + "epoch": 0.8130845924555453, + "grad_norm": 1.0901150703430176, + "learning_rate": 8.885750338111992e-07, + "loss": 0.6919, + "step": 15821 + }, + { + "epoch": 0.81313598519889, + "grad_norm": 1.0973469018936157, + "learning_rate": 8.881014750290017e-07, + "loss": 0.7019, + "step": 15822 + }, + { + "epoch": 0.8131873779422345, + "grad_norm": 1.0800434350967407, + "learning_rate": 8.876280301705419e-07, + "loss": 0.6311, + "step": 15823 + }, + { + "epoch": 0.8132387706855791, + "grad_norm": 1.2064049243927002, + "learning_rate": 8.8715469924894e-07, + "loss": 0.7132, + "step": 15824 + }, + { + "epoch": 0.8132901634289238, + "grad_norm": 0.6733985543251038, + "learning_rate": 8.86681482277309e-07, + "loss": 0.6409, + "step": 15825 + }, + { + "epoch": 0.8133415561722684, + "grad_norm": 1.106088638305664, + "learning_rate": 8.862083792687592e-07, + "loss": 0.7264, + "step": 15826 + }, + { + "epoch": 0.8133929489156131, + "grad_norm": 0.6929330229759216, + "learning_rate": 8.857353902363975e-07, + "loss": 0.6303, + "step": 15827 + }, + { + "epoch": 0.8134443416589577, + "grad_norm": 0.731674075126648, + "learning_rate": 8.852625151933313e-07, + "loss": 0.6337, + "step": 15828 + }, + { + "epoch": 0.8134957344023024, + "grad_norm": 1.1046757698059082, + "learning_rate": 8.84789754152659e-07, + "loss": 0.7117, + "step": 15829 + }, + { + "epoch": 0.813547127145647, + "grad_norm": 1.1163442134857178, + "learning_rate": 8.843171071274803e-07, + "loss": 0.6577, + "step": 15830 + }, + { + "epoch": 0.8135985198889917, + "grad_norm": 1.0644265413284302, + "learning_rate": 8.83844574130891e-07, + "loss": 0.6822, + "step": 15831 + }, + { + "epoch": 0.8136499126323363, + "grad_norm": 1.1497644186019897, + "learning_rate": 8.833721551759817e-07, + "loss": 0.674, + "step": 15832 + }, + { + "epoch": 0.813701305375681, + "grad_norm": 1.060119390487671, + "learning_rate": 8.828998502758407e-07, + "loss": 0.638, + "step": 15833 + }, + { + "epoch": 0.8137526981190256, + "grad_norm": 0.7519299983978271, + "learning_rate": 8.824276594435554e-07, + "loss": 0.6374, + "step": 15834 + }, + { + "epoch": 0.8138040908623703, + "grad_norm": 1.2447428703308105, + "learning_rate": 8.819555826922077e-07, + "loss": 0.6984, + "step": 15835 + }, + { + "epoch": 0.8138554836057149, + "grad_norm": 1.0567222833633423, + "learning_rate": 8.814836200348753e-07, + "loss": 0.7035, + "step": 15836 + }, + { + "epoch": 0.8139068763490596, + "grad_norm": 1.0885140895843506, + "learning_rate": 8.810117714846373e-07, + "loss": 0.6285, + "step": 15837 + }, + { + "epoch": 0.8139582690924042, + "grad_norm": 0.6613754630088806, + "learning_rate": 8.805400370545647e-07, + "loss": 0.6542, + "step": 15838 + }, + { + "epoch": 0.8140096618357487, + "grad_norm": 1.100843906402588, + "learning_rate": 8.800684167577278e-07, + "loss": 0.6729, + "step": 15839 + }, + { + "epoch": 0.8140610545790934, + "grad_norm": 1.0903635025024414, + "learning_rate": 8.795969106071917e-07, + "loss": 0.7063, + "step": 15840 + }, + { + "epoch": 0.814112447322438, + "grad_norm": 1.142377495765686, + "learning_rate": 8.791255186160236e-07, + "loss": 0.7742, + "step": 15841 + }, + { + "epoch": 0.8141638400657827, + "grad_norm": 1.097847819328308, + "learning_rate": 8.786542407972793e-07, + "loss": 0.7238, + "step": 15842 + }, + { + "epoch": 0.8142152328091273, + "grad_norm": 1.040615200996399, + "learning_rate": 8.781830771640198e-07, + "loss": 0.7057, + "step": 15843 + }, + { + "epoch": 0.814266625552472, + "grad_norm": 1.0631991624832153, + "learning_rate": 8.777120277292972e-07, + "loss": 0.6258, + "step": 15844 + }, + { + "epoch": 0.8143180182958166, + "grad_norm": 1.0461479425430298, + "learning_rate": 8.772410925061614e-07, + "loss": 0.6607, + "step": 15845 + }, + { + "epoch": 0.8143694110391613, + "grad_norm": 1.0002843141555786, + "learning_rate": 8.767702715076626e-07, + "loss": 0.6086, + "step": 15846 + }, + { + "epoch": 0.8144208037825059, + "grad_norm": 1.0043505430221558, + "learning_rate": 8.762995647468425e-07, + "loss": 0.6784, + "step": 15847 + }, + { + "epoch": 0.8144721965258506, + "grad_norm": 1.0576022863388062, + "learning_rate": 8.758289722367463e-07, + "loss": 0.6626, + "step": 15848 + }, + { + "epoch": 0.8145235892691952, + "grad_norm": 1.1268022060394287, + "learning_rate": 8.753584939904081e-07, + "loss": 0.7438, + "step": 15849 + }, + { + "epoch": 0.8145749820125399, + "grad_norm": 1.1017508506774902, + "learning_rate": 8.748881300208651e-07, + "loss": 0.6866, + "step": 15850 + }, + { + "epoch": 0.8146263747558845, + "grad_norm": 0.8794753551483154, + "learning_rate": 8.744178803411491e-07, + "loss": 0.6672, + "step": 15851 + }, + { + "epoch": 0.8146777674992292, + "grad_norm": 1.0583040714263916, + "learning_rate": 8.739477449642885e-07, + "loss": 0.6832, + "step": 15852 + }, + { + "epoch": 0.8147291602425738, + "grad_norm": 1.0865466594696045, + "learning_rate": 8.734777239033071e-07, + "loss": 0.6752, + "step": 15853 + }, + { + "epoch": 0.8147805529859183, + "grad_norm": 1.0640757083892822, + "learning_rate": 8.730078171712303e-07, + "loss": 0.6987, + "step": 15854 + }, + { + "epoch": 0.814831945729263, + "grad_norm": 1.0666866302490234, + "learning_rate": 8.725380247810755e-07, + "loss": 0.7264, + "step": 15855 + }, + { + "epoch": 0.8148833384726076, + "grad_norm": 1.0499472618103027, + "learning_rate": 8.72068346745858e-07, + "loss": 0.6379, + "step": 15856 + }, + { + "epoch": 0.8149347312159523, + "grad_norm": 1.0980334281921387, + "learning_rate": 8.715987830785944e-07, + "loss": 0.704, + "step": 15857 + }, + { + "epoch": 0.8149861239592969, + "grad_norm": 1.1336294412612915, + "learning_rate": 8.711293337922883e-07, + "loss": 0.7608, + "step": 15858 + }, + { + "epoch": 0.8150375167026416, + "grad_norm": 1.1186856031417847, + "learning_rate": 8.706599988999515e-07, + "loss": 0.7171, + "step": 15859 + }, + { + "epoch": 0.8150889094459862, + "grad_norm": 1.075716495513916, + "learning_rate": 8.701907784145852e-07, + "loss": 0.7125, + "step": 15860 + }, + { + "epoch": 0.8151403021893309, + "grad_norm": 1.0665866136550903, + "learning_rate": 8.697216723491897e-07, + "loss": 0.6522, + "step": 15861 + }, + { + "epoch": 0.8151916949326755, + "grad_norm": 0.7025566697120667, + "learning_rate": 8.692526807167606e-07, + "loss": 0.6735, + "step": 15862 + }, + { + "epoch": 0.8152430876760202, + "grad_norm": 1.0913625955581665, + "learning_rate": 8.687838035302942e-07, + "loss": 0.6904, + "step": 15863 + }, + { + "epoch": 0.8152944804193648, + "grad_norm": 1.0596872568130493, + "learning_rate": 8.683150408027807e-07, + "loss": 0.6904, + "step": 15864 + }, + { + "epoch": 0.8153458731627095, + "grad_norm": 0.8113095760345459, + "learning_rate": 8.678463925472052e-07, + "loss": 0.6741, + "step": 15865 + }, + { + "epoch": 0.8153972659060541, + "grad_norm": 1.0824666023254395, + "learning_rate": 8.673778587765552e-07, + "loss": 0.6476, + "step": 15866 + }, + { + "epoch": 0.8154486586493987, + "grad_norm": 1.0351488590240479, + "learning_rate": 8.669094395038103e-07, + "loss": 0.6752, + "step": 15867 + }, + { + "epoch": 0.8155000513927434, + "grad_norm": 1.0663225650787354, + "learning_rate": 8.664411347419488e-07, + "loss": 0.6551, + "step": 15868 + }, + { + "epoch": 0.8155514441360879, + "grad_norm": 1.1081591844558716, + "learning_rate": 8.659729445039439e-07, + "loss": 0.6619, + "step": 15869 + }, + { + "epoch": 0.8156028368794326, + "grad_norm": 1.0211765766143799, + "learning_rate": 8.655048688027712e-07, + "loss": 0.6844, + "step": 15870 + }, + { + "epoch": 0.8156542296227772, + "grad_norm": 1.0937654972076416, + "learning_rate": 8.650369076513937e-07, + "loss": 0.7102, + "step": 15871 + }, + { + "epoch": 0.8157056223661219, + "grad_norm": 1.0783026218414307, + "learning_rate": 8.645690610627811e-07, + "loss": 0.721, + "step": 15872 + }, + { + "epoch": 0.8157570151094665, + "grad_norm": 1.101480484008789, + "learning_rate": 8.641013290498934e-07, + "loss": 0.7678, + "step": 15873 + }, + { + "epoch": 0.8158084078528112, + "grad_norm": 1.0206547975540161, + "learning_rate": 8.636337116256893e-07, + "loss": 0.6929, + "step": 15874 + }, + { + "epoch": 0.8158598005961558, + "grad_norm": 1.0886304378509521, + "learning_rate": 8.631662088031262e-07, + "loss": 0.667, + "step": 15875 + }, + { + "epoch": 0.8159111933395005, + "grad_norm": 0.7084832787513733, + "learning_rate": 8.626988205951558e-07, + "loss": 0.6301, + "step": 15876 + }, + { + "epoch": 0.8159625860828451, + "grad_norm": 1.1814095973968506, + "learning_rate": 8.62231547014728e-07, + "loss": 0.7283, + "step": 15877 + }, + { + "epoch": 0.8160139788261898, + "grad_norm": 1.0779844522476196, + "learning_rate": 8.617643880747867e-07, + "loss": 0.6825, + "step": 15878 + }, + { + "epoch": 0.8160653715695344, + "grad_norm": 1.0795459747314453, + "learning_rate": 8.612973437882777e-07, + "loss": 0.6855, + "step": 15879 + }, + { + "epoch": 0.816116764312879, + "grad_norm": 1.026180386543274, + "learning_rate": 8.608304141681406e-07, + "loss": 0.6884, + "step": 15880 + }, + { + "epoch": 0.8161681570562237, + "grad_norm": 1.0571925640106201, + "learning_rate": 8.603635992273108e-07, + "loss": 0.7251, + "step": 15881 + }, + { + "epoch": 0.8162195497995683, + "grad_norm": 1.054360270500183, + "learning_rate": 8.598968989787216e-07, + "loss": 0.663, + "step": 15882 + }, + { + "epoch": 0.816270942542913, + "grad_norm": 0.6769542694091797, + "learning_rate": 8.59430313435306e-07, + "loss": 0.6717, + "step": 15883 + }, + { + "epoch": 0.8163223352862575, + "grad_norm": 1.154556155204773, + "learning_rate": 8.589638426099873e-07, + "loss": 0.7038, + "step": 15884 + }, + { + "epoch": 0.8163737280296022, + "grad_norm": 1.064639687538147, + "learning_rate": 8.584974865156925e-07, + "loss": 0.679, + "step": 15885 + }, + { + "epoch": 0.8164251207729468, + "grad_norm": 1.133123755455017, + "learning_rate": 8.580312451653416e-07, + "loss": 0.6521, + "step": 15886 + }, + { + "epoch": 0.8164765135162915, + "grad_norm": 1.0538917779922485, + "learning_rate": 8.575651185718503e-07, + "loss": 0.6705, + "step": 15887 + }, + { + "epoch": 0.8165279062596361, + "grad_norm": 1.0894396305084229, + "learning_rate": 8.570991067481366e-07, + "loss": 0.6961, + "step": 15888 + }, + { + "epoch": 0.8165792990029808, + "grad_norm": 1.129911184310913, + "learning_rate": 8.566332097071095e-07, + "loss": 0.7178, + "step": 15889 + }, + { + "epoch": 0.8166306917463254, + "grad_norm": 1.0769083499908447, + "learning_rate": 8.561674274616777e-07, + "loss": 0.7147, + "step": 15890 + }, + { + "epoch": 0.8166820844896701, + "grad_norm": 1.1535837650299072, + "learning_rate": 8.557017600247447e-07, + "loss": 0.7175, + "step": 15891 + }, + { + "epoch": 0.8167334772330147, + "grad_norm": 1.0542861223220825, + "learning_rate": 8.552362074092157e-07, + "loss": 0.6972, + "step": 15892 + }, + { + "epoch": 0.8167848699763594, + "grad_norm": 1.0870426893234253, + "learning_rate": 8.547707696279844e-07, + "loss": 0.7268, + "step": 15893 + }, + { + "epoch": 0.816836262719704, + "grad_norm": 1.1196643114089966, + "learning_rate": 8.543054466939505e-07, + "loss": 0.6925, + "step": 15894 + }, + { + "epoch": 0.8168876554630486, + "grad_norm": 0.7005282640457153, + "learning_rate": 8.538402386200023e-07, + "loss": 0.6796, + "step": 15895 + }, + { + "epoch": 0.8169390482063933, + "grad_norm": 1.0782498121261597, + "learning_rate": 8.533751454190326e-07, + "loss": 0.6963, + "step": 15896 + }, + { + "epoch": 0.8169904409497379, + "grad_norm": 1.1897894144058228, + "learning_rate": 8.529101671039258e-07, + "loss": 0.7401, + "step": 15897 + }, + { + "epoch": 0.8170418336930826, + "grad_norm": 1.0737711191177368, + "learning_rate": 8.524453036875624e-07, + "loss": 0.6973, + "step": 15898 + }, + { + "epoch": 0.8170932264364271, + "grad_norm": 1.0845646858215332, + "learning_rate": 8.519805551828269e-07, + "loss": 0.691, + "step": 15899 + }, + { + "epoch": 0.8171446191797718, + "grad_norm": 0.7155020833015442, + "learning_rate": 8.515159216025893e-07, + "loss": 0.7, + "step": 15900 + }, + { + "epoch": 0.8171960119231164, + "grad_norm": 1.0626174211502075, + "learning_rate": 8.510514029597272e-07, + "loss": 0.6663, + "step": 15901 + }, + { + "epoch": 0.8172474046664611, + "grad_norm": 0.6911786794662476, + "learning_rate": 8.50586999267109e-07, + "loss": 0.6554, + "step": 15902 + }, + { + "epoch": 0.8172987974098057, + "grad_norm": 1.0124671459197998, + "learning_rate": 8.501227105376015e-07, + "loss": 0.6492, + "step": 15903 + }, + { + "epoch": 0.8173501901531504, + "grad_norm": 0.7251526117324829, + "learning_rate": 8.496585367840665e-07, + "loss": 0.6263, + "step": 15904 + }, + { + "epoch": 0.817401582896495, + "grad_norm": 1.0921012163162231, + "learning_rate": 8.491944780193679e-07, + "loss": 0.7155, + "step": 15905 + }, + { + "epoch": 0.8174529756398397, + "grad_norm": 1.0037120580673218, + "learning_rate": 8.487305342563601e-07, + "loss": 0.7249, + "step": 15906 + }, + { + "epoch": 0.8175043683831843, + "grad_norm": 1.0369433164596558, + "learning_rate": 8.482667055078975e-07, + "loss": 0.6621, + "step": 15907 + }, + { + "epoch": 0.817555761126529, + "grad_norm": 1.105870246887207, + "learning_rate": 8.478029917868336e-07, + "loss": 0.6747, + "step": 15908 + }, + { + "epoch": 0.8176071538698736, + "grad_norm": 1.1297708749771118, + "learning_rate": 8.473393931060109e-07, + "loss": 0.7091, + "step": 15909 + }, + { + "epoch": 0.8176585466132182, + "grad_norm": 1.0341790914535522, + "learning_rate": 8.468759094782781e-07, + "loss": 0.7428, + "step": 15910 + }, + { + "epoch": 0.8177099393565629, + "grad_norm": 1.0771733522415161, + "learning_rate": 8.464125409164736e-07, + "loss": 0.692, + "step": 15911 + }, + { + "epoch": 0.8177613320999075, + "grad_norm": 0.8159832954406738, + "learning_rate": 8.45949287433439e-07, + "loss": 0.6751, + "step": 15912 + }, + { + "epoch": 0.8178127248432522, + "grad_norm": 1.0718497037887573, + "learning_rate": 8.454861490420046e-07, + "loss": 0.6109, + "step": 15913 + }, + { + "epoch": 0.8178641175865967, + "grad_norm": 1.0749939680099487, + "learning_rate": 8.450231257550057e-07, + "loss": 0.6514, + "step": 15914 + }, + { + "epoch": 0.8179155103299414, + "grad_norm": 1.0642589330673218, + "learning_rate": 8.445602175852691e-07, + "loss": 0.7372, + "step": 15915 + }, + { + "epoch": 0.817966903073286, + "grad_norm": 0.7097607254981995, + "learning_rate": 8.440974245456196e-07, + "loss": 0.6364, + "step": 15916 + }, + { + "epoch": 0.8180182958166307, + "grad_norm": 1.0714733600616455, + "learning_rate": 8.436347466488809e-07, + "loss": 0.6899, + "step": 15917 + }, + { + "epoch": 0.8180696885599753, + "grad_norm": 1.0946086645126343, + "learning_rate": 8.43172183907871e-07, + "loss": 0.6588, + "step": 15918 + }, + { + "epoch": 0.81812108130332, + "grad_norm": 0.9855013489723206, + "learning_rate": 8.427097363354065e-07, + "loss": 0.6564, + "step": 15919 + }, + { + "epoch": 0.8181724740466646, + "grad_norm": 0.7948664426803589, + "learning_rate": 8.422474039442969e-07, + "loss": 0.656, + "step": 15920 + }, + { + "epoch": 0.8182238667900092, + "grad_norm": 1.0878255367279053, + "learning_rate": 8.417851867473564e-07, + "loss": 0.665, + "step": 15921 + }, + { + "epoch": 0.8182752595333539, + "grad_norm": 1.0695507526397705, + "learning_rate": 8.413230847573861e-07, + "loss": 0.6444, + "step": 15922 + }, + { + "epoch": 0.8183266522766985, + "grad_norm": 1.062950611114502, + "learning_rate": 8.408610979871928e-07, + "loss": 0.6869, + "step": 15923 + }, + { + "epoch": 0.8183780450200432, + "grad_norm": 1.1164363622665405, + "learning_rate": 8.403992264495742e-07, + "loss": 0.7104, + "step": 15924 + }, + { + "epoch": 0.8184294377633878, + "grad_norm": 1.0398523807525635, + "learning_rate": 8.399374701573265e-07, + "loss": 0.6347, + "step": 15925 + }, + { + "epoch": 0.8184808305067325, + "grad_norm": 1.0860638618469238, + "learning_rate": 8.394758291232446e-07, + "loss": 0.7073, + "step": 15926 + }, + { + "epoch": 0.8185322232500771, + "grad_norm": 1.0642421245574951, + "learning_rate": 8.39014303360119e-07, + "loss": 0.6874, + "step": 15927 + }, + { + "epoch": 0.8185836159934218, + "grad_norm": 1.1776020526885986, + "learning_rate": 8.385528928807346e-07, + "loss": 0.7039, + "step": 15928 + }, + { + "epoch": 0.8186350087367664, + "grad_norm": 1.1142851114273071, + "learning_rate": 8.380915976978759e-07, + "loss": 0.6707, + "step": 15929 + }, + { + "epoch": 0.818686401480111, + "grad_norm": 1.0520637035369873, + "learning_rate": 8.376304178243245e-07, + "loss": 0.6815, + "step": 15930 + }, + { + "epoch": 0.8187377942234556, + "grad_norm": 1.0598965883255005, + "learning_rate": 8.371693532728575e-07, + "loss": 0.6533, + "step": 15931 + }, + { + "epoch": 0.8187891869668003, + "grad_norm": 1.1046322584152222, + "learning_rate": 8.367084040562485e-07, + "loss": 0.6589, + "step": 15932 + }, + { + "epoch": 0.8188405797101449, + "grad_norm": 1.0862507820129395, + "learning_rate": 8.362475701872675e-07, + "loss": 0.6511, + "step": 15933 + }, + { + "epoch": 0.8188919724534895, + "grad_norm": 1.106633186340332, + "learning_rate": 8.357868516786861e-07, + "loss": 0.6607, + "step": 15934 + }, + { + "epoch": 0.8189433651968342, + "grad_norm": 1.1368271112442017, + "learning_rate": 8.35326248543264e-07, + "loss": 0.6783, + "step": 15935 + }, + { + "epoch": 0.8189947579401788, + "grad_norm": 1.1690280437469482, + "learning_rate": 8.34865760793766e-07, + "loss": 0.6623, + "step": 15936 + }, + { + "epoch": 0.8190461506835235, + "grad_norm": 1.1522434949874878, + "learning_rate": 8.344053884429493e-07, + "loss": 0.6536, + "step": 15937 + }, + { + "epoch": 0.8190975434268681, + "grad_norm": 1.1830905675888062, + "learning_rate": 8.33945131503568e-07, + "loss": 0.6672, + "step": 15938 + }, + { + "epoch": 0.8191489361702128, + "grad_norm": 1.0953369140625, + "learning_rate": 8.334849899883757e-07, + "loss": 0.6696, + "step": 15939 + }, + { + "epoch": 0.8192003289135574, + "grad_norm": 1.0722713470458984, + "learning_rate": 8.330249639101201e-07, + "loss": 0.729, + "step": 15940 + }, + { + "epoch": 0.8192517216569021, + "grad_norm": 1.1886411905288696, + "learning_rate": 8.325650532815466e-07, + "loss": 0.7251, + "step": 15941 + }, + { + "epoch": 0.8193031144002467, + "grad_norm": 0.78179532289505, + "learning_rate": 8.321052581153965e-07, + "loss": 0.6692, + "step": 15942 + }, + { + "epoch": 0.8193545071435914, + "grad_norm": 1.1280386447906494, + "learning_rate": 8.31645578424411e-07, + "loss": 0.6886, + "step": 15943 + }, + { + "epoch": 0.819405899886936, + "grad_norm": 1.090631365776062, + "learning_rate": 8.311860142213246e-07, + "loss": 0.6939, + "step": 15944 + }, + { + "epoch": 0.8194572926302806, + "grad_norm": 0.8102370500564575, + "learning_rate": 8.307265655188701e-07, + "loss": 0.6321, + "step": 15945 + }, + { + "epoch": 0.8195086853736252, + "grad_norm": 0.7136482000350952, + "learning_rate": 8.302672323297756e-07, + "loss": 0.6752, + "step": 15946 + }, + { + "epoch": 0.8195600781169698, + "grad_norm": 1.1100010871887207, + "learning_rate": 8.298080146667698e-07, + "loss": 0.6744, + "step": 15947 + }, + { + "epoch": 0.8196114708603145, + "grad_norm": 1.2398725748062134, + "learning_rate": 8.293489125425747e-07, + "loss": 0.6642, + "step": 15948 + }, + { + "epoch": 0.8196628636036591, + "grad_norm": 1.135555386543274, + "learning_rate": 8.28889925969909e-07, + "loss": 0.7059, + "step": 15949 + }, + { + "epoch": 0.8197142563470038, + "grad_norm": 1.1473207473754883, + "learning_rate": 8.284310549614922e-07, + "loss": 0.6538, + "step": 15950 + }, + { + "epoch": 0.8197656490903484, + "grad_norm": 1.1159553527832031, + "learning_rate": 8.279722995300338e-07, + "loss": 0.7, + "step": 15951 + }, + { + "epoch": 0.8198170418336931, + "grad_norm": 1.1276623010635376, + "learning_rate": 8.275136596882471e-07, + "loss": 0.6852, + "step": 15952 + }, + { + "epoch": 0.8198684345770377, + "grad_norm": 1.0680826902389526, + "learning_rate": 8.270551354488382e-07, + "loss": 0.74, + "step": 15953 + }, + { + "epoch": 0.8199198273203824, + "grad_norm": 1.0428073406219482, + "learning_rate": 8.265967268245106e-07, + "loss": 0.7024, + "step": 15954 + }, + { + "epoch": 0.819971220063727, + "grad_norm": 1.06885826587677, + "learning_rate": 8.261384338279638e-07, + "loss": 0.6511, + "step": 15955 + }, + { + "epoch": 0.8200226128070717, + "grad_norm": 0.8059698939323425, + "learning_rate": 8.25680256471898e-07, + "loss": 0.6535, + "step": 15956 + }, + { + "epoch": 0.8200740055504163, + "grad_norm": 0.7939055562019348, + "learning_rate": 8.252221947690053e-07, + "loss": 0.6495, + "step": 15957 + }, + { + "epoch": 0.820125398293761, + "grad_norm": 1.1296801567077637, + "learning_rate": 8.247642487319768e-07, + "loss": 0.6414, + "step": 15958 + }, + { + "epoch": 0.8201767910371056, + "grad_norm": 0.7933281064033508, + "learning_rate": 8.243064183735017e-07, + "loss": 0.6602, + "step": 15959 + }, + { + "epoch": 0.8202281837804501, + "grad_norm": 0.8116136193275452, + "learning_rate": 8.238487037062637e-07, + "loss": 0.6465, + "step": 15960 + }, + { + "epoch": 0.8202795765237948, + "grad_norm": 1.060389757156372, + "learning_rate": 8.233911047429438e-07, + "loss": 0.7281, + "step": 15961 + }, + { + "epoch": 0.8203309692671394, + "grad_norm": 1.1530861854553223, + "learning_rate": 8.229336214962197e-07, + "loss": 0.7058, + "step": 15962 + }, + { + "epoch": 0.8203823620104841, + "grad_norm": 1.0510258674621582, + "learning_rate": 8.224762539787701e-07, + "loss": 0.6691, + "step": 15963 + }, + { + "epoch": 0.8204337547538287, + "grad_norm": 1.144066333770752, + "learning_rate": 8.220190022032604e-07, + "loss": 0.762, + "step": 15964 + }, + { + "epoch": 0.8204851474971734, + "grad_norm": 0.7330953478813171, + "learning_rate": 8.215618661823649e-07, + "loss": 0.6832, + "step": 15965 + }, + { + "epoch": 0.820536540240518, + "grad_norm": 1.1114577054977417, + "learning_rate": 8.211048459287458e-07, + "loss": 0.6776, + "step": 15966 + }, + { + "epoch": 0.8205879329838627, + "grad_norm": 1.085736870765686, + "learning_rate": 8.206479414550656e-07, + "loss": 0.7289, + "step": 15967 + }, + { + "epoch": 0.8206393257272073, + "grad_norm": 0.6623334288597107, + "learning_rate": 8.201911527739847e-07, + "loss": 0.6637, + "step": 15968 + }, + { + "epoch": 0.820690718470552, + "grad_norm": 1.1326062679290771, + "learning_rate": 8.197344798981577e-07, + "loss": 0.692, + "step": 15969 + }, + { + "epoch": 0.8207421112138966, + "grad_norm": 1.131390929222107, + "learning_rate": 8.192779228402375e-07, + "loss": 0.7013, + "step": 15970 + }, + { + "epoch": 0.8207935039572413, + "grad_norm": 1.1198616027832031, + "learning_rate": 8.18821481612872e-07, + "loss": 0.682, + "step": 15971 + }, + { + "epoch": 0.8208448967005859, + "grad_norm": 1.0767366886138916, + "learning_rate": 8.183651562287098e-07, + "loss": 0.7275, + "step": 15972 + }, + { + "epoch": 0.8208962894439306, + "grad_norm": 0.6785878539085388, + "learning_rate": 8.179089467003926e-07, + "loss": 0.6398, + "step": 15973 + }, + { + "epoch": 0.8209476821872752, + "grad_norm": 1.1476516723632812, + "learning_rate": 8.174528530405602e-07, + "loss": 0.6802, + "step": 15974 + }, + { + "epoch": 0.8209990749306197, + "grad_norm": 0.7410844564437866, + "learning_rate": 8.169968752618474e-07, + "loss": 0.6358, + "step": 15975 + }, + { + "epoch": 0.8210504676739644, + "grad_norm": 1.1050662994384766, + "learning_rate": 8.165410133768897e-07, + "loss": 0.7511, + "step": 15976 + }, + { + "epoch": 0.821101860417309, + "grad_norm": 1.136380910873413, + "learning_rate": 8.160852673983172e-07, + "loss": 0.6957, + "step": 15977 + }, + { + "epoch": 0.8211532531606537, + "grad_norm": 1.0785022974014282, + "learning_rate": 8.156296373387557e-07, + "loss": 0.6639, + "step": 15978 + }, + { + "epoch": 0.8212046459039983, + "grad_norm": 0.7310556769371033, + "learning_rate": 8.15174123210829e-07, + "loss": 0.6271, + "step": 15979 + }, + { + "epoch": 0.821256038647343, + "grad_norm": 0.8285425901412964, + "learning_rate": 8.147187250271566e-07, + "loss": 0.6761, + "step": 15980 + }, + { + "epoch": 0.8213074313906876, + "grad_norm": 0.7083662152290344, + "learning_rate": 8.142634428003577e-07, + "loss": 0.6591, + "step": 15981 + }, + { + "epoch": 0.8213588241340323, + "grad_norm": 1.0761287212371826, + "learning_rate": 8.13808276543045e-07, + "loss": 0.7542, + "step": 15982 + }, + { + "epoch": 0.8214102168773769, + "grad_norm": 1.0235462188720703, + "learning_rate": 8.133532262678301e-07, + "loss": 0.682, + "step": 15983 + }, + { + "epoch": 0.8214616096207216, + "grad_norm": 1.121659517288208, + "learning_rate": 8.128982919873185e-07, + "loss": 0.7204, + "step": 15984 + }, + { + "epoch": 0.8215130023640662, + "grad_norm": 0.7196667790412903, + "learning_rate": 8.124434737141184e-07, + "loss": 0.6672, + "step": 15985 + }, + { + "epoch": 0.8215643951074109, + "grad_norm": 1.232259750366211, + "learning_rate": 8.119887714608265e-07, + "loss": 0.7338, + "step": 15986 + }, + { + "epoch": 0.8216157878507555, + "grad_norm": 0.8438336849212646, + "learning_rate": 8.115341852400437e-07, + "loss": 0.639, + "step": 15987 + }, + { + "epoch": 0.8216671805941002, + "grad_norm": 1.0620945692062378, + "learning_rate": 8.110797150643629e-07, + "loss": 0.7196, + "step": 15988 + }, + { + "epoch": 0.8217185733374448, + "grad_norm": 1.0905804634094238, + "learning_rate": 8.106253609463776e-07, + "loss": 0.7173, + "step": 15989 + }, + { + "epoch": 0.8217699660807893, + "grad_norm": 0.7311794757843018, + "learning_rate": 8.101711228986753e-07, + "loss": 0.6082, + "step": 15990 + }, + { + "epoch": 0.821821358824134, + "grad_norm": 1.1373704671859741, + "learning_rate": 8.097170009338395e-07, + "loss": 0.6917, + "step": 15991 + }, + { + "epoch": 0.8218727515674786, + "grad_norm": 1.0940972566604614, + "learning_rate": 8.092629950644553e-07, + "loss": 0.6892, + "step": 15992 + }, + { + "epoch": 0.8219241443108233, + "grad_norm": 1.1069122552871704, + "learning_rate": 8.088091053030972e-07, + "loss": 0.678, + "step": 15993 + }, + { + "epoch": 0.8219755370541679, + "grad_norm": 1.0717417001724243, + "learning_rate": 8.083553316623443e-07, + "loss": 0.7159, + "step": 15994 + }, + { + "epoch": 0.8220269297975126, + "grad_norm": 1.1317168474197388, + "learning_rate": 8.079016741547669e-07, + "loss": 0.7149, + "step": 15995 + }, + { + "epoch": 0.8220783225408572, + "grad_norm": 1.109566569328308, + "learning_rate": 8.074481327929345e-07, + "loss": 0.7168, + "step": 15996 + }, + { + "epoch": 0.8221297152842019, + "grad_norm": 1.0114786624908447, + "learning_rate": 8.069947075894113e-07, + "loss": 0.6269, + "step": 15997 + }, + { + "epoch": 0.8221811080275465, + "grad_norm": 0.742664098739624, + "learning_rate": 8.065413985567628e-07, + "loss": 0.6498, + "step": 15998 + }, + { + "epoch": 0.8222325007708912, + "grad_norm": 1.0992227792739868, + "learning_rate": 8.060882057075464e-07, + "loss": 0.6676, + "step": 15999 + }, + { + "epoch": 0.8222838935142358, + "grad_norm": 1.0877968072891235, + "learning_rate": 8.056351290543179e-07, + "loss": 0.6772, + "step": 16000 + }, + { + "epoch": 0.8223352862575805, + "grad_norm": 1.0807422399520874, + "learning_rate": 8.051821686096328e-07, + "loss": 0.6546, + "step": 16001 + }, + { + "epoch": 0.8223866790009251, + "grad_norm": 1.1446477174758911, + "learning_rate": 8.047293243860366e-07, + "loss": 0.6785, + "step": 16002 + }, + { + "epoch": 0.8224380717442697, + "grad_norm": 1.0511821508407593, + "learning_rate": 8.042765963960786e-07, + "loss": 0.7442, + "step": 16003 + }, + { + "epoch": 0.8224894644876144, + "grad_norm": 1.1064132452011108, + "learning_rate": 8.03823984652301e-07, + "loss": 0.717, + "step": 16004 + }, + { + "epoch": 0.822540857230959, + "grad_norm": 1.1460384130477905, + "learning_rate": 8.033714891672462e-07, + "loss": 0.6871, + "step": 16005 + }, + { + "epoch": 0.8225922499743036, + "grad_norm": 1.099399447441101, + "learning_rate": 8.029191099534467e-07, + "loss": 0.694, + "step": 16006 + }, + { + "epoch": 0.8226436427176482, + "grad_norm": 1.0820860862731934, + "learning_rate": 8.024668470234393e-07, + "loss": 0.6966, + "step": 16007 + }, + { + "epoch": 0.8226950354609929, + "grad_norm": 1.063926339149475, + "learning_rate": 8.020147003897533e-07, + "loss": 0.6927, + "step": 16008 + }, + { + "epoch": 0.8227464282043375, + "grad_norm": 0.8330867886543274, + "learning_rate": 8.015626700649148e-07, + "loss": 0.6903, + "step": 16009 + }, + { + "epoch": 0.8227978209476822, + "grad_norm": 1.0246433019638062, + "learning_rate": 8.0111075606145e-07, + "loss": 0.644, + "step": 16010 + }, + { + "epoch": 0.8228492136910268, + "grad_norm": 1.0607528686523438, + "learning_rate": 8.00658958391879e-07, + "loss": 0.67, + "step": 16011 + }, + { + "epoch": 0.8229006064343715, + "grad_norm": 1.104867935180664, + "learning_rate": 8.00207277068718e-07, + "loss": 0.7561, + "step": 16012 + }, + { + "epoch": 0.8229519991777161, + "grad_norm": 1.1388285160064697, + "learning_rate": 7.997557121044803e-07, + "loss": 0.7228, + "step": 16013 + }, + { + "epoch": 0.8230033919210608, + "grad_norm": 1.1482667922973633, + "learning_rate": 7.993042635116815e-07, + "loss": 0.7778, + "step": 16014 + }, + { + "epoch": 0.8230547846644054, + "grad_norm": 1.1118996143341064, + "learning_rate": 7.988529313028237e-07, + "loss": 0.7741, + "step": 16015 + }, + { + "epoch": 0.82310617740775, + "grad_norm": 0.7849704623222351, + "learning_rate": 7.984017154904151e-07, + "loss": 0.69, + "step": 16016 + }, + { + "epoch": 0.8231575701510947, + "grad_norm": 1.1126105785369873, + "learning_rate": 7.97950616086956e-07, + "loss": 0.678, + "step": 16017 + }, + { + "epoch": 0.8232089628944393, + "grad_norm": 1.1017298698425293, + "learning_rate": 7.974996331049434e-07, + "loss": 0.6851, + "step": 16018 + }, + { + "epoch": 0.823260355637784, + "grad_norm": 1.0966075658798218, + "learning_rate": 7.970487665568743e-07, + "loss": 0.6925, + "step": 16019 + }, + { + "epoch": 0.8233117483811286, + "grad_norm": 1.1526294946670532, + "learning_rate": 7.965980164552395e-07, + "loss": 0.7452, + "step": 16020 + }, + { + "epoch": 0.8233631411244732, + "grad_norm": 1.063724398612976, + "learning_rate": 7.961473828125271e-07, + "loss": 0.7166, + "step": 16021 + }, + { + "epoch": 0.8234145338678178, + "grad_norm": 1.1120411157608032, + "learning_rate": 7.956968656412217e-07, + "loss": 0.6839, + "step": 16022 + }, + { + "epoch": 0.8234659266111625, + "grad_norm": 1.0558279752731323, + "learning_rate": 7.952464649538067e-07, + "loss": 0.7003, + "step": 16023 + }, + { + "epoch": 0.8235173193545071, + "grad_norm": 1.1339508295059204, + "learning_rate": 7.947961807627602e-07, + "loss": 0.7116, + "step": 16024 + }, + { + "epoch": 0.8235687120978518, + "grad_norm": 0.7437736392021179, + "learning_rate": 7.94346013080558e-07, + "loss": 0.6388, + "step": 16025 + }, + { + "epoch": 0.8236201048411964, + "grad_norm": 0.6681776642799377, + "learning_rate": 7.938959619196707e-07, + "loss": 0.6323, + "step": 16026 + }, + { + "epoch": 0.8236714975845411, + "grad_norm": 1.0692873001098633, + "learning_rate": 7.934460272925698e-07, + "loss": 0.6977, + "step": 16027 + }, + { + "epoch": 0.8237228903278857, + "grad_norm": 0.9259945750236511, + "learning_rate": 7.929962092117205e-07, + "loss": 0.6605, + "step": 16028 + }, + { + "epoch": 0.8237742830712304, + "grad_norm": 1.094130516052246, + "learning_rate": 7.925465076895844e-07, + "loss": 0.677, + "step": 16029 + }, + { + "epoch": 0.823825675814575, + "grad_norm": 1.062848687171936, + "learning_rate": 7.920969227386216e-07, + "loss": 0.7362, + "step": 16030 + }, + { + "epoch": 0.8238770685579196, + "grad_norm": 1.1261876821517944, + "learning_rate": 7.916474543712871e-07, + "loss": 0.7126, + "step": 16031 + }, + { + "epoch": 0.8239284613012643, + "grad_norm": 1.08109450340271, + "learning_rate": 7.91198102600036e-07, + "loss": 0.7026, + "step": 16032 + }, + { + "epoch": 0.8239798540446089, + "grad_norm": 1.1750117540359497, + "learning_rate": 7.907488674373165e-07, + "loss": 0.6998, + "step": 16033 + }, + { + "epoch": 0.8240312467879536, + "grad_norm": 1.0774067640304565, + "learning_rate": 7.902997488955755e-07, + "loss": 0.6957, + "step": 16034 + }, + { + "epoch": 0.8240826395312982, + "grad_norm": 1.1907278299331665, + "learning_rate": 7.898507469872546e-07, + "loss": 0.68, + "step": 16035 + }, + { + "epoch": 0.8241340322746428, + "grad_norm": 1.1572355031967163, + "learning_rate": 7.894018617247968e-07, + "loss": 0.6576, + "step": 16036 + }, + { + "epoch": 0.8241854250179874, + "grad_norm": 1.0717113018035889, + "learning_rate": 7.889530931206368e-07, + "loss": 0.6763, + "step": 16037 + }, + { + "epoch": 0.8242368177613321, + "grad_norm": 1.1185137033462524, + "learning_rate": 7.88504441187209e-07, + "loss": 0.6822, + "step": 16038 + }, + { + "epoch": 0.8242882105046767, + "grad_norm": 1.1623841524124146, + "learning_rate": 7.880559059369425e-07, + "loss": 0.7189, + "step": 16039 + }, + { + "epoch": 0.8243396032480214, + "grad_norm": 1.1048818826675415, + "learning_rate": 7.876074873822659e-07, + "loss": 0.6854, + "step": 16040 + }, + { + "epoch": 0.824390995991366, + "grad_norm": 1.0829689502716064, + "learning_rate": 7.87159185535602e-07, + "loss": 0.7216, + "step": 16041 + }, + { + "epoch": 0.8244423887347107, + "grad_norm": 1.2525339126586914, + "learning_rate": 7.867110004093708e-07, + "loss": 0.6481, + "step": 16042 + }, + { + "epoch": 0.8244937814780553, + "grad_norm": 1.1360790729522705, + "learning_rate": 7.862629320159931e-07, + "loss": 0.685, + "step": 16043 + }, + { + "epoch": 0.8245451742214, + "grad_norm": 1.1168122291564941, + "learning_rate": 7.858149803678782e-07, + "loss": 0.686, + "step": 16044 + }, + { + "epoch": 0.8245965669647446, + "grad_norm": 1.1179600954055786, + "learning_rate": 7.853671454774404e-07, + "loss": 0.7085, + "step": 16045 + }, + { + "epoch": 0.8246479597080892, + "grad_norm": 1.1545031070709229, + "learning_rate": 7.84919427357086e-07, + "loss": 0.7036, + "step": 16046 + }, + { + "epoch": 0.8246993524514339, + "grad_norm": 0.673156201839447, + "learning_rate": 7.844718260192196e-07, + "loss": 0.6171, + "step": 16047 + }, + { + "epoch": 0.8247507451947785, + "grad_norm": 1.1194005012512207, + "learning_rate": 7.840243414762417e-07, + "loss": 0.7191, + "step": 16048 + }, + { + "epoch": 0.8248021379381232, + "grad_norm": 1.0752993822097778, + "learning_rate": 7.835769737405518e-07, + "loss": 0.7062, + "step": 16049 + }, + { + "epoch": 0.8248535306814678, + "grad_norm": 1.0822778940200806, + "learning_rate": 7.831297228245443e-07, + "loss": 0.7466, + "step": 16050 + }, + { + "epoch": 0.8249049234248124, + "grad_norm": 1.1792621612548828, + "learning_rate": 7.826825887406086e-07, + "loss": 0.694, + "step": 16051 + }, + { + "epoch": 0.824956316168157, + "grad_norm": 0.7651743292808533, + "learning_rate": 7.822355715011354e-07, + "loss": 0.6672, + "step": 16052 + }, + { + "epoch": 0.8250077089115017, + "grad_norm": 1.0082892179489136, + "learning_rate": 7.817886711185091e-07, + "loss": 0.6501, + "step": 16053 + }, + { + "epoch": 0.8250591016548463, + "grad_norm": 1.137579083442688, + "learning_rate": 7.813418876051115e-07, + "loss": 0.7425, + "step": 16054 + }, + { + "epoch": 0.825110494398191, + "grad_norm": 0.7235168218612671, + "learning_rate": 7.808952209733195e-07, + "loss": 0.6594, + "step": 16055 + }, + { + "epoch": 0.8251618871415356, + "grad_norm": 1.1250555515289307, + "learning_rate": 7.804486712355119e-07, + "loss": 0.7151, + "step": 16056 + }, + { + "epoch": 0.8252132798848802, + "grad_norm": 1.1263036727905273, + "learning_rate": 7.800022384040562e-07, + "loss": 0.7208, + "step": 16057 + }, + { + "epoch": 0.8252646726282249, + "grad_norm": 1.0906765460968018, + "learning_rate": 7.795559224913252e-07, + "loss": 0.732, + "step": 16058 + }, + { + "epoch": 0.8253160653715695, + "grad_norm": 1.1602904796600342, + "learning_rate": 7.791097235096823e-07, + "loss": 0.7041, + "step": 16059 + }, + { + "epoch": 0.8253674581149142, + "grad_norm": 1.016738772392273, + "learning_rate": 7.786636414714893e-07, + "loss": 0.6601, + "step": 16060 + }, + { + "epoch": 0.8254188508582588, + "grad_norm": 1.0749131441116333, + "learning_rate": 7.782176763891075e-07, + "loss": 0.7016, + "step": 16061 + }, + { + "epoch": 0.8254702436016035, + "grad_norm": 1.0796241760253906, + "learning_rate": 7.777718282748919e-07, + "loss": 0.6833, + "step": 16062 + }, + { + "epoch": 0.8255216363449481, + "grad_norm": 0.7335962057113647, + "learning_rate": 7.773260971411944e-07, + "loss": 0.647, + "step": 16063 + }, + { + "epoch": 0.8255730290882928, + "grad_norm": 1.0635814666748047, + "learning_rate": 7.76880483000364e-07, + "loss": 0.6934, + "step": 16064 + }, + { + "epoch": 0.8256244218316374, + "grad_norm": 1.1309388875961304, + "learning_rate": 7.764349858647496e-07, + "loss": 0.7013, + "step": 16065 + }, + { + "epoch": 0.825675814574982, + "grad_norm": 0.7266185879707336, + "learning_rate": 7.759896057466904e-07, + "loss": 0.6527, + "step": 16066 + }, + { + "epoch": 0.8257272073183266, + "grad_norm": 1.14070725440979, + "learning_rate": 7.755443426585286e-07, + "loss": 0.6868, + "step": 16067 + }, + { + "epoch": 0.8257786000616713, + "grad_norm": 1.0829790830612183, + "learning_rate": 7.750991966125987e-07, + "loss": 0.687, + "step": 16068 + }, + { + "epoch": 0.8258299928050159, + "grad_norm": 0.8859814405441284, + "learning_rate": 7.746541676212355e-07, + "loss": 0.6498, + "step": 16069 + }, + { + "epoch": 0.8258813855483605, + "grad_norm": 1.080967664718628, + "learning_rate": 7.742092556967689e-07, + "loss": 0.6482, + "step": 16070 + }, + { + "epoch": 0.8259327782917052, + "grad_norm": 1.0481303930282593, + "learning_rate": 7.737644608515238e-07, + "loss": 0.7033, + "step": 16071 + }, + { + "epoch": 0.8259841710350498, + "grad_norm": 1.061299443244934, + "learning_rate": 7.733197830978273e-07, + "loss": 0.6405, + "step": 16072 + }, + { + "epoch": 0.8260355637783945, + "grad_norm": 1.4374608993530273, + "learning_rate": 7.728752224479946e-07, + "loss": 0.6839, + "step": 16073 + }, + { + "epoch": 0.8260869565217391, + "grad_norm": 1.106293797492981, + "learning_rate": 7.724307789143465e-07, + "loss": 0.6805, + "step": 16074 + }, + { + "epoch": 0.8261383492650838, + "grad_norm": 1.0680798292160034, + "learning_rate": 7.719864525091952e-07, + "loss": 0.7044, + "step": 16075 + }, + { + "epoch": 0.8261897420084284, + "grad_norm": 1.1610217094421387, + "learning_rate": 7.71542243244851e-07, + "loss": 0.7416, + "step": 16076 + }, + { + "epoch": 0.8262411347517731, + "grad_norm": 1.1235777139663696, + "learning_rate": 7.710981511336207e-07, + "loss": 0.6604, + "step": 16077 + }, + { + "epoch": 0.8262925274951177, + "grad_norm": 1.005269169807434, + "learning_rate": 7.706541761878101e-07, + "loss": 0.6551, + "step": 16078 + }, + { + "epoch": 0.8263439202384624, + "grad_norm": 0.7603880763053894, + "learning_rate": 7.70210318419719e-07, + "loss": 0.633, + "step": 16079 + }, + { + "epoch": 0.826395312981807, + "grad_norm": 1.0602203607559204, + "learning_rate": 7.697665778416441e-07, + "loss": 0.678, + "step": 16080 + }, + { + "epoch": 0.8264467057251516, + "grad_norm": 0.7427306175231934, + "learning_rate": 7.693229544658798e-07, + "loss": 0.6284, + "step": 16081 + }, + { + "epoch": 0.8264980984684962, + "grad_norm": 1.0884521007537842, + "learning_rate": 7.688794483047179e-07, + "loss": 0.7479, + "step": 16082 + }, + { + "epoch": 0.8265494912118408, + "grad_norm": 0.9648956656455994, + "learning_rate": 7.68436059370446e-07, + "loss": 0.6634, + "step": 16083 + }, + { + "epoch": 0.8266008839551855, + "grad_norm": 1.006766676902771, + "learning_rate": 7.679927876753468e-07, + "loss": 0.633, + "step": 16084 + }, + { + "epoch": 0.8266522766985301, + "grad_norm": 1.0697578191757202, + "learning_rate": 7.675496332317057e-07, + "loss": 0.7172, + "step": 16085 + }, + { + "epoch": 0.8267036694418748, + "grad_norm": 1.0052945613861084, + "learning_rate": 7.671065960517954e-07, + "loss": 0.6364, + "step": 16086 + }, + { + "epoch": 0.8267550621852194, + "grad_norm": 1.4203604459762573, + "learning_rate": 7.666636761478947e-07, + "loss": 0.6864, + "step": 16087 + }, + { + "epoch": 0.8268064549285641, + "grad_norm": 1.027005910873413, + "learning_rate": 7.662208735322735e-07, + "loss": 0.7022, + "step": 16088 + }, + { + "epoch": 0.8268578476719087, + "grad_norm": 1.0732640027999878, + "learning_rate": 7.657781882172e-07, + "loss": 0.6407, + "step": 16089 + }, + { + "epoch": 0.8269092404152534, + "grad_norm": 1.0870028734207153, + "learning_rate": 7.653356202149381e-07, + "loss": 0.7223, + "step": 16090 + }, + { + "epoch": 0.826960633158598, + "grad_norm": 1.1163480281829834, + "learning_rate": 7.648931695377521e-07, + "loss": 0.6542, + "step": 16091 + }, + { + "epoch": 0.8270120259019427, + "grad_norm": 1.1286154985427856, + "learning_rate": 7.644508361978987e-07, + "loss": 0.7438, + "step": 16092 + }, + { + "epoch": 0.8270634186452873, + "grad_norm": 1.0654898881912231, + "learning_rate": 7.640086202076325e-07, + "loss": 0.7066, + "step": 16093 + }, + { + "epoch": 0.827114811388632, + "grad_norm": 1.0906801223754883, + "learning_rate": 7.635665215792093e-07, + "loss": 0.7085, + "step": 16094 + }, + { + "epoch": 0.8271662041319766, + "grad_norm": 1.1705752611160278, + "learning_rate": 7.631245403248722e-07, + "loss": 0.7286, + "step": 16095 + }, + { + "epoch": 0.8272175968753213, + "grad_norm": 1.196956753730774, + "learning_rate": 7.626826764568712e-07, + "loss": 0.6975, + "step": 16096 + }, + { + "epoch": 0.8272689896186658, + "grad_norm": 1.1648024320602417, + "learning_rate": 7.622409299874451e-07, + "loss": 0.6395, + "step": 16097 + }, + { + "epoch": 0.8273203823620104, + "grad_norm": 1.0711920261383057, + "learning_rate": 7.617993009288371e-07, + "loss": 0.6844, + "step": 16098 + }, + { + "epoch": 0.8273717751053551, + "grad_norm": 1.1348947286605835, + "learning_rate": 7.613577892932783e-07, + "loss": 0.7215, + "step": 16099 + }, + { + "epoch": 0.8274231678486997, + "grad_norm": 1.0739679336547852, + "learning_rate": 7.609163950930048e-07, + "loss": 0.6815, + "step": 16100 + }, + { + "epoch": 0.8274745605920444, + "grad_norm": 1.1137958765029907, + "learning_rate": 7.604751183402437e-07, + "loss": 0.6929, + "step": 16101 + }, + { + "epoch": 0.827525953335389, + "grad_norm": 1.099880337715149, + "learning_rate": 7.600339590472211e-07, + "loss": 0.7106, + "step": 16102 + }, + { + "epoch": 0.8275773460787337, + "grad_norm": 1.077462911605835, + "learning_rate": 7.595929172261607e-07, + "loss": 0.6746, + "step": 16103 + }, + { + "epoch": 0.8276287388220783, + "grad_norm": 1.2197293043136597, + "learning_rate": 7.591519928892816e-07, + "loss": 0.6516, + "step": 16104 + }, + { + "epoch": 0.827680131565423, + "grad_norm": 1.0328541994094849, + "learning_rate": 7.587111860488e-07, + "loss": 0.6424, + "step": 16105 + }, + { + "epoch": 0.8277315243087676, + "grad_norm": 1.0963376760482788, + "learning_rate": 7.582704967169274e-07, + "loss": 0.689, + "step": 16106 + }, + { + "epoch": 0.8277829170521123, + "grad_norm": 1.0226638317108154, + "learning_rate": 7.578299249058774e-07, + "loss": 0.6684, + "step": 16107 + }, + { + "epoch": 0.8278343097954569, + "grad_norm": 1.1614270210266113, + "learning_rate": 7.573894706278512e-07, + "loss": 0.7209, + "step": 16108 + }, + { + "epoch": 0.8278857025388016, + "grad_norm": 1.091646432876587, + "learning_rate": 7.569491338950557e-07, + "loss": 0.7003, + "step": 16109 + }, + { + "epoch": 0.8279370952821462, + "grad_norm": 1.0209165811538696, + "learning_rate": 7.565089147196897e-07, + "loss": 0.6775, + "step": 16110 + }, + { + "epoch": 0.8279884880254909, + "grad_norm": 1.0242887735366821, + "learning_rate": 7.560688131139482e-07, + "loss": 0.6827, + "step": 16111 + }, + { + "epoch": 0.8280398807688354, + "grad_norm": 1.1423033475875854, + "learning_rate": 7.556288290900283e-07, + "loss": 0.696, + "step": 16112 + }, + { + "epoch": 0.82809127351218, + "grad_norm": 1.103794813156128, + "learning_rate": 7.551889626601161e-07, + "loss": 0.6038, + "step": 16113 + }, + { + "epoch": 0.8281426662555247, + "grad_norm": 1.0342439413070679, + "learning_rate": 7.547492138364032e-07, + "loss": 0.6382, + "step": 16114 + }, + { + "epoch": 0.8281940589988693, + "grad_norm": 1.0883582830429077, + "learning_rate": 7.543095826310676e-07, + "loss": 0.6889, + "step": 16115 + }, + { + "epoch": 0.828245451742214, + "grad_norm": 1.073947787284851, + "learning_rate": 7.538700690562945e-07, + "loss": 0.6807, + "step": 16116 + }, + { + "epoch": 0.8282968444855586, + "grad_norm": 0.7490939497947693, + "learning_rate": 7.534306731242585e-07, + "loss": 0.6208, + "step": 16117 + }, + { + "epoch": 0.8283482372289033, + "grad_norm": 0.8126642107963562, + "learning_rate": 7.52991394847134e-07, + "loss": 0.6487, + "step": 16118 + }, + { + "epoch": 0.8283996299722479, + "grad_norm": 1.0209972858428955, + "learning_rate": 7.525522342370906e-07, + "loss": 0.6973, + "step": 16119 + }, + { + "epoch": 0.8284510227155926, + "grad_norm": 0.8093645572662354, + "learning_rate": 7.521131913062979e-07, + "loss": 0.6163, + "step": 16120 + }, + { + "epoch": 0.8285024154589372, + "grad_norm": 1.1898071765899658, + "learning_rate": 7.516742660669185e-07, + "loss": 0.6904, + "step": 16121 + }, + { + "epoch": 0.8285538082022819, + "grad_norm": 1.0881743431091309, + "learning_rate": 7.512354585311121e-07, + "loss": 0.707, + "step": 16122 + }, + { + "epoch": 0.8286052009456265, + "grad_norm": 1.1162421703338623, + "learning_rate": 7.507967687110401e-07, + "loss": 0.7229, + "step": 16123 + }, + { + "epoch": 0.8286565936889712, + "grad_norm": 1.0954328775405884, + "learning_rate": 7.503581966188517e-07, + "loss": 0.6772, + "step": 16124 + }, + { + "epoch": 0.8287079864323158, + "grad_norm": 1.0674210786819458, + "learning_rate": 7.499197422667015e-07, + "loss": 0.6896, + "step": 16125 + }, + { + "epoch": 0.8287593791756604, + "grad_norm": 0.7597302198410034, + "learning_rate": 7.494814056667366e-07, + "loss": 0.6557, + "step": 16126 + }, + { + "epoch": 0.828810771919005, + "grad_norm": 1.0989633798599243, + "learning_rate": 7.490431868311005e-07, + "loss": 0.6646, + "step": 16127 + }, + { + "epoch": 0.8288621646623496, + "grad_norm": 1.0887296199798584, + "learning_rate": 7.486050857719346e-07, + "loss": 0.667, + "step": 16128 + }, + { + "epoch": 0.8289135574056943, + "grad_norm": 1.1052231788635254, + "learning_rate": 7.481671025013776e-07, + "loss": 0.6669, + "step": 16129 + }, + { + "epoch": 0.8289649501490389, + "grad_norm": 1.1002193689346313, + "learning_rate": 7.477292370315647e-07, + "loss": 0.7092, + "step": 16130 + }, + { + "epoch": 0.8290163428923836, + "grad_norm": 1.110308289527893, + "learning_rate": 7.472914893746263e-07, + "loss": 0.692, + "step": 16131 + }, + { + "epoch": 0.8290677356357282, + "grad_norm": 1.0620559453964233, + "learning_rate": 7.468538595426894e-07, + "loss": 0.7053, + "step": 16132 + }, + { + "epoch": 0.8291191283790729, + "grad_norm": 0.6852643489837646, + "learning_rate": 7.464163475478819e-07, + "loss": 0.6557, + "step": 16133 + }, + { + "epoch": 0.8291705211224175, + "grad_norm": 0.7366395592689514, + "learning_rate": 7.459789534023237e-07, + "loss": 0.6525, + "step": 16134 + }, + { + "epoch": 0.8292219138657622, + "grad_norm": 1.0354663133621216, + "learning_rate": 7.455416771181323e-07, + "loss": 0.6428, + "step": 16135 + }, + { + "epoch": 0.8292733066091068, + "grad_norm": 1.0779176950454712, + "learning_rate": 7.451045187074263e-07, + "loss": 0.6637, + "step": 16136 + }, + { + "epoch": 0.8293246993524515, + "grad_norm": 0.8375317454338074, + "learning_rate": 7.446674781823126e-07, + "loss": 0.677, + "step": 16137 + }, + { + "epoch": 0.8293760920957961, + "grad_norm": 1.077837586402893, + "learning_rate": 7.442305555549034e-07, + "loss": 0.6783, + "step": 16138 + }, + { + "epoch": 0.8294274848391407, + "grad_norm": 1.0832537412643433, + "learning_rate": 7.437937508373034e-07, + "loss": 0.6302, + "step": 16139 + }, + { + "epoch": 0.8294788775824854, + "grad_norm": 1.1278173923492432, + "learning_rate": 7.433570640416144e-07, + "loss": 0.7134, + "step": 16140 + }, + { + "epoch": 0.82953027032583, + "grad_norm": 1.041581392288208, + "learning_rate": 7.429204951799334e-07, + "loss": 0.6818, + "step": 16141 + }, + { + "epoch": 0.8295816630691746, + "grad_norm": 1.1409856081008911, + "learning_rate": 7.424840442643588e-07, + "loss": 0.6551, + "step": 16142 + }, + { + "epoch": 0.8296330558125192, + "grad_norm": 1.043656587600708, + "learning_rate": 7.420477113069818e-07, + "loss": 0.7066, + "step": 16143 + }, + { + "epoch": 0.8296844485558639, + "grad_norm": 1.0693556070327759, + "learning_rate": 7.416114963198895e-07, + "loss": 0.6849, + "step": 16144 + }, + { + "epoch": 0.8297358412992085, + "grad_norm": 1.0682543516159058, + "learning_rate": 7.41175399315171e-07, + "loss": 0.7332, + "step": 16145 + }, + { + "epoch": 0.8297872340425532, + "grad_norm": 0.7439308762550354, + "learning_rate": 7.407394203049068e-07, + "loss": 0.6183, + "step": 16146 + }, + { + "epoch": 0.8298386267858978, + "grad_norm": 1.0198359489440918, + "learning_rate": 7.403035593011765e-07, + "loss": 0.7305, + "step": 16147 + }, + { + "epoch": 0.8298900195292425, + "grad_norm": 1.1578389406204224, + "learning_rate": 7.398678163160549e-07, + "loss": 0.7185, + "step": 16148 + }, + { + "epoch": 0.8299414122725871, + "grad_norm": 1.1866463422775269, + "learning_rate": 7.394321913616176e-07, + "loss": 0.7673, + "step": 16149 + }, + { + "epoch": 0.8299928050159318, + "grad_norm": 1.1783944368362427, + "learning_rate": 7.389966844499297e-07, + "loss": 0.6968, + "step": 16150 + }, + { + "epoch": 0.8300441977592764, + "grad_norm": 1.2017390727996826, + "learning_rate": 7.385612955930605e-07, + "loss": 0.7328, + "step": 16151 + }, + { + "epoch": 0.830095590502621, + "grad_norm": 0.6819837093353271, + "learning_rate": 7.38126024803072e-07, + "loss": 0.6178, + "step": 16152 + }, + { + "epoch": 0.8301469832459657, + "grad_norm": 1.1740351915359497, + "learning_rate": 7.376908720920228e-07, + "loss": 0.7542, + "step": 16153 + }, + { + "epoch": 0.8301983759893103, + "grad_norm": 1.1745269298553467, + "learning_rate": 7.372558374719707e-07, + "loss": 0.7291, + "step": 16154 + }, + { + "epoch": 0.830249768732655, + "grad_norm": 0.746801495552063, + "learning_rate": 7.368209209549682e-07, + "loss": 0.6578, + "step": 16155 + }, + { + "epoch": 0.8303011614759996, + "grad_norm": 1.080525517463684, + "learning_rate": 7.36386122553065e-07, + "loss": 0.6715, + "step": 16156 + }, + { + "epoch": 0.8303525542193442, + "grad_norm": 1.1389060020446777, + "learning_rate": 7.35951442278306e-07, + "loss": 0.7078, + "step": 16157 + }, + { + "epoch": 0.8304039469626888, + "grad_norm": 1.033244013786316, + "learning_rate": 7.355168801427387e-07, + "loss": 0.6639, + "step": 16158 + }, + { + "epoch": 0.8304553397060335, + "grad_norm": 1.1089292764663696, + "learning_rate": 7.350824361583975e-07, + "loss": 0.7096, + "step": 16159 + }, + { + "epoch": 0.8305067324493781, + "grad_norm": 1.0920753479003906, + "learning_rate": 7.346481103373227e-07, + "loss": 0.7368, + "step": 16160 + }, + { + "epoch": 0.8305581251927228, + "grad_norm": 1.0647251605987549, + "learning_rate": 7.342139026915457e-07, + "loss": 0.7333, + "step": 16161 + }, + { + "epoch": 0.8306095179360674, + "grad_norm": 0.9290080070495605, + "learning_rate": 7.337798132330992e-07, + "loss": 0.65, + "step": 16162 + }, + { + "epoch": 0.8306609106794121, + "grad_norm": 0.6671953797340393, + "learning_rate": 7.33345841974008e-07, + "loss": 0.635, + "step": 16163 + }, + { + "epoch": 0.8307123034227567, + "grad_norm": 1.1195402145385742, + "learning_rate": 7.329119889262948e-07, + "loss": 0.7318, + "step": 16164 + }, + { + "epoch": 0.8307636961661014, + "grad_norm": 1.1184241771697998, + "learning_rate": 7.324782541019837e-07, + "loss": 0.6835, + "step": 16165 + }, + { + "epoch": 0.830815088909446, + "grad_norm": 1.0903064012527466, + "learning_rate": 7.320446375130869e-07, + "loss": 0.6564, + "step": 16166 + }, + { + "epoch": 0.8308664816527906, + "grad_norm": 1.032010555267334, + "learning_rate": 7.316111391716213e-07, + "loss": 0.7193, + "step": 16167 + }, + { + "epoch": 0.8309178743961353, + "grad_norm": 1.075393795967102, + "learning_rate": 7.311777590895963e-07, + "loss": 0.6545, + "step": 16168 + }, + { + "epoch": 0.8309692671394799, + "grad_norm": 1.101967453956604, + "learning_rate": 7.307444972790195e-07, + "loss": 0.7002, + "step": 16169 + }, + { + "epoch": 0.8310206598828246, + "grad_norm": 0.7552602887153625, + "learning_rate": 7.30311353751893e-07, + "loss": 0.6425, + "step": 16170 + }, + { + "epoch": 0.8310720526261692, + "grad_norm": 1.045198678970337, + "learning_rate": 7.298783285202205e-07, + "loss": 0.7022, + "step": 16171 + }, + { + "epoch": 0.8311234453695138, + "grad_norm": 1.169352650642395, + "learning_rate": 7.294454215959979e-07, + "loss": 0.6902, + "step": 16172 + }, + { + "epoch": 0.8311748381128584, + "grad_norm": 1.1398063898086548, + "learning_rate": 7.290126329912172e-07, + "loss": 0.7111, + "step": 16173 + }, + { + "epoch": 0.8312262308562031, + "grad_norm": 1.03053879737854, + "learning_rate": 7.285799627178741e-07, + "loss": 0.6814, + "step": 16174 + }, + { + "epoch": 0.8312776235995477, + "grad_norm": 0.7771539688110352, + "learning_rate": 7.28147410787951e-07, + "loss": 0.6455, + "step": 16175 + }, + { + "epoch": 0.8313290163428924, + "grad_norm": 1.0525847673416138, + "learning_rate": 7.277149772134346e-07, + "loss": 0.6832, + "step": 16176 + }, + { + "epoch": 0.831380409086237, + "grad_norm": 1.0635712146759033, + "learning_rate": 7.27282662006305e-07, + "loss": 0.7151, + "step": 16177 + }, + { + "epoch": 0.8314318018295817, + "grad_norm": 1.0379291772842407, + "learning_rate": 7.268504651785424e-07, + "loss": 0.6594, + "step": 16178 + }, + { + "epoch": 0.8314831945729263, + "grad_norm": 1.095997929573059, + "learning_rate": 7.26418386742117e-07, + "loss": 0.721, + "step": 16179 + }, + { + "epoch": 0.831534587316271, + "grad_norm": 0.6822413802146912, + "learning_rate": 7.259864267090033e-07, + "loss": 0.6756, + "step": 16180 + }, + { + "epoch": 0.8315859800596156, + "grad_norm": 1.026462197303772, + "learning_rate": 7.255545850911677e-07, + "loss": 0.6399, + "step": 16181 + }, + { + "epoch": 0.8316373728029602, + "grad_norm": 1.073300838470459, + "learning_rate": 7.251228619005751e-07, + "loss": 0.6341, + "step": 16182 + }, + { + "epoch": 0.8316887655463049, + "grad_norm": 0.6554083824157715, + "learning_rate": 7.246912571491854e-07, + "loss": 0.6503, + "step": 16183 + }, + { + "epoch": 0.8317401582896495, + "grad_norm": 1.1766184568405151, + "learning_rate": 7.242597708489585e-07, + "loss": 0.6874, + "step": 16184 + }, + { + "epoch": 0.8317915510329942, + "grad_norm": 1.0305601358413696, + "learning_rate": 7.238284030118492e-07, + "loss": 0.6669, + "step": 16185 + }, + { + "epoch": 0.8318429437763388, + "grad_norm": 1.1265326738357544, + "learning_rate": 7.233971536498064e-07, + "loss": 0.6988, + "step": 16186 + }, + { + "epoch": 0.8318943365196835, + "grad_norm": 1.0956284999847412, + "learning_rate": 7.229660227747825e-07, + "loss": 0.7151, + "step": 16187 + }, + { + "epoch": 0.831945729263028, + "grad_norm": 0.8320324420928955, + "learning_rate": 7.225350103987178e-07, + "loss": 0.608, + "step": 16188 + }, + { + "epoch": 0.8319971220063727, + "grad_norm": 1.2274229526519775, + "learning_rate": 7.221041165335568e-07, + "loss": 0.6854, + "step": 16189 + }, + { + "epoch": 0.8320485147497173, + "grad_norm": 1.117397427558899, + "learning_rate": 7.216733411912369e-07, + "loss": 0.707, + "step": 16190 + }, + { + "epoch": 0.832099907493062, + "grad_norm": 1.0491365194320679, + "learning_rate": 7.212426843836928e-07, + "loss": 0.6797, + "step": 16191 + }, + { + "epoch": 0.8321513002364066, + "grad_norm": 0.6913713216781616, + "learning_rate": 7.208121461228556e-07, + "loss": 0.6848, + "step": 16192 + }, + { + "epoch": 0.8322026929797512, + "grad_norm": 1.117197871208191, + "learning_rate": 7.203817264206558e-07, + "loss": 0.7322, + "step": 16193 + }, + { + "epoch": 0.8322540857230959, + "grad_norm": 0.727449893951416, + "learning_rate": 7.199514252890178e-07, + "loss": 0.6134, + "step": 16194 + }, + { + "epoch": 0.8323054784664405, + "grad_norm": 1.123633861541748, + "learning_rate": 7.195212427398618e-07, + "loss": 0.7045, + "step": 16195 + }, + { + "epoch": 0.8323568712097852, + "grad_norm": 1.0169224739074707, + "learning_rate": 7.190911787851085e-07, + "loss": 0.6806, + "step": 16196 + }, + { + "epoch": 0.8324082639531298, + "grad_norm": 0.6878763437271118, + "learning_rate": 7.186612334366727e-07, + "loss": 0.6304, + "step": 16197 + }, + { + "epoch": 0.8324596566964745, + "grad_norm": 1.0516421794891357, + "learning_rate": 7.182314067064656e-07, + "loss": 0.7084, + "step": 16198 + }, + { + "epoch": 0.8325110494398191, + "grad_norm": 0.6842736601829529, + "learning_rate": 7.178016986063957e-07, + "loss": 0.6295, + "step": 16199 + }, + { + "epoch": 0.8325624421831638, + "grad_norm": 1.0728563070297241, + "learning_rate": 7.173721091483715e-07, + "loss": 0.6564, + "step": 16200 + }, + { + "epoch": 0.8326138349265084, + "grad_norm": 1.0831098556518555, + "learning_rate": 7.169426383442901e-07, + "loss": 0.7153, + "step": 16201 + }, + { + "epoch": 0.8326652276698531, + "grad_norm": 1.1927567720413208, + "learning_rate": 7.165132862060542e-07, + "loss": 0.7333, + "step": 16202 + }, + { + "epoch": 0.8327166204131976, + "grad_norm": 1.0496019124984741, + "learning_rate": 7.160840527455587e-07, + "loss": 0.6874, + "step": 16203 + }, + { + "epoch": 0.8327680131565423, + "grad_norm": 1.063751459121704, + "learning_rate": 7.156549379746941e-07, + "loss": 0.6742, + "step": 16204 + }, + { + "epoch": 0.8328194058998869, + "grad_norm": 0.6623212099075317, + "learning_rate": 7.152259419053514e-07, + "loss": 0.6529, + "step": 16205 + }, + { + "epoch": 0.8328707986432315, + "grad_norm": 1.016613483428955, + "learning_rate": 7.147970645494151e-07, + "loss": 0.6868, + "step": 16206 + }, + { + "epoch": 0.8329221913865762, + "grad_norm": 1.121164083480835, + "learning_rate": 7.143683059187701e-07, + "loss": 0.6552, + "step": 16207 + }, + { + "epoch": 0.8329735841299208, + "grad_norm": 0.7065137028694153, + "learning_rate": 7.139396660252917e-07, + "loss": 0.6633, + "step": 16208 + }, + { + "epoch": 0.8330249768732655, + "grad_norm": 1.082448959350586, + "learning_rate": 7.135111448808585e-07, + "loss": 0.6849, + "step": 16209 + }, + { + "epoch": 0.8330763696166101, + "grad_norm": 1.1236902475357056, + "learning_rate": 7.130827424973419e-07, + "loss": 0.7513, + "step": 16210 + }, + { + "epoch": 0.8331277623599548, + "grad_norm": 1.0635037422180176, + "learning_rate": 7.126544588866119e-07, + "loss": 0.7184, + "step": 16211 + }, + { + "epoch": 0.8331791551032994, + "grad_norm": 0.6885570883750916, + "learning_rate": 7.122262940605324e-07, + "loss": 0.6387, + "step": 16212 + }, + { + "epoch": 0.8332305478466441, + "grad_norm": 1.0762639045715332, + "learning_rate": 7.117982480309693e-07, + "loss": 0.6728, + "step": 16213 + }, + { + "epoch": 0.8332819405899887, + "grad_norm": 1.1587618589401245, + "learning_rate": 7.1137032080978e-07, + "loss": 0.6812, + "step": 16214 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 1.0824652910232544, + "learning_rate": 7.109425124088204e-07, + "loss": 0.6974, + "step": 16215 + }, + { + "epoch": 0.833384726076678, + "grad_norm": 1.0130155086517334, + "learning_rate": 7.105148228399455e-07, + "loss": 0.6715, + "step": 16216 + }, + { + "epoch": 0.8334361188200227, + "grad_norm": 1.1074610948562622, + "learning_rate": 7.100872521150009e-07, + "loss": 0.6995, + "step": 16217 + }, + { + "epoch": 0.8334875115633672, + "grad_norm": 1.0440499782562256, + "learning_rate": 7.096598002458372e-07, + "loss": 0.6908, + "step": 16218 + }, + { + "epoch": 0.8335389043067118, + "grad_norm": 1.0427745580673218, + "learning_rate": 7.092324672442941e-07, + "loss": 0.6806, + "step": 16219 + }, + { + "epoch": 0.8335902970500565, + "grad_norm": 1.0647588968276978, + "learning_rate": 7.088052531222134e-07, + "loss": 0.6443, + "step": 16220 + }, + { + "epoch": 0.8336416897934011, + "grad_norm": 1.069153904914856, + "learning_rate": 7.083781578914284e-07, + "loss": 0.6948, + "step": 16221 + }, + { + "epoch": 0.8336930825367458, + "grad_norm": 1.1501058340072632, + "learning_rate": 7.079511815637757e-07, + "loss": 0.7796, + "step": 16222 + }, + { + "epoch": 0.8337444752800904, + "grad_norm": 0.7508118748664856, + "learning_rate": 7.075243241510837e-07, + "loss": 0.6778, + "step": 16223 + }, + { + "epoch": 0.8337958680234351, + "grad_norm": 1.1611324548721313, + "learning_rate": 7.070975856651774e-07, + "loss": 0.7381, + "step": 16224 + }, + { + "epoch": 0.8338472607667797, + "grad_norm": 1.108817219734192, + "learning_rate": 7.066709661178822e-07, + "loss": 0.6526, + "step": 16225 + }, + { + "epoch": 0.8338986535101244, + "grad_norm": 1.1026725769042969, + "learning_rate": 7.062444655210171e-07, + "loss": 0.7125, + "step": 16226 + }, + { + "epoch": 0.833950046253469, + "grad_norm": 1.0851575136184692, + "learning_rate": 7.058180838863987e-07, + "loss": 0.7321, + "step": 16227 + }, + { + "epoch": 0.8340014389968137, + "grad_norm": 1.0817135572433472, + "learning_rate": 7.053918212258387e-07, + "loss": 0.7122, + "step": 16228 + }, + { + "epoch": 0.8340528317401583, + "grad_norm": 1.1189020872116089, + "learning_rate": 7.049656775511509e-07, + "loss": 0.6908, + "step": 16229 + }, + { + "epoch": 0.834104224483503, + "grad_norm": 0.6704785227775574, + "learning_rate": 7.045396528741377e-07, + "loss": 0.6625, + "step": 16230 + }, + { + "epoch": 0.8341556172268476, + "grad_norm": 1.001562237739563, + "learning_rate": 7.041137472066051e-07, + "loss": 0.6339, + "step": 16231 + }, + { + "epoch": 0.8342070099701923, + "grad_norm": 1.1632343530654907, + "learning_rate": 7.03687960560353e-07, + "loss": 0.678, + "step": 16232 + }, + { + "epoch": 0.8342584027135368, + "grad_norm": 1.0946893692016602, + "learning_rate": 7.032622929471771e-07, + "loss": 0.706, + "step": 16233 + }, + { + "epoch": 0.8343097954568814, + "grad_norm": 1.1696473360061646, + "learning_rate": 7.028367443788708e-07, + "loss": 0.6746, + "step": 16234 + }, + { + "epoch": 0.8343611882002261, + "grad_norm": 0.8556324243545532, + "learning_rate": 7.024113148672257e-07, + "loss": 0.6312, + "step": 16235 + }, + { + "epoch": 0.8344125809435707, + "grad_norm": 0.7275747060775757, + "learning_rate": 7.019860044240285e-07, + "loss": 0.6215, + "step": 16236 + }, + { + "epoch": 0.8344639736869154, + "grad_norm": 1.0374876260757446, + "learning_rate": 7.015608130610607e-07, + "loss": 0.684, + "step": 16237 + }, + { + "epoch": 0.83451536643026, + "grad_norm": 1.1674046516418457, + "learning_rate": 7.011357407901053e-07, + "loss": 0.6725, + "step": 16238 + }, + { + "epoch": 0.8345667591736047, + "grad_norm": 1.089556336402893, + "learning_rate": 7.007107876229385e-07, + "loss": 0.7047, + "step": 16239 + }, + { + "epoch": 0.8346181519169493, + "grad_norm": 1.1114647388458252, + "learning_rate": 7.00285953571333e-07, + "loss": 0.6829, + "step": 16240 + }, + { + "epoch": 0.834669544660294, + "grad_norm": 1.0776846408843994, + "learning_rate": 6.998612386470593e-07, + "loss": 0.7058, + "step": 16241 + }, + { + "epoch": 0.8347209374036386, + "grad_norm": 1.0587656497955322, + "learning_rate": 6.99436642861887e-07, + "loss": 0.6502, + "step": 16242 + }, + { + "epoch": 0.8347723301469833, + "grad_norm": 1.1687204837799072, + "learning_rate": 6.990121662275761e-07, + "loss": 0.6645, + "step": 16243 + }, + { + "epoch": 0.8348237228903279, + "grad_norm": 0.7828230261802673, + "learning_rate": 6.985878087558894e-07, + "loss": 0.6551, + "step": 16244 + }, + { + "epoch": 0.8348751156336726, + "grad_norm": 0.6792510747909546, + "learning_rate": 6.981635704585843e-07, + "loss": 0.6532, + "step": 16245 + }, + { + "epoch": 0.8349265083770172, + "grad_norm": 1.1054781675338745, + "learning_rate": 6.977394513474129e-07, + "loss": 0.703, + "step": 16246 + }, + { + "epoch": 0.8349779011203619, + "grad_norm": 0.7244647145271301, + "learning_rate": 6.973154514341279e-07, + "loss": 0.6849, + "step": 16247 + }, + { + "epoch": 0.8350292938637064, + "grad_norm": 1.0690271854400635, + "learning_rate": 6.968915707304751e-07, + "loss": 0.6911, + "step": 16248 + }, + { + "epoch": 0.835080686607051, + "grad_norm": 1.1193323135375977, + "learning_rate": 6.964678092481996e-07, + "loss": 0.6743, + "step": 16249 + }, + { + "epoch": 0.8351320793503957, + "grad_norm": 1.0752524137496948, + "learning_rate": 6.960441669990403e-07, + "loss": 0.6596, + "step": 16250 + }, + { + "epoch": 0.8351834720937403, + "grad_norm": 1.090964674949646, + "learning_rate": 6.956206439947377e-07, + "loss": 0.6928, + "step": 16251 + }, + { + "epoch": 0.835234864837085, + "grad_norm": 1.9304898977279663, + "learning_rate": 6.95197240247022e-07, + "loss": 0.7414, + "step": 16252 + }, + { + "epoch": 0.8352862575804296, + "grad_norm": 1.0531097650527954, + "learning_rate": 6.94773955767627e-07, + "loss": 0.669, + "step": 16253 + }, + { + "epoch": 0.8353376503237743, + "grad_norm": 1.1478848457336426, + "learning_rate": 6.943507905682772e-07, + "loss": 0.7156, + "step": 16254 + }, + { + "epoch": 0.8353890430671189, + "grad_norm": 1.0878046751022339, + "learning_rate": 6.939277446607007e-07, + "loss": 0.6616, + "step": 16255 + }, + { + "epoch": 0.8354404358104636, + "grad_norm": 1.080569863319397, + "learning_rate": 6.935048180566162e-07, + "loss": 0.6983, + "step": 16256 + }, + { + "epoch": 0.8354918285538082, + "grad_norm": 0.7603037357330322, + "learning_rate": 6.930820107677394e-07, + "loss": 0.6153, + "step": 16257 + }, + { + "epoch": 0.8355432212971529, + "grad_norm": 1.0902410745620728, + "learning_rate": 6.926593228057893e-07, + "loss": 0.6853, + "step": 16258 + }, + { + "epoch": 0.8355946140404975, + "grad_norm": 1.0827269554138184, + "learning_rate": 6.92236754182472e-07, + "loss": 0.7207, + "step": 16259 + }, + { + "epoch": 0.8356460067838422, + "grad_norm": 1.0454212427139282, + "learning_rate": 6.918143049094983e-07, + "loss": 0.7168, + "step": 16260 + }, + { + "epoch": 0.8356973995271868, + "grad_norm": 1.09187650680542, + "learning_rate": 6.913919749985709e-07, + "loss": 0.6864, + "step": 16261 + }, + { + "epoch": 0.8357487922705314, + "grad_norm": 1.0645188093185425, + "learning_rate": 6.909697644613916e-07, + "loss": 0.6779, + "step": 16262 + }, + { + "epoch": 0.8358001850138761, + "grad_norm": 1.1095867156982422, + "learning_rate": 6.905476733096566e-07, + "loss": 0.6902, + "step": 16263 + }, + { + "epoch": 0.8358515777572206, + "grad_norm": 1.1850234270095825, + "learning_rate": 6.901257015550627e-07, + "loss": 0.7136, + "step": 16264 + }, + { + "epoch": 0.8359029705005653, + "grad_norm": 1.1090548038482666, + "learning_rate": 6.897038492092994e-07, + "loss": 0.7023, + "step": 16265 + }, + { + "epoch": 0.8359543632439099, + "grad_norm": 1.0831363201141357, + "learning_rate": 6.89282116284054e-07, + "loss": 0.7088, + "step": 16266 + }, + { + "epoch": 0.8360057559872546, + "grad_norm": 1.1062325239181519, + "learning_rate": 6.888605027910145e-07, + "loss": 0.6641, + "step": 16267 + }, + { + "epoch": 0.8360571487305992, + "grad_norm": 1.0894643068313599, + "learning_rate": 6.884390087418569e-07, + "loss": 0.7337, + "step": 16268 + }, + { + "epoch": 0.8361085414739439, + "grad_norm": 1.1248421669006348, + "learning_rate": 6.880176341482625e-07, + "loss": 0.6244, + "step": 16269 + }, + { + "epoch": 0.8361599342172885, + "grad_norm": 1.091792345046997, + "learning_rate": 6.875963790219043e-07, + "loss": 0.682, + "step": 16270 + }, + { + "epoch": 0.8362113269606332, + "grad_norm": 1.0689754486083984, + "learning_rate": 6.87175243374456e-07, + "loss": 0.6837, + "step": 16271 + }, + { + "epoch": 0.8362627197039778, + "grad_norm": 1.037709355354309, + "learning_rate": 6.86754227217582e-07, + "loss": 0.6723, + "step": 16272 + }, + { + "epoch": 0.8363141124473225, + "grad_norm": 1.0764809846878052, + "learning_rate": 6.863333305629494e-07, + "loss": 0.6833, + "step": 16273 + }, + { + "epoch": 0.8363655051906671, + "grad_norm": 1.0703129768371582, + "learning_rate": 6.859125534222189e-07, + "loss": 0.6661, + "step": 16274 + }, + { + "epoch": 0.8364168979340117, + "grad_norm": 1.1416085958480835, + "learning_rate": 6.854918958070472e-07, + "loss": 0.6382, + "step": 16275 + }, + { + "epoch": 0.8364682906773564, + "grad_norm": 0.7774978280067444, + "learning_rate": 6.850713577290913e-07, + "loss": 0.6624, + "step": 16276 + }, + { + "epoch": 0.836519683420701, + "grad_norm": 1.1273070573806763, + "learning_rate": 6.846509392000011e-07, + "loss": 0.6531, + "step": 16277 + }, + { + "epoch": 0.8365710761640457, + "grad_norm": 1.182376503944397, + "learning_rate": 6.842306402314258e-07, + "loss": 0.662, + "step": 16278 + }, + { + "epoch": 0.8366224689073902, + "grad_norm": 0.6770730018615723, + "learning_rate": 6.838104608350077e-07, + "loss": 0.6126, + "step": 16279 + }, + { + "epoch": 0.8366738616507349, + "grad_norm": 1.0436102151870728, + "learning_rate": 6.833904010223919e-07, + "loss": 0.6845, + "step": 16280 + }, + { + "epoch": 0.8367252543940795, + "grad_norm": 1.0707000494003296, + "learning_rate": 6.829704608052123e-07, + "loss": 0.7269, + "step": 16281 + }, + { + "epoch": 0.8367766471374242, + "grad_norm": 1.0488498210906982, + "learning_rate": 6.825506401951071e-07, + "loss": 0.6341, + "step": 16282 + }, + { + "epoch": 0.8368280398807688, + "grad_norm": 1.0473618507385254, + "learning_rate": 6.821309392037068e-07, + "loss": 0.7156, + "step": 16283 + }, + { + "epoch": 0.8368794326241135, + "grad_norm": 1.1246974468231201, + "learning_rate": 6.817113578426393e-07, + "loss": 0.7223, + "step": 16284 + }, + { + "epoch": 0.8369308253674581, + "grad_norm": 1.1153661012649536, + "learning_rate": 6.812918961235282e-07, + "loss": 0.6496, + "step": 16285 + }, + { + "epoch": 0.8369822181108028, + "grad_norm": 1.1102979183197021, + "learning_rate": 6.808725540579975e-07, + "loss": 0.6941, + "step": 16286 + }, + { + "epoch": 0.8370336108541474, + "grad_norm": 1.0231292247772217, + "learning_rate": 6.804533316576644e-07, + "loss": 0.6468, + "step": 16287 + }, + { + "epoch": 0.837085003597492, + "grad_norm": 1.0611438751220703, + "learning_rate": 6.800342289341433e-07, + "loss": 0.6834, + "step": 16288 + }, + { + "epoch": 0.8371363963408367, + "grad_norm": 1.0873777866363525, + "learning_rate": 6.796152458990469e-07, + "loss": 0.7043, + "step": 16289 + }, + { + "epoch": 0.8371877890841813, + "grad_norm": 1.006508469581604, + "learning_rate": 6.791963825639825e-07, + "loss": 0.6857, + "step": 16290 + }, + { + "epoch": 0.837239181827526, + "grad_norm": 1.046751618385315, + "learning_rate": 6.787776389405559e-07, + "loss": 0.7373, + "step": 16291 + }, + { + "epoch": 0.8372905745708706, + "grad_norm": 1.0341377258300781, + "learning_rate": 6.783590150403669e-07, + "loss": 0.6417, + "step": 16292 + }, + { + "epoch": 0.8373419673142153, + "grad_norm": 1.1443153619766235, + "learning_rate": 6.779405108750175e-07, + "loss": 0.6744, + "step": 16293 + }, + { + "epoch": 0.8373933600575598, + "grad_norm": 1.1029880046844482, + "learning_rate": 6.775221264560983e-07, + "loss": 0.7115, + "step": 16294 + }, + { + "epoch": 0.8374447528009045, + "grad_norm": 1.034118413925171, + "learning_rate": 6.771038617952041e-07, + "loss": 0.7104, + "step": 16295 + }, + { + "epoch": 0.8374961455442491, + "grad_norm": 1.0693303346633911, + "learning_rate": 6.766857169039226e-07, + "loss": 0.6469, + "step": 16296 + }, + { + "epoch": 0.8375475382875938, + "grad_norm": 1.14186429977417, + "learning_rate": 6.762676917938377e-07, + "loss": 0.712, + "step": 16297 + }, + { + "epoch": 0.8375989310309384, + "grad_norm": 1.1504127979278564, + "learning_rate": 6.758497864765329e-07, + "loss": 0.7231, + "step": 16298 + }, + { + "epoch": 0.8376503237742831, + "grad_norm": 1.084836483001709, + "learning_rate": 6.754320009635856e-07, + "loss": 0.7136, + "step": 16299 + }, + { + "epoch": 0.8377017165176277, + "grad_norm": 1.0781267881393433, + "learning_rate": 6.750143352665717e-07, + "loss": 0.702, + "step": 16300 + }, + { + "epoch": 0.8377531092609724, + "grad_norm": 1.1036078929901123, + "learning_rate": 6.745967893970606e-07, + "loss": 0.7042, + "step": 16301 + }, + { + "epoch": 0.837804502004317, + "grad_norm": 1.1051130294799805, + "learning_rate": 6.741793633666244e-07, + "loss": 0.6755, + "step": 16302 + }, + { + "epoch": 0.8378558947476616, + "grad_norm": 1.036118745803833, + "learning_rate": 6.737620571868258e-07, + "loss": 0.6627, + "step": 16303 + }, + { + "epoch": 0.8379072874910063, + "grad_norm": 0.9984489679336548, + "learning_rate": 6.733448708692275e-07, + "loss": 0.6507, + "step": 16304 + }, + { + "epoch": 0.8379586802343509, + "grad_norm": 1.1033415794372559, + "learning_rate": 6.729278044253867e-07, + "loss": 0.7091, + "step": 16305 + }, + { + "epoch": 0.8380100729776956, + "grad_norm": 1.1419497728347778, + "learning_rate": 6.725108578668609e-07, + "loss": 0.6371, + "step": 16306 + }, + { + "epoch": 0.8380614657210402, + "grad_norm": 1.183470606803894, + "learning_rate": 6.720940312052004e-07, + "loss": 0.6731, + "step": 16307 + }, + { + "epoch": 0.8381128584643849, + "grad_norm": 1.1097166538238525, + "learning_rate": 6.716773244519531e-07, + "loss": 0.7235, + "step": 16308 + }, + { + "epoch": 0.8381642512077294, + "grad_norm": 1.1303751468658447, + "learning_rate": 6.712607376186675e-07, + "loss": 0.7271, + "step": 16309 + }, + { + "epoch": 0.8382156439510741, + "grad_norm": 1.2080334424972534, + "learning_rate": 6.708442707168811e-07, + "loss": 0.7088, + "step": 16310 + }, + { + "epoch": 0.8382670366944187, + "grad_norm": 1.0644322633743286, + "learning_rate": 6.704279237581357e-07, + "loss": 0.6901, + "step": 16311 + }, + { + "epoch": 0.8383184294377634, + "grad_norm": 0.7498881816864014, + "learning_rate": 6.700116967539655e-07, + "loss": 0.6282, + "step": 16312 + }, + { + "epoch": 0.838369822181108, + "grad_norm": 1.0880926847457886, + "learning_rate": 6.695955897159023e-07, + "loss": 0.7377, + "step": 16313 + }, + { + "epoch": 0.8384212149244527, + "grad_norm": 1.056373119354248, + "learning_rate": 6.691796026554731e-07, + "loss": 0.722, + "step": 16314 + }, + { + "epoch": 0.8384726076677973, + "grad_norm": 1.1985082626342773, + "learning_rate": 6.687637355842069e-07, + "loss": 0.6538, + "step": 16315 + }, + { + "epoch": 0.838524000411142, + "grad_norm": 1.159277319908142, + "learning_rate": 6.683479885136229e-07, + "loss": 0.6737, + "step": 16316 + }, + { + "epoch": 0.8385753931544866, + "grad_norm": 1.1270928382873535, + "learning_rate": 6.679323614552396e-07, + "loss": 0.735, + "step": 16317 + }, + { + "epoch": 0.8386267858978312, + "grad_norm": 0.7263604402542114, + "learning_rate": 6.675168544205745e-07, + "loss": 0.6337, + "step": 16318 + }, + { + "epoch": 0.8386781786411759, + "grad_norm": 1.1149317026138306, + "learning_rate": 6.671014674211379e-07, + "loss": 0.6978, + "step": 16319 + }, + { + "epoch": 0.8387295713845205, + "grad_norm": 0.7285401225090027, + "learning_rate": 6.666862004684393e-07, + "loss": 0.675, + "step": 16320 + }, + { + "epoch": 0.8387809641278652, + "grad_norm": 1.0955021381378174, + "learning_rate": 6.662710535739819e-07, + "loss": 0.7389, + "step": 16321 + }, + { + "epoch": 0.8388323568712098, + "grad_norm": 1.038291335105896, + "learning_rate": 6.65856026749272e-07, + "loss": 0.6909, + "step": 16322 + }, + { + "epoch": 0.8388837496145545, + "grad_norm": 1.1523593664169312, + "learning_rate": 6.654411200058037e-07, + "loss": 0.7083, + "step": 16323 + }, + { + "epoch": 0.838935142357899, + "grad_norm": 1.0337821245193481, + "learning_rate": 6.650263333550744e-07, + "loss": 0.6697, + "step": 16324 + }, + { + "epoch": 0.8389865351012437, + "grad_norm": 0.6926131844520569, + "learning_rate": 6.646116668085767e-07, + "loss": 0.6304, + "step": 16325 + }, + { + "epoch": 0.8390379278445883, + "grad_norm": 1.12691330909729, + "learning_rate": 6.641971203777975e-07, + "loss": 0.7095, + "step": 16326 + }, + { + "epoch": 0.839089320587933, + "grad_norm": 1.0844695568084717, + "learning_rate": 6.637826940742242e-07, + "loss": 0.6429, + "step": 16327 + }, + { + "epoch": 0.8391407133312776, + "grad_norm": 1.1112098693847656, + "learning_rate": 6.633683879093383e-07, + "loss": 0.6705, + "step": 16328 + }, + { + "epoch": 0.8391921060746222, + "grad_norm": 1.0747679471969604, + "learning_rate": 6.629542018946178e-07, + "loss": 0.7109, + "step": 16329 + }, + { + "epoch": 0.8392434988179669, + "grad_norm": 1.2787449359893799, + "learning_rate": 6.625401360415374e-07, + "loss": 0.6847, + "step": 16330 + }, + { + "epoch": 0.8392948915613115, + "grad_norm": 1.0589516162872314, + "learning_rate": 6.621261903615711e-07, + "loss": 0.6688, + "step": 16331 + }, + { + "epoch": 0.8393462843046562, + "grad_norm": 1.0881831645965576, + "learning_rate": 6.617123648661871e-07, + "loss": 0.7177, + "step": 16332 + }, + { + "epoch": 0.8393976770480008, + "grad_norm": 1.0508501529693604, + "learning_rate": 6.612986595668503e-07, + "loss": 0.6539, + "step": 16333 + }, + { + "epoch": 0.8394490697913455, + "grad_norm": 1.1027936935424805, + "learning_rate": 6.608850744750217e-07, + "loss": 0.7164, + "step": 16334 + }, + { + "epoch": 0.8395004625346901, + "grad_norm": 1.0460678339004517, + "learning_rate": 6.604716096021634e-07, + "loss": 0.6495, + "step": 16335 + }, + { + "epoch": 0.8395518552780348, + "grad_norm": 1.0753124952316284, + "learning_rate": 6.600582649597265e-07, + "loss": 0.696, + "step": 16336 + }, + { + "epoch": 0.8396032480213794, + "grad_norm": 0.7549222707748413, + "learning_rate": 6.596450405591665e-07, + "loss": 0.6546, + "step": 16337 + }, + { + "epoch": 0.8396546407647241, + "grad_norm": 1.0794966220855713, + "learning_rate": 6.592319364119304e-07, + "loss": 0.6884, + "step": 16338 + }, + { + "epoch": 0.8397060335080686, + "grad_norm": 1.1128748655319214, + "learning_rate": 6.58818952529463e-07, + "loss": 0.7199, + "step": 16339 + }, + { + "epoch": 0.8397574262514133, + "grad_norm": 1.1007788181304932, + "learning_rate": 6.584060889232091e-07, + "loss": 0.6986, + "step": 16340 + }, + { + "epoch": 0.8398088189947579, + "grad_norm": 1.101826786994934, + "learning_rate": 6.57993345604605e-07, + "loss": 0.6214, + "step": 16341 + }, + { + "epoch": 0.8398602117381025, + "grad_norm": 1.1578271389007568, + "learning_rate": 6.575807225850872e-07, + "loss": 0.6847, + "step": 16342 + }, + { + "epoch": 0.8399116044814472, + "grad_norm": 1.1246073246002197, + "learning_rate": 6.571682198760864e-07, + "loss": 0.7604, + "step": 16343 + }, + { + "epoch": 0.8399629972247918, + "grad_norm": 1.0500749349594116, + "learning_rate": 6.567558374890348e-07, + "loss": 0.6671, + "step": 16344 + }, + { + "epoch": 0.8400143899681365, + "grad_norm": 1.1606776714324951, + "learning_rate": 6.563435754353531e-07, + "loss": 0.6884, + "step": 16345 + }, + { + "epoch": 0.8400657827114811, + "grad_norm": 0.7513163089752197, + "learning_rate": 6.559314337264666e-07, + "loss": 0.654, + "step": 16346 + }, + { + "epoch": 0.8401171754548258, + "grad_norm": 1.069115400314331, + "learning_rate": 6.555194123737924e-07, + "loss": 0.7108, + "step": 16347 + }, + { + "epoch": 0.8401685681981704, + "grad_norm": 1.1407817602157593, + "learning_rate": 6.551075113887478e-07, + "loss": 0.7177, + "step": 16348 + }, + { + "epoch": 0.8402199609415151, + "grad_norm": 0.8228931427001953, + "learning_rate": 6.546957307827434e-07, + "loss": 0.6328, + "step": 16349 + }, + { + "epoch": 0.8402713536848597, + "grad_norm": 0.8667601346969604, + "learning_rate": 6.542840705671871e-07, + "loss": 0.6562, + "step": 16350 + }, + { + "epoch": 0.8403227464282044, + "grad_norm": 1.1828827857971191, + "learning_rate": 6.538725307534882e-07, + "loss": 0.7112, + "step": 16351 + }, + { + "epoch": 0.840374139171549, + "grad_norm": 1.0517867803573608, + "learning_rate": 6.534611113530437e-07, + "loss": 0.6732, + "step": 16352 + }, + { + "epoch": 0.8404255319148937, + "grad_norm": 0.772987961769104, + "learning_rate": 6.530498123772556e-07, + "loss": 0.6486, + "step": 16353 + }, + { + "epoch": 0.8404769246582383, + "grad_norm": 1.1312000751495361, + "learning_rate": 6.526386338375185e-07, + "loss": 0.658, + "step": 16354 + }, + { + "epoch": 0.8405283174015828, + "grad_norm": 1.168704867362976, + "learning_rate": 6.522275757452245e-07, + "loss": 0.6775, + "step": 16355 + }, + { + "epoch": 0.8405797101449275, + "grad_norm": 7.032253265380859, + "learning_rate": 6.518166381117608e-07, + "loss": 0.7372, + "step": 16356 + }, + { + "epoch": 0.8406311028882721, + "grad_norm": 1.097245454788208, + "learning_rate": 6.514058209485152e-07, + "loss": 0.7519, + "step": 16357 + }, + { + "epoch": 0.8406824956316168, + "grad_norm": 1.080039381980896, + "learning_rate": 6.509951242668688e-07, + "loss": 0.721, + "step": 16358 + }, + { + "epoch": 0.8407338883749614, + "grad_norm": 1.0440746545791626, + "learning_rate": 6.505845480781997e-07, + "loss": 0.6478, + "step": 16359 + }, + { + "epoch": 0.8407852811183061, + "grad_norm": 0.8059173822402954, + "learning_rate": 6.501740923938854e-07, + "loss": 0.6225, + "step": 16360 + }, + { + "epoch": 0.8408366738616507, + "grad_norm": 1.125105857849121, + "learning_rate": 6.497637572252941e-07, + "loss": 0.71, + "step": 16361 + }, + { + "epoch": 0.8408880666049954, + "grad_norm": 15.266189575195312, + "learning_rate": 6.493535425837982e-07, + "loss": 0.7837, + "step": 16362 + }, + { + "epoch": 0.84093945934834, + "grad_norm": 1.1267484426498413, + "learning_rate": 6.489434484807599e-07, + "loss": 0.7231, + "step": 16363 + }, + { + "epoch": 0.8409908520916847, + "grad_norm": 1.1437889337539673, + "learning_rate": 6.485334749275457e-07, + "loss": 0.6679, + "step": 16364 + }, + { + "epoch": 0.8410422448350293, + "grad_norm": 1.1569018363952637, + "learning_rate": 6.481236219355092e-07, + "loss": 0.7389, + "step": 16365 + }, + { + "epoch": 0.841093637578374, + "grad_norm": 1.107492208480835, + "learning_rate": 6.477138895160085e-07, + "loss": 0.7022, + "step": 16366 + }, + { + "epoch": 0.8411450303217186, + "grad_norm": 1.1194957494735718, + "learning_rate": 6.473042776803956e-07, + "loss": 0.7319, + "step": 16367 + }, + { + "epoch": 0.8411964230650633, + "grad_norm": 1.1241450309753418, + "learning_rate": 6.468947864400177e-07, + "loss": 0.6814, + "step": 16368 + }, + { + "epoch": 0.8412478158084079, + "grad_norm": 3.13297438621521, + "learning_rate": 6.464854158062217e-07, + "loss": 0.7357, + "step": 16369 + }, + { + "epoch": 0.8412992085517524, + "grad_norm": 0.7655166983604431, + "learning_rate": 6.460761657903492e-07, + "loss": 0.6447, + "step": 16370 + }, + { + "epoch": 0.8413506012950971, + "grad_norm": 1.1183664798736572, + "learning_rate": 6.456670364037388e-07, + "loss": 0.7154, + "step": 16371 + }, + { + "epoch": 0.8414019940384417, + "grad_norm": 1.0953044891357422, + "learning_rate": 6.452580276577236e-07, + "loss": 0.6693, + "step": 16372 + }, + { + "epoch": 0.8414533867817864, + "grad_norm": 1.1274372339248657, + "learning_rate": 6.448491395636403e-07, + "loss": 0.7206, + "step": 16373 + }, + { + "epoch": 0.841504779525131, + "grad_norm": 1.1264081001281738, + "learning_rate": 6.44440372132813e-07, + "loss": 0.7555, + "step": 16374 + }, + { + "epoch": 0.8415561722684757, + "grad_norm": 1.1442147493362427, + "learning_rate": 6.440317253765688e-07, + "loss": 0.6602, + "step": 16375 + }, + { + "epoch": 0.8416075650118203, + "grad_norm": 1.1056187152862549, + "learning_rate": 6.436231993062298e-07, + "loss": 0.6912, + "step": 16376 + }, + { + "epoch": 0.841658957755165, + "grad_norm": 1.1231343746185303, + "learning_rate": 6.432147939331134e-07, + "loss": 0.6844, + "step": 16377 + }, + { + "epoch": 0.8417103504985096, + "grad_norm": 1.0995243787765503, + "learning_rate": 6.428065092685365e-07, + "loss": 0.6904, + "step": 16378 + }, + { + "epoch": 0.8417617432418543, + "grad_norm": 1.0518836975097656, + "learning_rate": 6.423983453238103e-07, + "loss": 0.7049, + "step": 16379 + }, + { + "epoch": 0.8418131359851989, + "grad_norm": 0.8346656560897827, + "learning_rate": 6.419903021102431e-07, + "loss": 0.645, + "step": 16380 + }, + { + "epoch": 0.8418645287285436, + "grad_norm": 1.084513545036316, + "learning_rate": 6.41582379639139e-07, + "loss": 0.6827, + "step": 16381 + }, + { + "epoch": 0.8419159214718882, + "grad_norm": 1.0619189739227295, + "learning_rate": 6.411745779218026e-07, + "loss": 0.6575, + "step": 16382 + }, + { + "epoch": 0.8419673142152329, + "grad_norm": 1.150114893913269, + "learning_rate": 6.407668969695302e-07, + "loss": 0.694, + "step": 16383 + }, + { + "epoch": 0.8420187069585775, + "grad_norm": 0.7526631951332092, + "learning_rate": 6.403593367936178e-07, + "loss": 0.6828, + "step": 16384 + }, + { + "epoch": 0.842070099701922, + "grad_norm": 1.0531716346740723, + "learning_rate": 6.399518974053564e-07, + "loss": 0.6863, + "step": 16385 + }, + { + "epoch": 0.8421214924452667, + "grad_norm": 0.6946149468421936, + "learning_rate": 6.395445788160365e-07, + "loss": 0.6405, + "step": 16386 + }, + { + "epoch": 0.8421728851886113, + "grad_norm": 0.8326166868209839, + "learning_rate": 6.3913738103694e-07, + "loss": 0.6384, + "step": 16387 + }, + { + "epoch": 0.842224277931956, + "grad_norm": 0.793912947177887, + "learning_rate": 6.387303040793519e-07, + "loss": 0.6449, + "step": 16388 + }, + { + "epoch": 0.8422756706753006, + "grad_norm": 0.9851839542388916, + "learning_rate": 6.383233479545486e-07, + "loss": 0.6542, + "step": 16389 + }, + { + "epoch": 0.8423270634186453, + "grad_norm": 1.1075139045715332, + "learning_rate": 6.379165126738046e-07, + "loss": 0.7194, + "step": 16390 + }, + { + "epoch": 0.8423784561619899, + "grad_norm": 1.078636884689331, + "learning_rate": 6.375097982483941e-07, + "loss": 0.6857, + "step": 16391 + }, + { + "epoch": 0.8424298489053346, + "grad_norm": 1.0742202997207642, + "learning_rate": 6.371032046895842e-07, + "loss": 0.703, + "step": 16392 + }, + { + "epoch": 0.8424812416486792, + "grad_norm": 1.1089184284210205, + "learning_rate": 6.366967320086398e-07, + "loss": 0.6882, + "step": 16393 + }, + { + "epoch": 0.8425326343920239, + "grad_norm": 1.0909452438354492, + "learning_rate": 6.362903802168219e-07, + "loss": 0.695, + "step": 16394 + }, + { + "epoch": 0.8425840271353685, + "grad_norm": 1.1308059692382812, + "learning_rate": 6.358841493253904e-07, + "loss": 0.7211, + "step": 16395 + }, + { + "epoch": 0.8426354198787132, + "grad_norm": 1.1464941501617432, + "learning_rate": 6.354780393455995e-07, + "loss": 0.6763, + "step": 16396 + }, + { + "epoch": 0.8426868126220578, + "grad_norm": 1.0735931396484375, + "learning_rate": 6.350720502887004e-07, + "loss": 0.6413, + "step": 16397 + }, + { + "epoch": 0.8427382053654024, + "grad_norm": 1.1571218967437744, + "learning_rate": 6.346661821659411e-07, + "loss": 0.7242, + "step": 16398 + }, + { + "epoch": 0.8427895981087471, + "grad_norm": 1.7675156593322754, + "learning_rate": 6.342604349885684e-07, + "loss": 0.7324, + "step": 16399 + }, + { + "epoch": 0.8428409908520916, + "grad_norm": 1.088990569114685, + "learning_rate": 6.338548087678226e-07, + "loss": 0.6705, + "step": 16400 + }, + { + "epoch": 0.8428923835954363, + "grad_norm": 1.1002097129821777, + "learning_rate": 6.334493035149408e-07, + "loss": 0.7192, + "step": 16401 + }, + { + "epoch": 0.8429437763387809, + "grad_norm": 1.0985361337661743, + "learning_rate": 6.330439192411614e-07, + "loss": 0.7188, + "step": 16402 + }, + { + "epoch": 0.8429951690821256, + "grad_norm": 1.1096352338790894, + "learning_rate": 6.326386559577119e-07, + "loss": 0.7135, + "step": 16403 + }, + { + "epoch": 0.8430465618254702, + "grad_norm": 1.1229389905929565, + "learning_rate": 6.322335136758229e-07, + "loss": 0.7016, + "step": 16404 + }, + { + "epoch": 0.8430979545688149, + "grad_norm": 1.0732941627502441, + "learning_rate": 6.318284924067181e-07, + "loss": 0.6627, + "step": 16405 + }, + { + "epoch": 0.8431493473121595, + "grad_norm": 1.118199348449707, + "learning_rate": 6.314235921616202e-07, + "loss": 0.6382, + "step": 16406 + }, + { + "epoch": 0.8432007400555042, + "grad_norm": 1.1047911643981934, + "learning_rate": 6.310188129517447e-07, + "loss": 0.6734, + "step": 16407 + }, + { + "epoch": 0.8432521327988488, + "grad_norm": 1.0798594951629639, + "learning_rate": 6.306141547883099e-07, + "loss": 0.7037, + "step": 16408 + }, + { + "epoch": 0.8433035255421935, + "grad_norm": 1.058784008026123, + "learning_rate": 6.302096176825251e-07, + "loss": 0.6673, + "step": 16409 + }, + { + "epoch": 0.8433549182855381, + "grad_norm": 0.7257029414176941, + "learning_rate": 6.298052016455974e-07, + "loss": 0.6424, + "step": 16410 + }, + { + "epoch": 0.8434063110288827, + "grad_norm": 1.08196222782135, + "learning_rate": 6.294009066887346e-07, + "loss": 0.6476, + "step": 16411 + }, + { + "epoch": 0.8434577037722274, + "grad_norm": 1.0518192052841187, + "learning_rate": 6.289967328231355e-07, + "loss": 0.6606, + "step": 16412 + }, + { + "epoch": 0.843509096515572, + "grad_norm": 1.0450294017791748, + "learning_rate": 6.285926800599995e-07, + "loss": 0.7022, + "step": 16413 + }, + { + "epoch": 0.8435604892589167, + "grad_norm": 1.0903228521347046, + "learning_rate": 6.281887484105192e-07, + "loss": 0.7399, + "step": 16414 + }, + { + "epoch": 0.8436118820022612, + "grad_norm": 1.0687614679336548, + "learning_rate": 6.277849378858897e-07, + "loss": 0.7175, + "step": 16415 + }, + { + "epoch": 0.8436632747456059, + "grad_norm": 1.0549064874649048, + "learning_rate": 6.273812484972941e-07, + "loss": 0.6782, + "step": 16416 + }, + { + "epoch": 0.8437146674889505, + "grad_norm": 1.0292104482650757, + "learning_rate": 6.269776802559208e-07, + "loss": 0.6921, + "step": 16417 + }, + { + "epoch": 0.8437660602322952, + "grad_norm": 1.0841951370239258, + "learning_rate": 6.265742331729491e-07, + "loss": 0.6522, + "step": 16418 + }, + { + "epoch": 0.8438174529756398, + "grad_norm": 1.1700786352157593, + "learning_rate": 6.261709072595568e-07, + "loss": 0.6904, + "step": 16419 + }, + { + "epoch": 0.8438688457189845, + "grad_norm": 1.1372413635253906, + "learning_rate": 6.257677025269193e-07, + "loss": 0.7169, + "step": 16420 + }, + { + "epoch": 0.8439202384623291, + "grad_norm": 1.1404823064804077, + "learning_rate": 6.253646189862078e-07, + "loss": 0.7212, + "step": 16421 + }, + { + "epoch": 0.8439716312056738, + "grad_norm": 1.1127538681030273, + "learning_rate": 6.249616566485899e-07, + "loss": 0.6829, + "step": 16422 + }, + { + "epoch": 0.8440230239490184, + "grad_norm": 0.7741541266441345, + "learning_rate": 6.245588155252285e-07, + "loss": 0.6228, + "step": 16423 + }, + { + "epoch": 0.844074416692363, + "grad_norm": 1.3361481428146362, + "learning_rate": 6.24156095627288e-07, + "loss": 0.6653, + "step": 16424 + }, + { + "epoch": 0.8441258094357077, + "grad_norm": 1.226232886314392, + "learning_rate": 6.23753496965922e-07, + "loss": 0.5761, + "step": 16425 + }, + { + "epoch": 0.8441772021790523, + "grad_norm": 1.0449851751327515, + "learning_rate": 6.233510195522874e-07, + "loss": 0.6375, + "step": 16426 + }, + { + "epoch": 0.844228594922397, + "grad_norm": 1.0675405263900757, + "learning_rate": 6.229486633975346e-07, + "loss": 0.6994, + "step": 16427 + }, + { + "epoch": 0.8442799876657416, + "grad_norm": 2.415781021118164, + "learning_rate": 6.225464285128119e-07, + "loss": 0.7259, + "step": 16428 + }, + { + "epoch": 0.8443313804090863, + "grad_norm": 0.8723052740097046, + "learning_rate": 6.221443149092626e-07, + "loss": 0.6411, + "step": 16429 + }, + { + "epoch": 0.8443827731524309, + "grad_norm": 1.0396190881729126, + "learning_rate": 6.217423225980285e-07, + "loss": 0.6605, + "step": 16430 + }, + { + "epoch": 0.8444341658957755, + "grad_norm": 1.068182349205017, + "learning_rate": 6.213404515902466e-07, + "loss": 0.7565, + "step": 16431 + }, + { + "epoch": 0.8444855586391201, + "grad_norm": 1.012959361076355, + "learning_rate": 6.209387018970498e-07, + "loss": 0.6902, + "step": 16432 + }, + { + "epoch": 0.8445369513824648, + "grad_norm": 1.0755106210708618, + "learning_rate": 6.205370735295713e-07, + "loss": 0.71, + "step": 16433 + }, + { + "epoch": 0.8445883441258094, + "grad_norm": 0.8704649806022644, + "learning_rate": 6.201355664989378e-07, + "loss": 0.7052, + "step": 16434 + }, + { + "epoch": 0.8446397368691541, + "grad_norm": 1.0970100164413452, + "learning_rate": 6.197341808162732e-07, + "loss": 0.6276, + "step": 16435 + }, + { + "epoch": 0.8446911296124987, + "grad_norm": 1.0853278636932373, + "learning_rate": 6.193329164926964e-07, + "loss": 0.7041, + "step": 16436 + }, + { + "epoch": 0.8447425223558434, + "grad_norm": 0.7823596596717834, + "learning_rate": 6.189317735393297e-07, + "loss": 0.6432, + "step": 16437 + }, + { + "epoch": 0.844793915099188, + "grad_norm": 1.1115933656692505, + "learning_rate": 6.185307519672812e-07, + "loss": 0.6838, + "step": 16438 + }, + { + "epoch": 0.8448453078425326, + "grad_norm": 1.098714828491211, + "learning_rate": 6.181298517876655e-07, + "loss": 0.724, + "step": 16439 + }, + { + "epoch": 0.8448967005858773, + "grad_norm": 1.0852597951889038, + "learning_rate": 6.177290730115876e-07, + "loss": 0.7007, + "step": 16440 + }, + { + "epoch": 0.8449480933292219, + "grad_norm": 1.1243349313735962, + "learning_rate": 6.173284156501535e-07, + "loss": 0.6795, + "step": 16441 + }, + { + "epoch": 0.8449994860725666, + "grad_norm": 1.1116089820861816, + "learning_rate": 6.169278797144629e-07, + "loss": 0.6869, + "step": 16442 + }, + { + "epoch": 0.8450508788159112, + "grad_norm": 1.0651346445083618, + "learning_rate": 6.165274652156116e-07, + "loss": 0.6933, + "step": 16443 + }, + { + "epoch": 0.8451022715592559, + "grad_norm": 1.040443778038025, + "learning_rate": 6.161271721646972e-07, + "loss": 0.7091, + "step": 16444 + }, + { + "epoch": 0.8451536643026005, + "grad_norm": 0.7358945608139038, + "learning_rate": 6.15727000572805e-07, + "loss": 0.647, + "step": 16445 + }, + { + "epoch": 0.8452050570459451, + "grad_norm": 1.0403320789337158, + "learning_rate": 6.153269504510262e-07, + "loss": 0.719, + "step": 16446 + }, + { + "epoch": 0.8452564497892897, + "grad_norm": 1.0431816577911377, + "learning_rate": 6.149270218104436e-07, + "loss": 0.645, + "step": 16447 + }, + { + "epoch": 0.8453078425326344, + "grad_norm": 1.062342882156372, + "learning_rate": 6.145272146621367e-07, + "loss": 0.686, + "step": 16448 + }, + { + "epoch": 0.845359235275979, + "grad_norm": 1.0591074228286743, + "learning_rate": 6.141275290171816e-07, + "loss": 0.6523, + "step": 16449 + }, + { + "epoch": 0.8454106280193237, + "grad_norm": 0.7416768074035645, + "learning_rate": 6.137279648866546e-07, + "loss": 0.6724, + "step": 16450 + }, + { + "epoch": 0.8454620207626683, + "grad_norm": 1.0046964883804321, + "learning_rate": 6.133285222816243e-07, + "loss": 0.6232, + "step": 16451 + }, + { + "epoch": 0.845513413506013, + "grad_norm": 1.06511652469635, + "learning_rate": 6.129292012131571e-07, + "loss": 0.6533, + "step": 16452 + }, + { + "epoch": 0.8455648062493576, + "grad_norm": 0.7565569281578064, + "learning_rate": 6.125300016923197e-07, + "loss": 0.6444, + "step": 16453 + }, + { + "epoch": 0.8456161989927022, + "grad_norm": 0.7711178660392761, + "learning_rate": 6.121309237301676e-07, + "loss": 0.6965, + "step": 16454 + }, + { + "epoch": 0.8456675917360469, + "grad_norm": 1.035863995552063, + "learning_rate": 6.117319673377609e-07, + "loss": 0.6818, + "step": 16455 + }, + { + "epoch": 0.8457189844793915, + "grad_norm": 1.1163461208343506, + "learning_rate": 6.113331325261512e-07, + "loss": 0.6807, + "step": 16456 + }, + { + "epoch": 0.8457703772227362, + "grad_norm": 1.1405702829360962, + "learning_rate": 6.10934419306391e-07, + "loss": 0.6186, + "step": 16457 + }, + { + "epoch": 0.8458217699660808, + "grad_norm": 1.0459213256835938, + "learning_rate": 6.105358276895241e-07, + "loss": 0.6521, + "step": 16458 + }, + { + "epoch": 0.8458731627094255, + "grad_norm": 1.0791577100753784, + "learning_rate": 6.10137357686596e-07, + "loss": 0.6981, + "step": 16459 + }, + { + "epoch": 0.8459245554527701, + "grad_norm": 0.6540331244468689, + "learning_rate": 6.097390093086453e-07, + "loss": 0.6295, + "step": 16460 + }, + { + "epoch": 0.8459759481961147, + "grad_norm": 1.1189758777618408, + "learning_rate": 6.093407825667086e-07, + "loss": 0.669, + "step": 16461 + }, + { + "epoch": 0.8460273409394593, + "grad_norm": 1.04123854637146, + "learning_rate": 6.089426774718205e-07, + "loss": 0.6561, + "step": 16462 + }, + { + "epoch": 0.846078733682804, + "grad_norm": 1.1685868501663208, + "learning_rate": 6.085446940350104e-07, + "loss": 0.6897, + "step": 16463 + }, + { + "epoch": 0.8461301264261486, + "grad_norm": 1.245519757270813, + "learning_rate": 6.08146832267304e-07, + "loss": 0.7106, + "step": 16464 + }, + { + "epoch": 0.8461815191694932, + "grad_norm": 1.0681719779968262, + "learning_rate": 6.077490921797236e-07, + "loss": 0.7225, + "step": 16465 + }, + { + "epoch": 0.8462329119128379, + "grad_norm": 1.158044457435608, + "learning_rate": 6.07351473783292e-07, + "loss": 0.6879, + "step": 16466 + }, + { + "epoch": 0.8462843046561825, + "grad_norm": 1.0493100881576538, + "learning_rate": 6.06953977089022e-07, + "loss": 0.6946, + "step": 16467 + }, + { + "epoch": 0.8463356973995272, + "grad_norm": 0.7046316862106323, + "learning_rate": 6.065566021079294e-07, + "loss": 0.6065, + "step": 16468 + }, + { + "epoch": 0.8463870901428718, + "grad_norm": 1.0526148080825806, + "learning_rate": 6.061593488510226e-07, + "loss": 0.6351, + "step": 16469 + }, + { + "epoch": 0.8464384828862165, + "grad_norm": 1.16148042678833, + "learning_rate": 6.057622173293065e-07, + "loss": 0.7008, + "step": 16470 + }, + { + "epoch": 0.8464898756295611, + "grad_norm": 1.1463459730148315, + "learning_rate": 6.053652075537869e-07, + "loss": 0.7136, + "step": 16471 + }, + { + "epoch": 0.8465412683729058, + "grad_norm": 1.1279423236846924, + "learning_rate": 6.049683195354605e-07, + "loss": 0.697, + "step": 16472 + }, + { + "epoch": 0.8465926611162504, + "grad_norm": 1.0883005857467651, + "learning_rate": 6.045715532853269e-07, + "loss": 0.6773, + "step": 16473 + }, + { + "epoch": 0.8466440538595951, + "grad_norm": 1.1768378019332886, + "learning_rate": 6.041749088143744e-07, + "loss": 0.6551, + "step": 16474 + }, + { + "epoch": 0.8466954466029397, + "grad_norm": 0.79721599817276, + "learning_rate": 6.03778386133596e-07, + "loss": 0.6368, + "step": 16475 + }, + { + "epoch": 0.8467468393462843, + "grad_norm": 1.1473215818405151, + "learning_rate": 6.033819852539769e-07, + "loss": 0.6891, + "step": 16476 + }, + { + "epoch": 0.8467982320896289, + "grad_norm": 1.019971251487732, + "learning_rate": 6.029857061864986e-07, + "loss": 0.6982, + "step": 16477 + }, + { + "epoch": 0.8468496248329735, + "grad_norm": 1.1158809661865234, + "learning_rate": 6.025895489421397e-07, + "loss": 0.703, + "step": 16478 + }, + { + "epoch": 0.8469010175763182, + "grad_norm": 1.1332000494003296, + "learning_rate": 6.021935135318791e-07, + "loss": 0.6711, + "step": 16479 + }, + { + "epoch": 0.8469524103196628, + "grad_norm": 1.0930920839309692, + "learning_rate": 6.017975999666869e-07, + "loss": 0.6922, + "step": 16480 + }, + { + "epoch": 0.8470038030630075, + "grad_norm": 1.1375925540924072, + "learning_rate": 6.014018082575329e-07, + "loss": 0.7206, + "step": 16481 + }, + { + "epoch": 0.8470551958063521, + "grad_norm": 1.0558089017868042, + "learning_rate": 6.010061384153831e-07, + "loss": 0.6713, + "step": 16482 + }, + { + "epoch": 0.8471065885496968, + "grad_norm": 1.0715219974517822, + "learning_rate": 6.006105904511989e-07, + "loss": 0.6917, + "step": 16483 + }, + { + "epoch": 0.8471579812930414, + "grad_norm": 1.0560548305511475, + "learning_rate": 6.002151643759407e-07, + "loss": 0.7248, + "step": 16484 + }, + { + "epoch": 0.8472093740363861, + "grad_norm": 1.0935081243515015, + "learning_rate": 5.998198602005634e-07, + "loss": 0.6817, + "step": 16485 + }, + { + "epoch": 0.8472607667797307, + "grad_norm": 0.7103713750839233, + "learning_rate": 5.994246779360197e-07, + "loss": 0.6353, + "step": 16486 + }, + { + "epoch": 0.8473121595230754, + "grad_norm": 0.7412858009338379, + "learning_rate": 5.990296175932564e-07, + "loss": 0.6136, + "step": 16487 + }, + { + "epoch": 0.84736355226642, + "grad_norm": 0.7032665014266968, + "learning_rate": 5.98634679183222e-07, + "loss": 0.6573, + "step": 16488 + }, + { + "epoch": 0.8474149450097647, + "grad_norm": 0.7189478278160095, + "learning_rate": 5.982398627168579e-07, + "loss": 0.6413, + "step": 16489 + }, + { + "epoch": 0.8474663377531093, + "grad_norm": 1.0538734197616577, + "learning_rate": 5.978451682051012e-07, + "loss": 0.6247, + "step": 16490 + }, + { + "epoch": 0.8475177304964538, + "grad_norm": 0.7165653109550476, + "learning_rate": 5.974505956588877e-07, + "loss": 0.5821, + "step": 16491 + }, + { + "epoch": 0.8475691232397985, + "grad_norm": 1.0727893114089966, + "learning_rate": 5.97056145089151e-07, + "loss": 0.6604, + "step": 16492 + }, + { + "epoch": 0.8476205159831431, + "grad_norm": 1.101873755455017, + "learning_rate": 5.966618165068189e-07, + "loss": 0.7053, + "step": 16493 + }, + { + "epoch": 0.8476719087264878, + "grad_norm": 1.107740044593811, + "learning_rate": 5.962676099228148e-07, + "loss": 0.6712, + "step": 16494 + }, + { + "epoch": 0.8477233014698324, + "grad_norm": 1.0503225326538086, + "learning_rate": 5.958735253480647e-07, + "loss": 0.6514, + "step": 16495 + }, + { + "epoch": 0.8477746942131771, + "grad_norm": 1.180428147315979, + "learning_rate": 5.954795627934818e-07, + "loss": 0.696, + "step": 16496 + }, + { + "epoch": 0.8478260869565217, + "grad_norm": 0.7621245384216309, + "learning_rate": 5.950857222699852e-07, + "loss": 0.6549, + "step": 16497 + }, + { + "epoch": 0.8478774796998664, + "grad_norm": 1.0458861589431763, + "learning_rate": 5.946920037884851e-07, + "loss": 0.6516, + "step": 16498 + }, + { + "epoch": 0.847928872443211, + "grad_norm": 0.7500308156013489, + "learning_rate": 5.942984073598901e-07, + "loss": 0.6519, + "step": 16499 + }, + { + "epoch": 0.8479802651865557, + "grad_norm": 1.07485830783844, + "learning_rate": 5.939049329951041e-07, + "loss": 0.6915, + "step": 16500 + }, + { + "epoch": 0.8480316579299003, + "grad_norm": 1.1496615409851074, + "learning_rate": 5.935115807050301e-07, + "loss": 0.7269, + "step": 16501 + }, + { + "epoch": 0.848083050673245, + "grad_norm": 1.0851554870605469, + "learning_rate": 5.931183505005661e-07, + "loss": 0.6538, + "step": 16502 + }, + { + "epoch": 0.8481344434165896, + "grad_norm": 1.1087366342544556, + "learning_rate": 5.92725242392605e-07, + "loss": 0.7092, + "step": 16503 + }, + { + "epoch": 0.8481858361599343, + "grad_norm": 1.0834993124008179, + "learning_rate": 5.923322563920409e-07, + "loss": 0.6432, + "step": 16504 + }, + { + "epoch": 0.8482372289032789, + "grad_norm": 1.0779434442520142, + "learning_rate": 5.919393925097611e-07, + "loss": 0.6554, + "step": 16505 + }, + { + "epoch": 0.8482886216466234, + "grad_norm": 1.258431077003479, + "learning_rate": 5.915466507566492e-07, + "loss": 0.7716, + "step": 16506 + }, + { + "epoch": 0.8483400143899681, + "grad_norm": 0.6868913769721985, + "learning_rate": 5.911540311435859e-07, + "loss": 0.651, + "step": 16507 + }, + { + "epoch": 0.8483914071333127, + "grad_norm": 1.0884453058242798, + "learning_rate": 5.907615336814527e-07, + "loss": 0.668, + "step": 16508 + }, + { + "epoch": 0.8484427998766574, + "grad_norm": 1.068174958229065, + "learning_rate": 5.903691583811189e-07, + "loss": 0.6714, + "step": 16509 + }, + { + "epoch": 0.848494192620002, + "grad_norm": 1.0838615894317627, + "learning_rate": 5.899769052534598e-07, + "loss": 0.6706, + "step": 16510 + }, + { + "epoch": 0.8485455853633467, + "grad_norm": 1.1418522596359253, + "learning_rate": 5.895847743093414e-07, + "loss": 0.7165, + "step": 16511 + }, + { + "epoch": 0.8485969781066913, + "grad_norm": 1.1390329599380493, + "learning_rate": 5.891927655596269e-07, + "loss": 0.6505, + "step": 16512 + }, + { + "epoch": 0.848648370850036, + "grad_norm": 1.1139330863952637, + "learning_rate": 5.8880087901518e-07, + "loss": 0.6799, + "step": 16513 + }, + { + "epoch": 0.8486997635933806, + "grad_norm": 1.0549362897872925, + "learning_rate": 5.884091146868565e-07, + "loss": 0.6929, + "step": 16514 + }, + { + "epoch": 0.8487511563367253, + "grad_norm": 1.1215424537658691, + "learning_rate": 5.880174725855109e-07, + "loss": 0.7369, + "step": 16515 + }, + { + "epoch": 0.8488025490800699, + "grad_norm": 0.7168447971343994, + "learning_rate": 5.87625952721993e-07, + "loss": 0.6686, + "step": 16516 + }, + { + "epoch": 0.8488539418234146, + "grad_norm": 0.6985307931900024, + "learning_rate": 5.872345551071529e-07, + "loss": 0.637, + "step": 16517 + }, + { + "epoch": 0.8489053345667592, + "grad_norm": 1.109653115272522, + "learning_rate": 5.868432797518314e-07, + "loss": 0.7013, + "step": 16518 + }, + { + "epoch": 0.8489567273101039, + "grad_norm": 1.08949875831604, + "learning_rate": 5.86452126666871e-07, + "loss": 0.7005, + "step": 16519 + }, + { + "epoch": 0.8490081200534485, + "grad_norm": 1.080488681793213, + "learning_rate": 5.86061095863108e-07, + "loss": 0.6812, + "step": 16520 + }, + { + "epoch": 0.8490595127967931, + "grad_norm": 1.1085983514785767, + "learning_rate": 5.856701873513776e-07, + "loss": 0.6932, + "step": 16521 + }, + { + "epoch": 0.8491109055401377, + "grad_norm": 0.7504760026931763, + "learning_rate": 5.852794011425094e-07, + "loss": 0.6083, + "step": 16522 + }, + { + "epoch": 0.8491622982834823, + "grad_norm": 1.0299628973007202, + "learning_rate": 5.848887372473299e-07, + "loss": 0.6972, + "step": 16523 + }, + { + "epoch": 0.849213691026827, + "grad_norm": 1.0379050970077515, + "learning_rate": 5.844981956766649e-07, + "loss": 0.7466, + "step": 16524 + }, + { + "epoch": 0.8492650837701716, + "grad_norm": 1.0614036321640015, + "learning_rate": 5.841077764413317e-07, + "loss": 0.7266, + "step": 16525 + }, + { + "epoch": 0.8493164765135163, + "grad_norm": 0.8220763802528381, + "learning_rate": 5.837174795521494e-07, + "loss": 0.6375, + "step": 16526 + }, + { + "epoch": 0.8493678692568609, + "grad_norm": 1.0771716833114624, + "learning_rate": 5.833273050199306e-07, + "loss": 0.7028, + "step": 16527 + }, + { + "epoch": 0.8494192620002056, + "grad_norm": 1.1031699180603027, + "learning_rate": 5.829372528554861e-07, + "loss": 0.6758, + "step": 16528 + }, + { + "epoch": 0.8494706547435502, + "grad_norm": 0.7094926238059998, + "learning_rate": 5.82547323069621e-07, + "loss": 0.6222, + "step": 16529 + }, + { + "epoch": 0.8495220474868949, + "grad_norm": 1.102644681930542, + "learning_rate": 5.821575156731407e-07, + "loss": 0.6791, + "step": 16530 + }, + { + "epoch": 0.8495734402302395, + "grad_norm": 1.1244462728500366, + "learning_rate": 5.817678306768443e-07, + "loss": 0.6972, + "step": 16531 + }, + { + "epoch": 0.8496248329735842, + "grad_norm": 0.7482755780220032, + "learning_rate": 5.813782680915287e-07, + "loss": 0.641, + "step": 16532 + }, + { + "epoch": 0.8496762257169288, + "grad_norm": 1.0783021450042725, + "learning_rate": 5.809888279279863e-07, + "loss": 0.6847, + "step": 16533 + }, + { + "epoch": 0.8497276184602734, + "grad_norm": 1.063064455986023, + "learning_rate": 5.805995101970063e-07, + "loss": 0.6684, + "step": 16534 + }, + { + "epoch": 0.8497790112036181, + "grad_norm": 1.0792648792266846, + "learning_rate": 5.802103149093774e-07, + "loss": 0.6628, + "step": 16535 + }, + { + "epoch": 0.8498304039469627, + "grad_norm": 0.7009086608886719, + "learning_rate": 5.798212420758798e-07, + "loss": 0.7046, + "step": 16536 + }, + { + "epoch": 0.8498817966903073, + "grad_norm": 1.1129181385040283, + "learning_rate": 5.79432291707297e-07, + "loss": 0.6975, + "step": 16537 + }, + { + "epoch": 0.8499331894336519, + "grad_norm": 1.1756367683410645, + "learning_rate": 5.790434638144005e-07, + "loss": 0.6945, + "step": 16538 + }, + { + "epoch": 0.8499845821769966, + "grad_norm": 1.1293703317642212, + "learning_rate": 5.786547584079666e-07, + "loss": 0.6768, + "step": 16539 + }, + { + "epoch": 0.8500359749203412, + "grad_norm": 0.843086302280426, + "learning_rate": 5.782661754987634e-07, + "loss": 0.617, + "step": 16540 + }, + { + "epoch": 0.8500873676636859, + "grad_norm": 1.1602028608322144, + "learning_rate": 5.778777150975573e-07, + "loss": 0.6618, + "step": 16541 + }, + { + "epoch": 0.8501387604070305, + "grad_norm": 1.1112890243530273, + "learning_rate": 5.774893772151097e-07, + "loss": 0.7418, + "step": 16542 + }, + { + "epoch": 0.8501901531503752, + "grad_norm": 1.0591968297958374, + "learning_rate": 5.771011618621813e-07, + "loss": 0.6521, + "step": 16543 + }, + { + "epoch": 0.8502415458937198, + "grad_norm": 1.0433895587921143, + "learning_rate": 5.767130690495281e-07, + "loss": 0.6735, + "step": 16544 + }, + { + "epoch": 0.8502929386370645, + "grad_norm": 1.1210577487945557, + "learning_rate": 5.763250987879004e-07, + "loss": 0.6845, + "step": 16545 + }, + { + "epoch": 0.8503443313804091, + "grad_norm": 1.080849051475525, + "learning_rate": 5.759372510880512e-07, + "loss": 0.6186, + "step": 16546 + }, + { + "epoch": 0.8503957241237537, + "grad_norm": 1.1271902322769165, + "learning_rate": 5.755495259607219e-07, + "loss": 0.7142, + "step": 16547 + }, + { + "epoch": 0.8504471168670984, + "grad_norm": 1.0916759967803955, + "learning_rate": 5.751619234166573e-07, + "loss": 0.7257, + "step": 16548 + }, + { + "epoch": 0.850498509610443, + "grad_norm": 0.9473386406898499, + "learning_rate": 5.747744434665942e-07, + "loss": 0.6341, + "step": 16549 + }, + { + "epoch": 0.8505499023537877, + "grad_norm": 1.0855633020401, + "learning_rate": 5.743870861212719e-07, + "loss": 0.6276, + "step": 16550 + }, + { + "epoch": 0.8506012950971323, + "grad_norm": 1.021533489227295, + "learning_rate": 5.73999851391418e-07, + "loss": 0.6438, + "step": 16551 + }, + { + "epoch": 0.8506526878404769, + "grad_norm": 1.1036608219146729, + "learning_rate": 5.736127392877644e-07, + "loss": 0.7253, + "step": 16552 + }, + { + "epoch": 0.8507040805838215, + "grad_norm": 1.1576470136642456, + "learning_rate": 5.732257498210347e-07, + "loss": 0.6909, + "step": 16553 + }, + { + "epoch": 0.8507554733271662, + "grad_norm": 1.08005952835083, + "learning_rate": 5.728388830019505e-07, + "loss": 0.7271, + "step": 16554 + }, + { + "epoch": 0.8508068660705108, + "grad_norm": 1.0497759580612183, + "learning_rate": 5.724521388412319e-07, + "loss": 0.6959, + "step": 16555 + }, + { + "epoch": 0.8508582588138555, + "grad_norm": 1.0315431356430054, + "learning_rate": 5.720655173495931e-07, + "loss": 0.6368, + "step": 16556 + }, + { + "epoch": 0.8509096515572001, + "grad_norm": 1.0845293998718262, + "learning_rate": 5.716790185377464e-07, + "loss": 0.7169, + "step": 16557 + }, + { + "epoch": 0.8509610443005448, + "grad_norm": 0.82472163438797, + "learning_rate": 5.712926424163978e-07, + "loss": 0.6232, + "step": 16558 + }, + { + "epoch": 0.8510124370438894, + "grad_norm": 1.0753751993179321, + "learning_rate": 5.709063889962563e-07, + "loss": 0.7009, + "step": 16559 + }, + { + "epoch": 0.851063829787234, + "grad_norm": 1.1726080179214478, + "learning_rate": 5.705202582880187e-07, + "loss": 0.7508, + "step": 16560 + }, + { + "epoch": 0.8511152225305787, + "grad_norm": 1.0945407152175903, + "learning_rate": 5.701342503023865e-07, + "loss": 0.6906, + "step": 16561 + }, + { + "epoch": 0.8511666152739233, + "grad_norm": 1.2457739114761353, + "learning_rate": 5.697483650500529e-07, + "loss": 0.6486, + "step": 16562 + }, + { + "epoch": 0.851218008017268, + "grad_norm": 1.0417375564575195, + "learning_rate": 5.693626025417087e-07, + "loss": 0.6945, + "step": 16563 + }, + { + "epoch": 0.8512694007606126, + "grad_norm": 0.7327407002449036, + "learning_rate": 5.689769627880442e-07, + "loss": 0.6523, + "step": 16564 + }, + { + "epoch": 0.8513207935039573, + "grad_norm": 1.0250177383422852, + "learning_rate": 5.685914457997405e-07, + "loss": 0.7026, + "step": 16565 + }, + { + "epoch": 0.8513721862473019, + "grad_norm": 1.1260474920272827, + "learning_rate": 5.682060515874833e-07, + "loss": 0.7034, + "step": 16566 + }, + { + "epoch": 0.8514235789906465, + "grad_norm": 1.1305336952209473, + "learning_rate": 5.678207801619445e-07, + "loss": 0.7121, + "step": 16567 + }, + { + "epoch": 0.8514749717339911, + "grad_norm": 1.0871243476867676, + "learning_rate": 5.674356315338031e-07, + "loss": 0.6933, + "step": 16568 + }, + { + "epoch": 0.8515263644773358, + "grad_norm": 1.0837122201919556, + "learning_rate": 5.670506057137282e-07, + "loss": 0.6857, + "step": 16569 + }, + { + "epoch": 0.8515777572206804, + "grad_norm": 1.1311004161834717, + "learning_rate": 5.666657027123868e-07, + "loss": 0.7232, + "step": 16570 + }, + { + "epoch": 0.8516291499640251, + "grad_norm": 1.0921376943588257, + "learning_rate": 5.66280922540443e-07, + "loss": 0.7059, + "step": 16571 + }, + { + "epoch": 0.8516805427073697, + "grad_norm": 1.0684804916381836, + "learning_rate": 5.658962652085586e-07, + "loss": 0.7173, + "step": 16572 + }, + { + "epoch": 0.8517319354507144, + "grad_norm": 1.148685336112976, + "learning_rate": 5.655117307273905e-07, + "loss": 0.7143, + "step": 16573 + }, + { + "epoch": 0.851783328194059, + "grad_norm": 1.1506116390228271, + "learning_rate": 5.651273191075912e-07, + "loss": 0.7358, + "step": 16574 + }, + { + "epoch": 0.8518347209374036, + "grad_norm": 0.7286731600761414, + "learning_rate": 5.647430303598145e-07, + "loss": 0.6468, + "step": 16575 + }, + { + "epoch": 0.8518861136807483, + "grad_norm": 1.1082874536514282, + "learning_rate": 5.643588644947029e-07, + "loss": 0.7421, + "step": 16576 + }, + { + "epoch": 0.8519375064240929, + "grad_norm": 1.151232361793518, + "learning_rate": 5.639748215229035e-07, + "loss": 0.7636, + "step": 16577 + }, + { + "epoch": 0.8519888991674376, + "grad_norm": 0.7597556114196777, + "learning_rate": 5.63590901455055e-07, + "loss": 0.6994, + "step": 16578 + }, + { + "epoch": 0.8520402919107822, + "grad_norm": 1.140904426574707, + "learning_rate": 5.632071043017951e-07, + "loss": 0.7032, + "step": 16579 + }, + { + "epoch": 0.8520916846541269, + "grad_norm": 1.0783642530441284, + "learning_rate": 5.628234300737556e-07, + "loss": 0.6932, + "step": 16580 + }, + { + "epoch": 0.8521430773974715, + "grad_norm": 1.0700945854187012, + "learning_rate": 5.624398787815688e-07, + "loss": 0.6619, + "step": 16581 + }, + { + "epoch": 0.8521944701408161, + "grad_norm": 1.1012630462646484, + "learning_rate": 5.620564504358599e-07, + "loss": 0.739, + "step": 16582 + }, + { + "epoch": 0.8522458628841607, + "grad_norm": 1.0722440481185913, + "learning_rate": 5.61673145047253e-07, + "loss": 0.679, + "step": 16583 + }, + { + "epoch": 0.8522972556275054, + "grad_norm": 0.8380346894264221, + "learning_rate": 5.612899626263657e-07, + "loss": 0.6816, + "step": 16584 + }, + { + "epoch": 0.85234864837085, + "grad_norm": 1.1677442789077759, + "learning_rate": 5.609069031838176e-07, + "loss": 0.7221, + "step": 16585 + }, + { + "epoch": 0.8524000411141947, + "grad_norm": 1.1117154359817505, + "learning_rate": 5.605239667302198e-07, + "loss": 0.6751, + "step": 16586 + }, + { + "epoch": 0.8524514338575393, + "grad_norm": 1.1337196826934814, + "learning_rate": 5.601411532761814e-07, + "loss": 0.6848, + "step": 16587 + }, + { + "epoch": 0.852502826600884, + "grad_norm": 0.7420323491096497, + "learning_rate": 5.597584628323116e-07, + "loss": 0.6952, + "step": 16588 + }, + { + "epoch": 0.8525542193442286, + "grad_norm": 1.1757662296295166, + "learning_rate": 5.593758954092088e-07, + "loss": 0.6498, + "step": 16589 + }, + { + "epoch": 0.8526056120875732, + "grad_norm": 1.0438846349716187, + "learning_rate": 5.589934510174755e-07, + "loss": 0.6772, + "step": 16590 + }, + { + "epoch": 0.8526570048309179, + "grad_norm": 1.0811257362365723, + "learning_rate": 5.586111296677071e-07, + "loss": 0.7021, + "step": 16591 + }, + { + "epoch": 0.8527083975742625, + "grad_norm": 1.133449912071228, + "learning_rate": 5.582289313704953e-07, + "loss": 0.6771, + "step": 16592 + }, + { + "epoch": 0.8527597903176072, + "grad_norm": 1.09807550907135, + "learning_rate": 5.578468561364292e-07, + "loss": 0.7462, + "step": 16593 + }, + { + "epoch": 0.8528111830609518, + "grad_norm": 1.0715973377227783, + "learning_rate": 5.574649039760955e-07, + "loss": 0.7161, + "step": 16594 + }, + { + "epoch": 0.8528625758042965, + "grad_norm": 1.092837929725647, + "learning_rate": 5.570830749000766e-07, + "loss": 0.6549, + "step": 16595 + }, + { + "epoch": 0.8529139685476411, + "grad_norm": 1.0895129442214966, + "learning_rate": 5.567013689189493e-07, + "loss": 0.6543, + "step": 16596 + }, + { + "epoch": 0.8529653612909857, + "grad_norm": 1.0654332637786865, + "learning_rate": 5.563197860432917e-07, + "loss": 0.7447, + "step": 16597 + }, + { + "epoch": 0.8530167540343303, + "grad_norm": 1.063740611076355, + "learning_rate": 5.559383262836754e-07, + "loss": 0.7001, + "step": 16598 + }, + { + "epoch": 0.853068146777675, + "grad_norm": 1.0838167667388916, + "learning_rate": 5.555569896506679e-07, + "loss": 0.648, + "step": 16599 + }, + { + "epoch": 0.8531195395210196, + "grad_norm": 1.110093355178833, + "learning_rate": 5.551757761548343e-07, + "loss": 0.7094, + "step": 16600 + }, + { + "epoch": 0.8531709322643642, + "grad_norm": 1.1284714937210083, + "learning_rate": 5.547946858067393e-07, + "loss": 0.6962, + "step": 16601 + }, + { + "epoch": 0.8532223250077089, + "grad_norm": 1.098952293395996, + "learning_rate": 5.544137186169373e-07, + "loss": 0.6761, + "step": 16602 + }, + { + "epoch": 0.8532737177510535, + "grad_norm": 1.1246854066848755, + "learning_rate": 5.540328745959866e-07, + "loss": 0.6712, + "step": 16603 + }, + { + "epoch": 0.8533251104943982, + "grad_norm": 1.0508147478103638, + "learning_rate": 5.536521537544371e-07, + "loss": 0.7032, + "step": 16604 + }, + { + "epoch": 0.8533765032377428, + "grad_norm": 1.0919137001037598, + "learning_rate": 5.532715561028363e-07, + "loss": 0.7274, + "step": 16605 + }, + { + "epoch": 0.8534278959810875, + "grad_norm": 1.107042670249939, + "learning_rate": 5.528910816517314e-07, + "loss": 0.6867, + "step": 16606 + }, + { + "epoch": 0.8534792887244321, + "grad_norm": 0.813794732093811, + "learning_rate": 5.525107304116622e-07, + "loss": 0.671, + "step": 16607 + }, + { + "epoch": 0.8535306814677768, + "grad_norm": 1.0404741764068604, + "learning_rate": 5.521305023931673e-07, + "loss": 0.6457, + "step": 16608 + }, + { + "epoch": 0.8535820742111214, + "grad_norm": 0.6791060566902161, + "learning_rate": 5.517503976067801e-07, + "loss": 0.6318, + "step": 16609 + }, + { + "epoch": 0.8536334669544661, + "grad_norm": 1.0482901334762573, + "learning_rate": 5.513704160630339e-07, + "loss": 0.6512, + "step": 16610 + }, + { + "epoch": 0.8536848596978107, + "grad_norm": 1.09709632396698, + "learning_rate": 5.509905577724533e-07, + "loss": 0.672, + "step": 16611 + }, + { + "epoch": 0.8537362524411554, + "grad_norm": 1.1396543979644775, + "learning_rate": 5.506108227455659e-07, + "loss": 0.6814, + "step": 16612 + }, + { + "epoch": 0.8537876451844999, + "grad_norm": 1.0864802598953247, + "learning_rate": 5.502312109928892e-07, + "loss": 0.688, + "step": 16613 + }, + { + "epoch": 0.8538390379278445, + "grad_norm": 1.0504721403121948, + "learning_rate": 5.498517225249439e-07, + "loss": 0.7082, + "step": 16614 + }, + { + "epoch": 0.8538904306711892, + "grad_norm": 1.0559278726577759, + "learning_rate": 5.494723573522432e-07, + "loss": 0.6786, + "step": 16615 + }, + { + "epoch": 0.8539418234145338, + "grad_norm": 1.146001935005188, + "learning_rate": 5.490931154852952e-07, + "loss": 0.6969, + "step": 16616 + }, + { + "epoch": 0.8539932161578785, + "grad_norm": 1.1648839712142944, + "learning_rate": 5.487139969346122e-07, + "loss": 0.6706, + "step": 16617 + }, + { + "epoch": 0.8540446089012231, + "grad_norm": 1.0918409824371338, + "learning_rate": 5.483350017106925e-07, + "loss": 0.7036, + "step": 16618 + }, + { + "epoch": 0.8540960016445678, + "grad_norm": 1.14625084400177, + "learning_rate": 5.479561298240399e-07, + "loss": 0.7293, + "step": 16619 + }, + { + "epoch": 0.8541473943879124, + "grad_norm": 1.0785630941390991, + "learning_rate": 5.475773812851503e-07, + "loss": 0.6498, + "step": 16620 + }, + { + "epoch": 0.8541987871312571, + "grad_norm": 1.0916883945465088, + "learning_rate": 5.471987561045178e-07, + "loss": 0.6678, + "step": 16621 + }, + { + "epoch": 0.8542501798746017, + "grad_norm": 1.105644702911377, + "learning_rate": 5.468202542926309e-07, + "loss": 0.6719, + "step": 16622 + }, + { + "epoch": 0.8543015726179464, + "grad_norm": 0.7795040607452393, + "learning_rate": 5.464418758599787e-07, + "loss": 0.6497, + "step": 16623 + }, + { + "epoch": 0.854352965361291, + "grad_norm": 1.072161078453064, + "learning_rate": 5.460636208170433e-07, + "loss": 0.7423, + "step": 16624 + }, + { + "epoch": 0.8544043581046357, + "grad_norm": 1.0713341236114502, + "learning_rate": 5.456854891743035e-07, + "loss": 0.6957, + "step": 16625 + }, + { + "epoch": 0.8544557508479803, + "grad_norm": 1.0741219520568848, + "learning_rate": 5.453074809422393e-07, + "loss": 0.7021, + "step": 16626 + }, + { + "epoch": 0.854507143591325, + "grad_norm": 0.7816008925437927, + "learning_rate": 5.449295961313189e-07, + "loss": 0.6245, + "step": 16627 + }, + { + "epoch": 0.8545585363346695, + "grad_norm": 1.1115132570266724, + "learning_rate": 5.445518347520157e-07, + "loss": 0.7324, + "step": 16628 + }, + { + "epoch": 0.8546099290780141, + "grad_norm": 1.1087119579315186, + "learning_rate": 5.441741968147934e-07, + "loss": 0.6728, + "step": 16629 + }, + { + "epoch": 0.8546613218213588, + "grad_norm": 1.0497866868972778, + "learning_rate": 5.437966823301183e-07, + "loss": 0.6685, + "step": 16630 + }, + { + "epoch": 0.8547127145647034, + "grad_norm": 1.119650959968567, + "learning_rate": 5.434192913084452e-07, + "loss": 0.6763, + "step": 16631 + }, + { + "epoch": 0.8547641073080481, + "grad_norm": 1.067808985710144, + "learning_rate": 5.430420237602336e-07, + "loss": 0.6199, + "step": 16632 + }, + { + "epoch": 0.8548155000513927, + "grad_norm": 1.0712556838989258, + "learning_rate": 5.426648796959344e-07, + "loss": 0.659, + "step": 16633 + }, + { + "epoch": 0.8548668927947374, + "grad_norm": 0.8623567819595337, + "learning_rate": 5.422878591259972e-07, + "loss": 0.6347, + "step": 16634 + }, + { + "epoch": 0.854918285538082, + "grad_norm": 1.0654454231262207, + "learning_rate": 5.419109620608664e-07, + "loss": 0.674, + "step": 16635 + }, + { + "epoch": 0.8549696782814267, + "grad_norm": 0.6622124910354614, + "learning_rate": 5.415341885109864e-07, + "loss": 0.6255, + "step": 16636 + }, + { + "epoch": 0.8550210710247713, + "grad_norm": 0.7096377611160278, + "learning_rate": 5.411575384867957e-07, + "loss": 0.6116, + "step": 16637 + }, + { + "epoch": 0.855072463768116, + "grad_norm": 0.8410235047340393, + "learning_rate": 5.407810119987272e-07, + "loss": 0.6101, + "step": 16638 + }, + { + "epoch": 0.8551238565114606, + "grad_norm": 1.1687554121017456, + "learning_rate": 5.404046090572173e-07, + "loss": 0.718, + "step": 16639 + }, + { + "epoch": 0.8551752492548053, + "grad_norm": 0.7542539834976196, + "learning_rate": 5.400283296726899e-07, + "loss": 0.6332, + "step": 16640 + }, + { + "epoch": 0.8552266419981499, + "grad_norm": 1.1134101152420044, + "learning_rate": 5.396521738555732e-07, + "loss": 0.7243, + "step": 16641 + }, + { + "epoch": 0.8552780347414946, + "grad_norm": 1.123837947845459, + "learning_rate": 5.392761416162879e-07, + "loss": 0.6416, + "step": 16642 + }, + { + "epoch": 0.8553294274848391, + "grad_norm": 1.086321473121643, + "learning_rate": 5.389002329652521e-07, + "loss": 0.6779, + "step": 16643 + }, + { + "epoch": 0.8553808202281837, + "grad_norm": 1.0343292951583862, + "learning_rate": 5.385244479128804e-07, + "loss": 0.669, + "step": 16644 + }, + { + "epoch": 0.8554322129715284, + "grad_norm": 1.0483826398849487, + "learning_rate": 5.381487864695856e-07, + "loss": 0.7161, + "step": 16645 + }, + { + "epoch": 0.855483605714873, + "grad_norm": 1.0882412195205688, + "learning_rate": 5.377732486457754e-07, + "loss": 0.6996, + "step": 16646 + }, + { + "epoch": 0.8555349984582177, + "grad_norm": 1.2087448835372925, + "learning_rate": 5.373978344518527e-07, + "loss": 0.7191, + "step": 16647 + }, + { + "epoch": 0.8555863912015623, + "grad_norm": 1.1023222208023071, + "learning_rate": 5.370225438982208e-07, + "loss": 0.6971, + "step": 16648 + }, + { + "epoch": 0.855637783944907, + "grad_norm": 1.0749213695526123, + "learning_rate": 5.366473769952768e-07, + "loss": 0.6856, + "step": 16649 + }, + { + "epoch": 0.8556891766882516, + "grad_norm": 0.7783789038658142, + "learning_rate": 5.362723337534148e-07, + "loss": 0.6804, + "step": 16650 + }, + { + "epoch": 0.8557405694315963, + "grad_norm": 1.0635075569152832, + "learning_rate": 5.358974141830242e-07, + "loss": 0.6816, + "step": 16651 + }, + { + "epoch": 0.8557919621749409, + "grad_norm": 1.1363420486450195, + "learning_rate": 5.355226182944967e-07, + "loss": 0.7324, + "step": 16652 + }, + { + "epoch": 0.8558433549182856, + "grad_norm": 1.0363080501556396, + "learning_rate": 5.351479460982112e-07, + "loss": 0.6409, + "step": 16653 + }, + { + "epoch": 0.8558947476616302, + "grad_norm": 1.0236976146697998, + "learning_rate": 5.347733976045522e-07, + "loss": 0.6776, + "step": 16654 + }, + { + "epoch": 0.8559461404049749, + "grad_norm": 1.1086493730545044, + "learning_rate": 5.343989728238947e-07, + "loss": 0.6687, + "step": 16655 + }, + { + "epoch": 0.8559975331483195, + "grad_norm": 0.7606008648872375, + "learning_rate": 5.340246717666131e-07, + "loss": 0.662, + "step": 16656 + }, + { + "epoch": 0.8560489258916641, + "grad_norm": 1.1598230600357056, + "learning_rate": 5.336504944430781e-07, + "loss": 0.725, + "step": 16657 + }, + { + "epoch": 0.8561003186350087, + "grad_norm": 1.1804630756378174, + "learning_rate": 5.33276440863657e-07, + "loss": 0.6857, + "step": 16658 + }, + { + "epoch": 0.8561517113783533, + "grad_norm": 1.0253392457962036, + "learning_rate": 5.329025110387121e-07, + "loss": 0.6515, + "step": 16659 + }, + { + "epoch": 0.856203104121698, + "grad_norm": 1.1216741800308228, + "learning_rate": 5.325287049786032e-07, + "loss": 0.7206, + "step": 16660 + }, + { + "epoch": 0.8562544968650426, + "grad_norm": 1.1707128286361694, + "learning_rate": 5.321550226936889e-07, + "loss": 0.7118, + "step": 16661 + }, + { + "epoch": 0.8563058896083873, + "grad_norm": 1.0712249279022217, + "learning_rate": 5.317814641943208e-07, + "loss": 0.6598, + "step": 16662 + }, + { + "epoch": 0.8563572823517319, + "grad_norm": 1.0757219791412354, + "learning_rate": 5.314080294908497e-07, + "loss": 0.7312, + "step": 16663 + }, + { + "epoch": 0.8564086750950766, + "grad_norm": 1.1548324823379517, + "learning_rate": 5.310347185936199e-07, + "loss": 0.7441, + "step": 16664 + }, + { + "epoch": 0.8564600678384212, + "grad_norm": 1.1156595945358276, + "learning_rate": 5.306615315129765e-07, + "loss": 0.7264, + "step": 16665 + }, + { + "epoch": 0.8565114605817659, + "grad_norm": 1.0754835605621338, + "learning_rate": 5.302884682592585e-07, + "loss": 0.7197, + "step": 16666 + }, + { + "epoch": 0.8565628533251105, + "grad_norm": 1.0774306058883667, + "learning_rate": 5.299155288428004e-07, + "loss": 0.7065, + "step": 16667 + }, + { + "epoch": 0.8566142460684552, + "grad_norm": 1.128065586090088, + "learning_rate": 5.295427132739384e-07, + "loss": 0.7355, + "step": 16668 + }, + { + "epoch": 0.8566656388117998, + "grad_norm": 1.1027228832244873, + "learning_rate": 5.291700215629969e-07, + "loss": 0.6763, + "step": 16669 + }, + { + "epoch": 0.8567170315551444, + "grad_norm": 1.0568816661834717, + "learning_rate": 5.287974537203056e-07, + "loss": 0.6639, + "step": 16670 + }, + { + "epoch": 0.8567684242984891, + "grad_norm": 1.1418324708938599, + "learning_rate": 5.284250097561849e-07, + "loss": 0.6881, + "step": 16671 + }, + { + "epoch": 0.8568198170418337, + "grad_norm": 1.0666179656982422, + "learning_rate": 5.280526896809545e-07, + "loss": 0.7143, + "step": 16672 + }, + { + "epoch": 0.8568712097851783, + "grad_norm": 1.1284116506576538, + "learning_rate": 5.276804935049279e-07, + "loss": 0.7045, + "step": 16673 + }, + { + "epoch": 0.8569226025285229, + "grad_norm": 1.3307828903198242, + "learning_rate": 5.273084212384199e-07, + "loss": 0.6912, + "step": 16674 + }, + { + "epoch": 0.8569739952718676, + "grad_norm": 1.1961811780929565, + "learning_rate": 5.269364728917381e-07, + "loss": 0.6717, + "step": 16675 + }, + { + "epoch": 0.8570253880152122, + "grad_norm": 1.0898549556732178, + "learning_rate": 5.265646484751857e-07, + "loss": 0.6774, + "step": 16676 + }, + { + "epoch": 0.8570767807585569, + "grad_norm": 1.1135098934173584, + "learning_rate": 5.261929479990679e-07, + "loss": 0.7188, + "step": 16677 + }, + { + "epoch": 0.8571281735019015, + "grad_norm": 1.0735715627670288, + "learning_rate": 5.25821371473681e-07, + "loss": 0.6889, + "step": 16678 + }, + { + "epoch": 0.8571795662452462, + "grad_norm": 1.0151816606521606, + "learning_rate": 5.254499189093198e-07, + "loss": 0.6786, + "step": 16679 + }, + { + "epoch": 0.8572309589885908, + "grad_norm": 1.1660007238388062, + "learning_rate": 5.250785903162747e-07, + "loss": 0.7259, + "step": 16680 + }, + { + "epoch": 0.8572823517319355, + "grad_norm": 1.1349910497665405, + "learning_rate": 5.247073857048373e-07, + "loss": 0.6512, + "step": 16681 + }, + { + "epoch": 0.8573337444752801, + "grad_norm": 1.0447020530700684, + "learning_rate": 5.243363050852878e-07, + "loss": 0.6788, + "step": 16682 + }, + { + "epoch": 0.8573851372186247, + "grad_norm": 1.1052254438400269, + "learning_rate": 5.239653484679103e-07, + "loss": 0.6984, + "step": 16683 + }, + { + "epoch": 0.8574365299619694, + "grad_norm": 0.7357926368713379, + "learning_rate": 5.235945158629818e-07, + "loss": 0.6401, + "step": 16684 + }, + { + "epoch": 0.857487922705314, + "grad_norm": 1.1499524116516113, + "learning_rate": 5.232238072807761e-07, + "loss": 0.7138, + "step": 16685 + }, + { + "epoch": 0.8575393154486587, + "grad_norm": 1.1138731241226196, + "learning_rate": 5.228532227315625e-07, + "loss": 0.6717, + "step": 16686 + }, + { + "epoch": 0.8575907081920033, + "grad_norm": 1.1965906620025635, + "learning_rate": 5.224827622256118e-07, + "loss": 0.6934, + "step": 16687 + }, + { + "epoch": 0.857642100935348, + "grad_norm": 1.103754997253418, + "learning_rate": 5.221124257731858e-07, + "loss": 0.6486, + "step": 16688 + }, + { + "epoch": 0.8576934936786925, + "grad_norm": 1.1095292568206787, + "learning_rate": 5.21742213384544e-07, + "loss": 0.6947, + "step": 16689 + }, + { + "epoch": 0.8577448864220372, + "grad_norm": 1.1932556629180908, + "learning_rate": 5.213721250699466e-07, + "loss": 0.6911, + "step": 16690 + }, + { + "epoch": 0.8577962791653818, + "grad_norm": 1.0708427429199219, + "learning_rate": 5.210021608396449e-07, + "loss": 0.7525, + "step": 16691 + }, + { + "epoch": 0.8578476719087265, + "grad_norm": 0.7635583877563477, + "learning_rate": 5.206323207038893e-07, + "loss": 0.6695, + "step": 16692 + }, + { + "epoch": 0.8578990646520711, + "grad_norm": 1.1842479705810547, + "learning_rate": 5.202626046729265e-07, + "loss": 0.6687, + "step": 16693 + }, + { + "epoch": 0.8579504573954158, + "grad_norm": 1.0723521709442139, + "learning_rate": 5.198930127570018e-07, + "loss": 0.6747, + "step": 16694 + }, + { + "epoch": 0.8580018501387604, + "grad_norm": 1.1177728176116943, + "learning_rate": 5.19523544966351e-07, + "loss": 0.7236, + "step": 16695 + }, + { + "epoch": 0.858053242882105, + "grad_norm": 1.0427969694137573, + "learning_rate": 5.191542013112144e-07, + "loss": 0.6475, + "step": 16696 + }, + { + "epoch": 0.8581046356254497, + "grad_norm": 1.0824629068374634, + "learning_rate": 5.187849818018237e-07, + "loss": 0.7029, + "step": 16697 + }, + { + "epoch": 0.8581560283687943, + "grad_norm": 1.159592628479004, + "learning_rate": 5.184158864484068e-07, + "loss": 0.6888, + "step": 16698 + }, + { + "epoch": 0.858207421112139, + "grad_norm": 1.1469663381576538, + "learning_rate": 5.180469152611928e-07, + "loss": 0.7269, + "step": 16699 + }, + { + "epoch": 0.8582588138554836, + "grad_norm": 1.1179442405700684, + "learning_rate": 5.176780682504023e-07, + "loss": 0.7206, + "step": 16700 + }, + { + "epoch": 0.8583102065988283, + "grad_norm": 1.0898016691207886, + "learning_rate": 5.173093454262557e-07, + "loss": 0.6476, + "step": 16701 + }, + { + "epoch": 0.8583615993421729, + "grad_norm": 0.6671383380889893, + "learning_rate": 5.169407467989668e-07, + "loss": 0.6461, + "step": 16702 + }, + { + "epoch": 0.8584129920855176, + "grad_norm": 1.044257640838623, + "learning_rate": 5.165722723787514e-07, + "loss": 0.7014, + "step": 16703 + }, + { + "epoch": 0.8584643848288621, + "grad_norm": 1.1633086204528809, + "learning_rate": 5.162039221758141e-07, + "loss": 0.7519, + "step": 16704 + }, + { + "epoch": 0.8585157775722068, + "grad_norm": 1.120275616645813, + "learning_rate": 5.158356962003635e-07, + "loss": 0.7061, + "step": 16705 + }, + { + "epoch": 0.8585671703155514, + "grad_norm": 1.105878233909607, + "learning_rate": 5.154675944626003e-07, + "loss": 0.6624, + "step": 16706 + }, + { + "epoch": 0.8586185630588961, + "grad_norm": 1.1321300268173218, + "learning_rate": 5.15099616972724e-07, + "loss": 0.7271, + "step": 16707 + }, + { + "epoch": 0.8586699558022407, + "grad_norm": 1.064968228340149, + "learning_rate": 5.147317637409294e-07, + "loss": 0.671, + "step": 16708 + }, + { + "epoch": 0.8587213485455854, + "grad_norm": 0.7342528104782104, + "learning_rate": 5.14364034777407e-07, + "loss": 0.638, + "step": 16709 + }, + { + "epoch": 0.85877274128893, + "grad_norm": 1.0235021114349365, + "learning_rate": 5.139964300923478e-07, + "loss": 0.6852, + "step": 16710 + }, + { + "epoch": 0.8588241340322746, + "grad_norm": 1.071006417274475, + "learning_rate": 5.136289496959329e-07, + "loss": 0.6826, + "step": 16711 + }, + { + "epoch": 0.8588755267756193, + "grad_norm": 1.2732007503509521, + "learning_rate": 5.132615935983471e-07, + "loss": 0.7183, + "step": 16712 + }, + { + "epoch": 0.8589269195189639, + "grad_norm": 1.1122158765792847, + "learning_rate": 5.128943618097664e-07, + "loss": 0.6739, + "step": 16713 + }, + { + "epoch": 0.8589783122623086, + "grad_norm": 1.1001310348510742, + "learning_rate": 5.125272543403658e-07, + "loss": 0.6661, + "step": 16714 + }, + { + "epoch": 0.8590297050056532, + "grad_norm": 1.1717740297317505, + "learning_rate": 5.12160271200316e-07, + "loss": 0.7329, + "step": 16715 + }, + { + "epoch": 0.8590810977489979, + "grad_norm": 1.100318431854248, + "learning_rate": 5.117934123997853e-07, + "loss": 0.684, + "step": 16716 + }, + { + "epoch": 0.8591324904923425, + "grad_norm": 1.121384620666504, + "learning_rate": 5.114266779489375e-07, + "loss": 0.7575, + "step": 16717 + }, + { + "epoch": 0.8591838832356872, + "grad_norm": 1.1172490119934082, + "learning_rate": 5.11060067857932e-07, + "loss": 0.7201, + "step": 16718 + }, + { + "epoch": 0.8592352759790317, + "grad_norm": 1.1001996994018555, + "learning_rate": 5.106935821369291e-07, + "loss": 0.7118, + "step": 16719 + }, + { + "epoch": 0.8592866687223764, + "grad_norm": 1.110534429550171, + "learning_rate": 5.103272207960791e-07, + "loss": 0.6584, + "step": 16720 + }, + { + "epoch": 0.859338061465721, + "grad_norm": 1.066957712173462, + "learning_rate": 5.09960983845535e-07, + "loss": 0.65, + "step": 16721 + }, + { + "epoch": 0.8593894542090657, + "grad_norm": 0.7040334343910217, + "learning_rate": 5.095948712954418e-07, + "loss": 0.6257, + "step": 16722 + }, + { + "epoch": 0.8594408469524103, + "grad_norm": 1.029909372329712, + "learning_rate": 5.092288831559455e-07, + "loss": 0.66, + "step": 16723 + }, + { + "epoch": 0.859492239695755, + "grad_norm": 1.3412113189697266, + "learning_rate": 5.088630194371829e-07, + "loss": 0.6876, + "step": 16724 + }, + { + "epoch": 0.8595436324390996, + "grad_norm": 1.1116526126861572, + "learning_rate": 5.084972801492932e-07, + "loss": 0.6487, + "step": 16725 + }, + { + "epoch": 0.8595950251824442, + "grad_norm": 1.0862735509872437, + "learning_rate": 5.081316653024088e-07, + "loss": 0.6232, + "step": 16726 + }, + { + "epoch": 0.8596464179257889, + "grad_norm": 1.088112473487854, + "learning_rate": 5.077661749066575e-07, + "loss": 0.6921, + "step": 16727 + }, + { + "epoch": 0.8596978106691335, + "grad_norm": 1.1253446340560913, + "learning_rate": 5.074008089721683e-07, + "loss": 0.676, + "step": 16728 + }, + { + "epoch": 0.8597492034124782, + "grad_norm": 1.0455559492111206, + "learning_rate": 5.070355675090632e-07, + "loss": 0.6144, + "step": 16729 + }, + { + "epoch": 0.8598005961558228, + "grad_norm": 1.0786539316177368, + "learning_rate": 5.066704505274606e-07, + "loss": 0.659, + "step": 16730 + }, + { + "epoch": 0.8598519888991675, + "grad_norm": 1.0298736095428467, + "learning_rate": 5.063054580374766e-07, + "loss": 0.7549, + "step": 16731 + }, + { + "epoch": 0.8599033816425121, + "grad_norm": 1.0899608135223389, + "learning_rate": 5.059405900492254e-07, + "loss": 0.7428, + "step": 16732 + }, + { + "epoch": 0.8599547743858568, + "grad_norm": 1.1084903478622437, + "learning_rate": 5.055758465728128e-07, + "loss": 0.6971, + "step": 16733 + }, + { + "epoch": 0.8600061671292013, + "grad_norm": 1.1381936073303223, + "learning_rate": 5.052112276183469e-07, + "loss": 0.6827, + "step": 16734 + }, + { + "epoch": 0.860057559872546, + "grad_norm": 1.0742419958114624, + "learning_rate": 5.048467331959294e-07, + "loss": 0.7003, + "step": 16735 + }, + { + "epoch": 0.8601089526158906, + "grad_norm": 1.0268282890319824, + "learning_rate": 5.044823633156581e-07, + "loss": 0.6808, + "step": 16736 + }, + { + "epoch": 0.8601603453592352, + "grad_norm": 1.0812846422195435, + "learning_rate": 5.041181179876275e-07, + "loss": 0.7411, + "step": 16737 + }, + { + "epoch": 0.8602117381025799, + "grad_norm": 0.6776163578033447, + "learning_rate": 5.037539972219319e-07, + "loss": 0.6491, + "step": 16738 + }, + { + "epoch": 0.8602631308459245, + "grad_norm": 1.1219476461410522, + "learning_rate": 5.033900010286579e-07, + "loss": 0.6753, + "step": 16739 + }, + { + "epoch": 0.8603145235892692, + "grad_norm": 1.073449730873108, + "learning_rate": 5.030261294178895e-07, + "loss": 0.7098, + "step": 16740 + }, + { + "epoch": 0.8603659163326138, + "grad_norm": 1.0801972150802612, + "learning_rate": 5.026623823997107e-07, + "loss": 0.7147, + "step": 16741 + }, + { + "epoch": 0.8604173090759585, + "grad_norm": 0.7268709540367126, + "learning_rate": 5.022987599841972e-07, + "loss": 0.6277, + "step": 16742 + }, + { + "epoch": 0.8604687018193031, + "grad_norm": 1.1501014232635498, + "learning_rate": 5.019352621814244e-07, + "loss": 0.6576, + "step": 16743 + }, + { + "epoch": 0.8605200945626478, + "grad_norm": 1.0481343269348145, + "learning_rate": 5.015718890014621e-07, + "loss": 0.6887, + "step": 16744 + }, + { + "epoch": 0.8605714873059924, + "grad_norm": 1.1956849098205566, + "learning_rate": 5.012086404543809e-07, + "loss": 0.6988, + "step": 16745 + }, + { + "epoch": 0.8606228800493371, + "grad_norm": 1.1003162860870361, + "learning_rate": 5.008455165502407e-07, + "loss": 0.6321, + "step": 16746 + }, + { + "epoch": 0.8606742727926817, + "grad_norm": 1.1154745817184448, + "learning_rate": 5.004825172991051e-07, + "loss": 0.691, + "step": 16747 + }, + { + "epoch": 0.8607256655360264, + "grad_norm": 1.1095490455627441, + "learning_rate": 5.001196427110306e-07, + "loss": 0.7082, + "step": 16748 + }, + { + "epoch": 0.8607770582793709, + "grad_norm": 1.1589680910110474, + "learning_rate": 4.9975689279607e-07, + "loss": 0.7155, + "step": 16749 + }, + { + "epoch": 0.8608284510227155, + "grad_norm": 1.1479979753494263, + "learning_rate": 4.99394267564276e-07, + "loss": 0.6979, + "step": 16750 + }, + { + "epoch": 0.8608798437660602, + "grad_norm": 0.7034879326820374, + "learning_rate": 4.990317670256933e-07, + "loss": 0.5978, + "step": 16751 + }, + { + "epoch": 0.8609312365094048, + "grad_norm": 0.723787248134613, + "learning_rate": 4.986693911903661e-07, + "loss": 0.6805, + "step": 16752 + }, + { + "epoch": 0.8609826292527495, + "grad_norm": 1.1227329969406128, + "learning_rate": 4.983071400683331e-07, + "loss": 0.6883, + "step": 16753 + }, + { + "epoch": 0.8610340219960941, + "grad_norm": 1.050891637802124, + "learning_rate": 4.979450136696329e-07, + "loss": 0.6655, + "step": 16754 + }, + { + "epoch": 0.8610854147394388, + "grad_norm": 1.0858683586120605, + "learning_rate": 4.97583012004298e-07, + "loss": 0.6742, + "step": 16755 + }, + { + "epoch": 0.8611368074827834, + "grad_norm": 1.113417387008667, + "learning_rate": 4.972211350823569e-07, + "loss": 0.6589, + "step": 16756 + }, + { + "epoch": 0.8611882002261281, + "grad_norm": 1.0991747379302979, + "learning_rate": 4.968593829138352e-07, + "loss": 0.6365, + "step": 16757 + }, + { + "epoch": 0.8612395929694727, + "grad_norm": 1.1093100309371948, + "learning_rate": 4.96497755508758e-07, + "loss": 0.7659, + "step": 16758 + }, + { + "epoch": 0.8612909857128174, + "grad_norm": 0.6461854577064514, + "learning_rate": 4.96136252877143e-07, + "loss": 0.6095, + "step": 16759 + }, + { + "epoch": 0.861342378456162, + "grad_norm": 1.1379939317703247, + "learning_rate": 4.957748750290047e-07, + "loss": 0.6798, + "step": 16760 + }, + { + "epoch": 0.8613937711995067, + "grad_norm": 1.0954571962356567, + "learning_rate": 4.954136219743594e-07, + "loss": 0.684, + "step": 16761 + }, + { + "epoch": 0.8614451639428513, + "grad_norm": 1.130601406097412, + "learning_rate": 4.95052493723211e-07, + "loss": 0.6352, + "step": 16762 + }, + { + "epoch": 0.861496556686196, + "grad_norm": 1.0393316745758057, + "learning_rate": 4.946914902855682e-07, + "loss": 0.6575, + "step": 16763 + }, + { + "epoch": 0.8615479494295405, + "grad_norm": 1.0717134475708008, + "learning_rate": 4.94330611671432e-07, + "loss": 0.7174, + "step": 16764 + }, + { + "epoch": 0.8615993421728851, + "grad_norm": 0.9992402791976929, + "learning_rate": 4.939698578908003e-07, + "loss": 0.6666, + "step": 16765 + }, + { + "epoch": 0.8616507349162298, + "grad_norm": 1.1043227910995483, + "learning_rate": 4.936092289536682e-07, + "loss": 0.6913, + "step": 16766 + }, + { + "epoch": 0.8617021276595744, + "grad_norm": 1.1066546440124512, + "learning_rate": 4.932487248700279e-07, + "loss": 0.7, + "step": 16767 + }, + { + "epoch": 0.8617535204029191, + "grad_norm": 1.0850659608840942, + "learning_rate": 4.928883456498679e-07, + "loss": 0.6618, + "step": 16768 + }, + { + "epoch": 0.8618049131462637, + "grad_norm": 1.1021714210510254, + "learning_rate": 4.925280913031704e-07, + "loss": 0.7284, + "step": 16769 + }, + { + "epoch": 0.8618563058896084, + "grad_norm": 1.115631103515625, + "learning_rate": 4.921679618399199e-07, + "loss": 0.6663, + "step": 16770 + }, + { + "epoch": 0.861907698632953, + "grad_norm": 1.1310640573501587, + "learning_rate": 4.918079572700923e-07, + "loss": 0.6884, + "step": 16771 + }, + { + "epoch": 0.8619590913762977, + "grad_norm": 1.0832988023757935, + "learning_rate": 4.914480776036617e-07, + "loss": 0.6939, + "step": 16772 + }, + { + "epoch": 0.8620104841196423, + "grad_norm": 1.234472632408142, + "learning_rate": 4.91088322850598e-07, + "loss": 0.7179, + "step": 16773 + }, + { + "epoch": 0.862061876862987, + "grad_norm": 1.0742464065551758, + "learning_rate": 4.907286930208721e-07, + "loss": 0.6987, + "step": 16774 + }, + { + "epoch": 0.8621132696063316, + "grad_norm": 1.0897985696792603, + "learning_rate": 4.903691881244438e-07, + "loss": 0.7024, + "step": 16775 + }, + { + "epoch": 0.8621646623496763, + "grad_norm": 1.197669506072998, + "learning_rate": 4.900098081712756e-07, + "loss": 0.6767, + "step": 16776 + }, + { + "epoch": 0.8622160550930209, + "grad_norm": 0.6522785425186157, + "learning_rate": 4.896505531713241e-07, + "loss": 0.6178, + "step": 16777 + }, + { + "epoch": 0.8622674478363656, + "grad_norm": 1.1411634683609009, + "learning_rate": 4.892914231345414e-07, + "loss": 0.6359, + "step": 16778 + }, + { + "epoch": 0.8623188405797102, + "grad_norm": 1.2458677291870117, + "learning_rate": 4.889324180708799e-07, + "loss": 0.6808, + "step": 16779 + }, + { + "epoch": 0.8623702333230547, + "grad_norm": 1.1351290941238403, + "learning_rate": 4.885735379902856e-07, + "loss": 0.6967, + "step": 16780 + }, + { + "epoch": 0.8624216260663994, + "grad_norm": 0.7984334826469421, + "learning_rate": 4.882147829027e-07, + "loss": 0.6796, + "step": 16781 + }, + { + "epoch": 0.862473018809744, + "grad_norm": 1.198954701423645, + "learning_rate": 4.878561528180637e-07, + "loss": 0.7134, + "step": 16782 + }, + { + "epoch": 0.8625244115530887, + "grad_norm": 1.120751976966858, + "learning_rate": 4.874976477463134e-07, + "loss": 0.7647, + "step": 16783 + }, + { + "epoch": 0.8625758042964333, + "grad_norm": 1.118221402168274, + "learning_rate": 4.871392676973813e-07, + "loss": 0.643, + "step": 16784 + }, + { + "epoch": 0.862627197039778, + "grad_norm": 0.700772225856781, + "learning_rate": 4.867810126811967e-07, + "loss": 0.6841, + "step": 16785 + }, + { + "epoch": 0.8626785897831226, + "grad_norm": 1.156740665435791, + "learning_rate": 4.864228827076845e-07, + "loss": 0.7139, + "step": 16786 + }, + { + "epoch": 0.8627299825264673, + "grad_norm": 1.0679891109466553, + "learning_rate": 4.860648777867682e-07, + "loss": 0.6439, + "step": 16787 + }, + { + "epoch": 0.8627813752698119, + "grad_norm": 0.7223211526870728, + "learning_rate": 4.857069979283669e-07, + "loss": 0.662, + "step": 16788 + }, + { + "epoch": 0.8628327680131566, + "grad_norm": 1.1113128662109375, + "learning_rate": 4.853492431423945e-07, + "loss": 0.6702, + "step": 16789 + }, + { + "epoch": 0.8628841607565012, + "grad_norm": 0.7723284363746643, + "learning_rate": 4.849916134387644e-07, + "loss": 0.6584, + "step": 16790 + }, + { + "epoch": 0.8629355534998459, + "grad_norm": 1.112114667892456, + "learning_rate": 4.846341088273832e-07, + "loss": 0.6743, + "step": 16791 + }, + { + "epoch": 0.8629869462431905, + "grad_norm": 1.0344780683517456, + "learning_rate": 4.842767293181577e-07, + "loss": 0.7201, + "step": 16792 + }, + { + "epoch": 0.8630383389865351, + "grad_norm": 1.1592353582382202, + "learning_rate": 4.839194749209891e-07, + "loss": 0.7357, + "step": 16793 + }, + { + "epoch": 0.8630897317298798, + "grad_norm": 1.1069899797439575, + "learning_rate": 4.83562345645775e-07, + "loss": 0.6683, + "step": 16794 + }, + { + "epoch": 0.8631411244732243, + "grad_norm": 1.118985652923584, + "learning_rate": 4.83205341502409e-07, + "loss": 0.6904, + "step": 16795 + }, + { + "epoch": 0.863192517216569, + "grad_norm": 1.0612621307373047, + "learning_rate": 4.828484625007851e-07, + "loss": 0.69, + "step": 16796 + }, + { + "epoch": 0.8632439099599136, + "grad_norm": 1.110080599784851, + "learning_rate": 4.824917086507874e-07, + "loss": 0.7174, + "step": 16797 + }, + { + "epoch": 0.8632953027032583, + "grad_norm": 1.1500543355941772, + "learning_rate": 4.821350799623031e-07, + "loss": 0.6781, + "step": 16798 + }, + { + "epoch": 0.8633466954466029, + "grad_norm": 0.6933485865592957, + "learning_rate": 4.817785764452098e-07, + "loss": 0.624, + "step": 16799 + }, + { + "epoch": 0.8633980881899476, + "grad_norm": 1.131424903869629, + "learning_rate": 4.814221981093881e-07, + "loss": 0.6977, + "step": 16800 + }, + { + "epoch": 0.8634494809332922, + "grad_norm": 1.1123830080032349, + "learning_rate": 4.810659449647103e-07, + "loss": 0.6901, + "step": 16801 + }, + { + "epoch": 0.8635008736766369, + "grad_norm": 1.0635474920272827, + "learning_rate": 4.807098170210455e-07, + "loss": 0.664, + "step": 16802 + }, + { + "epoch": 0.8635522664199815, + "grad_norm": 0.886468768119812, + "learning_rate": 4.803538142882636e-07, + "loss": 0.6189, + "step": 16803 + }, + { + "epoch": 0.8636036591633262, + "grad_norm": 1.081760048866272, + "learning_rate": 4.799979367762236e-07, + "loss": 0.6632, + "step": 16804 + }, + { + "epoch": 0.8636550519066708, + "grad_norm": 1.0781593322753906, + "learning_rate": 4.796421844947896e-07, + "loss": 0.685, + "step": 16805 + }, + { + "epoch": 0.8637064446500154, + "grad_norm": 1.1444255113601685, + "learning_rate": 4.792865574538159e-07, + "loss": 0.7414, + "step": 16806 + }, + { + "epoch": 0.8637578373933601, + "grad_norm": 0.7110108733177185, + "learning_rate": 4.789310556631554e-07, + "loss": 0.7078, + "step": 16807 + }, + { + "epoch": 0.8638092301367047, + "grad_norm": 0.7261713147163391, + "learning_rate": 4.785756791326579e-07, + "loss": 0.6736, + "step": 16808 + }, + { + "epoch": 0.8638606228800494, + "grad_norm": 1.0720264911651611, + "learning_rate": 4.782204278721697e-07, + "loss": 0.693, + "step": 16809 + }, + { + "epoch": 0.8639120156233939, + "grad_norm": 0.8120423555374146, + "learning_rate": 4.778653018915335e-07, + "loss": 0.6489, + "step": 16810 + }, + { + "epoch": 0.8639634083667386, + "grad_norm": 1.1376157999038696, + "learning_rate": 4.775103012005871e-07, + "loss": 0.7053, + "step": 16811 + }, + { + "epoch": 0.8640148011100832, + "grad_norm": 1.0715097188949585, + "learning_rate": 4.771554258091687e-07, + "loss": 0.7039, + "step": 16812 + }, + { + "epoch": 0.8640661938534279, + "grad_norm": 1.1204653978347778, + "learning_rate": 4.768006757271071e-07, + "loss": 0.7578, + "step": 16813 + }, + { + "epoch": 0.8641175865967725, + "grad_norm": 1.0762317180633545, + "learning_rate": 4.764460509642338e-07, + "loss": 0.7354, + "step": 16814 + }, + { + "epoch": 0.8641689793401172, + "grad_norm": 0.7659726738929749, + "learning_rate": 4.7609155153037157e-07, + "loss": 0.6574, + "step": 16815 + }, + { + "epoch": 0.8642203720834618, + "grad_norm": 1.1349427700042725, + "learning_rate": 4.757371774353453e-07, + "loss": 0.7107, + "step": 16816 + }, + { + "epoch": 0.8642717648268065, + "grad_norm": 1.0839574337005615, + "learning_rate": 4.753829286889694e-07, + "loss": 0.7279, + "step": 16817 + }, + { + "epoch": 0.8643231575701511, + "grad_norm": 1.0780730247497559, + "learning_rate": 4.7502880530106155e-07, + "loss": 0.6423, + "step": 16818 + }, + { + "epoch": 0.8643745503134957, + "grad_norm": 1.0728671550750732, + "learning_rate": 4.746748072814322e-07, + "loss": 0.6332, + "step": 16819 + }, + { + "epoch": 0.8644259430568404, + "grad_norm": 1.0914570093154907, + "learning_rate": 4.7432093463988805e-07, + "loss": 0.6592, + "step": 16820 + }, + { + "epoch": 0.864477335800185, + "grad_norm": 1.0886261463165283, + "learning_rate": 4.73967187386235e-07, + "loss": 0.6487, + "step": 16821 + }, + { + "epoch": 0.8645287285435297, + "grad_norm": 1.1144911050796509, + "learning_rate": 4.7361356553027373e-07, + "loss": 0.6653, + "step": 16822 + }, + { + "epoch": 0.8645801212868743, + "grad_norm": 1.1381844282150269, + "learning_rate": 4.7326006908180123e-07, + "loss": 0.6586, + "step": 16823 + }, + { + "epoch": 0.864631514030219, + "grad_norm": 0.6732823848724365, + "learning_rate": 4.729066980506103e-07, + "loss": 0.6453, + "step": 16824 + }, + { + "epoch": 0.8646829067735635, + "grad_norm": 1.1333794593811035, + "learning_rate": 4.7255345244649474e-07, + "loss": 0.6407, + "step": 16825 + }, + { + "epoch": 0.8647342995169082, + "grad_norm": 0.7401777505874634, + "learning_rate": 4.722003322792373e-07, + "loss": 0.6517, + "step": 16826 + }, + { + "epoch": 0.8647856922602528, + "grad_norm": 1.072718858718872, + "learning_rate": 4.718473375586241e-07, + "loss": 0.737, + "step": 16827 + }, + { + "epoch": 0.8648370850035975, + "grad_norm": 1.0631109476089478, + "learning_rate": 4.71494468294435e-07, + "loss": 0.6742, + "step": 16828 + }, + { + "epoch": 0.8648884777469421, + "grad_norm": 1.1551052331924438, + "learning_rate": 4.7114172449644503e-07, + "loss": 0.6964, + "step": 16829 + }, + { + "epoch": 0.8649398704902868, + "grad_norm": 1.108849048614502, + "learning_rate": 4.7078910617442964e-07, + "loss": 0.7132, + "step": 16830 + }, + { + "epoch": 0.8649912632336314, + "grad_norm": 0.6628881096839905, + "learning_rate": 4.7043661333815717e-07, + "loss": 0.6132, + "step": 16831 + }, + { + "epoch": 0.865042655976976, + "grad_norm": 1.0856351852416992, + "learning_rate": 4.7008424599739366e-07, + "loss": 0.6759, + "step": 16832 + }, + { + "epoch": 0.8650940487203207, + "grad_norm": 1.041588544845581, + "learning_rate": 4.697320041619008e-07, + "loss": 0.6833, + "step": 16833 + }, + { + "epoch": 0.8651454414636653, + "grad_norm": 1.1799362897872925, + "learning_rate": 4.693798878414396e-07, + "loss": 0.7056, + "step": 16834 + }, + { + "epoch": 0.86519683420701, + "grad_norm": 1.0882309675216675, + "learning_rate": 4.6902789704576514e-07, + "loss": 0.6648, + "step": 16835 + }, + { + "epoch": 0.8652482269503546, + "grad_norm": 1.047500491142273, + "learning_rate": 4.6867603178462895e-07, + "loss": 0.6859, + "step": 16836 + }, + { + "epoch": 0.8652996196936993, + "grad_norm": 1.1451234817504883, + "learning_rate": 4.6832429206777995e-07, + "loss": 0.7493, + "step": 16837 + }, + { + "epoch": 0.8653510124370439, + "grad_norm": 1.0401136875152588, + "learning_rate": 4.679726779049643e-07, + "loss": 0.7051, + "step": 16838 + }, + { + "epoch": 0.8654024051803886, + "grad_norm": 0.6487236022949219, + "learning_rate": 4.676211893059235e-07, + "loss": 0.6387, + "step": 16839 + }, + { + "epoch": 0.8654537979237331, + "grad_norm": 1.1266751289367676, + "learning_rate": 4.6726982628039483e-07, + "loss": 0.6932, + "step": 16840 + }, + { + "epoch": 0.8655051906670778, + "grad_norm": 1.118712306022644, + "learning_rate": 4.669185888381145e-07, + "loss": 0.7436, + "step": 16841 + }, + { + "epoch": 0.8655565834104224, + "grad_norm": 0.6916739344596863, + "learning_rate": 4.665674769888118e-07, + "loss": 0.6631, + "step": 16842 + }, + { + "epoch": 0.8656079761537671, + "grad_norm": 1.1347112655639648, + "learning_rate": 4.662164907422173e-07, + "loss": 0.6974, + "step": 16843 + }, + { + "epoch": 0.8656593688971117, + "grad_norm": 1.0755008459091187, + "learning_rate": 4.658656301080539e-07, + "loss": 0.6884, + "step": 16844 + }, + { + "epoch": 0.8657107616404563, + "grad_norm": 1.0861234664916992, + "learning_rate": 4.6551489509604196e-07, + "loss": 0.7077, + "step": 16845 + }, + { + "epoch": 0.865762154383801, + "grad_norm": 1.0912024974822998, + "learning_rate": 4.6516428571589943e-07, + "loss": 0.694, + "step": 16846 + }, + { + "epoch": 0.8658135471271456, + "grad_norm": 1.0918269157409668, + "learning_rate": 4.6481380197734125e-07, + "loss": 0.7055, + "step": 16847 + }, + { + "epoch": 0.8658649398704903, + "grad_norm": 1.010430097579956, + "learning_rate": 4.6446344389007637e-07, + "loss": 0.6547, + "step": 16848 + }, + { + "epoch": 0.8659163326138349, + "grad_norm": 1.085803747177124, + "learning_rate": 4.6411321146381305e-07, + "loss": 0.7055, + "step": 16849 + }, + { + "epoch": 0.8659677253571796, + "grad_norm": 1.1148240566253662, + "learning_rate": 4.63763104708253e-07, + "loss": 0.6879, + "step": 16850 + }, + { + "epoch": 0.8660191181005242, + "grad_norm": 1.0023490190505981, + "learning_rate": 4.634131236330985e-07, + "loss": 0.686, + "step": 16851 + }, + { + "epoch": 0.8660705108438689, + "grad_norm": 0.8238533735275269, + "learning_rate": 4.630632682480446e-07, + "loss": 0.6476, + "step": 16852 + }, + { + "epoch": 0.8661219035872135, + "grad_norm": 1.116344928741455, + "learning_rate": 4.62713538562784e-07, + "loss": 0.7547, + "step": 16853 + }, + { + "epoch": 0.8661732963305582, + "grad_norm": 1.0539709329605103, + "learning_rate": 4.6236393458700844e-07, + "loss": 0.6905, + "step": 16854 + }, + { + "epoch": 0.8662246890739027, + "grad_norm": 1.033897876739502, + "learning_rate": 4.620144563304013e-07, + "loss": 0.7008, + "step": 16855 + }, + { + "epoch": 0.8662760818172474, + "grad_norm": 0.9878309965133667, + "learning_rate": 4.6166510380264705e-07, + "loss": 0.6707, + "step": 16856 + }, + { + "epoch": 0.866327474560592, + "grad_norm": 0.7867979407310486, + "learning_rate": 4.6131587701342407e-07, + "loss": 0.6497, + "step": 16857 + }, + { + "epoch": 0.8663788673039367, + "grad_norm": 1.0457791090011597, + "learning_rate": 4.609667759724079e-07, + "loss": 0.6508, + "step": 16858 + }, + { + "epoch": 0.8664302600472813, + "grad_norm": 1.085776925086975, + "learning_rate": 4.606178006892703e-07, + "loss": 0.6231, + "step": 16859 + }, + { + "epoch": 0.866481652790626, + "grad_norm": 1.0443991422653198, + "learning_rate": 4.6026895117368187e-07, + "loss": 0.7116, + "step": 16860 + }, + { + "epoch": 0.8665330455339706, + "grad_norm": 1.1229698657989502, + "learning_rate": 4.5992022743530594e-07, + "loss": 0.6647, + "step": 16861 + }, + { + "epoch": 0.8665844382773152, + "grad_norm": 1.1352931261062622, + "learning_rate": 4.5957162948380374e-07, + "loss": 0.7684, + "step": 16862 + }, + { + "epoch": 0.8666358310206599, + "grad_norm": 1.0604121685028076, + "learning_rate": 4.5922315732883526e-07, + "loss": 0.6671, + "step": 16863 + }, + { + "epoch": 0.8666872237640045, + "grad_norm": 1.0564987659454346, + "learning_rate": 4.588748109800545e-07, + "loss": 0.6579, + "step": 16864 + }, + { + "epoch": 0.8667386165073492, + "grad_norm": 1.0370519161224365, + "learning_rate": 4.5852659044711257e-07, + "loss": 0.7028, + "step": 16865 + }, + { + "epoch": 0.8667900092506938, + "grad_norm": 1.1641144752502441, + "learning_rate": 4.581784957396568e-07, + "loss": 0.6741, + "step": 16866 + }, + { + "epoch": 0.8668414019940385, + "grad_norm": 1.130568027496338, + "learning_rate": 4.5783052686733333e-07, + "loss": 0.6503, + "step": 16867 + }, + { + "epoch": 0.8668927947373831, + "grad_norm": 1.0574378967285156, + "learning_rate": 4.5748268383977947e-07, + "loss": 0.6943, + "step": 16868 + }, + { + "epoch": 0.8669441874807278, + "grad_norm": 1.090811014175415, + "learning_rate": 4.571349666666358e-07, + "loss": 0.621, + "step": 16869 + }, + { + "epoch": 0.8669955802240724, + "grad_norm": 0.7930713891983032, + "learning_rate": 4.5678737535753526e-07, + "loss": 0.6198, + "step": 16870 + }, + { + "epoch": 0.867046972967417, + "grad_norm": 1.0515788793563843, + "learning_rate": 4.564399099221062e-07, + "loss": 0.6566, + "step": 16871 + }, + { + "epoch": 0.8670983657107616, + "grad_norm": 1.1588231325149536, + "learning_rate": 4.5609257036997865e-07, + "loss": 0.6557, + "step": 16872 + }, + { + "epoch": 0.8671497584541062, + "grad_norm": 0.8916311860084534, + "learning_rate": 4.557453567107739e-07, + "loss": 0.6183, + "step": 16873 + }, + { + "epoch": 0.8672011511974509, + "grad_norm": 1.1547328233718872, + "learning_rate": 4.5539826895411255e-07, + "loss": 0.6691, + "step": 16874 + }, + { + "epoch": 0.8672525439407955, + "grad_norm": 1.139276385307312, + "learning_rate": 4.5505130710960963e-07, + "loss": 0.6564, + "step": 16875 + }, + { + "epoch": 0.8673039366841402, + "grad_norm": 0.8685891032218933, + "learning_rate": 4.5470447118688086e-07, + "loss": 0.6492, + "step": 16876 + }, + { + "epoch": 0.8673553294274848, + "grad_norm": 1.0801135301589966, + "learning_rate": 4.5435776119553245e-07, + "loss": 0.692, + "step": 16877 + }, + { + "epoch": 0.8674067221708295, + "grad_norm": 1.0958422422409058, + "learning_rate": 4.540111771451722e-07, + "loss": 0.7214, + "step": 16878 + }, + { + "epoch": 0.8674581149141741, + "grad_norm": 1.0810171365737915, + "learning_rate": 4.536647190454013e-07, + "loss": 0.7197, + "step": 16879 + }, + { + "epoch": 0.8675095076575188, + "grad_norm": 0.7447600364685059, + "learning_rate": 4.5331838690582053e-07, + "loss": 0.5945, + "step": 16880 + }, + { + "epoch": 0.8675609004008634, + "grad_norm": 1.0942082405090332, + "learning_rate": 4.529721807360238e-07, + "loss": 0.7102, + "step": 16881 + }, + { + "epoch": 0.8676122931442081, + "grad_norm": 1.0947860479354858, + "learning_rate": 4.526261005456034e-07, + "loss": 0.7141, + "step": 16882 + }, + { + "epoch": 0.8676636858875527, + "grad_norm": 1.1190485954284668, + "learning_rate": 4.522801463441484e-07, + "loss": 0.7156, + "step": 16883 + }, + { + "epoch": 0.8677150786308974, + "grad_norm": 1.0785343647003174, + "learning_rate": 4.519343181412422e-07, + "loss": 0.711, + "step": 16884 + }, + { + "epoch": 0.867766471374242, + "grad_norm": 1.2084046602249146, + "learning_rate": 4.515886159464677e-07, + "loss": 0.7145, + "step": 16885 + }, + { + "epoch": 0.8678178641175865, + "grad_norm": 1.1176583766937256, + "learning_rate": 4.512430397694029e-07, + "loss": 0.7545, + "step": 16886 + }, + { + "epoch": 0.8678692568609312, + "grad_norm": 1.2191412448883057, + "learning_rate": 4.508975896196216e-07, + "loss": 0.7322, + "step": 16887 + }, + { + "epoch": 0.8679206496042758, + "grad_norm": 1.145444631576538, + "learning_rate": 4.505522655066941e-07, + "loss": 0.7391, + "step": 16888 + }, + { + "epoch": 0.8679720423476205, + "grad_norm": 1.1362560987472534, + "learning_rate": 4.502070674401904e-07, + "loss": 0.7129, + "step": 16889 + }, + { + "epoch": 0.8680234350909651, + "grad_norm": 1.0449599027633667, + "learning_rate": 4.4986199542967236e-07, + "loss": 0.6966, + "step": 16890 + }, + { + "epoch": 0.8680748278343098, + "grad_norm": 1.0726850032806396, + "learning_rate": 4.495170494847012e-07, + "loss": 0.7247, + "step": 16891 + }, + { + "epoch": 0.8681262205776544, + "grad_norm": 1.1508533954620361, + "learning_rate": 4.4917222961483377e-07, + "loss": 0.7308, + "step": 16892 + }, + { + "epoch": 0.8681776133209991, + "grad_norm": 0.698868453502655, + "learning_rate": 4.488275358296229e-07, + "loss": 0.6552, + "step": 16893 + }, + { + "epoch": 0.8682290060643437, + "grad_norm": 1.1259434223175049, + "learning_rate": 4.4848296813862046e-07, + "loss": 0.6771, + "step": 16894 + }, + { + "epoch": 0.8682803988076884, + "grad_norm": 0.7530171871185303, + "learning_rate": 4.48138526551371e-07, + "loss": 0.6721, + "step": 16895 + }, + { + "epoch": 0.868331791551033, + "grad_norm": 1.0810846090316772, + "learning_rate": 4.477942110774203e-07, + "loss": 0.695, + "step": 16896 + }, + { + "epoch": 0.8683831842943777, + "grad_norm": 1.1041934490203857, + "learning_rate": 4.4745002172630446e-07, + "loss": 0.7008, + "step": 16897 + }, + { + "epoch": 0.8684345770377223, + "grad_norm": 1.0875623226165771, + "learning_rate": 4.471059585075621e-07, + "loss": 0.6957, + "step": 16898 + }, + { + "epoch": 0.868485969781067, + "grad_norm": 1.1131097078323364, + "learning_rate": 4.46762021430725e-07, + "loss": 0.6997, + "step": 16899 + }, + { + "epoch": 0.8685373625244116, + "grad_norm": 1.0392400026321411, + "learning_rate": 4.464182105053222e-07, + "loss": 0.6217, + "step": 16900 + }, + { + "epoch": 0.8685887552677561, + "grad_norm": 1.0921518802642822, + "learning_rate": 4.460745257408783e-07, + "loss": 0.7343, + "step": 16901 + }, + { + "epoch": 0.8686401480111008, + "grad_norm": 1.1028876304626465, + "learning_rate": 4.457309671469173e-07, + "loss": 0.6946, + "step": 16902 + }, + { + "epoch": 0.8686915407544454, + "grad_norm": 1.0970611572265625, + "learning_rate": 4.453875347329567e-07, + "loss": 0.6745, + "step": 16903 + }, + { + "epoch": 0.8687429334977901, + "grad_norm": 1.046669602394104, + "learning_rate": 4.4504422850851105e-07, + "loss": 0.6348, + "step": 16904 + }, + { + "epoch": 0.8687943262411347, + "grad_norm": 1.0449820756912231, + "learning_rate": 4.447010484830944e-07, + "loss": 0.7197, + "step": 16905 + }, + { + "epoch": 0.8688457189844794, + "grad_norm": 1.1876931190490723, + "learning_rate": 4.4435799466621134e-07, + "loss": 0.7236, + "step": 16906 + }, + { + "epoch": 0.868897111727824, + "grad_norm": 0.7819764018058777, + "learning_rate": 4.4401506706736874e-07, + "loss": 0.6557, + "step": 16907 + }, + { + "epoch": 0.8689485044711687, + "grad_norm": 1.081925630569458, + "learning_rate": 4.436722656960668e-07, + "loss": 0.6573, + "step": 16908 + }, + { + "epoch": 0.8689998972145133, + "grad_norm": 1.1064388751983643, + "learning_rate": 4.433295905618046e-07, + "loss": 0.7174, + "step": 16909 + }, + { + "epoch": 0.869051289957858, + "grad_norm": 1.175676941871643, + "learning_rate": 4.4298704167407394e-07, + "loss": 0.7226, + "step": 16910 + }, + { + "epoch": 0.8691026827012026, + "grad_norm": 0.7838454246520996, + "learning_rate": 4.4264461904236777e-07, + "loss": 0.6597, + "step": 16911 + }, + { + "epoch": 0.8691540754445473, + "grad_norm": 1.2905205488204956, + "learning_rate": 4.4230232267617136e-07, + "loss": 0.6507, + "step": 16912 + }, + { + "epoch": 0.8692054681878919, + "grad_norm": 0.6603265404701233, + "learning_rate": 4.419601525849687e-07, + "loss": 0.661, + "step": 16913 + }, + { + "epoch": 0.8692568609312366, + "grad_norm": 1.0421782732009888, + "learning_rate": 4.416181087782412e-07, + "loss": 0.7322, + "step": 16914 + }, + { + "epoch": 0.8693082536745812, + "grad_norm": 1.2059253454208374, + "learning_rate": 4.412761912654645e-07, + "loss": 0.7073, + "step": 16915 + }, + { + "epoch": 0.8693596464179257, + "grad_norm": 1.0222489833831787, + "learning_rate": 4.4093440005611164e-07, + "loss": 0.6582, + "step": 16916 + }, + { + "epoch": 0.8694110391612704, + "grad_norm": 1.096178412437439, + "learning_rate": 4.405927351596517e-07, + "loss": 0.6855, + "step": 16917 + }, + { + "epoch": 0.869462431904615, + "grad_norm": 1.0448393821716309, + "learning_rate": 4.402511965855533e-07, + "loss": 0.6482, + "step": 16918 + }, + { + "epoch": 0.8695138246479597, + "grad_norm": 1.0728740692138672, + "learning_rate": 4.399097843432754e-07, + "loss": 0.6711, + "step": 16919 + }, + { + "epoch": 0.8695652173913043, + "grad_norm": 1.1431784629821777, + "learning_rate": 4.395684984422799e-07, + "loss": 0.6921, + "step": 16920 + }, + { + "epoch": 0.869616610134649, + "grad_norm": 1.1259459257125854, + "learning_rate": 4.3922733889202207e-07, + "loss": 0.6434, + "step": 16921 + }, + { + "epoch": 0.8696680028779936, + "grad_norm": 1.0877840518951416, + "learning_rate": 4.388863057019516e-07, + "loss": 0.6958, + "step": 16922 + }, + { + "epoch": 0.8697193956213383, + "grad_norm": 1.0904501676559448, + "learning_rate": 4.385453988815208e-07, + "loss": 0.7547, + "step": 16923 + }, + { + "epoch": 0.8697707883646829, + "grad_norm": 0.7371160387992859, + "learning_rate": 4.3820461844017117e-07, + "loss": 0.6405, + "step": 16924 + }, + { + "epoch": 0.8698221811080276, + "grad_norm": 1.105760097503662, + "learning_rate": 4.3786396438734834e-07, + "loss": 0.7315, + "step": 16925 + }, + { + "epoch": 0.8698735738513722, + "grad_norm": 1.1106667518615723, + "learning_rate": 4.3752343673248655e-07, + "loss": 0.7158, + "step": 16926 + }, + { + "epoch": 0.8699249665947169, + "grad_norm": 1.1510581970214844, + "learning_rate": 4.371830354850232e-07, + "loss": 0.7538, + "step": 16927 + }, + { + "epoch": 0.8699763593380615, + "grad_norm": 1.1151307821273804, + "learning_rate": 4.368427606543879e-07, + "loss": 0.7084, + "step": 16928 + }, + { + "epoch": 0.8700277520814061, + "grad_norm": 1.1802327632904053, + "learning_rate": 4.365026122500088e-07, + "loss": 0.6855, + "step": 16929 + }, + { + "epoch": 0.8700791448247508, + "grad_norm": 1.0912760496139526, + "learning_rate": 4.3616259028130936e-07, + "loss": 0.7153, + "step": 16930 + }, + { + "epoch": 0.8701305375680953, + "grad_norm": 1.0497270822525024, + "learning_rate": 4.35822694757711e-07, + "loss": 0.6775, + "step": 16931 + }, + { + "epoch": 0.87018193031144, + "grad_norm": 1.1995489597320557, + "learning_rate": 4.354829256886306e-07, + "loss": 0.7457, + "step": 16932 + }, + { + "epoch": 0.8702333230547846, + "grad_norm": 1.0532625913619995, + "learning_rate": 4.3514328308348173e-07, + "loss": 0.7321, + "step": 16933 + }, + { + "epoch": 0.8702847157981293, + "grad_norm": 0.7766775488853455, + "learning_rate": 4.348037669516747e-07, + "loss": 0.6436, + "step": 16934 + }, + { + "epoch": 0.8703361085414739, + "grad_norm": 1.0565733909606934, + "learning_rate": 4.344643773026147e-07, + "loss": 0.721, + "step": 16935 + }, + { + "epoch": 0.8703875012848186, + "grad_norm": 1.1334953308105469, + "learning_rate": 4.34125114145707e-07, + "loss": 0.7056, + "step": 16936 + }, + { + "epoch": 0.8704388940281632, + "grad_norm": 1.0884066820144653, + "learning_rate": 4.337859774903502e-07, + "loss": 0.6608, + "step": 16937 + }, + { + "epoch": 0.8704902867715079, + "grad_norm": 0.7810848355293274, + "learning_rate": 4.334469673459402e-07, + "loss": 0.6304, + "step": 16938 + }, + { + "epoch": 0.8705416795148525, + "grad_norm": 0.7114217877388, + "learning_rate": 4.3310808372186877e-07, + "loss": 0.6161, + "step": 16939 + }, + { + "epoch": 0.8705930722581972, + "grad_norm": 1.1316041946411133, + "learning_rate": 4.327693266275268e-07, + "loss": 0.7404, + "step": 16940 + }, + { + "epoch": 0.8706444650015418, + "grad_norm": 1.0426186323165894, + "learning_rate": 4.32430696072299e-07, + "loss": 0.6963, + "step": 16941 + }, + { + "epoch": 0.8706958577448864, + "grad_norm": 1.0765613317489624, + "learning_rate": 4.3209219206556687e-07, + "loss": 0.6334, + "step": 16942 + }, + { + "epoch": 0.8707472504882311, + "grad_norm": 1.1478737592697144, + "learning_rate": 4.317538146167094e-07, + "loss": 0.7115, + "step": 16943 + }, + { + "epoch": 0.8707986432315757, + "grad_norm": 1.4248632192611694, + "learning_rate": 4.3141556373510197e-07, + "loss": 0.7735, + "step": 16944 + }, + { + "epoch": 0.8708500359749204, + "grad_norm": 1.1269899606704712, + "learning_rate": 4.310774394301159e-07, + "loss": 0.7027, + "step": 16945 + }, + { + "epoch": 0.870901428718265, + "grad_norm": 1.0911827087402344, + "learning_rate": 4.3073944171111824e-07, + "loss": 0.6712, + "step": 16946 + }, + { + "epoch": 0.8709528214616096, + "grad_norm": 1.0040462017059326, + "learning_rate": 4.3040157058747645e-07, + "loss": 0.6913, + "step": 16947 + }, + { + "epoch": 0.8710042142049542, + "grad_norm": 1.0728493928909302, + "learning_rate": 4.3006382606854745e-07, + "loss": 0.7461, + "step": 16948 + }, + { + "epoch": 0.8710556069482989, + "grad_norm": 1.1414399147033691, + "learning_rate": 4.2972620816369217e-07, + "loss": 0.7077, + "step": 16949 + }, + { + "epoch": 0.8711069996916435, + "grad_norm": 1.069277286529541, + "learning_rate": 4.29388716882263e-07, + "loss": 0.7157, + "step": 16950 + }, + { + "epoch": 0.8711583924349882, + "grad_norm": 1.1577059030532837, + "learning_rate": 4.290513522336104e-07, + "loss": 0.7125, + "step": 16951 + }, + { + "epoch": 0.8712097851783328, + "grad_norm": 1.2562531232833862, + "learning_rate": 4.287141142270812e-07, + "loss": 0.6602, + "step": 16952 + }, + { + "epoch": 0.8712611779216775, + "grad_norm": 1.2184945344924927, + "learning_rate": 4.283770028720202e-07, + "loss": 0.7121, + "step": 16953 + }, + { + "epoch": 0.8713125706650221, + "grad_norm": 0.6707696914672852, + "learning_rate": 4.280400181777661e-07, + "loss": 0.6451, + "step": 16954 + }, + { + "epoch": 0.8713639634083667, + "grad_norm": 1.0605617761611938, + "learning_rate": 4.277031601536552e-07, + "loss": 0.7154, + "step": 16955 + }, + { + "epoch": 0.8714153561517114, + "grad_norm": 0.7428040504455566, + "learning_rate": 4.2736642880902124e-07, + "loss": 0.665, + "step": 16956 + }, + { + "epoch": 0.871466748895056, + "grad_norm": 1.0502722263336182, + "learning_rate": 4.2702982415319393e-07, + "loss": 0.709, + "step": 16957 + }, + { + "epoch": 0.8715181416384007, + "grad_norm": 1.0288259983062744, + "learning_rate": 4.266933461954986e-07, + "loss": 0.6846, + "step": 16958 + }, + { + "epoch": 0.8715695343817453, + "grad_norm": 0.7152932286262512, + "learning_rate": 4.2635699494525676e-07, + "loss": 0.6152, + "step": 16959 + }, + { + "epoch": 0.87162092712509, + "grad_norm": 0.6775751709938049, + "learning_rate": 4.2602077041179024e-07, + "loss": 0.6399, + "step": 16960 + }, + { + "epoch": 0.8716723198684346, + "grad_norm": 1.0512100458145142, + "learning_rate": 4.256846726044106e-07, + "loss": 0.6679, + "step": 16961 + }, + { + "epoch": 0.8717237126117792, + "grad_norm": 1.1368736028671265, + "learning_rate": 4.2534870153243256e-07, + "loss": 0.6939, + "step": 16962 + }, + { + "epoch": 0.8717751053551238, + "grad_norm": 1.0631284713745117, + "learning_rate": 4.250128572051632e-07, + "loss": 0.6812, + "step": 16963 + }, + { + "epoch": 0.8718264980984685, + "grad_norm": 1.1007957458496094, + "learning_rate": 4.2467713963190713e-07, + "loss": 0.701, + "step": 16964 + }, + { + "epoch": 0.8718778908418131, + "grad_norm": 0.7270546555519104, + "learning_rate": 4.243415488219671e-07, + "loss": 0.6183, + "step": 16965 + }, + { + "epoch": 0.8719292835851578, + "grad_norm": 1.0154081583023071, + "learning_rate": 4.2400608478464e-07, + "loss": 0.7474, + "step": 16966 + }, + { + "epoch": 0.8719806763285024, + "grad_norm": 1.1003563404083252, + "learning_rate": 4.2367074752922067e-07, + "loss": 0.7346, + "step": 16967 + }, + { + "epoch": 0.872032069071847, + "grad_norm": 0.7726216316223145, + "learning_rate": 4.233355370649983e-07, + "loss": 0.6565, + "step": 16968 + }, + { + "epoch": 0.8720834618151917, + "grad_norm": 1.0713075399398804, + "learning_rate": 4.230004534012633e-07, + "loss": 0.6785, + "step": 16969 + }, + { + "epoch": 0.8721348545585363, + "grad_norm": 0.7042410969734192, + "learning_rate": 4.226654965472954e-07, + "loss": 0.5947, + "step": 16970 + }, + { + "epoch": 0.872186247301881, + "grad_norm": 1.1804038286209106, + "learning_rate": 4.223306665123783e-07, + "loss": 0.6305, + "step": 16971 + }, + { + "epoch": 0.8722376400452256, + "grad_norm": 1.0762962102890015, + "learning_rate": 4.2199596330578685e-07, + "loss": 0.7012, + "step": 16972 + }, + { + "epoch": 0.8722890327885703, + "grad_norm": 1.124481201171875, + "learning_rate": 4.216613869367953e-07, + "loss": 0.739, + "step": 16973 + }, + { + "epoch": 0.8723404255319149, + "grad_norm": 0.6997160315513611, + "learning_rate": 4.213269374146733e-07, + "loss": 0.6694, + "step": 16974 + }, + { + "epoch": 0.8723918182752596, + "grad_norm": 0.9837872385978699, + "learning_rate": 4.2099261474868593e-07, + "loss": 0.635, + "step": 16975 + }, + { + "epoch": 0.8724432110186042, + "grad_norm": 1.1050201654434204, + "learning_rate": 4.206584189480989e-07, + "loss": 0.7337, + "step": 16976 + }, + { + "epoch": 0.8724946037619488, + "grad_norm": 1.2221908569335938, + "learning_rate": 4.203243500221671e-07, + "loss": 0.7028, + "step": 16977 + }, + { + "epoch": 0.8725459965052934, + "grad_norm": 1.0891327857971191, + "learning_rate": 4.1999040798014924e-07, + "loss": 0.6645, + "step": 16978 + }, + { + "epoch": 0.8725973892486381, + "grad_norm": 1.0673365592956543, + "learning_rate": 4.1965659283129625e-07, + "loss": 0.6755, + "step": 16979 + }, + { + "epoch": 0.8726487819919827, + "grad_norm": 1.0438331365585327, + "learning_rate": 4.193229045848574e-07, + "loss": 0.7106, + "step": 16980 + }, + { + "epoch": 0.8727001747353273, + "grad_norm": 1.093877911567688, + "learning_rate": 4.189893432500769e-07, + "loss": 0.6243, + "step": 16981 + }, + { + "epoch": 0.872751567478672, + "grad_norm": 1.0681830644607544, + "learning_rate": 4.1865590883619743e-07, + "loss": 0.7061, + "step": 16982 + }, + { + "epoch": 0.8728029602220166, + "grad_norm": 0.8856194615364075, + "learning_rate": 4.1832260135245715e-07, + "loss": 0.6435, + "step": 16983 + }, + { + "epoch": 0.8728543529653613, + "grad_norm": 1.1188421249389648, + "learning_rate": 4.1798942080808977e-07, + "loss": 0.6262, + "step": 16984 + }, + { + "epoch": 0.8729057457087059, + "grad_norm": 1.0492881536483765, + "learning_rate": 4.176563672123268e-07, + "loss": 0.7, + "step": 16985 + }, + { + "epoch": 0.8729571384520506, + "grad_norm": 1.0606663227081299, + "learning_rate": 4.1732344057439477e-07, + "loss": 0.6981, + "step": 16986 + }, + { + "epoch": 0.8730085311953952, + "grad_norm": 1.2632490396499634, + "learning_rate": 4.169906409035196e-07, + "loss": 0.7753, + "step": 16987 + }, + { + "epoch": 0.8730599239387399, + "grad_norm": 1.1579920053482056, + "learning_rate": 4.1665796820891947e-07, + "loss": 0.6391, + "step": 16988 + }, + { + "epoch": 0.8731113166820845, + "grad_norm": 1.0685985088348389, + "learning_rate": 4.163254224998148e-07, + "loss": 0.7087, + "step": 16989 + }, + { + "epoch": 0.8731627094254292, + "grad_norm": 1.0190953016281128, + "learning_rate": 4.159930037854154e-07, + "loss": 0.6902, + "step": 16990 + }, + { + "epoch": 0.8732141021687738, + "grad_norm": 0.7726448178291321, + "learning_rate": 4.156607120749334e-07, + "loss": 0.5996, + "step": 16991 + }, + { + "epoch": 0.8732654949121184, + "grad_norm": 1.1112573146820068, + "learning_rate": 4.1532854737757475e-07, + "loss": 0.69, + "step": 16992 + }, + { + "epoch": 0.873316887655463, + "grad_norm": 1.0939804315567017, + "learning_rate": 4.1499650970254214e-07, + "loss": 0.6989, + "step": 16993 + }, + { + "epoch": 0.8733682803988077, + "grad_norm": 1.156049132347107, + "learning_rate": 4.146645990590342e-07, + "loss": 0.7641, + "step": 16994 + }, + { + "epoch": 0.8734196731421523, + "grad_norm": 1.0336335897445679, + "learning_rate": 4.143328154562487e-07, + "loss": 0.638, + "step": 16995 + }, + { + "epoch": 0.873471065885497, + "grad_norm": 1.1202205419540405, + "learning_rate": 4.1400115890337657e-07, + "loss": 0.7361, + "step": 16996 + }, + { + "epoch": 0.8735224586288416, + "grad_norm": 1.1035032272338867, + "learning_rate": 4.1366962940960655e-07, + "loss": 0.6466, + "step": 16997 + }, + { + "epoch": 0.8735738513721862, + "grad_norm": 1.1604799032211304, + "learning_rate": 4.1333822698412575e-07, + "loss": 0.7788, + "step": 16998 + }, + { + "epoch": 0.8736252441155309, + "grad_norm": 1.0190480947494507, + "learning_rate": 4.130069516361135e-07, + "loss": 0.6711, + "step": 16999 + }, + { + "epoch": 0.8736766368588755, + "grad_norm": 0.7498182058334351, + "learning_rate": 4.1267580337474965e-07, + "loss": 0.6106, + "step": 17000 + }, + { + "epoch": 0.8737280296022202, + "grad_norm": 1.0064923763275146, + "learning_rate": 4.123447822092086e-07, + "loss": 0.6719, + "step": 17001 + }, + { + "epoch": 0.8737794223455648, + "grad_norm": 0.7679840922355652, + "learning_rate": 4.120138881486613e-07, + "loss": 0.6541, + "step": 17002 + }, + { + "epoch": 0.8738308150889095, + "grad_norm": 0.8344942927360535, + "learning_rate": 4.1168312120227537e-07, + "loss": 0.6718, + "step": 17003 + }, + { + "epoch": 0.8738822078322541, + "grad_norm": 1.1132385730743408, + "learning_rate": 4.113524813792158e-07, + "loss": 0.6883, + "step": 17004 + }, + { + "epoch": 0.8739336005755988, + "grad_norm": 1.1151834726333618, + "learning_rate": 4.110219686886424e-07, + "loss": 0.6155, + "step": 17005 + }, + { + "epoch": 0.8739849933189434, + "grad_norm": 1.0761266946792603, + "learning_rate": 4.1069158313971237e-07, + "loss": 0.708, + "step": 17006 + }, + { + "epoch": 0.874036386062288, + "grad_norm": 1.1007834672927856, + "learning_rate": 4.1036132474157995e-07, + "loss": 0.7577, + "step": 17007 + }, + { + "epoch": 0.8740877788056326, + "grad_norm": 1.1064989566802979, + "learning_rate": 4.1003119350339513e-07, + "loss": 0.6493, + "step": 17008 + }, + { + "epoch": 0.8741391715489772, + "grad_norm": 1.1839499473571777, + "learning_rate": 4.097011894343045e-07, + "loss": 0.6702, + "step": 17009 + }, + { + "epoch": 0.8741905642923219, + "grad_norm": 0.6931377649307251, + "learning_rate": 4.093713125434501e-07, + "loss": 0.6321, + "step": 17010 + }, + { + "epoch": 0.8742419570356665, + "grad_norm": 1.1268229484558105, + "learning_rate": 4.0904156283997353e-07, + "loss": 0.6781, + "step": 17011 + }, + { + "epoch": 0.8742933497790112, + "grad_norm": 1.1244171857833862, + "learning_rate": 4.087119403330075e-07, + "loss": 0.717, + "step": 17012 + }, + { + "epoch": 0.8743447425223558, + "grad_norm": 1.1495572328567505, + "learning_rate": 4.083824450316881e-07, + "loss": 0.7065, + "step": 17013 + }, + { + "epoch": 0.8743961352657005, + "grad_norm": 1.1147764921188354, + "learning_rate": 4.080530769451424e-07, + "loss": 0.7236, + "step": 17014 + }, + { + "epoch": 0.8744475280090451, + "grad_norm": 0.7313366532325745, + "learning_rate": 4.0772383608249475e-07, + "loss": 0.68, + "step": 17015 + }, + { + "epoch": 0.8744989207523898, + "grad_norm": 1.0453277826309204, + "learning_rate": 4.073947224528696e-07, + "loss": 0.6273, + "step": 17016 + }, + { + "epoch": 0.8745503134957344, + "grad_norm": 1.080526351928711, + "learning_rate": 4.070657360653835e-07, + "loss": 0.6397, + "step": 17017 + }, + { + "epoch": 0.8746017062390791, + "grad_norm": 0.7586373686790466, + "learning_rate": 4.0673687692915353e-07, + "loss": 0.6023, + "step": 17018 + }, + { + "epoch": 0.8746530989824237, + "grad_norm": 1.0428462028503418, + "learning_rate": 4.064081450532875e-07, + "loss": 0.7343, + "step": 17019 + }, + { + "epoch": 0.8747044917257684, + "grad_norm": 1.0922504663467407, + "learning_rate": 4.060795404468959e-07, + "loss": 0.6437, + "step": 17020 + }, + { + "epoch": 0.874755884469113, + "grad_norm": 1.1428388357162476, + "learning_rate": 4.0575106311908254e-07, + "loss": 0.6635, + "step": 17021 + }, + { + "epoch": 0.8748072772124575, + "grad_norm": 1.1208579540252686, + "learning_rate": 4.0542271307894785e-07, + "loss": 0.6959, + "step": 17022 + }, + { + "epoch": 0.8748586699558022, + "grad_norm": 1.1612337827682495, + "learning_rate": 4.05094490335588e-07, + "loss": 0.7422, + "step": 17023 + }, + { + "epoch": 0.8749100626991468, + "grad_norm": 0.7855663299560547, + "learning_rate": 4.04766394898099e-07, + "loss": 0.6204, + "step": 17024 + }, + { + "epoch": 0.8749614554424915, + "grad_norm": 1.0562723875045776, + "learning_rate": 4.0443842677556967e-07, + "loss": 0.6721, + "step": 17025 + }, + { + "epoch": 0.8750128481858361, + "grad_norm": 0.7155593037605286, + "learning_rate": 4.0411058597708553e-07, + "loss": 0.6404, + "step": 17026 + }, + { + "epoch": 0.8750642409291808, + "grad_norm": 1.0214122533798218, + "learning_rate": 4.0378287251173323e-07, + "loss": 0.6278, + "step": 17027 + }, + { + "epoch": 0.8751156336725254, + "grad_norm": 0.999601423740387, + "learning_rate": 4.034552863885877e-07, + "loss": 0.6726, + "step": 17028 + }, + { + "epoch": 0.8751670264158701, + "grad_norm": 0.7710046768188477, + "learning_rate": 4.0312782761672886e-07, + "loss": 0.6891, + "step": 17029 + }, + { + "epoch": 0.8752184191592147, + "grad_norm": 1.0801796913146973, + "learning_rate": 4.0280049620522733e-07, + "loss": 0.6855, + "step": 17030 + }, + { + "epoch": 0.8752698119025594, + "grad_norm": 0.7587679028511047, + "learning_rate": 4.02473292163153e-07, + "loss": 0.663, + "step": 17031 + }, + { + "epoch": 0.875321204645904, + "grad_norm": 1.1141160726547241, + "learning_rate": 4.0214621549956925e-07, + "loss": 0.7661, + "step": 17032 + }, + { + "epoch": 0.8753725973892487, + "grad_norm": 1.1480093002319336, + "learning_rate": 4.0181926622354096e-07, + "loss": 0.6645, + "step": 17033 + }, + { + "epoch": 0.8754239901325933, + "grad_norm": 1.0717296600341797, + "learning_rate": 4.014924443441254e-07, + "loss": 0.6562, + "step": 17034 + }, + { + "epoch": 0.875475382875938, + "grad_norm": 1.123856544494629, + "learning_rate": 4.0116574987037693e-07, + "loss": 0.7454, + "step": 17035 + }, + { + "epoch": 0.8755267756192826, + "grad_norm": 1.0959911346435547, + "learning_rate": 4.008391828113467e-07, + "loss": 0.7102, + "step": 17036 + }, + { + "epoch": 0.8755781683626273, + "grad_norm": 1.1420667171478271, + "learning_rate": 4.0051274317608357e-07, + "loss": 0.6927, + "step": 17037 + }, + { + "epoch": 0.8756295611059718, + "grad_norm": 1.1462041139602661, + "learning_rate": 4.00186430973632e-07, + "loss": 0.65, + "step": 17038 + }, + { + "epoch": 0.8756809538493164, + "grad_norm": 1.1185624599456787, + "learning_rate": 3.9986024621303077e-07, + "loss": 0.6293, + "step": 17039 + }, + { + "epoch": 0.8757323465926611, + "grad_norm": 1.210189938545227, + "learning_rate": 3.9953418890331996e-07, + "loss": 0.6595, + "step": 17040 + }, + { + "epoch": 0.8757837393360057, + "grad_norm": 1.047241449356079, + "learning_rate": 3.9920825905353065e-07, + "loss": 0.6026, + "step": 17041 + }, + { + "epoch": 0.8758351320793504, + "grad_norm": 0.7243605256080627, + "learning_rate": 3.9888245667269456e-07, + "loss": 0.6618, + "step": 17042 + }, + { + "epoch": 0.875886524822695, + "grad_norm": 0.8229589462280273, + "learning_rate": 3.9855678176983824e-07, + "loss": 0.6564, + "step": 17043 + }, + { + "epoch": 0.8759379175660397, + "grad_norm": 1.052469253540039, + "learning_rate": 3.9823123435398403e-07, + "loss": 0.6739, + "step": 17044 + }, + { + "epoch": 0.8759893103093843, + "grad_norm": 0.6812047362327576, + "learning_rate": 3.9790581443415135e-07, + "loss": 0.653, + "step": 17045 + }, + { + "epoch": 0.876040703052729, + "grad_norm": 1.0542981624603271, + "learning_rate": 3.975805220193579e-07, + "loss": 0.6129, + "step": 17046 + }, + { + "epoch": 0.8760920957960736, + "grad_norm": 1.0855557918548584, + "learning_rate": 3.9725535711861494e-07, + "loss": 0.6861, + "step": 17047 + }, + { + "epoch": 0.8761434885394183, + "grad_norm": 0.7398030757904053, + "learning_rate": 3.969303197409308e-07, + "loss": 0.646, + "step": 17048 + }, + { + "epoch": 0.8761948812827629, + "grad_norm": 1.0615806579589844, + "learning_rate": 3.966054098953126e-07, + "loss": 0.6651, + "step": 17049 + }, + { + "epoch": 0.8762462740261076, + "grad_norm": 1.1380072832107544, + "learning_rate": 3.96280627590761e-07, + "loss": 0.7179, + "step": 17050 + }, + { + "epoch": 0.8762976667694522, + "grad_norm": 0.731192946434021, + "learning_rate": 3.9595597283627553e-07, + "loss": 0.6292, + "step": 17051 + }, + { + "epoch": 0.8763490595127968, + "grad_norm": 1.2247822284698486, + "learning_rate": 3.956314456408489e-07, + "loss": 0.6566, + "step": 17052 + }, + { + "epoch": 0.8764004522561414, + "grad_norm": 1.4047714471817017, + "learning_rate": 3.953070460134756e-07, + "loss": 0.7413, + "step": 17053 + }, + { + "epoch": 0.876451844999486, + "grad_norm": 0.7880280613899231, + "learning_rate": 3.949827739631401e-07, + "loss": 0.6214, + "step": 17054 + }, + { + "epoch": 0.8765032377428307, + "grad_norm": 1.0637768507003784, + "learning_rate": 3.946586294988286e-07, + "loss": 0.7104, + "step": 17055 + }, + { + "epoch": 0.8765546304861753, + "grad_norm": 1.141261100769043, + "learning_rate": 3.943346126295217e-07, + "loss": 0.7002, + "step": 17056 + }, + { + "epoch": 0.87660602322952, + "grad_norm": 1.0902400016784668, + "learning_rate": 3.940107233641949e-07, + "loss": 0.7139, + "step": 17057 + }, + { + "epoch": 0.8766574159728646, + "grad_norm": 1.0471253395080566, + "learning_rate": 3.9368696171182443e-07, + "loss": 0.6483, + "step": 17058 + }, + { + "epoch": 0.8767088087162093, + "grad_norm": 1.1976720094680786, + "learning_rate": 3.933633276813792e-07, + "loss": 0.6956, + "step": 17059 + }, + { + "epoch": 0.8767602014595539, + "grad_norm": 0.7958855032920837, + "learning_rate": 3.930398212818254e-07, + "loss": 0.6231, + "step": 17060 + }, + { + "epoch": 0.8768115942028986, + "grad_norm": 0.8885971307754517, + "learning_rate": 3.927164425221258e-07, + "loss": 0.6294, + "step": 17061 + }, + { + "epoch": 0.8768629869462432, + "grad_norm": 1.0475728511810303, + "learning_rate": 3.923931914112422e-07, + "loss": 0.6967, + "step": 17062 + }, + { + "epoch": 0.8769143796895879, + "grad_norm": 1.1210602521896362, + "learning_rate": 3.9207006795812684e-07, + "loss": 0.711, + "step": 17063 + }, + { + "epoch": 0.8769657724329325, + "grad_norm": 1.095068335533142, + "learning_rate": 3.917470721717348e-07, + "loss": 0.6779, + "step": 17064 + }, + { + "epoch": 0.8770171651762771, + "grad_norm": 1.1076130867004395, + "learning_rate": 3.914242040610139e-07, + "loss": 0.6994, + "step": 17065 + }, + { + "epoch": 0.8770685579196218, + "grad_norm": 0.6902804970741272, + "learning_rate": 3.911014636349103e-07, + "loss": 0.6896, + "step": 17066 + }, + { + "epoch": 0.8771199506629664, + "grad_norm": 1.0327762365341187, + "learning_rate": 3.9077885090236524e-07, + "loss": 0.637, + "step": 17067 + }, + { + "epoch": 0.877171343406311, + "grad_norm": 1.0222896337509155, + "learning_rate": 3.904563658723165e-07, + "loss": 0.6979, + "step": 17068 + }, + { + "epoch": 0.8772227361496556, + "grad_norm": 1.09306800365448, + "learning_rate": 3.901340085537009e-07, + "loss": 0.6501, + "step": 17069 + }, + { + "epoch": 0.8772741288930003, + "grad_norm": 1.106996774673462, + "learning_rate": 3.8981177895544677e-07, + "loss": 0.7223, + "step": 17070 + }, + { + "epoch": 0.8773255216363449, + "grad_norm": 1.091869592666626, + "learning_rate": 3.894896770864837e-07, + "loss": 0.6865, + "step": 17071 + }, + { + "epoch": 0.8773769143796896, + "grad_norm": 1.0750385522842407, + "learning_rate": 3.8916770295573503e-07, + "loss": 0.63, + "step": 17072 + }, + { + "epoch": 0.8774283071230342, + "grad_norm": 1.1212286949157715, + "learning_rate": 3.8884585657212205e-07, + "loss": 0.6388, + "step": 17073 + }, + { + "epoch": 0.8774796998663789, + "grad_norm": 0.7513415217399597, + "learning_rate": 3.8852413794456033e-07, + "loss": 0.6426, + "step": 17074 + }, + { + "epoch": 0.8775310926097235, + "grad_norm": 1.1841797828674316, + "learning_rate": 3.88202547081965e-07, + "loss": 0.6688, + "step": 17075 + }, + { + "epoch": 0.8775824853530682, + "grad_norm": 1.114106297492981, + "learning_rate": 3.878810839932451e-07, + "loss": 0.7129, + "step": 17076 + }, + { + "epoch": 0.8776338780964128, + "grad_norm": 1.1150649785995483, + "learning_rate": 3.875597486873067e-07, + "loss": 0.6795, + "step": 17077 + }, + { + "epoch": 0.8776852708397574, + "grad_norm": 1.0625505447387695, + "learning_rate": 3.8723854117305505e-07, + "loss": 0.6587, + "step": 17078 + }, + { + "epoch": 0.8777366635831021, + "grad_norm": 1.066200852394104, + "learning_rate": 3.869174614593857e-07, + "loss": 0.638, + "step": 17079 + }, + { + "epoch": 0.8777880563264467, + "grad_norm": 1.0165842771530151, + "learning_rate": 3.8659650955519714e-07, + "loss": 0.6605, + "step": 17080 + }, + { + "epoch": 0.8778394490697914, + "grad_norm": 0.8071630001068115, + "learning_rate": 3.8627568546938056e-07, + "loss": 0.6659, + "step": 17081 + }, + { + "epoch": 0.877890841813136, + "grad_norm": 1.0759340524673462, + "learning_rate": 3.859549892108261e-07, + "loss": 0.7469, + "step": 17082 + }, + { + "epoch": 0.8779422345564806, + "grad_norm": 0.786113440990448, + "learning_rate": 3.856344207884166e-07, + "loss": 0.6353, + "step": 17083 + }, + { + "epoch": 0.8779936272998252, + "grad_norm": 1.109815239906311, + "learning_rate": 3.853139802110356e-07, + "loss": 0.6829, + "step": 17084 + }, + { + "epoch": 0.8780450200431699, + "grad_norm": 1.0783814191818237, + "learning_rate": 3.84993667487561e-07, + "loss": 0.6328, + "step": 17085 + }, + { + "epoch": 0.8780964127865145, + "grad_norm": 1.0265936851501465, + "learning_rate": 3.846734826268661e-07, + "loss": 0.7089, + "step": 17086 + }, + { + "epoch": 0.8781478055298592, + "grad_norm": 1.0987348556518555, + "learning_rate": 3.843534256378223e-07, + "loss": 0.6818, + "step": 17087 + }, + { + "epoch": 0.8781991982732038, + "grad_norm": 1.0983035564422607, + "learning_rate": 3.8403349652929856e-07, + "loss": 0.6716, + "step": 17088 + }, + { + "epoch": 0.8782505910165485, + "grad_norm": 1.1450324058532715, + "learning_rate": 3.8371369531015726e-07, + "loss": 0.7073, + "step": 17089 + }, + { + "epoch": 0.8783019837598931, + "grad_norm": 1.1366676092147827, + "learning_rate": 3.833940219892579e-07, + "loss": 0.7047, + "step": 17090 + }, + { + "epoch": 0.8783533765032377, + "grad_norm": 1.061814785003662, + "learning_rate": 3.8307447657546125e-07, + "loss": 0.7501, + "step": 17091 + }, + { + "epoch": 0.8784047692465824, + "grad_norm": 1.2162518501281738, + "learning_rate": 3.827550590776152e-07, + "loss": 0.7238, + "step": 17092 + }, + { + "epoch": 0.878456161989927, + "grad_norm": 1.1522462368011475, + "learning_rate": 3.8243576950457385e-07, + "loss": 0.7244, + "step": 17093 + }, + { + "epoch": 0.8785075547332717, + "grad_norm": 1.204479455947876, + "learning_rate": 3.821166078651817e-07, + "loss": 0.6709, + "step": 17094 + }, + { + "epoch": 0.8785589474766163, + "grad_norm": 1.0347235202789307, + "learning_rate": 3.817975741682811e-07, + "loss": 0.6616, + "step": 17095 + }, + { + "epoch": 0.878610340219961, + "grad_norm": 1.080794095993042, + "learning_rate": 3.8147866842271066e-07, + "loss": 0.7197, + "step": 17096 + }, + { + "epoch": 0.8786617329633056, + "grad_norm": 0.790163516998291, + "learning_rate": 3.811598906373082e-07, + "loss": 0.5996, + "step": 17097 + }, + { + "epoch": 0.8787131257066502, + "grad_norm": 1.1208338737487793, + "learning_rate": 3.808412408209039e-07, + "loss": 0.6863, + "step": 17098 + }, + { + "epoch": 0.8787645184499948, + "grad_norm": 0.8032639622688293, + "learning_rate": 3.8052271898232574e-07, + "loss": 0.6662, + "step": 17099 + }, + { + "epoch": 0.8788159111933395, + "grad_norm": 1.1396254301071167, + "learning_rate": 3.802043251304005e-07, + "loss": 0.6935, + "step": 17100 + }, + { + "epoch": 0.8788673039366841, + "grad_norm": 1.0788861513137817, + "learning_rate": 3.798860592739489e-07, + "loss": 0.7188, + "step": 17101 + }, + { + "epoch": 0.8789186966800288, + "grad_norm": 0.6574702262878418, + "learning_rate": 3.7956792142178845e-07, + "loss": 0.6552, + "step": 17102 + }, + { + "epoch": 0.8789700894233734, + "grad_norm": 1.0810402631759644, + "learning_rate": 3.792499115827325e-07, + "loss": 0.7173, + "step": 17103 + }, + { + "epoch": 0.879021482166718, + "grad_norm": 1.0911890268325806, + "learning_rate": 3.789320297655941e-07, + "loss": 0.6813, + "step": 17104 + }, + { + "epoch": 0.8790728749100627, + "grad_norm": 1.0754843950271606, + "learning_rate": 3.786142759791783e-07, + "loss": 0.683, + "step": 17105 + }, + { + "epoch": 0.8791242676534073, + "grad_norm": 1.038679599761963, + "learning_rate": 3.782966502322899e-07, + "loss": 0.6868, + "step": 17106 + }, + { + "epoch": 0.879175660396752, + "grad_norm": 1.1028778553009033, + "learning_rate": 3.779791525337284e-07, + "loss": 0.7308, + "step": 17107 + }, + { + "epoch": 0.8792270531400966, + "grad_norm": 1.2103615999221802, + "learning_rate": 3.7766178289229014e-07, + "loss": 0.7268, + "step": 17108 + }, + { + "epoch": 0.8792784458834413, + "grad_norm": 1.0694783926010132, + "learning_rate": 3.773445413167698e-07, + "loss": 0.711, + "step": 17109 + }, + { + "epoch": 0.8793298386267859, + "grad_norm": 1.1376357078552246, + "learning_rate": 3.770274278159553e-07, + "loss": 0.6296, + "step": 17110 + }, + { + "epoch": 0.8793812313701306, + "grad_norm": 1.0509940385818481, + "learning_rate": 3.767104423986329e-07, + "loss": 0.72, + "step": 17111 + }, + { + "epoch": 0.8794326241134752, + "grad_norm": 1.1283636093139648, + "learning_rate": 3.763935850735839e-07, + "loss": 0.7131, + "step": 17112 + }, + { + "epoch": 0.8794840168568199, + "grad_norm": 1.086683988571167, + "learning_rate": 3.760768558495892e-07, + "loss": 0.7486, + "step": 17113 + }, + { + "epoch": 0.8795354096001644, + "grad_norm": 0.722305178642273, + "learning_rate": 3.757602547354233e-07, + "loss": 0.6162, + "step": 17114 + }, + { + "epoch": 0.879586802343509, + "grad_norm": 0.7169036269187927, + "learning_rate": 3.7544378173985706e-07, + "loss": 0.6856, + "step": 17115 + }, + { + "epoch": 0.8796381950868537, + "grad_norm": 1.0900222063064575, + "learning_rate": 3.7512743687165896e-07, + "loss": 0.6827, + "step": 17116 + }, + { + "epoch": 0.8796895878301983, + "grad_norm": 1.1443791389465332, + "learning_rate": 3.7481122013959424e-07, + "loss": 0.6717, + "step": 17117 + }, + { + "epoch": 0.879740980573543, + "grad_norm": 0.9293333888053894, + "learning_rate": 3.7449513155242366e-07, + "loss": 0.6604, + "step": 17118 + }, + { + "epoch": 0.8797923733168876, + "grad_norm": 1.1026860475540161, + "learning_rate": 3.741791711189041e-07, + "loss": 0.695, + "step": 17119 + }, + { + "epoch": 0.8798437660602323, + "grad_norm": 1.3580210208892822, + "learning_rate": 3.738633388477919e-07, + "loss": 0.6824, + "step": 17120 + }, + { + "epoch": 0.8798951588035769, + "grad_norm": 1.1297781467437744, + "learning_rate": 3.735476347478334e-07, + "loss": 0.7158, + "step": 17121 + }, + { + "epoch": 0.8799465515469216, + "grad_norm": 1.098528504371643, + "learning_rate": 3.732320588277788e-07, + "loss": 0.6784, + "step": 17122 + }, + { + "epoch": 0.8799979442902662, + "grad_norm": 1.134292721748352, + "learning_rate": 3.7291661109637003e-07, + "loss": 0.668, + "step": 17123 + }, + { + "epoch": 0.8800493370336109, + "grad_norm": 1.2878371477127075, + "learning_rate": 3.7260129156234783e-07, + "loss": 0.7027, + "step": 17124 + }, + { + "epoch": 0.8801007297769555, + "grad_norm": 0.738251268863678, + "learning_rate": 3.7228610023444633e-07, + "loss": 0.6305, + "step": 17125 + }, + { + "epoch": 0.8801521225203002, + "grad_norm": 1.0267661809921265, + "learning_rate": 3.719710371214003e-07, + "loss": 0.6491, + "step": 17126 + }, + { + "epoch": 0.8802035152636448, + "grad_norm": 1.0673110485076904, + "learning_rate": 3.7165610223193814e-07, + "loss": 0.6799, + "step": 17127 + }, + { + "epoch": 0.8802549080069895, + "grad_norm": 1.0275362730026245, + "learning_rate": 3.7134129557478473e-07, + "loss": 0.6619, + "step": 17128 + }, + { + "epoch": 0.880306300750334, + "grad_norm": 1.0604251623153687, + "learning_rate": 3.7102661715866353e-07, + "loss": 0.6816, + "step": 17129 + }, + { + "epoch": 0.8803576934936787, + "grad_norm": 1.0553553104400635, + "learning_rate": 3.7071206699229147e-07, + "loss": 0.6749, + "step": 17130 + }, + { + "epoch": 0.8804090862370233, + "grad_norm": 0.7612324357032776, + "learning_rate": 3.7039764508438493e-07, + "loss": 0.6552, + "step": 17131 + }, + { + "epoch": 0.8804604789803679, + "grad_norm": 1.1975078582763672, + "learning_rate": 3.7008335144365306e-07, + "loss": 0.7274, + "step": 17132 + }, + { + "epoch": 0.8805118717237126, + "grad_norm": 1.1076902151107788, + "learning_rate": 3.6976918607880664e-07, + "loss": 0.7138, + "step": 17133 + }, + { + "epoch": 0.8805632644670572, + "grad_norm": 1.0978312492370605, + "learning_rate": 3.694551489985471e-07, + "loss": 0.6985, + "step": 17134 + }, + { + "epoch": 0.8806146572104019, + "grad_norm": 1.1310263872146606, + "learning_rate": 3.6914124021157685e-07, + "loss": 0.6929, + "step": 17135 + }, + { + "epoch": 0.8806660499537465, + "grad_norm": 0.6573857665061951, + "learning_rate": 3.6882745972659227e-07, + "loss": 0.6299, + "step": 17136 + }, + { + "epoch": 0.8807174426970912, + "grad_norm": 0.7280827760696411, + "learning_rate": 3.685138075522859e-07, + "loss": 0.6414, + "step": 17137 + }, + { + "epoch": 0.8807688354404358, + "grad_norm": 1.081909418106079, + "learning_rate": 3.6820028369735026e-07, + "loss": 0.7212, + "step": 17138 + }, + { + "epoch": 0.8808202281837805, + "grad_norm": 1.086043357849121, + "learning_rate": 3.6788688817047e-07, + "loss": 0.7434, + "step": 17139 + }, + { + "epoch": 0.8808716209271251, + "grad_norm": 1.0470410585403442, + "learning_rate": 3.675736209803288e-07, + "loss": 0.6161, + "step": 17140 + }, + { + "epoch": 0.8809230136704698, + "grad_norm": 1.09235417842865, + "learning_rate": 3.672604821356052e-07, + "loss": 0.6776, + "step": 17141 + }, + { + "epoch": 0.8809744064138144, + "grad_norm": 1.1200886964797974, + "learning_rate": 3.669474716449756e-07, + "loss": 0.7197, + "step": 17142 + }, + { + "epoch": 0.8810257991571591, + "grad_norm": 1.022282361984253, + "learning_rate": 3.666345895171125e-07, + "loss": 0.6691, + "step": 17143 + }, + { + "epoch": 0.8810771919005036, + "grad_norm": 0.8025686740875244, + "learning_rate": 3.66321835760684e-07, + "loss": 0.6521, + "step": 17144 + }, + { + "epoch": 0.8811285846438482, + "grad_norm": 1.1165648698806763, + "learning_rate": 3.6600921038435435e-07, + "loss": 0.6811, + "step": 17145 + }, + { + "epoch": 0.8811799773871929, + "grad_norm": 1.0491769313812256, + "learning_rate": 3.656967133967881e-07, + "loss": 0.668, + "step": 17146 + }, + { + "epoch": 0.8812313701305375, + "grad_norm": 0.7947109937667847, + "learning_rate": 3.6538434480663963e-07, + "loss": 0.6447, + "step": 17147 + }, + { + "epoch": 0.8812827628738822, + "grad_norm": 1.084948182106018, + "learning_rate": 3.650721046225658e-07, + "loss": 0.7042, + "step": 17148 + }, + { + "epoch": 0.8813341556172268, + "grad_norm": 1.0844550132751465, + "learning_rate": 3.647599928532164e-07, + "loss": 0.7248, + "step": 17149 + }, + { + "epoch": 0.8813855483605715, + "grad_norm": 0.7221179008483887, + "learning_rate": 3.6444800950723836e-07, + "loss": 0.6842, + "step": 17150 + }, + { + "epoch": 0.8814369411039161, + "grad_norm": 1.0798547267913818, + "learning_rate": 3.6413615459327755e-07, + "loss": 0.687, + "step": 17151 + }, + { + "epoch": 0.8814883338472608, + "grad_norm": 1.0676544904708862, + "learning_rate": 3.638244281199721e-07, + "loss": 0.7005, + "step": 17152 + }, + { + "epoch": 0.8815397265906054, + "grad_norm": 1.1150600910186768, + "learning_rate": 3.6351283009596004e-07, + "loss": 0.6879, + "step": 17153 + }, + { + "epoch": 0.8815911193339501, + "grad_norm": 1.0744106769561768, + "learning_rate": 3.632013605298723e-07, + "loss": 0.6762, + "step": 17154 + }, + { + "epoch": 0.8816425120772947, + "grad_norm": 0.7658979296684265, + "learning_rate": 3.62890019430342e-07, + "loss": 0.6196, + "step": 17155 + }, + { + "epoch": 0.8816939048206394, + "grad_norm": 1.108672022819519, + "learning_rate": 3.6257880680599157e-07, + "loss": 0.6874, + "step": 17156 + }, + { + "epoch": 0.881745297563984, + "grad_norm": 0.760303258895874, + "learning_rate": 3.622677226654453e-07, + "loss": 0.6609, + "step": 17157 + }, + { + "epoch": 0.8817966903073287, + "grad_norm": 1.0185706615447998, + "learning_rate": 3.619567670173213e-07, + "loss": 0.7192, + "step": 17158 + }, + { + "epoch": 0.8818480830506732, + "grad_norm": 1.014692783355713, + "learning_rate": 3.6164593987023545e-07, + "loss": 0.6503, + "step": 17159 + }, + { + "epoch": 0.8818994757940178, + "grad_norm": 1.0535719394683838, + "learning_rate": 3.613352412327997e-07, + "loss": 0.6994, + "step": 17160 + }, + { + "epoch": 0.8819508685373625, + "grad_norm": 1.0766398906707764, + "learning_rate": 3.610246711136206e-07, + "loss": 0.7083, + "step": 17161 + }, + { + "epoch": 0.8820022612807071, + "grad_norm": 1.1554217338562012, + "learning_rate": 3.6071422952130563e-07, + "loss": 0.6962, + "step": 17162 + }, + { + "epoch": 0.8820536540240518, + "grad_norm": 1.1262308359146118, + "learning_rate": 3.6040391646445293e-07, + "loss": 0.7428, + "step": 17163 + }, + { + "epoch": 0.8821050467673964, + "grad_norm": 1.0671658515930176, + "learning_rate": 3.6009373195166176e-07, + "loss": 0.7132, + "step": 17164 + }, + { + "epoch": 0.8821564395107411, + "grad_norm": 1.0519405603408813, + "learning_rate": 3.5978367599152576e-07, + "loss": 0.6301, + "step": 17165 + }, + { + "epoch": 0.8822078322540857, + "grad_norm": 1.252974033355713, + "learning_rate": 3.594737485926347e-07, + "loss": 0.7072, + "step": 17166 + }, + { + "epoch": 0.8822592249974304, + "grad_norm": 1.0198771953582764, + "learning_rate": 3.5916394976357513e-07, + "loss": 0.6498, + "step": 17167 + }, + { + "epoch": 0.882310617740775, + "grad_norm": 0.8625896573066711, + "learning_rate": 3.5885427951293125e-07, + "loss": 0.6274, + "step": 17168 + }, + { + "epoch": 0.8823620104841197, + "grad_norm": 1.2059311866760254, + "learning_rate": 3.585447378492829e-07, + "loss": 0.688, + "step": 17169 + }, + { + "epoch": 0.8824134032274643, + "grad_norm": 0.7967343926429749, + "learning_rate": 3.582353247812048e-07, + "loss": 0.6173, + "step": 17170 + }, + { + "epoch": 0.882464795970809, + "grad_norm": 1.1352158784866333, + "learning_rate": 3.579260403172724e-07, + "loss": 0.7154, + "step": 17171 + }, + { + "epoch": 0.8825161887141536, + "grad_norm": 1.1393189430236816, + "learning_rate": 3.5761688446605047e-07, + "loss": 0.6907, + "step": 17172 + }, + { + "epoch": 0.8825675814574983, + "grad_norm": 1.172644853591919, + "learning_rate": 3.573078572361077e-07, + "loss": 0.6973, + "step": 17173 + }, + { + "epoch": 0.8826189742008428, + "grad_norm": 1.0227291584014893, + "learning_rate": 3.5699895863600455e-07, + "loss": 0.6784, + "step": 17174 + }, + { + "epoch": 0.8826703669441874, + "grad_norm": 1.035005807876587, + "learning_rate": 3.5669018867430074e-07, + "loss": 0.6739, + "step": 17175 + }, + { + "epoch": 0.8827217596875321, + "grad_norm": 0.857428252696991, + "learning_rate": 3.563815473595489e-07, + "loss": 0.592, + "step": 17176 + }, + { + "epoch": 0.8827731524308767, + "grad_norm": 1.0636605024337769, + "learning_rate": 3.560730347003016e-07, + "loss": 0.6919, + "step": 17177 + }, + { + "epoch": 0.8828245451742214, + "grad_norm": 0.8680917024612427, + "learning_rate": 3.557646507051066e-07, + "loss": 0.6863, + "step": 17178 + }, + { + "epoch": 0.882875937917566, + "grad_norm": 1.1427152156829834, + "learning_rate": 3.554563953825063e-07, + "loss": 0.7739, + "step": 17179 + }, + { + "epoch": 0.8829273306609107, + "grad_norm": 1.0131632089614868, + "learning_rate": 3.5514826874104347e-07, + "loss": 0.6803, + "step": 17180 + }, + { + "epoch": 0.8829787234042553, + "grad_norm": 1.1011669635772705, + "learning_rate": 3.5484027078925343e-07, + "loss": 0.6962, + "step": 17181 + }, + { + "epoch": 0.8830301161476, + "grad_norm": 0.9995465874671936, + "learning_rate": 3.545324015356705e-07, + "loss": 0.6699, + "step": 17182 + }, + { + "epoch": 0.8830815088909446, + "grad_norm": 0.7657961845397949, + "learning_rate": 3.5422466098882337e-07, + "loss": 0.612, + "step": 17183 + }, + { + "epoch": 0.8831329016342893, + "grad_norm": 1.0281312465667725, + "learning_rate": 3.5391704915724026e-07, + "loss": 0.7438, + "step": 17184 + }, + { + "epoch": 0.8831842943776339, + "grad_norm": 0.9848723411560059, + "learning_rate": 3.5360956604944096e-07, + "loss": 0.6659, + "step": 17185 + }, + { + "epoch": 0.8832356871209786, + "grad_norm": 0.8695343732833862, + "learning_rate": 3.533022116739465e-07, + "loss": 0.6334, + "step": 17186 + }, + { + "epoch": 0.8832870798643232, + "grad_norm": 0.7787792682647705, + "learning_rate": 3.529949860392723e-07, + "loss": 0.6347, + "step": 17187 + }, + { + "epoch": 0.8833384726076678, + "grad_norm": 1.1888320446014404, + "learning_rate": 3.5268788915392927e-07, + "loss": 0.6613, + "step": 17188 + }, + { + "epoch": 0.8833898653510124, + "grad_norm": 1.0413286685943604, + "learning_rate": 3.523809210264273e-07, + "loss": 0.6318, + "step": 17189 + }, + { + "epoch": 0.883441258094357, + "grad_norm": 1.09832763671875, + "learning_rate": 3.5207408166527014e-07, + "loss": 0.6938, + "step": 17190 + }, + { + "epoch": 0.8834926508377017, + "grad_norm": 1.105941891670227, + "learning_rate": 3.5176737107895985e-07, + "loss": 0.7184, + "step": 17191 + }, + { + "epoch": 0.8835440435810463, + "grad_norm": 1.061026930809021, + "learning_rate": 3.5146078927599245e-07, + "loss": 0.7236, + "step": 17192 + }, + { + "epoch": 0.883595436324391, + "grad_norm": 1.2754851579666138, + "learning_rate": 3.5115433626486394e-07, + "loss": 0.7179, + "step": 17193 + }, + { + "epoch": 0.8836468290677356, + "grad_norm": 1.118636965751648, + "learning_rate": 3.5084801205406414e-07, + "loss": 0.697, + "step": 17194 + }, + { + "epoch": 0.8836982218110803, + "grad_norm": 1.045110821723938, + "learning_rate": 3.5054181665207967e-07, + "loss": 0.6629, + "step": 17195 + }, + { + "epoch": 0.8837496145544249, + "grad_norm": 1.0930958986282349, + "learning_rate": 3.502357500673942e-07, + "loss": 0.7012, + "step": 17196 + }, + { + "epoch": 0.8838010072977696, + "grad_norm": 1.1034013032913208, + "learning_rate": 3.499298123084888e-07, + "loss": 0.6638, + "step": 17197 + }, + { + "epoch": 0.8838524000411142, + "grad_norm": 1.1060434579849243, + "learning_rate": 3.496240033838366e-07, + "loss": 0.6397, + "step": 17198 + }, + { + "epoch": 0.8839037927844589, + "grad_norm": 1.0753295421600342, + "learning_rate": 3.4931832330191374e-07, + "loss": 0.6864, + "step": 17199 + }, + { + "epoch": 0.8839551855278035, + "grad_norm": 0.7311631441116333, + "learning_rate": 3.490127720711878e-07, + "loss": 0.6148, + "step": 17200 + }, + { + "epoch": 0.8840065782711481, + "grad_norm": 0.7730428576469421, + "learning_rate": 3.4870734970012363e-07, + "loss": 0.6601, + "step": 17201 + }, + { + "epoch": 0.8840579710144928, + "grad_norm": 1.1114429235458374, + "learning_rate": 3.484020561971846e-07, + "loss": 0.7105, + "step": 17202 + }, + { + "epoch": 0.8841093637578374, + "grad_norm": 1.1501654386520386, + "learning_rate": 3.4809689157082884e-07, + "loss": 0.6854, + "step": 17203 + }, + { + "epoch": 0.8841607565011821, + "grad_norm": 1.0526442527770996, + "learning_rate": 3.4779185582951125e-07, + "loss": 0.6718, + "step": 17204 + }, + { + "epoch": 0.8842121492445266, + "grad_norm": 1.0376602411270142, + "learning_rate": 3.474869489816812e-07, + "loss": 0.6595, + "step": 17205 + }, + { + "epoch": 0.8842635419878713, + "grad_norm": 0.7162024974822998, + "learning_rate": 3.471821710357892e-07, + "loss": 0.6387, + "step": 17206 + }, + { + "epoch": 0.8843149347312159, + "grad_norm": 1.1031944751739502, + "learning_rate": 3.4687752200027847e-07, + "loss": 0.6319, + "step": 17207 + }, + { + "epoch": 0.8843663274745606, + "grad_norm": 1.0819947719573975, + "learning_rate": 3.4657300188358887e-07, + "loss": 0.7434, + "step": 17208 + }, + { + "epoch": 0.8844177202179052, + "grad_norm": 1.099614143371582, + "learning_rate": 3.462686106941576e-07, + "loss": 0.7059, + "step": 17209 + }, + { + "epoch": 0.8844691129612499, + "grad_norm": 0.7581198811531067, + "learning_rate": 3.459643484404185e-07, + "loss": 0.6451, + "step": 17210 + }, + { + "epoch": 0.8845205057045945, + "grad_norm": 1.0846989154815674, + "learning_rate": 3.456602151308014e-07, + "loss": 0.7023, + "step": 17211 + }, + { + "epoch": 0.8845718984479392, + "grad_norm": 0.7723881006240845, + "learning_rate": 3.453562107737318e-07, + "loss": 0.6233, + "step": 17212 + }, + { + "epoch": 0.8846232911912838, + "grad_norm": 1.0163217782974243, + "learning_rate": 3.450523353776347e-07, + "loss": 0.6981, + "step": 17213 + }, + { + "epoch": 0.8846746839346284, + "grad_norm": 1.166298270225525, + "learning_rate": 3.4474858895092554e-07, + "loss": 0.7331, + "step": 17214 + }, + { + "epoch": 0.8847260766779731, + "grad_norm": 1.1289334297180176, + "learning_rate": 3.4444497150202315e-07, + "loss": 0.6679, + "step": 17215 + }, + { + "epoch": 0.8847774694213177, + "grad_norm": 0.7483891248703003, + "learning_rate": 3.4414148303933805e-07, + "loss": 0.6432, + "step": 17216 + }, + { + "epoch": 0.8848288621646624, + "grad_norm": 0.6482483744621277, + "learning_rate": 3.438381235712784e-07, + "loss": 0.6072, + "step": 17217 + }, + { + "epoch": 0.884880254908007, + "grad_norm": 0.7421467304229736, + "learning_rate": 3.4353489310624935e-07, + "loss": 0.6693, + "step": 17218 + }, + { + "epoch": 0.8849316476513517, + "grad_norm": 1.106765627861023, + "learning_rate": 3.4323179165265283e-07, + "loss": 0.6869, + "step": 17219 + }, + { + "epoch": 0.8849830403946962, + "grad_norm": 1.1078455448150635, + "learning_rate": 3.429288192188862e-07, + "loss": 0.7216, + "step": 17220 + }, + { + "epoch": 0.8850344331380409, + "grad_norm": 1.230933666229248, + "learning_rate": 3.426259758133427e-07, + "loss": 0.6991, + "step": 17221 + }, + { + "epoch": 0.8850858258813855, + "grad_norm": 1.0159106254577637, + "learning_rate": 3.4232326144441387e-07, + "loss": 0.6507, + "step": 17222 + }, + { + "epoch": 0.8851372186247302, + "grad_norm": 1.1438149213790894, + "learning_rate": 3.420206761204864e-07, + "loss": 0.7181, + "step": 17223 + }, + { + "epoch": 0.8851886113680748, + "grad_norm": 1.0809037685394287, + "learning_rate": 3.417182198499441e-07, + "loss": 0.6712, + "step": 17224 + }, + { + "epoch": 0.8852400041114195, + "grad_norm": 1.1030441522598267, + "learning_rate": 3.4141589264116537e-07, + "loss": 0.7274, + "step": 17225 + }, + { + "epoch": 0.8852913968547641, + "grad_norm": 1.121673345565796, + "learning_rate": 3.4111369450252895e-07, + "loss": 0.704, + "step": 17226 + }, + { + "epoch": 0.8853427895981087, + "grad_norm": 1.067434549331665, + "learning_rate": 3.408116254424043e-07, + "loss": 0.6818, + "step": 17227 + }, + { + "epoch": 0.8853941823414534, + "grad_norm": 0.679084062576294, + "learning_rate": 3.4050968546916307e-07, + "loss": 0.5973, + "step": 17228 + }, + { + "epoch": 0.885445575084798, + "grad_norm": 1.0908101797103882, + "learning_rate": 3.4020787459116967e-07, + "loss": 0.7169, + "step": 17229 + }, + { + "epoch": 0.8854969678281427, + "grad_norm": 1.1170778274536133, + "learning_rate": 3.399061928167857e-07, + "loss": 0.6537, + "step": 17230 + }, + { + "epoch": 0.8855483605714873, + "grad_norm": 1.1773847341537476, + "learning_rate": 3.396046401543712e-07, + "loss": 0.704, + "step": 17231 + }, + { + "epoch": 0.885599753314832, + "grad_norm": 0.824786901473999, + "learning_rate": 3.393032166122789e-07, + "loss": 0.6336, + "step": 17232 + }, + { + "epoch": 0.8856511460581766, + "grad_norm": 1.1812467575073242, + "learning_rate": 3.390019221988616e-07, + "loss": 0.6963, + "step": 17233 + }, + { + "epoch": 0.8857025388015213, + "grad_norm": 1.030355453491211, + "learning_rate": 3.387007569224654e-07, + "loss": 0.6843, + "step": 17234 + }, + { + "epoch": 0.8857539315448658, + "grad_norm": 1.1622542142868042, + "learning_rate": 3.3839972079143635e-07, + "loss": 0.6844, + "step": 17235 + }, + { + "epoch": 0.8858053242882105, + "grad_norm": 1.0862395763397217, + "learning_rate": 3.3809881381411224e-07, + "loss": 0.6833, + "step": 17236 + }, + { + "epoch": 0.8858567170315551, + "grad_norm": 1.1335065364837646, + "learning_rate": 3.3779803599883257e-07, + "loss": 0.6996, + "step": 17237 + }, + { + "epoch": 0.8859081097748998, + "grad_norm": 1.078650712966919, + "learning_rate": 3.374973873539289e-07, + "loss": 0.7087, + "step": 17238 + }, + { + "epoch": 0.8859595025182444, + "grad_norm": 1.1388357877731323, + "learning_rate": 3.3719686788773187e-07, + "loss": 0.6844, + "step": 17239 + }, + { + "epoch": 0.886010895261589, + "grad_norm": 1.078320860862732, + "learning_rate": 3.368964776085676e-07, + "loss": 0.6191, + "step": 17240 + }, + { + "epoch": 0.8860622880049337, + "grad_norm": 1.1209315061569214, + "learning_rate": 3.3659621652475885e-07, + "loss": 0.6476, + "step": 17241 + }, + { + "epoch": 0.8861136807482783, + "grad_norm": 1.1069517135620117, + "learning_rate": 3.3629608464462336e-07, + "loss": 0.6787, + "step": 17242 + }, + { + "epoch": 0.886165073491623, + "grad_norm": 1.0445693731307983, + "learning_rate": 3.359960819764774e-07, + "loss": 0.7287, + "step": 17243 + }, + { + "epoch": 0.8862164662349676, + "grad_norm": 1.06179678440094, + "learning_rate": 3.3569620852863303e-07, + "loss": 0.6473, + "step": 17244 + }, + { + "epoch": 0.8862678589783123, + "grad_norm": 1.0277901887893677, + "learning_rate": 3.353964643093988e-07, + "loss": 0.6769, + "step": 17245 + }, + { + "epoch": 0.8863192517216569, + "grad_norm": 1.227080225944519, + "learning_rate": 3.350968493270784e-07, + "loss": 0.6961, + "step": 17246 + }, + { + "epoch": 0.8863706444650016, + "grad_norm": 0.7237336039543152, + "learning_rate": 3.347973635899726e-07, + "loss": 0.645, + "step": 17247 + }, + { + "epoch": 0.8864220372083462, + "grad_norm": 1.1391007900238037, + "learning_rate": 3.344980071063819e-07, + "loss": 0.6654, + "step": 17248 + }, + { + "epoch": 0.8864734299516909, + "grad_norm": 1.0825445652008057, + "learning_rate": 3.341987798845958e-07, + "loss": 0.7326, + "step": 17249 + }, + { + "epoch": 0.8865248226950354, + "grad_norm": 1.0735896825790405, + "learning_rate": 3.338996819329077e-07, + "loss": 0.6699, + "step": 17250 + }, + { + "epoch": 0.88657621543838, + "grad_norm": 1.126123070716858, + "learning_rate": 3.3360071325960307e-07, + "loss": 0.6431, + "step": 17251 + }, + { + "epoch": 0.8866276081817247, + "grad_norm": 0.6752283573150635, + "learning_rate": 3.333018738729665e-07, + "loss": 0.6071, + "step": 17252 + }, + { + "epoch": 0.8866790009250693, + "grad_norm": 1.0911318063735962, + "learning_rate": 3.330031637812764e-07, + "loss": 0.6596, + "step": 17253 + }, + { + "epoch": 0.886730393668414, + "grad_norm": 1.1691069602966309, + "learning_rate": 3.327045829928083e-07, + "loss": 0.7034, + "step": 17254 + }, + { + "epoch": 0.8867817864117586, + "grad_norm": 1.122514247894287, + "learning_rate": 3.3240613151583665e-07, + "loss": 0.7618, + "step": 17255 + }, + { + "epoch": 0.8868331791551033, + "grad_norm": 1.0930514335632324, + "learning_rate": 3.3210780935862773e-07, + "loss": 0.7227, + "step": 17256 + }, + { + "epoch": 0.8868845718984479, + "grad_norm": 0.7268034815788269, + "learning_rate": 3.318096165294493e-07, + "loss": 0.5955, + "step": 17257 + }, + { + "epoch": 0.8869359646417926, + "grad_norm": 1.1669716835021973, + "learning_rate": 3.3151155303656144e-07, + "loss": 0.706, + "step": 17258 + }, + { + "epoch": 0.8869873573851372, + "grad_norm": 1.1718838214874268, + "learning_rate": 3.312136188882231e-07, + "loss": 0.6574, + "step": 17259 + }, + { + "epoch": 0.8870387501284819, + "grad_norm": 0.831355094909668, + "learning_rate": 3.309158140926877e-07, + "loss": 0.6441, + "step": 17260 + }, + { + "epoch": 0.8870901428718265, + "grad_norm": 1.1457546949386597, + "learning_rate": 3.3061813865820744e-07, + "loss": 0.6401, + "step": 17261 + }, + { + "epoch": 0.8871415356151712, + "grad_norm": 0.7980967164039612, + "learning_rate": 3.303205925930292e-07, + "loss": 0.656, + "step": 17262 + }, + { + "epoch": 0.8871929283585158, + "grad_norm": 1.0373868942260742, + "learning_rate": 3.3002317590539623e-07, + "loss": 0.6995, + "step": 17263 + }, + { + "epoch": 0.8872443211018605, + "grad_norm": 1.0957270860671997, + "learning_rate": 3.2972588860355035e-07, + "loss": 0.6401, + "step": 17264 + }, + { + "epoch": 0.887295713845205, + "grad_norm": 1.0539743900299072, + "learning_rate": 3.2942873069572555e-07, + "loss": 0.7054, + "step": 17265 + }, + { + "epoch": 0.8873471065885497, + "grad_norm": 0.7700062394142151, + "learning_rate": 3.291317021901569e-07, + "loss": 0.6568, + "step": 17266 + }, + { + "epoch": 0.8873984993318943, + "grad_norm": 1.053623080253601, + "learning_rate": 3.2883480309507274e-07, + "loss": 0.6993, + "step": 17267 + }, + { + "epoch": 0.8874498920752389, + "grad_norm": 1.094169020652771, + "learning_rate": 3.2853803341870105e-07, + "loss": 0.7444, + "step": 17268 + }, + { + "epoch": 0.8875012848185836, + "grad_norm": 0.8515596985816956, + "learning_rate": 3.2824139316926074e-07, + "loss": 0.6505, + "step": 17269 + }, + { + "epoch": 0.8875526775619282, + "grad_norm": 1.090950846672058, + "learning_rate": 3.27944882354973e-07, + "loss": 0.6659, + "step": 17270 + }, + { + "epoch": 0.8876040703052729, + "grad_norm": 1.0787317752838135, + "learning_rate": 3.2764850098405245e-07, + "loss": 0.6642, + "step": 17271 + }, + { + "epoch": 0.8876554630486175, + "grad_norm": 0.7060444355010986, + "learning_rate": 3.2735224906470906e-07, + "loss": 0.6242, + "step": 17272 + }, + { + "epoch": 0.8877068557919622, + "grad_norm": 1.141175389289856, + "learning_rate": 3.2705612660515305e-07, + "loss": 0.6913, + "step": 17273 + }, + { + "epoch": 0.8877582485353068, + "grad_norm": 1.135288953781128, + "learning_rate": 3.267601336135878e-07, + "loss": 0.7261, + "step": 17274 + }, + { + "epoch": 0.8878096412786515, + "grad_norm": 1.1385952234268188, + "learning_rate": 3.264642700982135e-07, + "loss": 0.664, + "step": 17275 + }, + { + "epoch": 0.8878610340219961, + "grad_norm": 1.092997670173645, + "learning_rate": 3.261685360672273e-07, + "loss": 0.7433, + "step": 17276 + }, + { + "epoch": 0.8879124267653408, + "grad_norm": 1.1415539979934692, + "learning_rate": 3.2587293152882446e-07, + "loss": 0.7035, + "step": 17277 + }, + { + "epoch": 0.8879638195086854, + "grad_norm": 1.068906307220459, + "learning_rate": 3.255774564911923e-07, + "loss": 0.7129, + "step": 17278 + }, + { + "epoch": 0.8880152122520301, + "grad_norm": 1.0996875762939453, + "learning_rate": 3.2528211096251926e-07, + "loss": 0.665, + "step": 17279 + }, + { + "epoch": 0.8880666049953746, + "grad_norm": 1.0697276592254639, + "learning_rate": 3.2498689495098767e-07, + "loss": 0.6782, + "step": 17280 + }, + { + "epoch": 0.8881179977387192, + "grad_norm": 0.8283963203430176, + "learning_rate": 3.24691808464776e-07, + "loss": 0.6436, + "step": 17281 + }, + { + "epoch": 0.8881693904820639, + "grad_norm": 1.0669564008712769, + "learning_rate": 3.2439685151206104e-07, + "loss": 0.6697, + "step": 17282 + }, + { + "epoch": 0.8882207832254085, + "grad_norm": 0.6826446056365967, + "learning_rate": 3.24102024101014e-07, + "loss": 0.6768, + "step": 17283 + }, + { + "epoch": 0.8882721759687532, + "grad_norm": 0.661725640296936, + "learning_rate": 3.2380732623980395e-07, + "loss": 0.6234, + "step": 17284 + }, + { + "epoch": 0.8883235687120978, + "grad_norm": 1.1046956777572632, + "learning_rate": 3.235127579365943e-07, + "loss": 0.6978, + "step": 17285 + }, + { + "epoch": 0.8883749614554425, + "grad_norm": 1.2081642150878906, + "learning_rate": 3.232183191995486e-07, + "loss": 0.6554, + "step": 17286 + }, + { + "epoch": 0.8884263541987871, + "grad_norm": 1.044550895690918, + "learning_rate": 3.2292401003682303e-07, + "loss": 0.6809, + "step": 17287 + }, + { + "epoch": 0.8884777469421318, + "grad_norm": 1.076706886291504, + "learning_rate": 3.226298304565717e-07, + "loss": 0.6951, + "step": 17288 + }, + { + "epoch": 0.8885291396854764, + "grad_norm": 1.12276029586792, + "learning_rate": 3.2233578046694523e-07, + "loss": 0.6735, + "step": 17289 + }, + { + "epoch": 0.8885805324288211, + "grad_norm": 1.0970485210418701, + "learning_rate": 3.2204186007609105e-07, + "loss": 0.6897, + "step": 17290 + }, + { + "epoch": 0.8886319251721657, + "grad_norm": 1.118424654006958, + "learning_rate": 3.2174806929215206e-07, + "loss": 0.7407, + "step": 17291 + }, + { + "epoch": 0.8886833179155104, + "grad_norm": 1.053068995475769, + "learning_rate": 3.2145440812326845e-07, + "loss": 0.6691, + "step": 17292 + }, + { + "epoch": 0.888734710658855, + "grad_norm": 1.1214486360549927, + "learning_rate": 3.211608765775759e-07, + "loss": 0.7023, + "step": 17293 + }, + { + "epoch": 0.8887861034021997, + "grad_norm": 1.0410770177841187, + "learning_rate": 3.2086747466320624e-07, + "loss": 0.6647, + "step": 17294 + }, + { + "epoch": 0.8888374961455443, + "grad_norm": 1.104561448097229, + "learning_rate": 3.205742023882896e-07, + "loss": 0.6612, + "step": 17295 + }, + { + "epoch": 0.8888888888888888, + "grad_norm": 0.693846583366394, + "learning_rate": 3.2028105976095125e-07, + "loss": 0.6653, + "step": 17296 + }, + { + "epoch": 0.8889402816322335, + "grad_norm": 1.1310362815856934, + "learning_rate": 3.1998804678931295e-07, + "loss": 0.6702, + "step": 17297 + }, + { + "epoch": 0.8889916743755781, + "grad_norm": 1.1292986869812012, + "learning_rate": 3.1969516348149155e-07, + "loss": 0.7311, + "step": 17298 + }, + { + "epoch": 0.8890430671189228, + "grad_norm": 1.0828626155853271, + "learning_rate": 3.1940240984560333e-07, + "loss": 0.6821, + "step": 17299 + }, + { + "epoch": 0.8890944598622674, + "grad_norm": 0.7424675226211548, + "learning_rate": 3.1910978588975905e-07, + "loss": 0.6195, + "step": 17300 + }, + { + "epoch": 0.8891458526056121, + "grad_norm": 1.0057932138442993, + "learning_rate": 3.1881729162206556e-07, + "loss": 0.6125, + "step": 17301 + }, + { + "epoch": 0.8891972453489567, + "grad_norm": 1.1064280271530151, + "learning_rate": 3.185249270506258e-07, + "loss": 0.768, + "step": 17302 + }, + { + "epoch": 0.8892486380923014, + "grad_norm": 1.0534429550170898, + "learning_rate": 3.182326921835421e-07, + "loss": 0.6472, + "step": 17303 + }, + { + "epoch": 0.889300030835646, + "grad_norm": 1.0426263809204102, + "learning_rate": 3.1794058702890983e-07, + "loss": 0.7162, + "step": 17304 + }, + { + "epoch": 0.8893514235789907, + "grad_norm": 1.062767744064331, + "learning_rate": 3.176486115948213e-07, + "loss": 0.6931, + "step": 17305 + }, + { + "epoch": 0.8894028163223353, + "grad_norm": 1.0462026596069336, + "learning_rate": 3.173567658893684e-07, + "loss": 0.6355, + "step": 17306 + }, + { + "epoch": 0.88945420906568, + "grad_norm": 1.1213829517364502, + "learning_rate": 3.1706504992063403e-07, + "loss": 0.7193, + "step": 17307 + }, + { + "epoch": 0.8895056018090246, + "grad_norm": 1.0517857074737549, + "learning_rate": 3.1677346369670235e-07, + "loss": 0.673, + "step": 17308 + }, + { + "epoch": 0.8895569945523693, + "grad_norm": 0.8106634020805359, + "learning_rate": 3.164820072256514e-07, + "loss": 0.6134, + "step": 17309 + }, + { + "epoch": 0.8896083872957139, + "grad_norm": 1.0553170442581177, + "learning_rate": 3.1619068051555624e-07, + "loss": 0.709, + "step": 17310 + }, + { + "epoch": 0.8896597800390584, + "grad_norm": 1.1086674928665161, + "learning_rate": 3.1589948357448776e-07, + "loss": 0.69, + "step": 17311 + }, + { + "epoch": 0.8897111727824031, + "grad_norm": 1.1421409845352173, + "learning_rate": 3.1560841641051555e-07, + "loss": 0.6936, + "step": 17312 + }, + { + "epoch": 0.8897625655257477, + "grad_norm": 1.3569501638412476, + "learning_rate": 3.153174790317026e-07, + "loss": 0.6742, + "step": 17313 + }, + { + "epoch": 0.8898139582690924, + "grad_norm": 1.2307621240615845, + "learning_rate": 3.1502667144610867e-07, + "loss": 0.6787, + "step": 17314 + }, + { + "epoch": 0.889865351012437, + "grad_norm": 0.6956360340118408, + "learning_rate": 3.1473599366179277e-07, + "loss": 0.64, + "step": 17315 + }, + { + "epoch": 0.8899167437557817, + "grad_norm": 1.0923476219177246, + "learning_rate": 3.144454456868079e-07, + "loss": 0.6824, + "step": 17316 + }, + { + "epoch": 0.8899681364991263, + "grad_norm": 0.9958836436271667, + "learning_rate": 3.141550275292032e-07, + "loss": 0.7347, + "step": 17317 + }, + { + "epoch": 0.890019529242471, + "grad_norm": 1.0987982749938965, + "learning_rate": 3.1386473919702507e-07, + "loss": 0.7168, + "step": 17318 + }, + { + "epoch": 0.8900709219858156, + "grad_norm": 1.1015421152114868, + "learning_rate": 3.135745806983176e-07, + "loss": 0.6392, + "step": 17319 + }, + { + "epoch": 0.8901223147291603, + "grad_norm": 1.020914077758789, + "learning_rate": 3.1328455204111705e-07, + "loss": 0.7061, + "step": 17320 + }, + { + "epoch": 0.8901737074725049, + "grad_norm": 1.1055606603622437, + "learning_rate": 3.129946532334621e-07, + "loss": 0.708, + "step": 17321 + }, + { + "epoch": 0.8902251002158496, + "grad_norm": 1.0513299703598022, + "learning_rate": 3.127048842833824e-07, + "loss": 0.7093, + "step": 17322 + }, + { + "epoch": 0.8902764929591942, + "grad_norm": 1.0631810426712036, + "learning_rate": 3.124152451989071e-07, + "loss": 0.7007, + "step": 17323 + }, + { + "epoch": 0.8903278857025388, + "grad_norm": 1.0524544715881348, + "learning_rate": 3.121257359880608e-07, + "loss": 0.7452, + "step": 17324 + }, + { + "epoch": 0.8903792784458835, + "grad_norm": 0.6523012518882751, + "learning_rate": 3.1183635665886504e-07, + "loss": 0.6578, + "step": 17325 + }, + { + "epoch": 0.890430671189228, + "grad_norm": 1.1447899341583252, + "learning_rate": 3.115471072193366e-07, + "loss": 0.7014, + "step": 17326 + }, + { + "epoch": 0.8904820639325727, + "grad_norm": 1.0926249027252197, + "learning_rate": 3.112579876774896e-07, + "loss": 0.644, + "step": 17327 + }, + { + "epoch": 0.8905334566759173, + "grad_norm": 1.05857253074646, + "learning_rate": 3.1096899804133553e-07, + "loss": 0.7275, + "step": 17328 + }, + { + "epoch": 0.890584849419262, + "grad_norm": 1.0685769319534302, + "learning_rate": 3.1068013831887847e-07, + "loss": 0.6488, + "step": 17329 + }, + { + "epoch": 0.8906362421626066, + "grad_norm": 0.7496011257171631, + "learning_rate": 3.103914085181242e-07, + "loss": 0.6485, + "step": 17330 + }, + { + "epoch": 0.8906876349059513, + "grad_norm": 1.159683108329773, + "learning_rate": 3.101028086470698e-07, + "loss": 0.7643, + "step": 17331 + }, + { + "epoch": 0.8907390276492959, + "grad_norm": 1.2198076248168945, + "learning_rate": 3.0981433871371367e-07, + "loss": 0.7089, + "step": 17332 + }, + { + "epoch": 0.8907904203926406, + "grad_norm": 1.1778459548950195, + "learning_rate": 3.095259987260468e-07, + "loss": 0.7314, + "step": 17333 + }, + { + "epoch": 0.8908418131359852, + "grad_norm": 1.0457491874694824, + "learning_rate": 3.092377886920583e-07, + "loss": 0.6493, + "step": 17334 + }, + { + "epoch": 0.8908932058793299, + "grad_norm": 1.1541686058044434, + "learning_rate": 3.0894970861973295e-07, + "loss": 0.6408, + "step": 17335 + }, + { + "epoch": 0.8909445986226745, + "grad_norm": 0.7138638496398926, + "learning_rate": 3.086617585170515e-07, + "loss": 0.6332, + "step": 17336 + }, + { + "epoch": 0.8909959913660191, + "grad_norm": 1.0447943210601807, + "learning_rate": 3.083739383919937e-07, + "loss": 0.6963, + "step": 17337 + }, + { + "epoch": 0.8910473841093638, + "grad_norm": 1.1010397672653198, + "learning_rate": 3.0808624825253265e-07, + "loss": 0.7216, + "step": 17338 + }, + { + "epoch": 0.8910987768527084, + "grad_norm": 0.7465928196907043, + "learning_rate": 3.0779868810663917e-07, + "loss": 0.6351, + "step": 17339 + }, + { + "epoch": 0.8911501695960531, + "grad_norm": 0.7380722761154175, + "learning_rate": 3.0751125796227965e-07, + "loss": 0.6614, + "step": 17340 + }, + { + "epoch": 0.8912015623393976, + "grad_norm": 1.1245673894882202, + "learning_rate": 3.0722395782741945e-07, + "loss": 0.7219, + "step": 17341 + }, + { + "epoch": 0.8912529550827423, + "grad_norm": 0.7626135945320129, + "learning_rate": 3.0693678771001765e-07, + "loss": 0.6608, + "step": 17342 + }, + { + "epoch": 0.8913043478260869, + "grad_norm": 0.9172635078430176, + "learning_rate": 3.066497476180297e-07, + "loss": 0.6449, + "step": 17343 + }, + { + "epoch": 0.8913557405694316, + "grad_norm": 0.771536111831665, + "learning_rate": 3.063628375594091e-07, + "loss": 0.6464, + "step": 17344 + }, + { + "epoch": 0.8914071333127762, + "grad_norm": 0.7993407249450684, + "learning_rate": 3.0607605754210457e-07, + "loss": 0.6547, + "step": 17345 + }, + { + "epoch": 0.8914585260561209, + "grad_norm": 1.0910483598709106, + "learning_rate": 3.057894075740619e-07, + "loss": 0.6606, + "step": 17346 + }, + { + "epoch": 0.8915099187994655, + "grad_norm": 1.0440376996994019, + "learning_rate": 3.055028876632227e-07, + "loss": 0.6993, + "step": 17347 + }, + { + "epoch": 0.8915613115428102, + "grad_norm": 1.0500766038894653, + "learning_rate": 3.052164978175265e-07, + "loss": 0.6839, + "step": 17348 + }, + { + "epoch": 0.8916127042861548, + "grad_norm": 0.6825932264328003, + "learning_rate": 3.049302380449054e-07, + "loss": 0.664, + "step": 17349 + }, + { + "epoch": 0.8916640970294994, + "grad_norm": 1.0863875150680542, + "learning_rate": 3.046441083532931e-07, + "loss": 0.6512, + "step": 17350 + }, + { + "epoch": 0.8917154897728441, + "grad_norm": 1.0442588329315186, + "learning_rate": 3.043581087506159e-07, + "loss": 0.6741, + "step": 17351 + }, + { + "epoch": 0.8917668825161887, + "grad_norm": 1.3062458038330078, + "learning_rate": 3.0407223924479757e-07, + "loss": 0.631, + "step": 17352 + }, + { + "epoch": 0.8918182752595334, + "grad_norm": 0.7802101373672485, + "learning_rate": 3.0378649984375784e-07, + "loss": 0.7097, + "step": 17353 + }, + { + "epoch": 0.891869668002878, + "grad_norm": 1.0873173475265503, + "learning_rate": 3.035008905554149e-07, + "loss": 0.6628, + "step": 17354 + }, + { + "epoch": 0.8919210607462227, + "grad_norm": 1.1318784952163696, + "learning_rate": 3.0321541138768064e-07, + "loss": 0.6697, + "step": 17355 + }, + { + "epoch": 0.8919724534895672, + "grad_norm": 1.121634840965271, + "learning_rate": 3.029300623484643e-07, + "loss": 0.799, + "step": 17356 + }, + { + "epoch": 0.8920238462329119, + "grad_norm": 1.04374361038208, + "learning_rate": 3.026448434456741e-07, + "loss": 0.6791, + "step": 17357 + }, + { + "epoch": 0.8920752389762565, + "grad_norm": 1.0634995698928833, + "learning_rate": 3.023597546872087e-07, + "loss": 0.7081, + "step": 17358 + }, + { + "epoch": 0.8921266317196012, + "grad_norm": 1.1587491035461426, + "learning_rate": 3.020747960809689e-07, + "loss": 0.7061, + "step": 17359 + }, + { + "epoch": 0.8921780244629458, + "grad_norm": 1.088513970375061, + "learning_rate": 3.017899676348496e-07, + "loss": 0.6553, + "step": 17360 + }, + { + "epoch": 0.8922294172062905, + "grad_norm": 1.0593944787979126, + "learning_rate": 3.0150526935674216e-07, + "loss": 0.7001, + "step": 17361 + }, + { + "epoch": 0.8922808099496351, + "grad_norm": 1.0292344093322754, + "learning_rate": 3.012207012545332e-07, + "loss": 0.6967, + "step": 17362 + }, + { + "epoch": 0.8923322026929797, + "grad_norm": 0.760574460029602, + "learning_rate": 3.009362633361085e-07, + "loss": 0.6814, + "step": 17363 + }, + { + "epoch": 0.8923835954363244, + "grad_norm": 0.6671168208122253, + "learning_rate": 3.006519556093479e-07, + "loss": 0.5869, + "step": 17364 + }, + { + "epoch": 0.892434988179669, + "grad_norm": 1.077892780303955, + "learning_rate": 3.003677780821285e-07, + "loss": 0.6401, + "step": 17365 + }, + { + "epoch": 0.8924863809230137, + "grad_norm": 1.105648398399353, + "learning_rate": 3.000837307623239e-07, + "loss": 0.6991, + "step": 17366 + }, + { + "epoch": 0.8925377736663583, + "grad_norm": 1.10355806350708, + "learning_rate": 2.9979981365780397e-07, + "loss": 0.6626, + "step": 17367 + }, + { + "epoch": 0.892589166409703, + "grad_norm": 1.225762128829956, + "learning_rate": 2.995160267764341e-07, + "loss": 0.6537, + "step": 17368 + }, + { + "epoch": 0.8926405591530476, + "grad_norm": 1.0736891031265259, + "learning_rate": 2.992323701260774e-07, + "loss": 0.6986, + "step": 17369 + }, + { + "epoch": 0.8926919518963923, + "grad_norm": 1.0879831314086914, + "learning_rate": 2.989488437145938e-07, + "loss": 0.6776, + "step": 17370 + }, + { + "epoch": 0.8927433446397369, + "grad_norm": 1.0839269161224365, + "learning_rate": 2.9866544754983626e-07, + "loss": 0.6477, + "step": 17371 + }, + { + "epoch": 0.8927947373830815, + "grad_norm": 1.1312848329544067, + "learning_rate": 2.983821816396587e-07, + "loss": 0.7612, + "step": 17372 + }, + { + "epoch": 0.8928461301264261, + "grad_norm": 1.0883126258850098, + "learning_rate": 2.9809904599190865e-07, + "loss": 0.7205, + "step": 17373 + }, + { + "epoch": 0.8928975228697708, + "grad_norm": 1.0878690481185913, + "learning_rate": 2.9781604061442927e-07, + "loss": 0.6763, + "step": 17374 + }, + { + "epoch": 0.8929489156131154, + "grad_norm": 1.1662545204162598, + "learning_rate": 2.975331655150637e-07, + "loss": 0.6928, + "step": 17375 + }, + { + "epoch": 0.89300030835646, + "grad_norm": 1.0824042558670044, + "learning_rate": 2.9725042070164745e-07, + "loss": 0.7007, + "step": 17376 + }, + { + "epoch": 0.8930517010998047, + "grad_norm": 0.6959069967269897, + "learning_rate": 2.969678061820164e-07, + "loss": 0.6076, + "step": 17377 + }, + { + "epoch": 0.8931030938431493, + "grad_norm": 1.0717687606811523, + "learning_rate": 2.9668532196399704e-07, + "loss": 0.6654, + "step": 17378 + }, + { + "epoch": 0.893154486586494, + "grad_norm": 1.1564409732818604, + "learning_rate": 2.9640296805541925e-07, + "loss": 0.7075, + "step": 17379 + }, + { + "epoch": 0.8932058793298386, + "grad_norm": 1.1828687191009521, + "learning_rate": 2.961207444641051e-07, + "loss": 0.7335, + "step": 17380 + }, + { + "epoch": 0.8932572720731833, + "grad_norm": 1.1136672496795654, + "learning_rate": 2.958386511978728e-07, + "loss": 0.6683, + "step": 17381 + }, + { + "epoch": 0.8933086648165279, + "grad_norm": 1.1237658262252808, + "learning_rate": 2.9555668826453774e-07, + "loss": 0.688, + "step": 17382 + }, + { + "epoch": 0.8933600575598726, + "grad_norm": 1.0470094680786133, + "learning_rate": 2.952748556719137e-07, + "loss": 0.7095, + "step": 17383 + }, + { + "epoch": 0.8934114503032172, + "grad_norm": 0.6859115958213806, + "learning_rate": 2.949931534278083e-07, + "loss": 0.6434, + "step": 17384 + }, + { + "epoch": 0.8934628430465619, + "grad_norm": 1.0506435632705688, + "learning_rate": 2.947115815400259e-07, + "loss": 0.6776, + "step": 17385 + }, + { + "epoch": 0.8935142357899065, + "grad_norm": 1.1310999393463135, + "learning_rate": 2.9443014001636796e-07, + "loss": 0.7016, + "step": 17386 + }, + { + "epoch": 0.893565628533251, + "grad_norm": 1.1167365312576294, + "learning_rate": 2.941488288646316e-07, + "loss": 0.6769, + "step": 17387 + }, + { + "epoch": 0.8936170212765957, + "grad_norm": 1.1728516817092896, + "learning_rate": 2.938676480926117e-07, + "loss": 0.6583, + "step": 17388 + }, + { + "epoch": 0.8936684140199403, + "grad_norm": 1.078352451324463, + "learning_rate": 2.935865977080987e-07, + "loss": 0.6753, + "step": 17389 + }, + { + "epoch": 0.893719806763285, + "grad_norm": 1.1132880449295044, + "learning_rate": 2.933056777188786e-07, + "loss": 0.7001, + "step": 17390 + }, + { + "epoch": 0.8937711995066296, + "grad_norm": 1.0806270837783813, + "learning_rate": 2.930248881327341e-07, + "loss": 0.6396, + "step": 17391 + }, + { + "epoch": 0.8938225922499743, + "grad_norm": 1.035416603088379, + "learning_rate": 2.927442289574461e-07, + "loss": 0.679, + "step": 17392 + }, + { + "epoch": 0.8938739849933189, + "grad_norm": 1.0838271379470825, + "learning_rate": 2.924637002007896e-07, + "loss": 0.6865, + "step": 17393 + }, + { + "epoch": 0.8939253777366636, + "grad_norm": 0.7409743666648865, + "learning_rate": 2.9218330187053723e-07, + "loss": 0.6674, + "step": 17394 + }, + { + "epoch": 0.8939767704800082, + "grad_norm": 1.0827436447143555, + "learning_rate": 2.919030339744572e-07, + "loss": 0.7165, + "step": 17395 + }, + { + "epoch": 0.8940281632233529, + "grad_norm": 0.7001832723617554, + "learning_rate": 2.916228965203155e-07, + "loss": 0.6598, + "step": 17396 + }, + { + "epoch": 0.8940795559666975, + "grad_norm": 1.1202679872512817, + "learning_rate": 2.913428895158726e-07, + "loss": 0.7093, + "step": 17397 + }, + { + "epoch": 0.8941309487100422, + "grad_norm": 1.038336992263794, + "learning_rate": 2.910630129688863e-07, + "loss": 0.736, + "step": 17398 + }, + { + "epoch": 0.8941823414533868, + "grad_norm": 1.0931763648986816, + "learning_rate": 2.90783266887113e-07, + "loss": 0.7558, + "step": 17399 + }, + { + "epoch": 0.8942337341967315, + "grad_norm": 1.1437512636184692, + "learning_rate": 2.9050365127829995e-07, + "loss": 0.6804, + "step": 17400 + }, + { + "epoch": 0.8942851269400761, + "grad_norm": 1.1102626323699951, + "learning_rate": 2.9022416615019645e-07, + "loss": 0.6726, + "step": 17401 + }, + { + "epoch": 0.8943365196834206, + "grad_norm": 0.6824184656143188, + "learning_rate": 2.899448115105452e-07, + "loss": 0.6311, + "step": 17402 + }, + { + "epoch": 0.8943879124267653, + "grad_norm": 0.7354761958122253, + "learning_rate": 2.896655873670856e-07, + "loss": 0.6558, + "step": 17403 + }, + { + "epoch": 0.8944393051701099, + "grad_norm": 1.1177719831466675, + "learning_rate": 2.8938649372755425e-07, + "loss": 0.7151, + "step": 17404 + }, + { + "epoch": 0.8944906979134546, + "grad_norm": 1.1368799209594727, + "learning_rate": 2.891075305996838e-07, + "loss": 0.7442, + "step": 17405 + }, + { + "epoch": 0.8945420906567992, + "grad_norm": 1.176924228668213, + "learning_rate": 2.8882869799120305e-07, + "loss": 0.6836, + "step": 17406 + }, + { + "epoch": 0.8945934834001439, + "grad_norm": 1.1300816535949707, + "learning_rate": 2.885499959098365e-07, + "loss": 0.6808, + "step": 17407 + }, + { + "epoch": 0.8946448761434885, + "grad_norm": 1.0469435453414917, + "learning_rate": 2.8827142436330726e-07, + "loss": 0.7138, + "step": 17408 + }, + { + "epoch": 0.8946962688868332, + "grad_norm": 1.0598068237304688, + "learning_rate": 2.8799298335933255e-07, + "loss": 0.6859, + "step": 17409 + }, + { + "epoch": 0.8947476616301778, + "grad_norm": 1.0993692874908447, + "learning_rate": 2.877146729056274e-07, + "loss": 0.6955, + "step": 17410 + }, + { + "epoch": 0.8947990543735225, + "grad_norm": 1.1401753425598145, + "learning_rate": 2.8743649300990104e-07, + "loss": 0.6953, + "step": 17411 + }, + { + "epoch": 0.8948504471168671, + "grad_norm": 1.0517836809158325, + "learning_rate": 2.871584436798636e-07, + "loss": 0.7002, + "step": 17412 + }, + { + "epoch": 0.8949018398602118, + "grad_norm": 1.127001166343689, + "learning_rate": 2.868805249232154e-07, + "loss": 0.682, + "step": 17413 + }, + { + "epoch": 0.8949532326035564, + "grad_norm": 1.0674580335617065, + "learning_rate": 2.866027367476587e-07, + "loss": 0.651, + "step": 17414 + }, + { + "epoch": 0.8950046253469011, + "grad_norm": 1.0983985662460327, + "learning_rate": 2.86325079160889e-07, + "loss": 0.7076, + "step": 17415 + }, + { + "epoch": 0.8950560180902457, + "grad_norm": 1.1478139162063599, + "learning_rate": 2.8604755217059853e-07, + "loss": 0.7278, + "step": 17416 + }, + { + "epoch": 0.8951074108335902, + "grad_norm": 0.7941725254058838, + "learning_rate": 2.8577015578447774e-07, + "loss": 0.6176, + "step": 17417 + }, + { + "epoch": 0.8951588035769349, + "grad_norm": 0.7267408967018127, + "learning_rate": 2.854928900102116e-07, + "loss": 0.6702, + "step": 17418 + }, + { + "epoch": 0.8952101963202795, + "grad_norm": 0.7024105191230774, + "learning_rate": 2.852157548554818e-07, + "loss": 0.6089, + "step": 17419 + }, + { + "epoch": 0.8952615890636242, + "grad_norm": 1.1136268377304077, + "learning_rate": 2.8493875032796547e-07, + "loss": 0.7164, + "step": 17420 + }, + { + "epoch": 0.8953129818069688, + "grad_norm": 1.101357340812683, + "learning_rate": 2.846618764353404e-07, + "loss": 0.7242, + "step": 17421 + }, + { + "epoch": 0.8953643745503135, + "grad_norm": 1.1436948776245117, + "learning_rate": 2.8438513318527426e-07, + "loss": 0.6694, + "step": 17422 + }, + { + "epoch": 0.8954157672936581, + "grad_norm": 1.0445762872695923, + "learning_rate": 2.8410852058543656e-07, + "loss": 0.674, + "step": 17423 + }, + { + "epoch": 0.8954671600370028, + "grad_norm": 1.0948631763458252, + "learning_rate": 2.838320386434895e-07, + "loss": 0.6276, + "step": 17424 + }, + { + "epoch": 0.8955185527803474, + "grad_norm": 1.0978718996047974, + "learning_rate": 2.8355568736709524e-07, + "loss": 0.7145, + "step": 17425 + }, + { + "epoch": 0.8955699455236921, + "grad_norm": 1.0621562004089355, + "learning_rate": 2.8327946676390884e-07, + "loss": 0.7117, + "step": 17426 + }, + { + "epoch": 0.8956213382670367, + "grad_norm": 1.1699554920196533, + "learning_rate": 2.830033768415835e-07, + "loss": 0.6862, + "step": 17427 + }, + { + "epoch": 0.8956727310103814, + "grad_norm": 0.6954039335250854, + "learning_rate": 2.827274176077699e-07, + "loss": 0.6233, + "step": 17428 + }, + { + "epoch": 0.895724123753726, + "grad_norm": 1.0798120498657227, + "learning_rate": 2.824515890701107e-07, + "loss": 0.6781, + "step": 17429 + }, + { + "epoch": 0.8957755164970707, + "grad_norm": 0.7310761213302612, + "learning_rate": 2.82175891236251e-07, + "loss": 0.6765, + "step": 17430 + }, + { + "epoch": 0.8958269092404153, + "grad_norm": 1.0690207481384277, + "learning_rate": 2.8190032411382795e-07, + "loss": 0.7012, + "step": 17431 + }, + { + "epoch": 0.8958783019837598, + "grad_norm": 1.1398894786834717, + "learning_rate": 2.8162488771047604e-07, + "loss": 0.6625, + "step": 17432 + }, + { + "epoch": 0.8959296947271045, + "grad_norm": 1.1216741800308228, + "learning_rate": 2.813495820338269e-07, + "loss": 0.7135, + "step": 17433 + }, + { + "epoch": 0.8959810874704491, + "grad_norm": 0.895706295967102, + "learning_rate": 2.8107440709150837e-07, + "loss": 0.6564, + "step": 17434 + }, + { + "epoch": 0.8960324802137938, + "grad_norm": 1.2029927968978882, + "learning_rate": 2.807993628911443e-07, + "loss": 0.7077, + "step": 17435 + }, + { + "epoch": 0.8960838729571384, + "grad_norm": 1.173804521560669, + "learning_rate": 2.805244494403542e-07, + "loss": 0.6985, + "step": 17436 + }, + { + "epoch": 0.8961352657004831, + "grad_norm": 1.109657883644104, + "learning_rate": 2.8024966674675636e-07, + "loss": 0.6731, + "step": 17437 + }, + { + "epoch": 0.8961866584438277, + "grad_norm": 1.0929486751556396, + "learning_rate": 2.7997501481796253e-07, + "loss": 0.7252, + "step": 17438 + }, + { + "epoch": 0.8962380511871724, + "grad_norm": 1.126557469367981, + "learning_rate": 2.797004936615827e-07, + "loss": 0.6925, + "step": 17439 + }, + { + "epoch": 0.896289443930517, + "grad_norm": 1.048191785812378, + "learning_rate": 2.794261032852219e-07, + "loss": 0.6795, + "step": 17440 + }, + { + "epoch": 0.8963408366738617, + "grad_norm": 1.1220759153366089, + "learning_rate": 2.791518436964846e-07, + "loss": 0.6437, + "step": 17441 + }, + { + "epoch": 0.8963922294172063, + "grad_norm": 1.0785547494888306, + "learning_rate": 2.78877714902967e-07, + "loss": 0.6876, + "step": 17442 + }, + { + "epoch": 0.896443622160551, + "grad_norm": 0.664884626865387, + "learning_rate": 2.786037169122652e-07, + "loss": 0.6377, + "step": 17443 + }, + { + "epoch": 0.8964950149038956, + "grad_norm": 1.0685147047042847, + "learning_rate": 2.7832984973197084e-07, + "loss": 0.6792, + "step": 17444 + }, + { + "epoch": 0.8965464076472403, + "grad_norm": 1.152539610862732, + "learning_rate": 2.7805611336967077e-07, + "loss": 0.7028, + "step": 17445 + }, + { + "epoch": 0.8965978003905849, + "grad_norm": 1.1342029571533203, + "learning_rate": 2.777825078329488e-07, + "loss": 0.6706, + "step": 17446 + }, + { + "epoch": 0.8966491931339294, + "grad_norm": 1.1150037050247192, + "learning_rate": 2.775090331293867e-07, + "loss": 0.6691, + "step": 17447 + }, + { + "epoch": 0.8967005858772741, + "grad_norm": 1.0736795663833618, + "learning_rate": 2.772356892665612e-07, + "loss": 0.6861, + "step": 17448 + }, + { + "epoch": 0.8967519786206187, + "grad_norm": 1.1482558250427246, + "learning_rate": 2.7696247625204397e-07, + "loss": 0.6929, + "step": 17449 + }, + { + "epoch": 0.8968033713639634, + "grad_norm": 0.6963222622871399, + "learning_rate": 2.7668939409340677e-07, + "loss": 0.6597, + "step": 17450 + }, + { + "epoch": 0.896854764107308, + "grad_norm": 1.0902416706085205, + "learning_rate": 2.7641644279821356e-07, + "loss": 0.6753, + "step": 17451 + }, + { + "epoch": 0.8969061568506527, + "grad_norm": 1.0697299242019653, + "learning_rate": 2.7614362237402825e-07, + "loss": 0.6434, + "step": 17452 + }, + { + "epoch": 0.8969575495939973, + "grad_norm": 1.2497276067733765, + "learning_rate": 2.758709328284093e-07, + "loss": 0.6828, + "step": 17453 + }, + { + "epoch": 0.897008942337342, + "grad_norm": 0.7118691205978394, + "learning_rate": 2.755983741689111e-07, + "loss": 0.6071, + "step": 17454 + }, + { + "epoch": 0.8970603350806866, + "grad_norm": 1.0583820343017578, + "learning_rate": 2.7532594640308496e-07, + "loss": 0.6873, + "step": 17455 + }, + { + "epoch": 0.8971117278240313, + "grad_norm": 1.100954294204712, + "learning_rate": 2.750536495384798e-07, + "loss": 0.6892, + "step": 17456 + }, + { + "epoch": 0.8971631205673759, + "grad_norm": 1.1245986223220825, + "learning_rate": 2.7478148358263956e-07, + "loss": 0.7124, + "step": 17457 + }, + { + "epoch": 0.8972145133107206, + "grad_norm": 0.8061022758483887, + "learning_rate": 2.7450944854310323e-07, + "loss": 0.6273, + "step": 17458 + }, + { + "epoch": 0.8972659060540652, + "grad_norm": 1.0604676008224487, + "learning_rate": 2.742375444274104e-07, + "loss": 0.6666, + "step": 17459 + }, + { + "epoch": 0.8973172987974098, + "grad_norm": 1.0862524509429932, + "learning_rate": 2.739657712430932e-07, + "loss": 0.681, + "step": 17460 + }, + { + "epoch": 0.8973686915407545, + "grad_norm": 0.732409656047821, + "learning_rate": 2.736941289976813e-07, + "loss": 0.6236, + "step": 17461 + }, + { + "epoch": 0.8974200842840991, + "grad_norm": 1.0655211210250854, + "learning_rate": 2.7342261769869984e-07, + "loss": 0.6367, + "step": 17462 + }, + { + "epoch": 0.8974714770274437, + "grad_norm": 1.0893183946609497, + "learning_rate": 2.7315123735367376e-07, + "loss": 0.7372, + "step": 17463 + }, + { + "epoch": 0.8975228697707883, + "grad_norm": 1.0969533920288086, + "learning_rate": 2.7287998797011885e-07, + "loss": 0.6763, + "step": 17464 + }, + { + "epoch": 0.897574262514133, + "grad_norm": 1.0983387231826782, + "learning_rate": 2.726088695555523e-07, + "loss": 0.7383, + "step": 17465 + }, + { + "epoch": 0.8976256552574776, + "grad_norm": 1.101660132408142, + "learning_rate": 2.723378821174855e-07, + "loss": 0.6732, + "step": 17466 + }, + { + "epoch": 0.8976770480008223, + "grad_norm": 1.1684496402740479, + "learning_rate": 2.7206702566342504e-07, + "loss": 0.6641, + "step": 17467 + }, + { + "epoch": 0.8977284407441669, + "grad_norm": 0.637290894985199, + "learning_rate": 2.717963002008772e-07, + "loss": 0.6731, + "step": 17468 + }, + { + "epoch": 0.8977798334875116, + "grad_norm": 1.0875515937805176, + "learning_rate": 2.715257057373416e-07, + "loss": 0.7013, + "step": 17469 + }, + { + "epoch": 0.8978312262308562, + "grad_norm": 0.6846639513969421, + "learning_rate": 2.712552422803161e-07, + "loss": 0.6243, + "step": 17470 + }, + { + "epoch": 0.8978826189742009, + "grad_norm": 0.7346695065498352, + "learning_rate": 2.709849098372919e-07, + "loss": 0.6454, + "step": 17471 + }, + { + "epoch": 0.8979340117175455, + "grad_norm": 1.1032160520553589, + "learning_rate": 2.707147084157613e-07, + "loss": 0.6511, + "step": 17472 + }, + { + "epoch": 0.8979854044608901, + "grad_norm": 1.036848783493042, + "learning_rate": 2.704446380232095e-07, + "loss": 0.6478, + "step": 17473 + }, + { + "epoch": 0.8980367972042348, + "grad_norm": 1.024643063545227, + "learning_rate": 2.7017469866711943e-07, + "loss": 0.6535, + "step": 17474 + }, + { + "epoch": 0.8980881899475794, + "grad_norm": 1.10590660572052, + "learning_rate": 2.699048903549684e-07, + "loss": 0.7101, + "step": 17475 + }, + { + "epoch": 0.8981395826909241, + "grad_norm": 1.0675209760665894, + "learning_rate": 2.6963521309423424e-07, + "loss": 0.6177, + "step": 17476 + }, + { + "epoch": 0.8981909754342687, + "grad_norm": 1.039973497390747, + "learning_rate": 2.693656668923866e-07, + "loss": 0.6704, + "step": 17477 + }, + { + "epoch": 0.8982423681776133, + "grad_norm": 1.1084675788879395, + "learning_rate": 2.6909625175689403e-07, + "loss": 0.6838, + "step": 17478 + }, + { + "epoch": 0.8982937609209579, + "grad_norm": 0.7855969071388245, + "learning_rate": 2.688269676952221e-07, + "loss": 0.6471, + "step": 17479 + }, + { + "epoch": 0.8983451536643026, + "grad_norm": 1.2746654748916626, + "learning_rate": 2.6855781471482876e-07, + "loss": 0.6899, + "step": 17480 + }, + { + "epoch": 0.8983965464076472, + "grad_norm": 1.088821530342102, + "learning_rate": 2.6828879282317424e-07, + "loss": 0.6392, + "step": 17481 + }, + { + "epoch": 0.8984479391509919, + "grad_norm": 1.2113670110702515, + "learning_rate": 2.680199020277102e-07, + "loss": 0.776, + "step": 17482 + }, + { + "epoch": 0.8984993318943365, + "grad_norm": 1.0478318929672241, + "learning_rate": 2.6775114233588696e-07, + "loss": 0.7157, + "step": 17483 + }, + { + "epoch": 0.8985507246376812, + "grad_norm": 0.7268092036247253, + "learning_rate": 2.6748251375515065e-07, + "loss": 0.6763, + "step": 17484 + }, + { + "epoch": 0.8986021173810258, + "grad_norm": 0.7095730900764465, + "learning_rate": 2.6721401629294373e-07, + "loss": 0.6687, + "step": 17485 + }, + { + "epoch": 0.8986535101243704, + "grad_norm": 0.8008632659912109, + "learning_rate": 2.669456499567058e-07, + "loss": 0.6591, + "step": 17486 + }, + { + "epoch": 0.8987049028677151, + "grad_norm": 1.0375851392745972, + "learning_rate": 2.6667741475387144e-07, + "loss": 0.6689, + "step": 17487 + }, + { + "epoch": 0.8987562956110597, + "grad_norm": 0.7210838794708252, + "learning_rate": 2.664093106918725e-07, + "loss": 0.6735, + "step": 17488 + }, + { + "epoch": 0.8988076883544044, + "grad_norm": 1.0483851432800293, + "learning_rate": 2.66141337778138e-07, + "loss": 0.7108, + "step": 17489 + }, + { + "epoch": 0.898859081097749, + "grad_norm": 1.1259170770645142, + "learning_rate": 2.6587349602009095e-07, + "loss": 0.7455, + "step": 17490 + }, + { + "epoch": 0.8989104738410937, + "grad_norm": 1.042736291885376, + "learning_rate": 2.656057854251526e-07, + "loss": 0.6843, + "step": 17491 + }, + { + "epoch": 0.8989618665844383, + "grad_norm": 1.0363774299621582, + "learning_rate": 2.653382060007409e-07, + "loss": 0.7176, + "step": 17492 + }, + { + "epoch": 0.8990132593277829, + "grad_norm": 0.7217842936515808, + "learning_rate": 2.650707577542677e-07, + "loss": 0.5943, + "step": 17493 + }, + { + "epoch": 0.8990646520711275, + "grad_norm": 1.0505690574645996, + "learning_rate": 2.648034406931449e-07, + "loss": 0.7204, + "step": 17494 + }, + { + "epoch": 0.8991160448144722, + "grad_norm": 1.1260002851486206, + "learning_rate": 2.645362548247776e-07, + "loss": 0.7355, + "step": 17495 + }, + { + "epoch": 0.8991674375578168, + "grad_norm": 0.8638566732406616, + "learning_rate": 2.642692001565683e-07, + "loss": 0.6529, + "step": 17496 + }, + { + "epoch": 0.8992188303011615, + "grad_norm": 1.1522737741470337, + "learning_rate": 2.6400227669591547e-07, + "loss": 0.7245, + "step": 17497 + }, + { + "epoch": 0.8992702230445061, + "grad_norm": 1.0978188514709473, + "learning_rate": 2.637354844502166e-07, + "loss": 0.7045, + "step": 17498 + }, + { + "epoch": 0.8993216157878507, + "grad_norm": 1.102148413658142, + "learning_rate": 2.634688234268612e-07, + "loss": 0.6682, + "step": 17499 + }, + { + "epoch": 0.8993730085311954, + "grad_norm": 1.1625529527664185, + "learning_rate": 2.632022936332385e-07, + "loss": 0.703, + "step": 17500 + }, + { + "epoch": 0.89942440127454, + "grad_norm": 1.1003016233444214, + "learning_rate": 2.6293589507673243e-07, + "loss": 0.7322, + "step": 17501 + }, + { + "epoch": 0.8994757940178847, + "grad_norm": 0.7062330842018127, + "learning_rate": 2.626696277647245e-07, + "loss": 0.6137, + "step": 17502 + }, + { + "epoch": 0.8995271867612293, + "grad_norm": 0.6347520351409912, + "learning_rate": 2.624034917045909e-07, + "loss": 0.7068, + "step": 17503 + }, + { + "epoch": 0.899578579504574, + "grad_norm": 0.6669344902038574, + "learning_rate": 2.621374869037052e-07, + "loss": 0.6828, + "step": 17504 + }, + { + "epoch": 0.8996299722479186, + "grad_norm": 1.1745636463165283, + "learning_rate": 2.618716133694393e-07, + "loss": 0.6928, + "step": 17505 + }, + { + "epoch": 0.8996813649912633, + "grad_norm": 1.1013671159744263, + "learning_rate": 2.6160587110915626e-07, + "loss": 0.6903, + "step": 17506 + }, + { + "epoch": 0.8997327577346079, + "grad_norm": 1.1720566749572754, + "learning_rate": 2.613402601302212e-07, + "loss": 0.6909, + "step": 17507 + }, + { + "epoch": 0.8997841504779525, + "grad_norm": 1.0640180110931396, + "learning_rate": 2.6107478043999213e-07, + "loss": 0.6217, + "step": 17508 + }, + { + "epoch": 0.8998355432212971, + "grad_norm": 1.1407326459884644, + "learning_rate": 2.608094320458243e-07, + "loss": 0.6271, + "step": 17509 + }, + { + "epoch": 0.8998869359646418, + "grad_norm": 1.2122441530227661, + "learning_rate": 2.6054421495506963e-07, + "loss": 0.6891, + "step": 17510 + }, + { + "epoch": 0.8999383287079864, + "grad_norm": 1.0725260972976685, + "learning_rate": 2.602791291750767e-07, + "loss": 0.7182, + "step": 17511 + }, + { + "epoch": 0.899989721451331, + "grad_norm": 0.7054963111877441, + "learning_rate": 2.60014174713189e-07, + "loss": 0.6274, + "step": 17512 + }, + { + "epoch": 0.9000411141946757, + "grad_norm": 1.1932792663574219, + "learning_rate": 2.597493515767474e-07, + "loss": 0.6956, + "step": 17513 + }, + { + "epoch": 0.9000925069380203, + "grad_norm": 1.0281811952590942, + "learning_rate": 2.5948465977309047e-07, + "loss": 0.6684, + "step": 17514 + }, + { + "epoch": 0.900143899681365, + "grad_norm": 1.1068369150161743, + "learning_rate": 2.592200993095495e-07, + "loss": 0.7083, + "step": 17515 + }, + { + "epoch": 0.9001952924247096, + "grad_norm": 1.0526407957077026, + "learning_rate": 2.589556701934559e-07, + "loss": 0.7109, + "step": 17516 + }, + { + "epoch": 0.9002466851680543, + "grad_norm": 1.0962176322937012, + "learning_rate": 2.586913724321349e-07, + "loss": 0.6545, + "step": 17517 + }, + { + "epoch": 0.9002980779113989, + "grad_norm": 1.1656428575515747, + "learning_rate": 2.584272060329107e-07, + "loss": 0.7366, + "step": 17518 + }, + { + "epoch": 0.9003494706547436, + "grad_norm": 1.1307579278945923, + "learning_rate": 2.581631710031013e-07, + "loss": 0.6816, + "step": 17519 + }, + { + "epoch": 0.9004008633980882, + "grad_norm": 0.9736624956130981, + "learning_rate": 2.5789926735002137e-07, + "loss": 0.6494, + "step": 17520 + }, + { + "epoch": 0.9004522561414329, + "grad_norm": 1.0942718982696533, + "learning_rate": 2.576354950809845e-07, + "loss": 0.7304, + "step": 17521 + }, + { + "epoch": 0.9005036488847775, + "grad_norm": 1.0813865661621094, + "learning_rate": 2.5737185420329604e-07, + "loss": 0.7095, + "step": 17522 + }, + { + "epoch": 0.900555041628122, + "grad_norm": 1.0433142185211182, + "learning_rate": 2.571083447242628e-07, + "loss": 0.6947, + "step": 17523 + }, + { + "epoch": 0.9006064343714667, + "grad_norm": 1.2292811870574951, + "learning_rate": 2.568449666511846e-07, + "loss": 0.7057, + "step": 17524 + }, + { + "epoch": 0.9006578271148113, + "grad_norm": 1.1177713871002197, + "learning_rate": 2.565817199913584e-07, + "loss": 0.6524, + "step": 17525 + }, + { + "epoch": 0.900709219858156, + "grad_norm": 1.1463268995285034, + "learning_rate": 2.563186047520766e-07, + "loss": 0.6729, + "step": 17526 + }, + { + "epoch": 0.9007606126015006, + "grad_norm": 1.084991693496704, + "learning_rate": 2.5605562094063175e-07, + "loss": 0.6614, + "step": 17527 + }, + { + "epoch": 0.9008120053448453, + "grad_norm": 1.038216233253479, + "learning_rate": 2.5579276856430804e-07, + "loss": 0.6621, + "step": 17528 + }, + { + "epoch": 0.9008633980881899, + "grad_norm": 1.148944616317749, + "learning_rate": 2.5553004763038794e-07, + "loss": 0.72, + "step": 17529 + }, + { + "epoch": 0.9009147908315346, + "grad_norm": 1.0615544319152832, + "learning_rate": 2.552674581461523e-07, + "loss": 0.7131, + "step": 17530 + }, + { + "epoch": 0.9009661835748792, + "grad_norm": 1.047399878501892, + "learning_rate": 2.5500500011887364e-07, + "loss": 0.6985, + "step": 17531 + }, + { + "epoch": 0.9010175763182239, + "grad_norm": 0.6706374883651733, + "learning_rate": 2.547426735558256e-07, + "loss": 0.6488, + "step": 17532 + }, + { + "epoch": 0.9010689690615685, + "grad_norm": 1.1104978322982788, + "learning_rate": 2.5448047846427514e-07, + "loss": 0.6666, + "step": 17533 + }, + { + "epoch": 0.9011203618049132, + "grad_norm": 1.0795434713363647, + "learning_rate": 2.542184148514881e-07, + "loss": 0.7017, + "step": 17534 + }, + { + "epoch": 0.9011717545482578, + "grad_norm": 1.070583462715149, + "learning_rate": 2.5395648272472317e-07, + "loss": 0.7094, + "step": 17535 + }, + { + "epoch": 0.9012231472916025, + "grad_norm": 1.2641292810440063, + "learning_rate": 2.536946820912384e-07, + "loss": 0.6791, + "step": 17536 + }, + { + "epoch": 0.9012745400349471, + "grad_norm": 1.1463603973388672, + "learning_rate": 2.5343301295828795e-07, + "loss": 0.7449, + "step": 17537 + }, + { + "epoch": 0.9013259327782918, + "grad_norm": 1.107149600982666, + "learning_rate": 2.5317147533311936e-07, + "loss": 0.7314, + "step": 17538 + }, + { + "epoch": 0.9013773255216363, + "grad_norm": 1.133347511291504, + "learning_rate": 2.5291006922298136e-07, + "loss": 0.6955, + "step": 17539 + }, + { + "epoch": 0.9014287182649809, + "grad_norm": 1.1084579229354858, + "learning_rate": 2.526487946351147e-07, + "loss": 0.7171, + "step": 17540 + }, + { + "epoch": 0.9014801110083256, + "grad_norm": 1.060705542564392, + "learning_rate": 2.5238765157675924e-07, + "loss": 0.7278, + "step": 17541 + }, + { + "epoch": 0.9015315037516702, + "grad_norm": 1.055263876914978, + "learning_rate": 2.521266400551492e-07, + "loss": 0.6789, + "step": 17542 + }, + { + "epoch": 0.9015828964950149, + "grad_norm": 1.0538804531097412, + "learning_rate": 2.518657600775176e-07, + "loss": 0.7049, + "step": 17543 + }, + { + "epoch": 0.9016342892383595, + "grad_norm": 0.8037257790565491, + "learning_rate": 2.5160501165108984e-07, + "loss": 0.6265, + "step": 17544 + }, + { + "epoch": 0.9016856819817042, + "grad_norm": 0.7324722409248352, + "learning_rate": 2.513443947830929e-07, + "loss": 0.6477, + "step": 17545 + }, + { + "epoch": 0.9017370747250488, + "grad_norm": 1.1232225894927979, + "learning_rate": 2.5108390948074603e-07, + "loss": 0.6465, + "step": 17546 + }, + { + "epoch": 0.9017884674683935, + "grad_norm": 1.0749430656433105, + "learning_rate": 2.508235557512667e-07, + "loss": 0.6529, + "step": 17547 + }, + { + "epoch": 0.9018398602117381, + "grad_norm": 1.0457696914672852, + "learning_rate": 2.50563333601867e-07, + "loss": 0.6793, + "step": 17548 + }, + { + "epoch": 0.9018912529550828, + "grad_norm": 1.1819281578063965, + "learning_rate": 2.5030324303975784e-07, + "loss": 0.7068, + "step": 17549 + }, + { + "epoch": 0.9019426456984274, + "grad_norm": 1.0802580118179321, + "learning_rate": 2.5004328407214564e-07, + "loss": 0.6806, + "step": 17550 + }, + { + "epoch": 0.9019940384417721, + "grad_norm": 1.0565345287322998, + "learning_rate": 2.497834567062313e-07, + "loss": 0.6683, + "step": 17551 + }, + { + "epoch": 0.9020454311851167, + "grad_norm": 1.0929385423660278, + "learning_rate": 2.495237609492146e-07, + "loss": 0.6901, + "step": 17552 + }, + { + "epoch": 0.9020968239284614, + "grad_norm": 1.1595039367675781, + "learning_rate": 2.4926419680829095e-07, + "loss": 0.6942, + "step": 17553 + }, + { + "epoch": 0.9021482166718059, + "grad_norm": 1.161083459854126, + "learning_rate": 2.4900476429065125e-07, + "loss": 0.7011, + "step": 17554 + }, + { + "epoch": 0.9021996094151505, + "grad_norm": 0.7919915318489075, + "learning_rate": 2.487454634034825e-07, + "loss": 0.6477, + "step": 17555 + }, + { + "epoch": 0.9022510021584952, + "grad_norm": 1.0947983264923096, + "learning_rate": 2.484862941539712e-07, + "loss": 0.6962, + "step": 17556 + }, + { + "epoch": 0.9023023949018398, + "grad_norm": 1.1308001279830933, + "learning_rate": 2.4822725654929493e-07, + "loss": 0.7396, + "step": 17557 + }, + { + "epoch": 0.9023537876451845, + "grad_norm": 1.1335115432739258, + "learning_rate": 2.479683505966324e-07, + "loss": 0.6828, + "step": 17558 + }, + { + "epoch": 0.9024051803885291, + "grad_norm": 0.7448621392250061, + "learning_rate": 2.4770957630315673e-07, + "loss": 0.6477, + "step": 17559 + }, + { + "epoch": 0.9024565731318738, + "grad_norm": 1.0525184869766235, + "learning_rate": 2.474509336760361e-07, + "loss": 0.6839, + "step": 17560 + }, + { + "epoch": 0.9025079658752184, + "grad_norm": 1.0095527172088623, + "learning_rate": 2.4719242272243814e-07, + "loss": 0.6964, + "step": 17561 + }, + { + "epoch": 0.9025593586185631, + "grad_norm": 1.0868803262710571, + "learning_rate": 2.469340434495249e-07, + "loss": 0.7057, + "step": 17562 + }, + { + "epoch": 0.9026107513619077, + "grad_norm": 1.071757435798645, + "learning_rate": 2.4667579586445443e-07, + "loss": 0.6829, + "step": 17563 + }, + { + "epoch": 0.9026621441052524, + "grad_norm": 1.1874746084213257, + "learning_rate": 2.464176799743812e-07, + "loss": 0.6933, + "step": 17564 + }, + { + "epoch": 0.902713536848597, + "grad_norm": 1.072497844696045, + "learning_rate": 2.461596957864576e-07, + "loss": 0.6324, + "step": 17565 + }, + { + "epoch": 0.9027649295919417, + "grad_norm": 1.4241782426834106, + "learning_rate": 2.459018433078314e-07, + "loss": 0.6884, + "step": 17566 + }, + { + "epoch": 0.9028163223352863, + "grad_norm": 1.0346876382827759, + "learning_rate": 2.4564412254564575e-07, + "loss": 0.7117, + "step": 17567 + }, + { + "epoch": 0.902867715078631, + "grad_norm": 1.1888883113861084, + "learning_rate": 2.4538653350704046e-07, + "loss": 0.6793, + "step": 17568 + }, + { + "epoch": 0.9029191078219755, + "grad_norm": 0.7513110041618347, + "learning_rate": 2.451290761991543e-07, + "loss": 0.6316, + "step": 17569 + }, + { + "epoch": 0.9029705005653201, + "grad_norm": 0.7497759461402893, + "learning_rate": 2.4487175062911926e-07, + "loss": 0.6597, + "step": 17570 + }, + { + "epoch": 0.9030218933086648, + "grad_norm": 1.0527058839797974, + "learning_rate": 2.4461455680406363e-07, + "loss": 0.6738, + "step": 17571 + }, + { + "epoch": 0.9030732860520094, + "grad_norm": 1.0991442203521729, + "learning_rate": 2.4435749473111613e-07, + "loss": 0.6607, + "step": 17572 + }, + { + "epoch": 0.9031246787953541, + "grad_norm": 1.0166374444961548, + "learning_rate": 2.441005644173955e-07, + "loss": 0.6571, + "step": 17573 + }, + { + "epoch": 0.9031760715386987, + "grad_norm": 1.0188210010528564, + "learning_rate": 2.438437658700227e-07, + "loss": 0.6701, + "step": 17574 + }, + { + "epoch": 0.9032274642820434, + "grad_norm": 1.0827404260635376, + "learning_rate": 2.4358709909611144e-07, + "loss": 0.701, + "step": 17575 + }, + { + "epoch": 0.903278857025388, + "grad_norm": 1.0952612161636353, + "learning_rate": 2.4333056410277276e-07, + "loss": 0.6859, + "step": 17576 + }, + { + "epoch": 0.9033302497687327, + "grad_norm": 1.0821459293365479, + "learning_rate": 2.430741608971143e-07, + "loss": 0.7334, + "step": 17577 + }, + { + "epoch": 0.9033816425120773, + "grad_norm": 1.0059535503387451, + "learning_rate": 2.42817889486241e-07, + "loss": 0.7098, + "step": 17578 + }, + { + "epoch": 0.903433035255422, + "grad_norm": 0.711694061756134, + "learning_rate": 2.4256174987725203e-07, + "loss": 0.648, + "step": 17579 + }, + { + "epoch": 0.9034844279987666, + "grad_norm": 1.06101393699646, + "learning_rate": 2.4230574207724344e-07, + "loss": 0.7155, + "step": 17580 + }, + { + "epoch": 0.9035358207421113, + "grad_norm": 1.069606900215149, + "learning_rate": 2.420498660933096e-07, + "loss": 0.6617, + "step": 17581 + }, + { + "epoch": 0.9035872134854559, + "grad_norm": 1.1617414951324463, + "learning_rate": 2.4179412193253925e-07, + "loss": 0.7332, + "step": 17582 + }, + { + "epoch": 0.9036386062288005, + "grad_norm": 1.1796826124191284, + "learning_rate": 2.415385096020173e-07, + "loss": 0.6709, + "step": 17583 + }, + { + "epoch": 0.9036899989721451, + "grad_norm": 1.1939054727554321, + "learning_rate": 2.412830291088264e-07, + "loss": 0.6892, + "step": 17584 + }, + { + "epoch": 0.9037413917154897, + "grad_norm": 1.090199589729309, + "learning_rate": 2.4102768046004534e-07, + "loss": 0.6956, + "step": 17585 + }, + { + "epoch": 0.9037927844588344, + "grad_norm": 1.2293330430984497, + "learning_rate": 2.407724636627473e-07, + "loss": 0.7121, + "step": 17586 + }, + { + "epoch": 0.903844177202179, + "grad_norm": 0.6924625039100647, + "learning_rate": 2.405173787240045e-07, + "loss": 0.6323, + "step": 17587 + }, + { + "epoch": 0.9038955699455237, + "grad_norm": 1.0404231548309326, + "learning_rate": 2.4026242565088396e-07, + "loss": 0.7147, + "step": 17588 + }, + { + "epoch": 0.9039469626888683, + "grad_norm": 1.1004329919815063, + "learning_rate": 2.4000760445044845e-07, + "loss": 0.6618, + "step": 17589 + }, + { + "epoch": 0.903998355432213, + "grad_norm": 1.0404647588729858, + "learning_rate": 2.397529151297595e-07, + "loss": 0.6641, + "step": 17590 + }, + { + "epoch": 0.9040497481755576, + "grad_norm": 0.7179667353630066, + "learning_rate": 2.3949835769587316e-07, + "loss": 0.6424, + "step": 17591 + }, + { + "epoch": 0.9041011409189023, + "grad_norm": 1.1021227836608887, + "learning_rate": 2.392439321558421e-07, + "loss": 0.7239, + "step": 17592 + }, + { + "epoch": 0.9041525336622469, + "grad_norm": 1.0678948163986206, + "learning_rate": 2.389896385167145e-07, + "loss": 0.7369, + "step": 17593 + }, + { + "epoch": 0.9042039264055916, + "grad_norm": 1.1009504795074463, + "learning_rate": 2.3873547678553654e-07, + "loss": 0.6711, + "step": 17594 + }, + { + "epoch": 0.9042553191489362, + "grad_norm": 1.0780911445617676, + "learning_rate": 2.384814469693508e-07, + "loss": 0.7129, + "step": 17595 + }, + { + "epoch": 0.9043067118922808, + "grad_norm": 3.6993801593780518, + "learning_rate": 2.3822754907519396e-07, + "loss": 0.6512, + "step": 17596 + }, + { + "epoch": 0.9043581046356255, + "grad_norm": 1.0272951126098633, + "learning_rate": 2.3797378311010032e-07, + "loss": 0.686, + "step": 17597 + }, + { + "epoch": 0.9044094973789701, + "grad_norm": 1.0487034320831299, + "learning_rate": 2.377201490811032e-07, + "loss": 0.7008, + "step": 17598 + }, + { + "epoch": 0.9044608901223147, + "grad_norm": 1.1117587089538574, + "learning_rate": 2.3746664699522637e-07, + "loss": 0.6846, + "step": 17599 + }, + { + "epoch": 0.9045122828656593, + "grad_norm": 1.1480002403259277, + "learning_rate": 2.3721327685949536e-07, + "loss": 0.7135, + "step": 17600 + }, + { + "epoch": 0.904563675609004, + "grad_norm": 1.1156461238861084, + "learning_rate": 2.3696003868093008e-07, + "loss": 0.6864, + "step": 17601 + }, + { + "epoch": 0.9046150683523486, + "grad_norm": 1.243971586227417, + "learning_rate": 2.3670693246654553e-07, + "loss": 0.6934, + "step": 17602 + }, + { + "epoch": 0.9046664610956933, + "grad_norm": 1.1333811283111572, + "learning_rate": 2.3645395822335548e-07, + "loss": 0.6973, + "step": 17603 + }, + { + "epoch": 0.9047178538390379, + "grad_norm": 1.0605882406234741, + "learning_rate": 2.3620111595836826e-07, + "loss": 0.6887, + "step": 17604 + }, + { + "epoch": 0.9047692465823826, + "grad_norm": 1.0705245733261108, + "learning_rate": 2.3594840567858878e-07, + "loss": 0.6629, + "step": 17605 + }, + { + "epoch": 0.9048206393257272, + "grad_norm": 0.739646315574646, + "learning_rate": 2.356958273910187e-07, + "loss": 0.6576, + "step": 17606 + }, + { + "epoch": 0.9048720320690719, + "grad_norm": 1.1915605068206787, + "learning_rate": 2.3544338110265685e-07, + "loss": 0.7338, + "step": 17607 + }, + { + "epoch": 0.9049234248124165, + "grad_norm": 1.0127440690994263, + "learning_rate": 2.3519106682049597e-07, + "loss": 0.6327, + "step": 17608 + }, + { + "epoch": 0.9049748175557611, + "grad_norm": 1.1401029825210571, + "learning_rate": 2.349388845515277e-07, + "loss": 0.7122, + "step": 17609 + }, + { + "epoch": 0.9050262102991058, + "grad_norm": 0.7195321917533875, + "learning_rate": 2.3468683430273865e-07, + "loss": 0.6326, + "step": 17610 + }, + { + "epoch": 0.9050776030424504, + "grad_norm": 1.0024667978286743, + "learning_rate": 2.3443491608111212e-07, + "loss": 0.6751, + "step": 17611 + }, + { + "epoch": 0.9051289957857951, + "grad_norm": 1.102150797843933, + "learning_rate": 2.3418312989362812e-07, + "loss": 0.6674, + "step": 17612 + }, + { + "epoch": 0.9051803885291397, + "grad_norm": 1.0790152549743652, + "learning_rate": 2.339314757472616e-07, + "loss": 0.6958, + "step": 17613 + }, + { + "epoch": 0.9052317812724843, + "grad_norm": 1.0946900844573975, + "learning_rate": 2.3367995364898643e-07, + "loss": 0.7233, + "step": 17614 + }, + { + "epoch": 0.9052831740158289, + "grad_norm": 1.199942708015442, + "learning_rate": 2.3342856360576927e-07, + "loss": 0.7061, + "step": 17615 + }, + { + "epoch": 0.9053345667591736, + "grad_norm": 1.0425904989242554, + "learning_rate": 2.3317730562457676e-07, + "loss": 0.6991, + "step": 17616 + }, + { + "epoch": 0.9053859595025182, + "grad_norm": 1.1337605714797974, + "learning_rate": 2.3292617971236942e-07, + "loss": 0.7351, + "step": 17617 + }, + { + "epoch": 0.9054373522458629, + "grad_norm": 1.0822337865829468, + "learning_rate": 2.3267518587610504e-07, + "loss": 0.675, + "step": 17618 + }, + { + "epoch": 0.9054887449892075, + "grad_norm": 1.1378329992294312, + "learning_rate": 2.324243241227375e-07, + "loss": 0.7095, + "step": 17619 + }, + { + "epoch": 0.9055401377325522, + "grad_norm": 1.0523250102996826, + "learning_rate": 2.3217359445921738e-07, + "loss": 0.6914, + "step": 17620 + }, + { + "epoch": 0.9055915304758968, + "grad_norm": 1.1309000253677368, + "learning_rate": 2.3192299689249187e-07, + "loss": 0.6765, + "step": 17621 + }, + { + "epoch": 0.9056429232192414, + "grad_norm": 0.7675410509109497, + "learning_rate": 2.3167253142950265e-07, + "loss": 0.6302, + "step": 17622 + }, + { + "epoch": 0.9056943159625861, + "grad_norm": 0.6988844871520996, + "learning_rate": 2.314221980771908e-07, + "loss": 0.6648, + "step": 17623 + }, + { + "epoch": 0.9057457087059307, + "grad_norm": 1.072287678718567, + "learning_rate": 2.3117199684249024e-07, + "loss": 0.7482, + "step": 17624 + }, + { + "epoch": 0.9057971014492754, + "grad_norm": 1.0469870567321777, + "learning_rate": 2.309219277323338e-07, + "loss": 0.7341, + "step": 17625 + }, + { + "epoch": 0.90584849419262, + "grad_norm": 1.1441148519515991, + "learning_rate": 2.3067199075364977e-07, + "loss": 0.7115, + "step": 17626 + }, + { + "epoch": 0.9058998869359647, + "grad_norm": 1.1056292057037354, + "learning_rate": 2.304221859133643e-07, + "loss": 0.728, + "step": 17627 + }, + { + "epoch": 0.9059512796793093, + "grad_norm": 1.1033676862716675, + "learning_rate": 2.3017251321839573e-07, + "loss": 0.7147, + "step": 17628 + }, + { + "epoch": 0.906002672422654, + "grad_norm": 1.1317777633666992, + "learning_rate": 2.299229726756641e-07, + "loss": 0.7327, + "step": 17629 + }, + { + "epoch": 0.9060540651659985, + "grad_norm": 0.8893811106681824, + "learning_rate": 2.296735642920811e-07, + "loss": 0.6015, + "step": 17630 + }, + { + "epoch": 0.9061054579093432, + "grad_norm": 1.0364857912063599, + "learning_rate": 2.2942428807455787e-07, + "loss": 0.6612, + "step": 17631 + }, + { + "epoch": 0.9061568506526878, + "grad_norm": 0.8596569299697876, + "learning_rate": 2.291751440300005e-07, + "loss": 0.6318, + "step": 17632 + }, + { + "epoch": 0.9062082433960325, + "grad_norm": 1.11715567111969, + "learning_rate": 2.2892613216531245e-07, + "loss": 0.7318, + "step": 17633 + }, + { + "epoch": 0.9062596361393771, + "grad_norm": 1.2381598949432373, + "learning_rate": 2.2867725248739204e-07, + "loss": 0.6975, + "step": 17634 + }, + { + "epoch": 0.9063110288827217, + "grad_norm": 1.1477817296981812, + "learning_rate": 2.2842850500313375e-07, + "loss": 0.6779, + "step": 17635 + }, + { + "epoch": 0.9063624216260664, + "grad_norm": 1.0620248317718506, + "learning_rate": 2.281798897194326e-07, + "loss": 0.6825, + "step": 17636 + }, + { + "epoch": 0.906413814369411, + "grad_norm": 1.1184896230697632, + "learning_rate": 2.279314066431726e-07, + "loss": 0.721, + "step": 17637 + }, + { + "epoch": 0.9064652071127557, + "grad_norm": 1.133842945098877, + "learning_rate": 2.2768305578124094e-07, + "loss": 0.6855, + "step": 17638 + }, + { + "epoch": 0.9065165998561003, + "grad_norm": 1.1051045656204224, + "learning_rate": 2.2743483714051773e-07, + "loss": 0.6454, + "step": 17639 + }, + { + "epoch": 0.906567992599445, + "grad_norm": 1.0730048418045044, + "learning_rate": 2.271867507278791e-07, + "loss": 0.6709, + "step": 17640 + }, + { + "epoch": 0.9066193853427896, + "grad_norm": 1.114850640296936, + "learning_rate": 2.2693879655020067e-07, + "loss": 0.7345, + "step": 17641 + }, + { + "epoch": 0.9066707780861343, + "grad_norm": 1.1023399829864502, + "learning_rate": 2.266909746143503e-07, + "loss": 0.6861, + "step": 17642 + }, + { + "epoch": 0.9067221708294789, + "grad_norm": 1.1586685180664062, + "learning_rate": 2.264432849271947e-07, + "loss": 0.7853, + "step": 17643 + }, + { + "epoch": 0.9067735635728236, + "grad_norm": 0.6670803427696228, + "learning_rate": 2.2619572749559616e-07, + "loss": 0.5806, + "step": 17644 + }, + { + "epoch": 0.9068249563161681, + "grad_norm": 1.0671591758728027, + "learning_rate": 2.259483023264142e-07, + "loss": 0.7052, + "step": 17645 + }, + { + "epoch": 0.9068763490595128, + "grad_norm": 1.0895864963531494, + "learning_rate": 2.257010094265033e-07, + "loss": 0.699, + "step": 17646 + }, + { + "epoch": 0.9069277418028574, + "grad_norm": 1.0388092994689941, + "learning_rate": 2.2545384880271526e-07, + "loss": 0.6396, + "step": 17647 + }, + { + "epoch": 0.906979134546202, + "grad_norm": 1.1465235948562622, + "learning_rate": 2.252068204618968e-07, + "loss": 0.6874, + "step": 17648 + }, + { + "epoch": 0.9070305272895467, + "grad_norm": 1.1031323671340942, + "learning_rate": 2.2495992441089408e-07, + "loss": 0.739, + "step": 17649 + }, + { + "epoch": 0.9070819200328913, + "grad_norm": 1.0973830223083496, + "learning_rate": 2.247131606565456e-07, + "loss": 0.7616, + "step": 17650 + }, + { + "epoch": 0.907133312776236, + "grad_norm": 1.1110271215438843, + "learning_rate": 2.2446652920568912e-07, + "loss": 0.7284, + "step": 17651 + }, + { + "epoch": 0.9071847055195806, + "grad_norm": 0.7484662532806396, + "learning_rate": 2.2422003006515812e-07, + "loss": 0.6462, + "step": 17652 + }, + { + "epoch": 0.9072360982629253, + "grad_norm": 0.7035232186317444, + "learning_rate": 2.2397366324178049e-07, + "loss": 0.6542, + "step": 17653 + }, + { + "epoch": 0.9072874910062699, + "grad_norm": 1.0518049001693726, + "learning_rate": 2.2372742874238406e-07, + "loss": 0.7262, + "step": 17654 + }, + { + "epoch": 0.9073388837496146, + "grad_norm": 0.7808499932289124, + "learning_rate": 2.2348132657379008e-07, + "loss": 0.6693, + "step": 17655 + }, + { + "epoch": 0.9073902764929592, + "grad_norm": 1.089228630065918, + "learning_rate": 2.2323535674281748e-07, + "loss": 0.7012, + "step": 17656 + }, + { + "epoch": 0.9074416692363039, + "grad_norm": 1.0767018795013428, + "learning_rate": 2.229895192562792e-07, + "loss": 0.6955, + "step": 17657 + }, + { + "epoch": 0.9074930619796485, + "grad_norm": 1.1553523540496826, + "learning_rate": 2.2274381412098866e-07, + "loss": 0.7309, + "step": 17658 + }, + { + "epoch": 0.9075444547229932, + "grad_norm": 1.2027223110198975, + "learning_rate": 2.2249824134375264e-07, + "loss": 0.681, + "step": 17659 + }, + { + "epoch": 0.9075958474663377, + "grad_norm": 1.060681939125061, + "learning_rate": 2.2225280093137457e-07, + "loss": 0.7304, + "step": 17660 + }, + { + "epoch": 0.9076472402096823, + "grad_norm": 0.7268868684768677, + "learning_rate": 2.2200749289065405e-07, + "loss": 0.6632, + "step": 17661 + }, + { + "epoch": 0.907698632953027, + "grad_norm": 1.1161484718322754, + "learning_rate": 2.2176231722838947e-07, + "loss": 0.6604, + "step": 17662 + }, + { + "epoch": 0.9077500256963716, + "grad_norm": 0.7811411023139954, + "learning_rate": 2.2151727395137213e-07, + "loss": 0.6444, + "step": 17663 + }, + { + "epoch": 0.9078014184397163, + "grad_norm": 1.0133854150772095, + "learning_rate": 2.2127236306639043e-07, + "loss": 0.6589, + "step": 17664 + }, + { + "epoch": 0.9078528111830609, + "grad_norm": 1.115313172340393, + "learning_rate": 2.2102758458023288e-07, + "loss": 0.7179, + "step": 17665 + }, + { + "epoch": 0.9079042039264056, + "grad_norm": 0.7578287124633789, + "learning_rate": 2.207829384996779e-07, + "loss": 0.6047, + "step": 17666 + }, + { + "epoch": 0.9079555966697502, + "grad_norm": 0.7083166837692261, + "learning_rate": 2.2053842483150513e-07, + "loss": 0.6708, + "step": 17667 + }, + { + "epoch": 0.9080069894130949, + "grad_norm": 1.0738131999969482, + "learning_rate": 2.2029404358248963e-07, + "loss": 0.6785, + "step": 17668 + }, + { + "epoch": 0.9080583821564395, + "grad_norm": 1.0604991912841797, + "learning_rate": 2.2004979475940102e-07, + "loss": 0.7095, + "step": 17669 + }, + { + "epoch": 0.9081097748997842, + "grad_norm": 1.0235544443130493, + "learning_rate": 2.1980567836900612e-07, + "loss": 0.6533, + "step": 17670 + }, + { + "epoch": 0.9081611676431288, + "grad_norm": 1.0896077156066895, + "learning_rate": 2.1956169441807007e-07, + "loss": 0.685, + "step": 17671 + }, + { + "epoch": 0.9082125603864735, + "grad_norm": 1.1579216718673706, + "learning_rate": 2.193178429133519e-07, + "loss": 0.7168, + "step": 17672 + }, + { + "epoch": 0.9082639531298181, + "grad_norm": 1.1074789762496948, + "learning_rate": 2.1907412386160675e-07, + "loss": 0.7014, + "step": 17673 + }, + { + "epoch": 0.9083153458731628, + "grad_norm": 1.0487475395202637, + "learning_rate": 2.188305372695887e-07, + "loss": 0.6941, + "step": 17674 + }, + { + "epoch": 0.9083667386165073, + "grad_norm": 1.0870540142059326, + "learning_rate": 2.1858708314404564e-07, + "loss": 0.7055, + "step": 17675 + }, + { + "epoch": 0.9084181313598519, + "grad_norm": 1.1733434200286865, + "learning_rate": 2.1834376149172332e-07, + "loss": 0.7008, + "step": 17676 + }, + { + "epoch": 0.9084695241031966, + "grad_norm": 1.145087480545044, + "learning_rate": 2.1810057231936132e-07, + "loss": 0.6924, + "step": 17677 + }, + { + "epoch": 0.9085209168465412, + "grad_norm": 1.0450141429901123, + "learning_rate": 2.178575156337004e-07, + "loss": 0.6385, + "step": 17678 + }, + { + "epoch": 0.9085723095898859, + "grad_norm": 1.0399049520492554, + "learning_rate": 2.1761459144147122e-07, + "loss": 0.6529, + "step": 17679 + }, + { + "epoch": 0.9086237023332305, + "grad_norm": 1.1304844617843628, + "learning_rate": 2.173717997494068e-07, + "loss": 0.7142, + "step": 17680 + }, + { + "epoch": 0.9086750950765752, + "grad_norm": 1.0297542810440063, + "learning_rate": 2.1712914056423339e-07, + "loss": 0.6957, + "step": 17681 + }, + { + "epoch": 0.9087264878199198, + "grad_norm": 1.1754237413406372, + "learning_rate": 2.1688661389267286e-07, + "loss": 0.7333, + "step": 17682 + }, + { + "epoch": 0.9087778805632645, + "grad_norm": 1.07144033908844, + "learning_rate": 2.166442197414459e-07, + "loss": 0.6882, + "step": 17683 + }, + { + "epoch": 0.9088292733066091, + "grad_norm": 1.0926289558410645, + "learning_rate": 2.1640195811726828e-07, + "loss": 0.7352, + "step": 17684 + }, + { + "epoch": 0.9088806660499538, + "grad_norm": 1.04472815990448, + "learning_rate": 2.1615982902685183e-07, + "loss": 0.6658, + "step": 17685 + }, + { + "epoch": 0.9089320587932984, + "grad_norm": 1.1104344129562378, + "learning_rate": 2.1591783247690347e-07, + "loss": 0.6567, + "step": 17686 + }, + { + "epoch": 0.9089834515366431, + "grad_norm": 0.9975858926773071, + "learning_rate": 2.1567596847413108e-07, + "loss": 0.6493, + "step": 17687 + }, + { + "epoch": 0.9090348442799877, + "grad_norm": 1.1546063423156738, + "learning_rate": 2.1543423702523214e-07, + "loss": 0.7152, + "step": 17688 + }, + { + "epoch": 0.9090862370233324, + "grad_norm": 1.0524946451187134, + "learning_rate": 2.1519263813690683e-07, + "loss": 0.7139, + "step": 17689 + }, + { + "epoch": 0.9091376297666769, + "grad_norm": 1.0565636157989502, + "learning_rate": 2.14951171815847e-07, + "loss": 0.7233, + "step": 17690 + }, + { + "epoch": 0.9091890225100215, + "grad_norm": 1.0292373895645142, + "learning_rate": 2.1470983806874402e-07, + "loss": 0.703, + "step": 17691 + }, + { + "epoch": 0.9092404152533662, + "grad_norm": 1.064986228942871, + "learning_rate": 2.144686369022836e-07, + "loss": 0.69, + "step": 17692 + }, + { + "epoch": 0.9092918079967108, + "grad_norm": 0.7384403347969055, + "learning_rate": 2.1422756832314873e-07, + "loss": 0.6815, + "step": 17693 + }, + { + "epoch": 0.9093432007400555, + "grad_norm": 1.149746298789978, + "learning_rate": 2.1398663233801798e-07, + "loss": 0.6969, + "step": 17694 + }, + { + "epoch": 0.9093945934834001, + "grad_norm": 1.0494534969329834, + "learning_rate": 2.1374582895356656e-07, + "loss": 0.6655, + "step": 17695 + }, + { + "epoch": 0.9094459862267448, + "grad_norm": 1.1233456134796143, + "learning_rate": 2.135051581764669e-07, + "loss": 0.7122, + "step": 17696 + }, + { + "epoch": 0.9094973789700894, + "grad_norm": 1.1172221899032593, + "learning_rate": 2.1326462001338698e-07, + "loss": 0.7224, + "step": 17697 + }, + { + "epoch": 0.9095487717134341, + "grad_norm": 1.131014108657837, + "learning_rate": 2.1302421447099042e-07, + "loss": 0.6785, + "step": 17698 + }, + { + "epoch": 0.9096001644567787, + "grad_norm": 0.7532368302345276, + "learning_rate": 2.1278394155593796e-07, + "loss": 0.6683, + "step": 17699 + }, + { + "epoch": 0.9096515572001234, + "grad_norm": 1.1580088138580322, + "learning_rate": 2.125438012748876e-07, + "loss": 0.6971, + "step": 17700 + }, + { + "epoch": 0.909702949943468, + "grad_norm": 0.7308835387229919, + "learning_rate": 2.123037936344907e-07, + "loss": 0.671, + "step": 17701 + }, + { + "epoch": 0.9097543426868127, + "grad_norm": 1.0930366516113281, + "learning_rate": 2.1206391864139863e-07, + "loss": 0.6377, + "step": 17702 + }, + { + "epoch": 0.9098057354301573, + "grad_norm": 1.067945957183838, + "learning_rate": 2.1182417630225715e-07, + "loss": 0.7089, + "step": 17703 + }, + { + "epoch": 0.909857128173502, + "grad_norm": 1.0786175727844238, + "learning_rate": 2.1158456662370708e-07, + "loss": 0.6979, + "step": 17704 + }, + { + "epoch": 0.9099085209168465, + "grad_norm": 0.9157930612564087, + "learning_rate": 2.1134508961238864e-07, + "loss": 0.6594, + "step": 17705 + }, + { + "epoch": 0.9099599136601911, + "grad_norm": 1.1287634372711182, + "learning_rate": 2.11105745274936e-07, + "loss": 0.6862, + "step": 17706 + }, + { + "epoch": 0.9100113064035358, + "grad_norm": 1.0697829723358154, + "learning_rate": 2.1086653361798103e-07, + "loss": 0.7419, + "step": 17707 + }, + { + "epoch": 0.9100626991468804, + "grad_norm": 1.0793765783309937, + "learning_rate": 2.1062745464815014e-07, + "loss": 0.6835, + "step": 17708 + }, + { + "epoch": 0.9101140918902251, + "grad_norm": 1.0769258737564087, + "learning_rate": 2.103885083720686e-07, + "loss": 0.6873, + "step": 17709 + }, + { + "epoch": 0.9101654846335697, + "grad_norm": 1.1262171268463135, + "learning_rate": 2.101496947963555e-07, + "loss": 0.7213, + "step": 17710 + }, + { + "epoch": 0.9102168773769144, + "grad_norm": 1.0951290130615234, + "learning_rate": 2.0991101392762781e-07, + "loss": 0.6967, + "step": 17711 + }, + { + "epoch": 0.910268270120259, + "grad_norm": 1.0736819505691528, + "learning_rate": 2.09672465772498e-07, + "loss": 0.6898, + "step": 17712 + }, + { + "epoch": 0.9103196628636037, + "grad_norm": 1.1122201681137085, + "learning_rate": 2.0943405033757634e-07, + "loss": 0.6705, + "step": 17713 + }, + { + "epoch": 0.9103710556069483, + "grad_norm": 1.1191297769546509, + "learning_rate": 2.0919576762946758e-07, + "loss": 0.7433, + "step": 17714 + }, + { + "epoch": 0.910422448350293, + "grad_norm": 1.0796895027160645, + "learning_rate": 2.0895761765477306e-07, + "loss": 0.6825, + "step": 17715 + }, + { + "epoch": 0.9104738410936376, + "grad_norm": 1.08818781375885, + "learning_rate": 2.0871960042009254e-07, + "loss": 0.7101, + "step": 17716 + }, + { + "epoch": 0.9105252338369823, + "grad_norm": 1.1995338201522827, + "learning_rate": 2.084817159320185e-07, + "loss": 0.6683, + "step": 17717 + }, + { + "epoch": 0.9105766265803269, + "grad_norm": 1.1802960634231567, + "learning_rate": 2.0824396419714287e-07, + "loss": 0.7788, + "step": 17718 + }, + { + "epoch": 0.9106280193236715, + "grad_norm": 1.0783082246780396, + "learning_rate": 2.0800634522205265e-07, + "loss": 0.6783, + "step": 17719 + }, + { + "epoch": 0.9106794120670162, + "grad_norm": 1.0899839401245117, + "learning_rate": 2.0776885901333198e-07, + "loss": 0.7086, + "step": 17720 + }, + { + "epoch": 0.9107308048103607, + "grad_norm": 1.0576839447021484, + "learning_rate": 2.075315055775584e-07, + "loss": 0.7077, + "step": 17721 + }, + { + "epoch": 0.9107821975537054, + "grad_norm": 0.774390459060669, + "learning_rate": 2.0729428492131054e-07, + "loss": 0.6612, + "step": 17722 + }, + { + "epoch": 0.91083359029705, + "grad_norm": 1.103029489517212, + "learning_rate": 2.0705719705115923e-07, + "loss": 0.6882, + "step": 17723 + }, + { + "epoch": 0.9108849830403947, + "grad_norm": 1.1453486680984497, + "learning_rate": 2.0682024197367369e-07, + "loss": 0.7754, + "step": 17724 + }, + { + "epoch": 0.9109363757837393, + "grad_norm": 1.0678218603134155, + "learning_rate": 2.065834196954192e-07, + "loss": 0.6912, + "step": 17725 + }, + { + "epoch": 0.910987768527084, + "grad_norm": 1.1938998699188232, + "learning_rate": 2.0634673022295658e-07, + "loss": 0.6873, + "step": 17726 + }, + { + "epoch": 0.9110391612704286, + "grad_norm": 1.1752475500106812, + "learning_rate": 2.0611017356284458e-07, + "loss": 0.7194, + "step": 17727 + }, + { + "epoch": 0.9110905540137733, + "grad_norm": 1.0744637250900269, + "learning_rate": 2.058737497216351e-07, + "loss": 0.7082, + "step": 17728 + }, + { + "epoch": 0.9111419467571179, + "grad_norm": 1.134577989578247, + "learning_rate": 2.0563745870588126e-07, + "loss": 0.7246, + "step": 17729 + }, + { + "epoch": 0.9111933395004626, + "grad_norm": 1.091447353363037, + "learning_rate": 2.0540130052212726e-07, + "loss": 0.7129, + "step": 17730 + }, + { + "epoch": 0.9112447322438072, + "grad_norm": 1.077867031097412, + "learning_rate": 2.0516527517691786e-07, + "loss": 0.7092, + "step": 17731 + }, + { + "epoch": 0.9112961249871518, + "grad_norm": 0.7471120953559875, + "learning_rate": 2.049293826767912e-07, + "loss": 0.6073, + "step": 17732 + }, + { + "epoch": 0.9113475177304965, + "grad_norm": 1.0988645553588867, + "learning_rate": 2.0469362302828254e-07, + "loss": 0.7275, + "step": 17733 + }, + { + "epoch": 0.9113989104738411, + "grad_norm": 1.1255877017974854, + "learning_rate": 2.0445799623792563e-07, + "loss": 0.714, + "step": 17734 + }, + { + "epoch": 0.9114503032171858, + "grad_norm": 1.0762730836868286, + "learning_rate": 2.0422250231224626e-07, + "loss": 0.7668, + "step": 17735 + }, + { + "epoch": 0.9115016959605303, + "grad_norm": 0.6903653740882874, + "learning_rate": 2.0398714125777208e-07, + "loss": 0.6411, + "step": 17736 + }, + { + "epoch": 0.911553088703875, + "grad_norm": 1.15397047996521, + "learning_rate": 2.0375191308102115e-07, + "loss": 0.6814, + "step": 17737 + }, + { + "epoch": 0.9116044814472196, + "grad_norm": 0.996845006942749, + "learning_rate": 2.035168177885122e-07, + "loss": 0.6779, + "step": 17738 + }, + { + "epoch": 0.9116558741905643, + "grad_norm": 1.0848451852798462, + "learning_rate": 2.0328185538675827e-07, + "loss": 0.7232, + "step": 17739 + }, + { + "epoch": 0.9117072669339089, + "grad_norm": 1.0763123035430908, + "learning_rate": 2.0304702588226922e-07, + "loss": 0.6744, + "step": 17740 + }, + { + "epoch": 0.9117586596772536, + "grad_norm": 1.039921760559082, + "learning_rate": 2.0281232928155092e-07, + "loss": 0.6636, + "step": 17741 + }, + { + "epoch": 0.9118100524205982, + "grad_norm": 1.1046186685562134, + "learning_rate": 2.0257776559110655e-07, + "loss": 0.666, + "step": 17742 + }, + { + "epoch": 0.9118614451639429, + "grad_norm": 0.7246026992797852, + "learning_rate": 2.0234333481743417e-07, + "loss": 0.7017, + "step": 17743 + }, + { + "epoch": 0.9119128379072875, + "grad_norm": 1.0738811492919922, + "learning_rate": 2.0210903696702977e-07, + "loss": 0.6968, + "step": 17744 + }, + { + "epoch": 0.9119642306506321, + "grad_norm": 1.1218420267105103, + "learning_rate": 2.0187487204638367e-07, + "loss": 0.7006, + "step": 17745 + }, + { + "epoch": 0.9120156233939768, + "grad_norm": 1.013877511024475, + "learning_rate": 2.0164084006198405e-07, + "loss": 0.6611, + "step": 17746 + }, + { + "epoch": 0.9120670161373214, + "grad_norm": 1.1758811473846436, + "learning_rate": 2.014069410203151e-07, + "loss": 0.7399, + "step": 17747 + }, + { + "epoch": 0.9121184088806661, + "grad_norm": 1.1796398162841797, + "learning_rate": 2.0117317492785783e-07, + "loss": 0.6768, + "step": 17748 + }, + { + "epoch": 0.9121698016240107, + "grad_norm": 1.0896128416061401, + "learning_rate": 2.009395417910881e-07, + "loss": 0.6752, + "step": 17749 + }, + { + "epoch": 0.9122211943673554, + "grad_norm": 1.1138525009155273, + "learning_rate": 2.0070604161647856e-07, + "loss": 0.7005, + "step": 17750 + }, + { + "epoch": 0.9122725871106999, + "grad_norm": 1.0697894096374512, + "learning_rate": 2.0047267441049957e-07, + "loss": 0.7039, + "step": 17751 + }, + { + "epoch": 0.9123239798540446, + "grad_norm": 1.1171438694000244, + "learning_rate": 2.0023944017961594e-07, + "loss": 0.7016, + "step": 17752 + }, + { + "epoch": 0.9123753725973892, + "grad_norm": 1.0748374462127686, + "learning_rate": 2.0000633893029087e-07, + "loss": 0.6646, + "step": 17753 + }, + { + "epoch": 0.9124267653407339, + "grad_norm": 0.6407703757286072, + "learning_rate": 1.9977337066898029e-07, + "loss": 0.6729, + "step": 17754 + }, + { + "epoch": 0.9124781580840785, + "grad_norm": 1.0660828351974487, + "learning_rate": 1.9954053540214123e-07, + "loss": 0.7267, + "step": 17755 + }, + { + "epoch": 0.9125295508274232, + "grad_norm": 1.1067396402359009, + "learning_rate": 1.993078331362236e-07, + "loss": 0.6487, + "step": 17756 + }, + { + "epoch": 0.9125809435707678, + "grad_norm": 1.0695616006851196, + "learning_rate": 1.9907526387767384e-07, + "loss": 0.6973, + "step": 17757 + }, + { + "epoch": 0.9126323363141124, + "grad_norm": 0.8864750862121582, + "learning_rate": 1.9884282763293794e-07, + "loss": 0.6197, + "step": 17758 + }, + { + "epoch": 0.9126837290574571, + "grad_norm": 1.024201512336731, + "learning_rate": 1.9861052440845241e-07, + "loss": 0.6704, + "step": 17759 + }, + { + "epoch": 0.9127351218008017, + "grad_norm": 1.0804156064987183, + "learning_rate": 1.98378354210656e-07, + "loss": 0.6893, + "step": 17760 + }, + { + "epoch": 0.9127865145441464, + "grad_norm": 1.0635980367660522, + "learning_rate": 1.981463170459802e-07, + "loss": 0.6679, + "step": 17761 + }, + { + "epoch": 0.912837907287491, + "grad_norm": 0.8467568755149841, + "learning_rate": 1.979144129208538e-07, + "loss": 0.6619, + "step": 17762 + }, + { + "epoch": 0.9128893000308357, + "grad_norm": 1.0786676406860352, + "learning_rate": 1.9768264184170104e-07, + "loss": 0.661, + "step": 17763 + }, + { + "epoch": 0.9129406927741803, + "grad_norm": 1.092750906944275, + "learning_rate": 1.974510038149452e-07, + "loss": 0.7409, + "step": 17764 + }, + { + "epoch": 0.912992085517525, + "grad_norm": 0.672427773475647, + "learning_rate": 1.9721949884700332e-07, + "loss": 0.6193, + "step": 17765 + }, + { + "epoch": 0.9130434782608695, + "grad_norm": 1.0358028411865234, + "learning_rate": 1.9698812694428805e-07, + "loss": 0.5805, + "step": 17766 + }, + { + "epoch": 0.9130948710042142, + "grad_norm": 1.2200502157211304, + "learning_rate": 1.9675688811321203e-07, + "loss": 0.6835, + "step": 17767 + }, + { + "epoch": 0.9131462637475588, + "grad_norm": 1.1212867498397827, + "learning_rate": 1.9652578236018072e-07, + "loss": 0.6879, + "step": 17768 + }, + { + "epoch": 0.9131976564909035, + "grad_norm": 1.145961046218872, + "learning_rate": 1.962948096915973e-07, + "loss": 0.7041, + "step": 17769 + }, + { + "epoch": 0.9132490492342481, + "grad_norm": 0.8149420619010925, + "learning_rate": 1.9606397011386002e-07, + "loss": 0.7071, + "step": 17770 + }, + { + "epoch": 0.9133004419775927, + "grad_norm": 1.0562644004821777, + "learning_rate": 1.9583326363336707e-07, + "loss": 0.6844, + "step": 17771 + }, + { + "epoch": 0.9133518347209374, + "grad_norm": 1.100762963294983, + "learning_rate": 1.9560269025650725e-07, + "loss": 0.6844, + "step": 17772 + }, + { + "epoch": 0.913403227464282, + "grad_norm": 1.0478250980377197, + "learning_rate": 1.9537224998967098e-07, + "loss": 0.6432, + "step": 17773 + }, + { + "epoch": 0.9134546202076267, + "grad_norm": 1.1058193445205688, + "learning_rate": 1.9514194283924203e-07, + "loss": 0.6993, + "step": 17774 + }, + { + "epoch": 0.9135060129509713, + "grad_norm": 1.1286722421646118, + "learning_rate": 1.949117688116009e-07, + "loss": 0.6624, + "step": 17775 + }, + { + "epoch": 0.913557405694316, + "grad_norm": 1.1158396005630493, + "learning_rate": 1.9468172791312633e-07, + "loss": 0.7058, + "step": 17776 + }, + { + "epoch": 0.9136087984376606, + "grad_norm": 1.0591977834701538, + "learning_rate": 1.9445182015018994e-07, + "loss": 0.6493, + "step": 17777 + }, + { + "epoch": 0.9136601911810053, + "grad_norm": 1.0822550058364868, + "learning_rate": 1.942220455291627e-07, + "loss": 0.6697, + "step": 17778 + }, + { + "epoch": 0.9137115839243499, + "grad_norm": 1.115945816040039, + "learning_rate": 1.939924040564095e-07, + "loss": 0.6911, + "step": 17779 + }, + { + "epoch": 0.9137629766676946, + "grad_norm": 1.1579450368881226, + "learning_rate": 1.9376289573829478e-07, + "loss": 0.7013, + "step": 17780 + }, + { + "epoch": 0.9138143694110391, + "grad_norm": 1.08558988571167, + "learning_rate": 1.9353352058117503e-07, + "loss": 0.6466, + "step": 17781 + }, + { + "epoch": 0.9138657621543838, + "grad_norm": 1.0792367458343506, + "learning_rate": 1.9330427859140687e-07, + "loss": 0.6893, + "step": 17782 + }, + { + "epoch": 0.9139171548977284, + "grad_norm": 0.7175288200378418, + "learning_rate": 1.9307516977534024e-07, + "loss": 0.681, + "step": 17783 + }, + { + "epoch": 0.913968547641073, + "grad_norm": 1.1783541440963745, + "learning_rate": 1.9284619413932447e-07, + "loss": 0.6724, + "step": 17784 + }, + { + "epoch": 0.9140199403844177, + "grad_norm": 1.3228797912597656, + "learning_rate": 1.926173516897023e-07, + "loss": 0.6928, + "step": 17785 + }, + { + "epoch": 0.9140713331277623, + "grad_norm": 1.1209787130355835, + "learning_rate": 1.9238864243281418e-07, + "loss": 0.7191, + "step": 17786 + }, + { + "epoch": 0.914122725871107, + "grad_norm": 0.998951256275177, + "learning_rate": 1.921600663749984e-07, + "loss": 0.6556, + "step": 17787 + }, + { + "epoch": 0.9141741186144516, + "grad_norm": 1.0374977588653564, + "learning_rate": 1.9193162352258487e-07, + "loss": 0.639, + "step": 17788 + }, + { + "epoch": 0.9142255113577963, + "grad_norm": 1.1528420448303223, + "learning_rate": 1.9170331388190466e-07, + "loss": 0.7109, + "step": 17789 + }, + { + "epoch": 0.9142769041011409, + "grad_norm": 1.1000696420669556, + "learning_rate": 1.9147513745928382e-07, + "loss": 0.6808, + "step": 17790 + }, + { + "epoch": 0.9143282968444856, + "grad_norm": 0.7669256925582886, + "learning_rate": 1.9124709426104226e-07, + "loss": 0.6307, + "step": 17791 + }, + { + "epoch": 0.9143796895878302, + "grad_norm": 0.8601362109184265, + "learning_rate": 1.910191842934994e-07, + "loss": 0.6311, + "step": 17792 + }, + { + "epoch": 0.9144310823311749, + "grad_norm": 0.9897601008415222, + "learning_rate": 1.9079140756296965e-07, + "loss": 0.6982, + "step": 17793 + }, + { + "epoch": 0.9144824750745195, + "grad_norm": 1.0738475322723389, + "learning_rate": 1.905637640757635e-07, + "loss": 0.6841, + "step": 17794 + }, + { + "epoch": 0.9145338678178642, + "grad_norm": 1.1342777013778687, + "learning_rate": 1.903362538381881e-07, + "loss": 0.713, + "step": 17795 + }, + { + "epoch": 0.9145852605612088, + "grad_norm": 1.0233083963394165, + "learning_rate": 1.9010887685654677e-07, + "loss": 0.6551, + "step": 17796 + }, + { + "epoch": 0.9146366533045533, + "grad_norm": 1.0669301748275757, + "learning_rate": 1.8988163313713892e-07, + "loss": 0.6697, + "step": 17797 + }, + { + "epoch": 0.914688046047898, + "grad_norm": 1.1149855852127075, + "learning_rate": 1.8965452268626117e-07, + "loss": 0.6729, + "step": 17798 + }, + { + "epoch": 0.9147394387912426, + "grad_norm": 1.126332402229309, + "learning_rate": 1.8942754551020515e-07, + "loss": 0.712, + "step": 17799 + }, + { + "epoch": 0.9147908315345873, + "grad_norm": 1.0495896339416504, + "learning_rate": 1.8920070161526083e-07, + "loss": 0.6657, + "step": 17800 + }, + { + "epoch": 0.9148422242779319, + "grad_norm": 1.0570639371871948, + "learning_rate": 1.8897399100771097e-07, + "loss": 0.6632, + "step": 17801 + }, + { + "epoch": 0.9148936170212766, + "grad_norm": 1.1035386323928833, + "learning_rate": 1.887474136938383e-07, + "loss": 0.6965, + "step": 17802 + }, + { + "epoch": 0.9149450097646212, + "grad_norm": 1.0673751831054688, + "learning_rate": 1.885209696799195e-07, + "loss": 0.6602, + "step": 17803 + }, + { + "epoch": 0.9149964025079659, + "grad_norm": 0.7812763452529907, + "learning_rate": 1.8829465897222954e-07, + "loss": 0.6384, + "step": 17804 + }, + { + "epoch": 0.9150477952513105, + "grad_norm": 0.7205644845962524, + "learning_rate": 1.880684815770373e-07, + "loss": 0.644, + "step": 17805 + }, + { + "epoch": 0.9150991879946552, + "grad_norm": 1.0499277114868164, + "learning_rate": 1.8784243750060994e-07, + "loss": 0.7078, + "step": 17806 + }, + { + "epoch": 0.9151505807379998, + "grad_norm": 1.0734301805496216, + "learning_rate": 1.8761652674921028e-07, + "loss": 0.6693, + "step": 17807 + }, + { + "epoch": 0.9152019734813445, + "grad_norm": 1.072252631187439, + "learning_rate": 1.8739074932909607e-07, + "loss": 0.689, + "step": 17808 + }, + { + "epoch": 0.9152533662246891, + "grad_norm": 1.0846703052520752, + "learning_rate": 1.8716510524652508e-07, + "loss": 0.7349, + "step": 17809 + }, + { + "epoch": 0.9153047589680338, + "grad_norm": 1.0556458234786987, + "learning_rate": 1.869395945077468e-07, + "loss": 0.6902, + "step": 17810 + }, + { + "epoch": 0.9153561517113784, + "grad_norm": 1.0917582511901855, + "learning_rate": 1.867142171190106e-07, + "loss": 0.6932, + "step": 17811 + }, + { + "epoch": 0.9154075444547229, + "grad_norm": 1.06710946559906, + "learning_rate": 1.864889730865599e-07, + "loss": 0.6904, + "step": 17812 + }, + { + "epoch": 0.9154589371980676, + "grad_norm": 1.252770185470581, + "learning_rate": 1.8626386241663574e-07, + "loss": 0.6539, + "step": 17813 + }, + { + "epoch": 0.9155103299414122, + "grad_norm": 1.099575161933899, + "learning_rate": 1.860388851154743e-07, + "loss": 0.7279, + "step": 17814 + }, + { + "epoch": 0.9155617226847569, + "grad_norm": 0.753142774105072, + "learning_rate": 1.8581404118931e-07, + "loss": 0.6547, + "step": 17815 + }, + { + "epoch": 0.9156131154281015, + "grad_norm": 1.179952621459961, + "learning_rate": 1.8558933064437122e-07, + "loss": 0.6646, + "step": 17816 + }, + { + "epoch": 0.9156645081714462, + "grad_norm": 1.1514180898666382, + "learning_rate": 1.8536475348688353e-07, + "loss": 0.7113, + "step": 17817 + }, + { + "epoch": 0.9157159009147908, + "grad_norm": 1.0712791681289673, + "learning_rate": 1.8514030972307085e-07, + "loss": 0.6942, + "step": 17818 + }, + { + "epoch": 0.9157672936581355, + "grad_norm": 1.1599644422531128, + "learning_rate": 1.849159993591504e-07, + "loss": 0.6463, + "step": 17819 + }, + { + "epoch": 0.9158186864014801, + "grad_norm": 1.1334662437438965, + "learning_rate": 1.8469182240133666e-07, + "loss": 0.7299, + "step": 17820 + }, + { + "epoch": 0.9158700791448248, + "grad_norm": 1.1012070178985596, + "learning_rate": 1.844677788558402e-07, + "loss": 0.6718, + "step": 17821 + }, + { + "epoch": 0.9159214718881694, + "grad_norm": 1.053802490234375, + "learning_rate": 1.8424386872887057e-07, + "loss": 0.6836, + "step": 17822 + }, + { + "epoch": 0.9159728646315141, + "grad_norm": 1.1249608993530273, + "learning_rate": 1.840200920266283e-07, + "loss": 0.7243, + "step": 17823 + }, + { + "epoch": 0.9160242573748587, + "grad_norm": 1.0726464986801147, + "learning_rate": 1.8379644875531566e-07, + "loss": 0.662, + "step": 17824 + }, + { + "epoch": 0.9160756501182034, + "grad_norm": 1.080886721611023, + "learning_rate": 1.8357293892112825e-07, + "loss": 0.7304, + "step": 17825 + }, + { + "epoch": 0.916127042861548, + "grad_norm": 0.9601016640663147, + "learning_rate": 1.8334956253025783e-07, + "loss": 0.5948, + "step": 17826 + }, + { + "epoch": 0.9161784356048925, + "grad_norm": 0.9973931312561035, + "learning_rate": 1.8312631958889438e-07, + "loss": 0.6503, + "step": 17827 + }, + { + "epoch": 0.9162298283482372, + "grad_norm": 1.097583532333374, + "learning_rate": 1.8290321010322243e-07, + "loss": 0.6965, + "step": 17828 + }, + { + "epoch": 0.9162812210915818, + "grad_norm": 1.0854599475860596, + "learning_rate": 1.8268023407942426e-07, + "loss": 0.6956, + "step": 17829 + }, + { + "epoch": 0.9163326138349265, + "grad_norm": 1.0833860635757446, + "learning_rate": 1.8245739152367602e-07, + "loss": 0.674, + "step": 17830 + }, + { + "epoch": 0.9163840065782711, + "grad_norm": 1.1195471286773682, + "learning_rate": 1.8223468244215281e-07, + "loss": 0.6629, + "step": 17831 + }, + { + "epoch": 0.9164353993216158, + "grad_norm": 0.7234694361686707, + "learning_rate": 1.8201210684102522e-07, + "loss": 0.6559, + "step": 17832 + }, + { + "epoch": 0.9164867920649604, + "grad_norm": 1.0424590110778809, + "learning_rate": 1.8178966472645888e-07, + "loss": 0.6896, + "step": 17833 + }, + { + "epoch": 0.9165381848083051, + "grad_norm": 1.1470211744308472, + "learning_rate": 1.8156735610461717e-07, + "loss": 0.7357, + "step": 17834 + }, + { + "epoch": 0.9165895775516497, + "grad_norm": 1.0699808597564697, + "learning_rate": 1.813451809816602e-07, + "loss": 0.6889, + "step": 17835 + }, + { + "epoch": 0.9166409702949944, + "grad_norm": 1.0896185636520386, + "learning_rate": 1.8112313936374303e-07, + "loss": 0.7216, + "step": 17836 + }, + { + "epoch": 0.916692363038339, + "grad_norm": 0.7181626558303833, + "learning_rate": 1.809012312570163e-07, + "loss": 0.6245, + "step": 17837 + }, + { + "epoch": 0.9167437557816837, + "grad_norm": 1.095384120941162, + "learning_rate": 1.8067945666763064e-07, + "loss": 0.7207, + "step": 17838 + }, + { + "epoch": 0.9167951485250283, + "grad_norm": 1.0941030979156494, + "learning_rate": 1.8045781560172782e-07, + "loss": 0.711, + "step": 17839 + }, + { + "epoch": 0.916846541268373, + "grad_norm": 1.152273178100586, + "learning_rate": 1.8023630806545066e-07, + "loss": 0.7366, + "step": 17840 + }, + { + "epoch": 0.9168979340117176, + "grad_norm": 1.1516669988632202, + "learning_rate": 1.8001493406493486e-07, + "loss": 0.6904, + "step": 17841 + }, + { + "epoch": 0.9169493267550621, + "grad_norm": 1.1944884061813354, + "learning_rate": 1.7979369360631495e-07, + "loss": 0.7475, + "step": 17842 + }, + { + "epoch": 0.9170007194984068, + "grad_norm": 1.1919951438903809, + "learning_rate": 1.7957258669571877e-07, + "loss": 0.6788, + "step": 17843 + }, + { + "epoch": 0.9170521122417514, + "grad_norm": 1.0706806182861328, + "learning_rate": 1.7935161333927476e-07, + "loss": 0.7046, + "step": 17844 + }, + { + "epoch": 0.9171035049850961, + "grad_norm": 1.1335201263427734, + "learning_rate": 1.791307735431036e-07, + "loss": 0.6717, + "step": 17845 + }, + { + "epoch": 0.9171548977284407, + "grad_norm": 0.7956576943397522, + "learning_rate": 1.7891006731332372e-07, + "loss": 0.65, + "step": 17846 + }, + { + "epoch": 0.9172062904717854, + "grad_norm": 1.0746384859085083, + "learning_rate": 1.7868949465605024e-07, + "loss": 0.6858, + "step": 17847 + }, + { + "epoch": 0.91725768321513, + "grad_norm": 1.1434177160263062, + "learning_rate": 1.784690555773949e-07, + "loss": 0.6993, + "step": 17848 + }, + { + "epoch": 0.9173090759584747, + "grad_norm": 1.1466773748397827, + "learning_rate": 1.7824875008346508e-07, + "loss": 0.6809, + "step": 17849 + }, + { + "epoch": 0.9173604687018193, + "grad_norm": 1.2224323749542236, + "learning_rate": 1.7802857818036367e-07, + "loss": 0.6858, + "step": 17850 + }, + { + "epoch": 0.917411861445164, + "grad_norm": 1.1147270202636719, + "learning_rate": 1.778085398741919e-07, + "loss": 0.7083, + "step": 17851 + }, + { + "epoch": 0.9174632541885086, + "grad_norm": 1.0163531303405762, + "learning_rate": 1.7758863517104485e-07, + "loss": 0.6154, + "step": 17852 + }, + { + "epoch": 0.9175146469318533, + "grad_norm": 0.787189781665802, + "learning_rate": 1.7736886407701604e-07, + "loss": 0.6313, + "step": 17853 + }, + { + "epoch": 0.9175660396751979, + "grad_norm": 1.0340983867645264, + "learning_rate": 1.7714922659819443e-07, + "loss": 0.6594, + "step": 17854 + }, + { + "epoch": 0.9176174324185425, + "grad_norm": 1.2157496213912964, + "learning_rate": 1.7692972274066467e-07, + "loss": 0.7109, + "step": 17855 + }, + { + "epoch": 0.9176688251618872, + "grad_norm": 1.1568785905838013, + "learning_rate": 1.767103525105085e-07, + "loss": 0.6675, + "step": 17856 + }, + { + "epoch": 0.9177202179052317, + "grad_norm": 1.1193177700042725, + "learning_rate": 1.7649111591380385e-07, + "loss": 0.698, + "step": 17857 + }, + { + "epoch": 0.9177716106485764, + "grad_norm": 0.686129629611969, + "learning_rate": 1.7627201295662532e-07, + "loss": 0.6321, + "step": 17858 + }, + { + "epoch": 0.917823003391921, + "grad_norm": 1.2017323970794678, + "learning_rate": 1.760530436450425e-07, + "loss": 0.6788, + "step": 17859 + }, + { + "epoch": 0.9178743961352657, + "grad_norm": 1.0828529596328735, + "learning_rate": 1.758342079851233e-07, + "loss": 0.6632, + "step": 17860 + }, + { + "epoch": 0.9179257888786103, + "grad_norm": 1.1641581058502197, + "learning_rate": 1.7561550598292954e-07, + "loss": 0.7203, + "step": 17861 + }, + { + "epoch": 0.917977181621955, + "grad_norm": 1.0762056112289429, + "learning_rate": 1.7539693764452137e-07, + "loss": 0.6676, + "step": 17862 + }, + { + "epoch": 0.9180285743652996, + "grad_norm": 1.129097580909729, + "learning_rate": 1.751785029759534e-07, + "loss": 0.7054, + "step": 17863 + }, + { + "epoch": 0.9180799671086443, + "grad_norm": 1.044602632522583, + "learning_rate": 1.7496020198327967e-07, + "loss": 0.6067, + "step": 17864 + }, + { + "epoch": 0.9181313598519889, + "grad_norm": 0.7199539542198181, + "learning_rate": 1.7474203467254592e-07, + "loss": 0.6171, + "step": 17865 + }, + { + "epoch": 0.9181827525953336, + "grad_norm": 1.0825364589691162, + "learning_rate": 1.7452400104979782e-07, + "loss": 0.6215, + "step": 17866 + }, + { + "epoch": 0.9182341453386782, + "grad_norm": 1.0826314687728882, + "learning_rate": 1.7430610112107616e-07, + "loss": 0.7204, + "step": 17867 + }, + { + "epoch": 0.9182855380820228, + "grad_norm": 1.1075942516326904, + "learning_rate": 1.7408833489241772e-07, + "loss": 0.6439, + "step": 17868 + }, + { + "epoch": 0.9183369308253675, + "grad_norm": 1.0964702367782593, + "learning_rate": 1.7387070236985658e-07, + "loss": 0.6887, + "step": 17869 + }, + { + "epoch": 0.9183883235687121, + "grad_norm": 0.8237515687942505, + "learning_rate": 1.7365320355942185e-07, + "loss": 0.6221, + "step": 17870 + }, + { + "epoch": 0.9184397163120568, + "grad_norm": 1.3141134977340698, + "learning_rate": 1.7343583846713974e-07, + "loss": 0.6506, + "step": 17871 + }, + { + "epoch": 0.9184911090554013, + "grad_norm": 1.1173369884490967, + "learning_rate": 1.7321860709903214e-07, + "loss": 0.667, + "step": 17872 + }, + { + "epoch": 0.918542501798746, + "grad_norm": 1.0614476203918457, + "learning_rate": 1.7300150946111872e-07, + "loss": 0.6661, + "step": 17873 + }, + { + "epoch": 0.9185938945420906, + "grad_norm": 1.0272514820098877, + "learning_rate": 1.7278454555941294e-07, + "loss": 0.6641, + "step": 17874 + }, + { + "epoch": 0.9186452872854353, + "grad_norm": 1.1299598217010498, + "learning_rate": 1.7256771539992723e-07, + "loss": 0.703, + "step": 17875 + }, + { + "epoch": 0.9186966800287799, + "grad_norm": 1.131804347038269, + "learning_rate": 1.7235101898866736e-07, + "loss": 0.7107, + "step": 17876 + }, + { + "epoch": 0.9187480727721246, + "grad_norm": 1.0820996761322021, + "learning_rate": 1.7213445633163961e-07, + "loss": 0.6868, + "step": 17877 + }, + { + "epoch": 0.9187994655154692, + "grad_norm": 1.128631591796875, + "learning_rate": 1.7191802743484198e-07, + "loss": 0.7497, + "step": 17878 + }, + { + "epoch": 0.9188508582588139, + "grad_norm": 1.10720956325531, + "learning_rate": 1.7170173230427135e-07, + "loss": 0.7093, + "step": 17879 + }, + { + "epoch": 0.9189022510021585, + "grad_norm": 1.1000370979309082, + "learning_rate": 1.7148557094592122e-07, + "loss": 0.7232, + "step": 17880 + }, + { + "epoch": 0.9189536437455031, + "grad_norm": 0.7628276348114014, + "learning_rate": 1.7126954336577906e-07, + "loss": 0.6514, + "step": 17881 + }, + { + "epoch": 0.9190050364888478, + "grad_norm": 1.0870556831359863, + "learning_rate": 1.7105364956983116e-07, + "loss": 0.7081, + "step": 17882 + }, + { + "epoch": 0.9190564292321924, + "grad_norm": 1.016682744026184, + "learning_rate": 1.7083788956405889e-07, + "loss": 0.711, + "step": 17883 + }, + { + "epoch": 0.9191078219755371, + "grad_norm": 0.6881650686264038, + "learning_rate": 1.7062226335443964e-07, + "loss": 0.6305, + "step": 17884 + }, + { + "epoch": 0.9191592147188817, + "grad_norm": 1.153055191040039, + "learning_rate": 1.7040677094694756e-07, + "loss": 0.6684, + "step": 17885 + }, + { + "epoch": 0.9192106074622264, + "grad_norm": 0.6730453968048096, + "learning_rate": 1.7019141234755398e-07, + "loss": 0.668, + "step": 17886 + }, + { + "epoch": 0.919262000205571, + "grad_norm": 1.1237142086029053, + "learning_rate": 1.6997618756222466e-07, + "loss": 0.6778, + "step": 17887 + }, + { + "epoch": 0.9193133929489156, + "grad_norm": 1.0704950094223022, + "learning_rate": 1.6976109659692263e-07, + "loss": 0.6483, + "step": 17888 + }, + { + "epoch": 0.9193647856922602, + "grad_norm": 1.0592703819274902, + "learning_rate": 1.6954613945760812e-07, + "loss": 0.7091, + "step": 17889 + }, + { + "epoch": 0.9194161784356049, + "grad_norm": 1.1600102186203003, + "learning_rate": 1.693313161502347e-07, + "loss": 0.6939, + "step": 17890 + }, + { + "epoch": 0.9194675711789495, + "grad_norm": 1.1618740558624268, + "learning_rate": 1.6911662668075645e-07, + "loss": 0.6894, + "step": 17891 + }, + { + "epoch": 0.9195189639222942, + "grad_norm": 1.0890191793441772, + "learning_rate": 1.689020710551198e-07, + "loss": 0.6423, + "step": 17892 + }, + { + "epoch": 0.9195703566656388, + "grad_norm": 1.0456568002700806, + "learning_rate": 1.686876492792716e-07, + "loss": 0.6654, + "step": 17893 + }, + { + "epoch": 0.9196217494089834, + "grad_norm": 1.165205717086792, + "learning_rate": 1.6847336135914937e-07, + "loss": 0.6929, + "step": 17894 + }, + { + "epoch": 0.9196731421523281, + "grad_norm": 1.1483205556869507, + "learning_rate": 1.6825920730069279e-07, + "loss": 0.7267, + "step": 17895 + }, + { + "epoch": 0.9197245348956727, + "grad_norm": 1.3313127756118774, + "learning_rate": 1.6804518710983374e-07, + "loss": 0.7284, + "step": 17896 + }, + { + "epoch": 0.9197759276390174, + "grad_norm": 1.0919408798217773, + "learning_rate": 1.6783130079250255e-07, + "loss": 0.6587, + "step": 17897 + }, + { + "epoch": 0.919827320382362, + "grad_norm": 1.1168464422225952, + "learning_rate": 1.6761754835462386e-07, + "loss": 0.7168, + "step": 17898 + }, + { + "epoch": 0.9198787131257067, + "grad_norm": 1.1493359804153442, + "learning_rate": 1.6740392980212185e-07, + "loss": 0.7368, + "step": 17899 + }, + { + "epoch": 0.9199301058690513, + "grad_norm": 0.9786962866783142, + "learning_rate": 1.6719044514091344e-07, + "loss": 0.6042, + "step": 17900 + }, + { + "epoch": 0.919981498612396, + "grad_norm": 0.7389718294143677, + "learning_rate": 1.6697709437691388e-07, + "loss": 0.6495, + "step": 17901 + }, + { + "epoch": 0.9200328913557406, + "grad_norm": 0.6559603214263916, + "learning_rate": 1.6676387751603518e-07, + "loss": 0.6308, + "step": 17902 + }, + { + "epoch": 0.9200842840990852, + "grad_norm": 1.1020094156265259, + "learning_rate": 1.6655079456418312e-07, + "loss": 0.6671, + "step": 17903 + }, + { + "epoch": 0.9201356768424298, + "grad_norm": 1.1517366170883179, + "learning_rate": 1.663378455272624e-07, + "loss": 0.7296, + "step": 17904 + }, + { + "epoch": 0.9201870695857745, + "grad_norm": 1.1258994340896606, + "learning_rate": 1.6612503041117278e-07, + "loss": 0.7193, + "step": 17905 + }, + { + "epoch": 0.9202384623291191, + "grad_norm": 1.096066951751709, + "learning_rate": 1.659123492218101e-07, + "loss": 0.6918, + "step": 17906 + }, + { + "epoch": 0.9202898550724637, + "grad_norm": 1.0259110927581787, + "learning_rate": 1.656998019650663e-07, + "loss": 0.665, + "step": 17907 + }, + { + "epoch": 0.9203412478158084, + "grad_norm": 1.0766032934188843, + "learning_rate": 1.6548738864683222e-07, + "loss": 0.6748, + "step": 17908 + }, + { + "epoch": 0.920392640559153, + "grad_norm": 0.6675497889518738, + "learning_rate": 1.6527510927299152e-07, + "loss": 0.6452, + "step": 17909 + }, + { + "epoch": 0.9204440333024977, + "grad_norm": 1.140841007232666, + "learning_rate": 1.65062963849425e-07, + "loss": 0.729, + "step": 17910 + }, + { + "epoch": 0.9204954260458423, + "grad_norm": 1.0372449159622192, + "learning_rate": 1.6485095238201187e-07, + "loss": 0.7559, + "step": 17911 + }, + { + "epoch": 0.920546818789187, + "grad_norm": 1.134925127029419, + "learning_rate": 1.6463907487662523e-07, + "loss": 0.6825, + "step": 17912 + }, + { + "epoch": 0.9205982115325316, + "grad_norm": 1.0917657613754272, + "learning_rate": 1.644273313391359e-07, + "loss": 0.7089, + "step": 17913 + }, + { + "epoch": 0.9206496042758763, + "grad_norm": 1.1895254850387573, + "learning_rate": 1.6421572177540867e-07, + "loss": 0.7135, + "step": 17914 + }, + { + "epoch": 0.9207009970192209, + "grad_norm": 1.102960467338562, + "learning_rate": 1.640042461913094e-07, + "loss": 0.697, + "step": 17915 + }, + { + "epoch": 0.9207523897625656, + "grad_norm": 1.1201682090759277, + "learning_rate": 1.6379290459269448e-07, + "loss": 0.7418, + "step": 17916 + }, + { + "epoch": 0.9208037825059102, + "grad_norm": 1.084162712097168, + "learning_rate": 1.6358169698542037e-07, + "loss": 0.6873, + "step": 17917 + }, + { + "epoch": 0.9208551752492548, + "grad_norm": 1.1272410154342651, + "learning_rate": 1.6337062337533904e-07, + "loss": 0.7226, + "step": 17918 + }, + { + "epoch": 0.9209065679925994, + "grad_norm": 1.1469653844833374, + "learning_rate": 1.631596837682975e-07, + "loss": 0.6975, + "step": 17919 + }, + { + "epoch": 0.920957960735944, + "grad_norm": 0.6749574542045593, + "learning_rate": 1.629488781701416e-07, + "loss": 0.6311, + "step": 17920 + }, + { + "epoch": 0.9210093534792887, + "grad_norm": 1.1012417078018188, + "learning_rate": 1.6273820658671057e-07, + "loss": 0.7211, + "step": 17921 + }, + { + "epoch": 0.9210607462226333, + "grad_norm": 1.129227638244629, + "learning_rate": 1.6252766902384144e-07, + "loss": 0.7202, + "step": 17922 + }, + { + "epoch": 0.921112138965978, + "grad_norm": 1.0793527364730835, + "learning_rate": 1.6231726548736727e-07, + "loss": 0.7049, + "step": 17923 + }, + { + "epoch": 0.9211635317093226, + "grad_norm": 1.0719245672225952, + "learning_rate": 1.6210699598311842e-07, + "loss": 0.6267, + "step": 17924 + }, + { + "epoch": 0.9212149244526673, + "grad_norm": 1.088513731956482, + "learning_rate": 1.6189686051691968e-07, + "loss": 0.6345, + "step": 17925 + }, + { + "epoch": 0.9212663171960119, + "grad_norm": 1.0609462261199951, + "learning_rate": 1.6168685909459304e-07, + "loss": 0.6734, + "step": 17926 + }, + { + "epoch": 0.9213177099393566, + "grad_norm": 1.1078190803527832, + "learning_rate": 1.6147699172195718e-07, + "loss": 0.6983, + "step": 17927 + }, + { + "epoch": 0.9213691026827012, + "grad_norm": 1.1390665769577026, + "learning_rate": 1.6126725840482637e-07, + "loss": 0.7211, + "step": 17928 + }, + { + "epoch": 0.9214204954260459, + "grad_norm": 0.9897744655609131, + "learning_rate": 1.610576591490115e-07, + "loss": 0.6412, + "step": 17929 + }, + { + "epoch": 0.9214718881693905, + "grad_norm": 1.0611399412155151, + "learning_rate": 1.6084819396031958e-07, + "loss": 0.6866, + "step": 17930 + }, + { + "epoch": 0.9215232809127352, + "grad_norm": 1.1009806394577026, + "learning_rate": 1.6063886284455544e-07, + "loss": 0.6719, + "step": 17931 + }, + { + "epoch": 0.9215746736560798, + "grad_norm": 1.0061864852905273, + "learning_rate": 1.6042966580751607e-07, + "loss": 0.653, + "step": 17932 + }, + { + "epoch": 0.9216260663994243, + "grad_norm": 1.1049132347106934, + "learning_rate": 1.6022060285499964e-07, + "loss": 0.7158, + "step": 17933 + }, + { + "epoch": 0.921677459142769, + "grad_norm": 0.7581444382667542, + "learning_rate": 1.600116739927976e-07, + "loss": 0.607, + "step": 17934 + }, + { + "epoch": 0.9217288518861136, + "grad_norm": 1.1605666875839233, + "learning_rate": 1.5980287922669868e-07, + "loss": 0.6627, + "step": 17935 + }, + { + "epoch": 0.9217802446294583, + "grad_norm": 1.0792490243911743, + "learning_rate": 1.5959421856248715e-07, + "loss": 0.6769, + "step": 17936 + }, + { + "epoch": 0.9218316373728029, + "grad_norm": 0.876400351524353, + "learning_rate": 1.59385692005945e-07, + "loss": 0.6524, + "step": 17937 + }, + { + "epoch": 0.9218830301161476, + "grad_norm": 1.0118316411972046, + "learning_rate": 1.5917729956284934e-07, + "loss": 0.676, + "step": 17938 + }, + { + "epoch": 0.9219344228594922, + "grad_norm": 0.8122013211250305, + "learning_rate": 1.589690412389733e-07, + "loss": 0.6534, + "step": 17939 + }, + { + "epoch": 0.9219858156028369, + "grad_norm": 1.1455602645874023, + "learning_rate": 1.5876091704008834e-07, + "loss": 0.6792, + "step": 17940 + }, + { + "epoch": 0.9220372083461815, + "grad_norm": 1.1031770706176758, + "learning_rate": 1.5855292697195935e-07, + "loss": 0.6814, + "step": 17941 + }, + { + "epoch": 0.9220886010895262, + "grad_norm": 1.0946985483169556, + "learning_rate": 1.5834507104034947e-07, + "loss": 0.6588, + "step": 17942 + }, + { + "epoch": 0.9221399938328708, + "grad_norm": 1.110788345336914, + "learning_rate": 1.581373492510163e-07, + "loss": 0.6937, + "step": 17943 + }, + { + "epoch": 0.9221913865762155, + "grad_norm": 1.1252504587173462, + "learning_rate": 1.5792976160971752e-07, + "loss": 0.7146, + "step": 17944 + }, + { + "epoch": 0.9222427793195601, + "grad_norm": 1.187690019607544, + "learning_rate": 1.577223081222018e-07, + "loss": 0.7248, + "step": 17945 + }, + { + "epoch": 0.9222941720629048, + "grad_norm": 1.0790342092514038, + "learning_rate": 1.5751498879421901e-07, + "loss": 0.6883, + "step": 17946 + }, + { + "epoch": 0.9223455648062494, + "grad_norm": 1.1112626791000366, + "learning_rate": 1.5730780363151176e-07, + "loss": 0.6861, + "step": 17947 + }, + { + "epoch": 0.9223969575495939, + "grad_norm": 1.1326247453689575, + "learning_rate": 1.571007526398205e-07, + "loss": 0.6877, + "step": 17948 + }, + { + "epoch": 0.9224483502929386, + "grad_norm": 1.1459630727767944, + "learning_rate": 1.5689383582488117e-07, + "loss": 0.7348, + "step": 17949 + }, + { + "epoch": 0.9224997430362832, + "grad_norm": 1.1149672269821167, + "learning_rate": 1.566870531924286e-07, + "loss": 0.7281, + "step": 17950 + }, + { + "epoch": 0.9225511357796279, + "grad_norm": 1.1064398288726807, + "learning_rate": 1.5648040474818994e-07, + "loss": 0.7009, + "step": 17951 + }, + { + "epoch": 0.9226025285229725, + "grad_norm": 1.0870492458343506, + "learning_rate": 1.562738904978911e-07, + "loss": 0.6913, + "step": 17952 + }, + { + "epoch": 0.9226539212663172, + "grad_norm": 1.108618140220642, + "learning_rate": 1.5606751044725422e-07, + "loss": 0.6749, + "step": 17953 + }, + { + "epoch": 0.9227053140096618, + "grad_norm": 0.8157503008842468, + "learning_rate": 1.558612646019969e-07, + "loss": 0.611, + "step": 17954 + }, + { + "epoch": 0.9227567067530065, + "grad_norm": 1.049039602279663, + "learning_rate": 1.556551529678335e-07, + "loss": 0.6776, + "step": 17955 + }, + { + "epoch": 0.9228080994963511, + "grad_norm": 1.0380388498306274, + "learning_rate": 1.554491755504739e-07, + "loss": 0.6338, + "step": 17956 + }, + { + "epoch": 0.9228594922396958, + "grad_norm": 1.132527232170105, + "learning_rate": 1.5524333235562628e-07, + "loss": 0.6685, + "step": 17957 + }, + { + "epoch": 0.9229108849830404, + "grad_norm": 1.0159029960632324, + "learning_rate": 1.5503762338899165e-07, + "loss": 0.6518, + "step": 17958 + }, + { + "epoch": 0.9229622777263851, + "grad_norm": 1.0918611288070679, + "learning_rate": 1.54832048656271e-07, + "loss": 0.7168, + "step": 17959 + }, + { + "epoch": 0.9230136704697297, + "grad_norm": 1.1056890487670898, + "learning_rate": 1.5462660816315978e-07, + "loss": 0.6672, + "step": 17960 + }, + { + "epoch": 0.9230650632130744, + "grad_norm": 1.0868240594863892, + "learning_rate": 1.54421301915349e-07, + "loss": 0.6946, + "step": 17961 + }, + { + "epoch": 0.923116455956419, + "grad_norm": 1.0969133377075195, + "learning_rate": 1.5421612991852743e-07, + "loss": 0.6758, + "step": 17962 + }, + { + "epoch": 0.9231678486997635, + "grad_norm": 1.0977437496185303, + "learning_rate": 1.5401109217837996e-07, + "loss": 0.7188, + "step": 17963 + }, + { + "epoch": 0.9232192414431082, + "grad_norm": 1.1663697957992554, + "learning_rate": 1.5380618870058705e-07, + "loss": 0.7497, + "step": 17964 + }, + { + "epoch": 0.9232706341864528, + "grad_norm": 1.1673861742019653, + "learning_rate": 1.5360141949082474e-07, + "loss": 0.6828, + "step": 17965 + }, + { + "epoch": 0.9233220269297975, + "grad_norm": 1.1102330684661865, + "learning_rate": 1.5339678455476848e-07, + "loss": 0.6548, + "step": 17966 + }, + { + "epoch": 0.9233734196731421, + "grad_norm": 1.0657141208648682, + "learning_rate": 1.5319228389808538e-07, + "loss": 0.6564, + "step": 17967 + }, + { + "epoch": 0.9234248124164868, + "grad_norm": 1.0036215782165527, + "learning_rate": 1.5298791752644316e-07, + "loss": 0.6803, + "step": 17968 + }, + { + "epoch": 0.9234762051598314, + "grad_norm": 1.0851986408233643, + "learning_rate": 1.5278368544550282e-07, + "loss": 0.7094, + "step": 17969 + }, + { + "epoch": 0.9235275979031761, + "grad_norm": 1.0764050483703613, + "learning_rate": 1.5257958766092318e-07, + "loss": 0.6776, + "step": 17970 + }, + { + "epoch": 0.9235789906465207, + "grad_norm": 0.7904413342475891, + "learning_rate": 1.5237562417835973e-07, + "loss": 0.6367, + "step": 17971 + }, + { + "epoch": 0.9236303833898654, + "grad_norm": 1.0781728029251099, + "learning_rate": 1.521717950034618e-07, + "loss": 0.7381, + "step": 17972 + }, + { + "epoch": 0.92368177613321, + "grad_norm": 1.1759557723999023, + "learning_rate": 1.5196810014187936e-07, + "loss": 0.7618, + "step": 17973 + }, + { + "epoch": 0.9237331688765547, + "grad_norm": 1.092697262763977, + "learning_rate": 1.517645395992523e-07, + "loss": 0.6808, + "step": 17974 + }, + { + "epoch": 0.9237845616198993, + "grad_norm": 1.0965087413787842, + "learning_rate": 1.5156111338122338e-07, + "loss": 0.737, + "step": 17975 + }, + { + "epoch": 0.923835954363244, + "grad_norm": 1.1602504253387451, + "learning_rate": 1.5135782149342749e-07, + "loss": 0.6723, + "step": 17976 + }, + { + "epoch": 0.9238873471065886, + "grad_norm": 1.0647333860397339, + "learning_rate": 1.5115466394149737e-07, + "loss": 0.6961, + "step": 17977 + }, + { + "epoch": 0.9239387398499332, + "grad_norm": 1.0834171772003174, + "learning_rate": 1.5095164073106128e-07, + "loss": 0.6798, + "step": 17978 + }, + { + "epoch": 0.9239901325932778, + "grad_norm": 1.12798011302948, + "learning_rate": 1.507487518677442e-07, + "loss": 0.727, + "step": 17979 + }, + { + "epoch": 0.9240415253366224, + "grad_norm": 1.1388719081878662, + "learning_rate": 1.5054599735716824e-07, + "loss": 0.6982, + "step": 17980 + }, + { + "epoch": 0.9240929180799671, + "grad_norm": 1.1117812395095825, + "learning_rate": 1.5034337720494897e-07, + "loss": 0.7107, + "step": 17981 + }, + { + "epoch": 0.9241443108233117, + "grad_norm": 1.1133978366851807, + "learning_rate": 1.5014089141670296e-07, + "loss": 0.7032, + "step": 17982 + }, + { + "epoch": 0.9241957035666564, + "grad_norm": 1.1378601789474487, + "learning_rate": 1.4993853999803743e-07, + "loss": 0.6859, + "step": 17983 + }, + { + "epoch": 0.924247096310001, + "grad_norm": 1.073419451713562, + "learning_rate": 1.497363229545601e-07, + "loss": 0.6765, + "step": 17984 + }, + { + "epoch": 0.9242984890533457, + "grad_norm": 1.0413177013397217, + "learning_rate": 1.4953424029187315e-07, + "loss": 0.733, + "step": 17985 + }, + { + "epoch": 0.9243498817966903, + "grad_norm": 1.1038250923156738, + "learning_rate": 1.4933229201557707e-07, + "loss": 0.6946, + "step": 17986 + }, + { + "epoch": 0.924401274540035, + "grad_norm": 1.0266940593719482, + "learning_rate": 1.4913047813126414e-07, + "loss": 0.6643, + "step": 17987 + }, + { + "epoch": 0.9244526672833796, + "grad_norm": 1.0726960897445679, + "learning_rate": 1.4892879864452813e-07, + "loss": 0.6568, + "step": 17988 + }, + { + "epoch": 0.9245040600267243, + "grad_norm": 1.1040953397750854, + "learning_rate": 1.4872725356095575e-07, + "loss": 0.7084, + "step": 17989 + }, + { + "epoch": 0.9245554527700689, + "grad_norm": 1.145744800567627, + "learning_rate": 1.4852584288613082e-07, + "loss": 0.7068, + "step": 17990 + }, + { + "epoch": 0.9246068455134135, + "grad_norm": 1.1365375518798828, + "learning_rate": 1.4832456662563443e-07, + "loss": 0.7196, + "step": 17991 + }, + { + "epoch": 0.9246582382567582, + "grad_norm": 1.2515432834625244, + "learning_rate": 1.4812342478504216e-07, + "loss": 0.7381, + "step": 17992 + }, + { + "epoch": 0.9247096310001028, + "grad_norm": 1.1745240688323975, + "learning_rate": 1.4792241736992785e-07, + "loss": 0.7026, + "step": 17993 + }, + { + "epoch": 0.9247610237434474, + "grad_norm": 1.0568336248397827, + "learning_rate": 1.4772154438585929e-07, + "loss": 0.6942, + "step": 17994 + }, + { + "epoch": 0.924812416486792, + "grad_norm": 1.0647705793380737, + "learning_rate": 1.4752080583840366e-07, + "loss": 0.6512, + "step": 17995 + }, + { + "epoch": 0.9248638092301367, + "grad_norm": 0.7068923115730286, + "learning_rate": 1.4732020173312045e-07, + "loss": 0.6776, + "step": 17996 + }, + { + "epoch": 0.9249152019734813, + "grad_norm": 1.0651558637619019, + "learning_rate": 1.4711973207556907e-07, + "loss": 0.7404, + "step": 17997 + }, + { + "epoch": 0.924966594716826, + "grad_norm": 1.0641396045684814, + "learning_rate": 1.469193968713034e-07, + "loss": 0.6985, + "step": 17998 + }, + { + "epoch": 0.9250179874601706, + "grad_norm": 1.077093243598938, + "learning_rate": 1.467191961258735e-07, + "loss": 0.7171, + "step": 17999 + }, + { + "epoch": 0.9250693802035153, + "grad_norm": 1.0949444770812988, + "learning_rate": 1.4651912984482596e-07, + "loss": 0.6862, + "step": 18000 + }, + { + "epoch": 0.9251207729468599, + "grad_norm": 1.1452710628509521, + "learning_rate": 1.4631919803370477e-07, + "loss": 0.6974, + "step": 18001 + }, + { + "epoch": 0.9251721656902046, + "grad_norm": 1.048422932624817, + "learning_rate": 1.4611940069804875e-07, + "loss": 0.6839, + "step": 18002 + }, + { + "epoch": 0.9252235584335492, + "grad_norm": 0.7867533564567566, + "learning_rate": 1.4591973784339242e-07, + "loss": 0.6723, + "step": 18003 + }, + { + "epoch": 0.9252749511768938, + "grad_norm": 1.0264827013015747, + "learning_rate": 1.4572020947526966e-07, + "loss": 0.6688, + "step": 18004 + }, + { + "epoch": 0.9253263439202385, + "grad_norm": 0.737784743309021, + "learning_rate": 1.4552081559920716e-07, + "loss": 0.6514, + "step": 18005 + }, + { + "epoch": 0.9253777366635831, + "grad_norm": 1.1032506227493286, + "learning_rate": 1.4532155622072941e-07, + "loss": 0.7214, + "step": 18006 + }, + { + "epoch": 0.9254291294069278, + "grad_norm": 1.0456485748291016, + "learning_rate": 1.4512243134535698e-07, + "loss": 0.6525, + "step": 18007 + }, + { + "epoch": 0.9254805221502724, + "grad_norm": 1.049696683883667, + "learning_rate": 1.4492344097860823e-07, + "loss": 0.6479, + "step": 18008 + }, + { + "epoch": 0.925531914893617, + "grad_norm": 1.0641655921936035, + "learning_rate": 1.4472458512599374e-07, + "loss": 0.7152, + "step": 18009 + }, + { + "epoch": 0.9255833076369616, + "grad_norm": 0.6849913001060486, + "learning_rate": 1.4452586379302524e-07, + "loss": 0.635, + "step": 18010 + }, + { + "epoch": 0.9256347003803063, + "grad_norm": 1.038744330406189, + "learning_rate": 1.4432727698520775e-07, + "loss": 0.702, + "step": 18011 + }, + { + "epoch": 0.9256860931236509, + "grad_norm": 1.1021252870559692, + "learning_rate": 1.4412882470804302e-07, + "loss": 0.6578, + "step": 18012 + }, + { + "epoch": 0.9257374858669956, + "grad_norm": 1.0691472291946411, + "learning_rate": 1.4393050696702938e-07, + "loss": 0.6845, + "step": 18013 + }, + { + "epoch": 0.9257888786103402, + "grad_norm": 0.7026569247245789, + "learning_rate": 1.437323237676619e-07, + "loss": 0.6146, + "step": 18014 + }, + { + "epoch": 0.9258402713536849, + "grad_norm": 1.1386979818344116, + "learning_rate": 1.4353427511543117e-07, + "loss": 0.7405, + "step": 18015 + }, + { + "epoch": 0.9258916640970295, + "grad_norm": 0.780444324016571, + "learning_rate": 1.4333636101582395e-07, + "loss": 0.6601, + "step": 18016 + }, + { + "epoch": 0.9259430568403741, + "grad_norm": 0.7260696291923523, + "learning_rate": 1.4313858147432414e-07, + "loss": 0.6353, + "step": 18017 + }, + { + "epoch": 0.9259944495837188, + "grad_norm": 1.1066051721572876, + "learning_rate": 1.4294093649641071e-07, + "loss": 0.6975, + "step": 18018 + }, + { + "epoch": 0.9260458423270634, + "grad_norm": 0.8170315623283386, + "learning_rate": 1.4274342608756042e-07, + "loss": 0.649, + "step": 18019 + }, + { + "epoch": 0.9260972350704081, + "grad_norm": 0.6989414691925049, + "learning_rate": 1.4254605025324497e-07, + "loss": 0.6103, + "step": 18020 + }, + { + "epoch": 0.9261486278137527, + "grad_norm": 0.7070466876029968, + "learning_rate": 1.4234880899893279e-07, + "loss": 0.6416, + "step": 18021 + }, + { + "epoch": 0.9262000205570974, + "grad_norm": 1.0816798210144043, + "learning_rate": 1.4215170233008945e-07, + "loss": 0.6863, + "step": 18022 + }, + { + "epoch": 0.926251413300442, + "grad_norm": 1.0600920915603638, + "learning_rate": 1.41954730252174e-07, + "loss": 0.7263, + "step": 18023 + }, + { + "epoch": 0.9263028060437866, + "grad_norm": 1.089324951171875, + "learning_rate": 1.4175789277064645e-07, + "loss": 0.6384, + "step": 18024 + }, + { + "epoch": 0.9263541987871312, + "grad_norm": 1.1319739818572998, + "learning_rate": 1.4156118989095801e-07, + "loss": 0.705, + "step": 18025 + }, + { + "epoch": 0.9264055915304759, + "grad_norm": 0.6767879724502563, + "learning_rate": 1.4136462161855992e-07, + "loss": 0.6315, + "step": 18026 + }, + { + "epoch": 0.9264569842738205, + "grad_norm": 1.1289499998092651, + "learning_rate": 1.4116818795889776e-07, + "loss": 0.7108, + "step": 18027 + }, + { + "epoch": 0.9265083770171652, + "grad_norm": 1.0119119882583618, + "learning_rate": 1.409718889174133e-07, + "loss": 0.6005, + "step": 18028 + }, + { + "epoch": 0.9265597697605098, + "grad_norm": 1.117863655090332, + "learning_rate": 1.4077572449954612e-07, + "loss": 0.7288, + "step": 18029 + }, + { + "epoch": 0.9266111625038544, + "grad_norm": 1.124996542930603, + "learning_rate": 1.4057969471073074e-07, + "loss": 0.6745, + "step": 18030 + }, + { + "epoch": 0.9266625552471991, + "grad_norm": 1.1155446767807007, + "learning_rate": 1.4038379955639835e-07, + "loss": 0.6959, + "step": 18031 + }, + { + "epoch": 0.9267139479905437, + "grad_norm": 1.0934009552001953, + "learning_rate": 1.401880390419763e-07, + "loss": 0.7568, + "step": 18032 + }, + { + "epoch": 0.9267653407338884, + "grad_norm": 1.1934900283813477, + "learning_rate": 1.3999241317288857e-07, + "loss": 0.6443, + "step": 18033 + }, + { + "epoch": 0.926816733477233, + "grad_norm": 1.1255019903182983, + "learning_rate": 1.397969219545553e-07, + "loss": 0.6712, + "step": 18034 + }, + { + "epoch": 0.9268681262205777, + "grad_norm": 1.0650707483291626, + "learning_rate": 1.396015653923921e-07, + "loss": 0.6907, + "step": 18035 + }, + { + "epoch": 0.9269195189639223, + "grad_norm": 1.1233806610107422, + "learning_rate": 1.3940634349181136e-07, + "loss": 0.6878, + "step": 18036 + }, + { + "epoch": 0.926970911707267, + "grad_norm": 1.1066895723342896, + "learning_rate": 1.3921125625822373e-07, + "loss": 0.7318, + "step": 18037 + }, + { + "epoch": 0.9270223044506116, + "grad_norm": 1.0932739973068237, + "learning_rate": 1.3901630369703156e-07, + "loss": 0.6856, + "step": 18038 + }, + { + "epoch": 0.9270736971939562, + "grad_norm": 1.108330488204956, + "learning_rate": 1.388214858136383e-07, + "loss": 0.6651, + "step": 18039 + }, + { + "epoch": 0.9271250899373008, + "grad_norm": 1.0956828594207764, + "learning_rate": 1.3862680261344075e-07, + "loss": 0.6894, + "step": 18040 + }, + { + "epoch": 0.9271764826806455, + "grad_norm": 1.1883400678634644, + "learning_rate": 1.384322541018318e-07, + "loss": 0.709, + "step": 18041 + }, + { + "epoch": 0.9272278754239901, + "grad_norm": 1.0750535726547241, + "learning_rate": 1.3823784028420385e-07, + "loss": 0.6749, + "step": 18042 + }, + { + "epoch": 0.9272792681673347, + "grad_norm": 1.0885261297225952, + "learning_rate": 1.3804356116594141e-07, + "loss": 0.6947, + "step": 18043 + }, + { + "epoch": 0.9273306609106794, + "grad_norm": 0.6639559864997864, + "learning_rate": 1.3784941675242803e-07, + "loss": 0.6407, + "step": 18044 + }, + { + "epoch": 0.927382053654024, + "grad_norm": 1.0531586408615112, + "learning_rate": 1.3765540704904211e-07, + "loss": 0.6762, + "step": 18045 + }, + { + "epoch": 0.9274334463973687, + "grad_norm": 1.0709749460220337, + "learning_rate": 1.3746153206115997e-07, + "loss": 0.691, + "step": 18046 + }, + { + "epoch": 0.9274848391407133, + "grad_norm": 1.1919680833816528, + "learning_rate": 1.3726779179415116e-07, + "loss": 0.7133, + "step": 18047 + }, + { + "epoch": 0.927536231884058, + "grad_norm": 1.0678884983062744, + "learning_rate": 1.3707418625338476e-07, + "loss": 0.7406, + "step": 18048 + }, + { + "epoch": 0.9275876246274026, + "grad_norm": 1.1151939630508423, + "learning_rate": 1.3688071544422475e-07, + "loss": 0.7516, + "step": 18049 + }, + { + "epoch": 0.9276390173707473, + "grad_norm": 0.9404031038284302, + "learning_rate": 1.3668737937203136e-07, + "loss": 0.6303, + "step": 18050 + }, + { + "epoch": 0.9276904101140919, + "grad_norm": 1.0909656286239624, + "learning_rate": 1.3649417804216082e-07, + "loss": 0.7374, + "step": 18051 + }, + { + "epoch": 0.9277418028574366, + "grad_norm": 1.043218970298767, + "learning_rate": 1.3630111145996605e-07, + "loss": 0.7064, + "step": 18052 + }, + { + "epoch": 0.9277931956007812, + "grad_norm": 0.7091201543807983, + "learning_rate": 1.361081796307967e-07, + "loss": 0.6206, + "step": 18053 + }, + { + "epoch": 0.9278445883441259, + "grad_norm": 1.042454719543457, + "learning_rate": 1.3591538255999626e-07, + "loss": 0.6757, + "step": 18054 + }, + { + "epoch": 0.9278959810874704, + "grad_norm": 1.013356328010559, + "learning_rate": 1.3572272025290879e-07, + "loss": 0.6694, + "step": 18055 + }, + { + "epoch": 0.927947373830815, + "grad_norm": 1.0977790355682373, + "learning_rate": 1.3553019271487112e-07, + "loss": 0.6886, + "step": 18056 + }, + { + "epoch": 0.9279987665741597, + "grad_norm": 1.0828428268432617, + "learning_rate": 1.3533779995121731e-07, + "loss": 0.6951, + "step": 18057 + }, + { + "epoch": 0.9280501593175043, + "grad_norm": 0.7001410722732544, + "learning_rate": 1.3514554196727702e-07, + "loss": 0.6289, + "step": 18058 + }, + { + "epoch": 0.928101552060849, + "grad_norm": 1.0742887258529663, + "learning_rate": 1.3495341876837875e-07, + "loss": 0.6576, + "step": 18059 + }, + { + "epoch": 0.9281529448041936, + "grad_norm": 0.7802107930183411, + "learning_rate": 1.3476143035984323e-07, + "loss": 0.6604, + "step": 18060 + }, + { + "epoch": 0.9282043375475383, + "grad_norm": 1.2168136835098267, + "learning_rate": 1.3456957674699178e-07, + "loss": 0.6847, + "step": 18061 + }, + { + "epoch": 0.9282557302908829, + "grad_norm": 1.1582164764404297, + "learning_rate": 1.3437785793513792e-07, + "loss": 0.6891, + "step": 18062 + }, + { + "epoch": 0.9283071230342276, + "grad_norm": 1.107991337776184, + "learning_rate": 1.3418627392959515e-07, + "loss": 0.7038, + "step": 18063 + }, + { + "epoch": 0.9283585157775722, + "grad_norm": 0.6763691902160645, + "learning_rate": 1.3399482473567093e-07, + "loss": 0.6363, + "step": 18064 + }, + { + "epoch": 0.9284099085209169, + "grad_norm": 1.1387147903442383, + "learning_rate": 1.3380351035866823e-07, + "loss": 0.6673, + "step": 18065 + }, + { + "epoch": 0.9284613012642615, + "grad_norm": 1.0933600664138794, + "learning_rate": 1.3361233080389e-07, + "loss": 0.741, + "step": 18066 + }, + { + "epoch": 0.9285126940076062, + "grad_norm": 1.0726501941680908, + "learning_rate": 1.3342128607663096e-07, + "loss": 0.6755, + "step": 18067 + }, + { + "epoch": 0.9285640867509508, + "grad_norm": 1.0702552795410156, + "learning_rate": 1.3323037618218516e-07, + "loss": 0.6277, + "step": 18068 + }, + { + "epoch": 0.9286154794942955, + "grad_norm": 0.6824581623077393, + "learning_rate": 1.3303960112584113e-07, + "loss": 0.6073, + "step": 18069 + }, + { + "epoch": 0.92866687223764, + "grad_norm": 1.0876410007476807, + "learning_rate": 1.328489609128858e-07, + "loss": 0.7121, + "step": 18070 + }, + { + "epoch": 0.9287182649809846, + "grad_norm": 1.094212293624878, + "learning_rate": 1.3265845554859934e-07, + "loss": 0.7332, + "step": 18071 + }, + { + "epoch": 0.9287696577243293, + "grad_norm": 1.1048212051391602, + "learning_rate": 1.324680850382609e-07, + "loss": 0.6964, + "step": 18072 + }, + { + "epoch": 0.9288210504676739, + "grad_norm": 1.1248199939727783, + "learning_rate": 1.3227784938714517e-07, + "loss": 0.7202, + "step": 18073 + }, + { + "epoch": 0.9288724432110186, + "grad_norm": 1.1405552625656128, + "learning_rate": 1.3208774860052177e-07, + "loss": 0.6945, + "step": 18074 + }, + { + "epoch": 0.9289238359543632, + "grad_norm": 1.0174750089645386, + "learning_rate": 1.3189778268365928e-07, + "loss": 0.7266, + "step": 18075 + }, + { + "epoch": 0.9289752286977079, + "grad_norm": 0.7862551212310791, + "learning_rate": 1.3170795164181847e-07, + "loss": 0.6761, + "step": 18076 + }, + { + "epoch": 0.9290266214410525, + "grad_norm": 1.0244574546813965, + "learning_rate": 1.3151825548026077e-07, + "loss": 0.6437, + "step": 18077 + }, + { + "epoch": 0.9290780141843972, + "grad_norm": 1.0804916620254517, + "learning_rate": 1.3132869420424077e-07, + "loss": 0.7387, + "step": 18078 + }, + { + "epoch": 0.9291294069277418, + "grad_norm": 1.245983600616455, + "learning_rate": 1.3113926781901155e-07, + "loss": 0.7472, + "step": 18079 + }, + { + "epoch": 0.9291807996710865, + "grad_norm": 1.058756709098816, + "learning_rate": 1.3094997632981998e-07, + "loss": 0.7022, + "step": 18080 + }, + { + "epoch": 0.9292321924144311, + "grad_norm": 1.1186561584472656, + "learning_rate": 1.3076081974191136e-07, + "loss": 0.6915, + "step": 18081 + }, + { + "epoch": 0.9292835851577758, + "grad_norm": 1.1571509838104248, + "learning_rate": 1.3057179806052645e-07, + "loss": 0.7133, + "step": 18082 + }, + { + "epoch": 0.9293349779011204, + "grad_norm": 1.2750399112701416, + "learning_rate": 1.3038291129090108e-07, + "loss": 0.6953, + "step": 18083 + }, + { + "epoch": 0.929386370644465, + "grad_norm": 0.7940494418144226, + "learning_rate": 1.3019415943827052e-07, + "loss": 0.6546, + "step": 18084 + }, + { + "epoch": 0.9294377633878096, + "grad_norm": 1.0582627058029175, + "learning_rate": 1.3000554250786334e-07, + "loss": 0.6783, + "step": 18085 + }, + { + "epoch": 0.9294891561311542, + "grad_norm": 0.7538990378379822, + "learning_rate": 1.2981706050490484e-07, + "loss": 0.6563, + "step": 18086 + }, + { + "epoch": 0.9295405488744989, + "grad_norm": 1.0719081163406372, + "learning_rate": 1.2962871343461747e-07, + "loss": 0.6901, + "step": 18087 + }, + { + "epoch": 0.9295919416178435, + "grad_norm": 1.0997651815414429, + "learning_rate": 1.2944050130222042e-07, + "loss": 0.6567, + "step": 18088 + }, + { + "epoch": 0.9296433343611882, + "grad_norm": 0.785801351070404, + "learning_rate": 1.292524241129267e-07, + "loss": 0.6251, + "step": 18089 + }, + { + "epoch": 0.9296947271045328, + "grad_norm": 1.0831655263900757, + "learning_rate": 1.290644818719483e-07, + "loss": 0.725, + "step": 18090 + }, + { + "epoch": 0.9297461198478775, + "grad_norm": 1.1604515314102173, + "learning_rate": 1.288766745844916e-07, + "loss": 0.7021, + "step": 18091 + }, + { + "epoch": 0.9297975125912221, + "grad_norm": 1.066574215888977, + "learning_rate": 1.286890022557602e-07, + "loss": 0.6769, + "step": 18092 + }, + { + "epoch": 0.9298489053345668, + "grad_norm": 1.069810152053833, + "learning_rate": 1.285014648909544e-07, + "loss": 0.699, + "step": 18093 + }, + { + "epoch": 0.9299002980779114, + "grad_norm": 1.0855931043624878, + "learning_rate": 1.2831406249526946e-07, + "loss": 0.6541, + "step": 18094 + }, + { + "epoch": 0.9299516908212561, + "grad_norm": 1.1196109056472778, + "learning_rate": 1.2812679507389737e-07, + "loss": 0.7654, + "step": 18095 + }, + { + "epoch": 0.9300030835646007, + "grad_norm": 1.1178741455078125, + "learning_rate": 1.2793966263202616e-07, + "loss": 0.6859, + "step": 18096 + }, + { + "epoch": 0.9300544763079454, + "grad_norm": 1.1831738948822021, + "learning_rate": 1.2775266517484176e-07, + "loss": 0.6567, + "step": 18097 + }, + { + "epoch": 0.93010586905129, + "grad_norm": 0.6771738529205322, + "learning_rate": 1.2756580270752439e-07, + "loss": 0.6295, + "step": 18098 + }, + { + "epoch": 0.9301572617946346, + "grad_norm": 1.0817644596099854, + "learning_rate": 1.2737907523525162e-07, + "loss": 0.6954, + "step": 18099 + }, + { + "epoch": 0.9302086545379792, + "grad_norm": 1.0571473836898804, + "learning_rate": 1.2719248276319595e-07, + "loss": 0.716, + "step": 18100 + }, + { + "epoch": 0.9302600472813238, + "grad_norm": 1.1686515808105469, + "learning_rate": 1.270060252965283e-07, + "loss": 0.6883, + "step": 18101 + }, + { + "epoch": 0.9303114400246685, + "grad_norm": 1.051928997039795, + "learning_rate": 1.2681970284041333e-07, + "loss": 0.6587, + "step": 18102 + }, + { + "epoch": 0.9303628327680131, + "grad_norm": 1.0963581800460815, + "learning_rate": 1.2663351540001423e-07, + "loss": 0.6692, + "step": 18103 + }, + { + "epoch": 0.9304142255113578, + "grad_norm": 1.1356115341186523, + "learning_rate": 1.264474629804896e-07, + "loss": 0.6768, + "step": 18104 + }, + { + "epoch": 0.9304656182547024, + "grad_norm": 1.045095682144165, + "learning_rate": 1.262615455869931e-07, + "loss": 0.6881, + "step": 18105 + }, + { + "epoch": 0.9305170109980471, + "grad_norm": 1.0176856517791748, + "learning_rate": 1.2607576322467674e-07, + "loss": 0.66, + "step": 18106 + }, + { + "epoch": 0.9305684037413917, + "grad_norm": 1.0867902040481567, + "learning_rate": 1.258901158986875e-07, + "loss": 0.6903, + "step": 18107 + }, + { + "epoch": 0.9306197964847364, + "grad_norm": 1.1248114109039307, + "learning_rate": 1.2570460361416958e-07, + "loss": 0.7506, + "step": 18108 + }, + { + "epoch": 0.930671189228081, + "grad_norm": 1.0654813051223755, + "learning_rate": 1.2551922637626058e-07, + "loss": 0.7032, + "step": 18109 + }, + { + "epoch": 0.9307225819714257, + "grad_norm": 1.0783593654632568, + "learning_rate": 1.2533398419009913e-07, + "loss": 0.6621, + "step": 18110 + }, + { + "epoch": 0.9307739747147703, + "grad_norm": 1.0867053270339966, + "learning_rate": 1.2514887706081613e-07, + "loss": 0.6601, + "step": 18111 + }, + { + "epoch": 0.930825367458115, + "grad_norm": 1.0424574613571167, + "learning_rate": 1.2496390499354082e-07, + "loss": 0.6828, + "step": 18112 + }, + { + "epoch": 0.9308767602014596, + "grad_norm": 1.0914846658706665, + "learning_rate": 1.247790679933969e-07, + "loss": 0.6806, + "step": 18113 + }, + { + "epoch": 0.9309281529448042, + "grad_norm": 1.053343653678894, + "learning_rate": 1.2459436606550634e-07, + "loss": 0.73, + "step": 18114 + }, + { + "epoch": 0.9309795456881488, + "grad_norm": 0.7563942074775696, + "learning_rate": 1.2440979921498675e-07, + "loss": 0.6721, + "step": 18115 + }, + { + "epoch": 0.9310309384314934, + "grad_norm": 1.080246090888977, + "learning_rate": 1.242253674469507e-07, + "loss": 0.6865, + "step": 18116 + }, + { + "epoch": 0.9310823311748381, + "grad_norm": 1.0843212604522705, + "learning_rate": 1.240410707665096e-07, + "loss": 0.6468, + "step": 18117 + }, + { + "epoch": 0.9311337239181827, + "grad_norm": 1.0800042152404785, + "learning_rate": 1.2385690917876724e-07, + "loss": 0.6428, + "step": 18118 + }, + { + "epoch": 0.9311851166615274, + "grad_norm": 1.0935783386230469, + "learning_rate": 1.2367288268882783e-07, + "loss": 0.7159, + "step": 18119 + }, + { + "epoch": 0.931236509404872, + "grad_norm": 1.0715067386627197, + "learning_rate": 1.234889913017895e-07, + "loss": 0.6638, + "step": 18120 + }, + { + "epoch": 0.9312879021482167, + "grad_norm": 0.7493892908096313, + "learning_rate": 1.2330523502274705e-07, + "loss": 0.6586, + "step": 18121 + }, + { + "epoch": 0.9313392948915613, + "grad_norm": 0.7267151474952698, + "learning_rate": 1.2312161385679144e-07, + "loss": 0.6652, + "step": 18122 + }, + { + "epoch": 0.931390687634906, + "grad_norm": 1.0882261991500854, + "learning_rate": 1.2293812780901027e-07, + "loss": 0.705, + "step": 18123 + }, + { + "epoch": 0.9314420803782506, + "grad_norm": 0.7736518979072571, + "learning_rate": 1.2275477688448723e-07, + "loss": 0.6191, + "step": 18124 + }, + { + "epoch": 0.9314934731215953, + "grad_norm": 1.0837070941925049, + "learning_rate": 1.2257156108830159e-07, + "loss": 0.7129, + "step": 18125 + }, + { + "epoch": 0.9315448658649399, + "grad_norm": 1.089508056640625, + "learning_rate": 1.2238848042553042e-07, + "loss": 0.6928, + "step": 18126 + }, + { + "epoch": 0.9315962586082845, + "grad_norm": 0.7178660035133362, + "learning_rate": 1.2220553490124632e-07, + "loss": 0.6273, + "step": 18127 + }, + { + "epoch": 0.9316476513516292, + "grad_norm": 1.1137096881866455, + "learning_rate": 1.220227245205169e-07, + "loss": 0.6703, + "step": 18128 + }, + { + "epoch": 0.9316990440949738, + "grad_norm": 1.1520177125930786, + "learning_rate": 1.21840049288407e-07, + "loss": 0.6951, + "step": 18129 + }, + { + "epoch": 0.9317504368383184, + "grad_norm": 1.0321643352508545, + "learning_rate": 1.2165750920997977e-07, + "loss": 0.6955, + "step": 18130 + }, + { + "epoch": 0.931801829581663, + "grad_norm": 0.8172734379768372, + "learning_rate": 1.2147510429028952e-07, + "loss": 0.693, + "step": 18131 + }, + { + "epoch": 0.9318532223250077, + "grad_norm": 1.1170563697814941, + "learning_rate": 1.2129283453439278e-07, + "loss": 0.6769, + "step": 18132 + }, + { + "epoch": 0.9319046150683523, + "grad_norm": 1.016937494277954, + "learning_rate": 1.2111069994733827e-07, + "loss": 0.6628, + "step": 18133 + }, + { + "epoch": 0.931956007811697, + "grad_norm": 1.0824862718582153, + "learning_rate": 1.2092870053417138e-07, + "loss": 0.6717, + "step": 18134 + }, + { + "epoch": 0.9320074005550416, + "grad_norm": 1.1746697425842285, + "learning_rate": 1.2074683629993643e-07, + "loss": 0.6985, + "step": 18135 + }, + { + "epoch": 0.9320587932983863, + "grad_norm": 1.1815232038497925, + "learning_rate": 1.2056510724967107e-07, + "loss": 0.7213, + "step": 18136 + }, + { + "epoch": 0.9321101860417309, + "grad_norm": 5.467514514923096, + "learning_rate": 1.2038351338841014e-07, + "loss": 0.7101, + "step": 18137 + }, + { + "epoch": 0.9321615787850756, + "grad_norm": 0.7454895973205566, + "learning_rate": 1.2020205472118462e-07, + "loss": 0.6788, + "step": 18138 + }, + { + "epoch": 0.9322129715284202, + "grad_norm": 1.0649522542953491, + "learning_rate": 1.2002073125302382e-07, + "loss": 0.7258, + "step": 18139 + }, + { + "epoch": 0.9322643642717648, + "grad_norm": 0.7957323789596558, + "learning_rate": 1.1983954298894874e-07, + "loss": 0.6508, + "step": 18140 + }, + { + "epoch": 0.9323157570151095, + "grad_norm": 1.0418696403503418, + "learning_rate": 1.1965848993398088e-07, + "loss": 0.703, + "step": 18141 + }, + { + "epoch": 0.9323671497584541, + "grad_norm": 0.7692862749099731, + "learning_rate": 1.1947757209313626e-07, + "loss": 0.6486, + "step": 18142 + }, + { + "epoch": 0.9324185425017988, + "grad_norm": 1.0710469484329224, + "learning_rate": 1.1929678947142753e-07, + "loss": 0.6533, + "step": 18143 + }, + { + "epoch": 0.9324699352451434, + "grad_norm": 1.2012804746627808, + "learning_rate": 1.1911614207386346e-07, + "loss": 0.6444, + "step": 18144 + }, + { + "epoch": 0.9325213279884881, + "grad_norm": 0.9740645885467529, + "learning_rate": 1.1893562990544894e-07, + "loss": 0.6646, + "step": 18145 + }, + { + "epoch": 0.9325727207318326, + "grad_norm": 0.7126825451850891, + "learning_rate": 1.1875525297118496e-07, + "loss": 0.6395, + "step": 18146 + }, + { + "epoch": 0.9326241134751773, + "grad_norm": 1.0794944763183594, + "learning_rate": 1.1857501127606919e-07, + "loss": 0.709, + "step": 18147 + }, + { + "epoch": 0.9326755062185219, + "grad_norm": 0.7529399394989014, + "learning_rate": 1.1839490482509541e-07, + "loss": 0.6599, + "step": 18148 + }, + { + "epoch": 0.9327268989618666, + "grad_norm": 1.0956978797912598, + "learning_rate": 1.182149336232541e-07, + "loss": 0.7296, + "step": 18149 + }, + { + "epoch": 0.9327782917052112, + "grad_norm": 0.8156173825263977, + "learning_rate": 1.1803509767553067e-07, + "loss": 0.6159, + "step": 18150 + }, + { + "epoch": 0.9328296844485559, + "grad_norm": 0.7395505905151367, + "learning_rate": 1.1785539698690728e-07, + "loss": 0.6283, + "step": 18151 + }, + { + "epoch": 0.9328810771919005, + "grad_norm": 1.1007943153381348, + "learning_rate": 1.176758315623644e-07, + "loss": 0.6486, + "step": 18152 + }, + { + "epoch": 0.9329324699352451, + "grad_norm": 1.126417875289917, + "learning_rate": 1.1749640140687579e-07, + "loss": 0.7318, + "step": 18153 + }, + { + "epoch": 0.9329838626785898, + "grad_norm": 1.0709338188171387, + "learning_rate": 1.1731710652541306e-07, + "loss": 0.6678, + "step": 18154 + }, + { + "epoch": 0.9330352554219344, + "grad_norm": 0.7269437313079834, + "learning_rate": 1.171379469229439e-07, + "loss": 0.6835, + "step": 18155 + }, + { + "epoch": 0.9330866481652791, + "grad_norm": 1.0362740755081177, + "learning_rate": 1.1695892260443098e-07, + "loss": 0.6701, + "step": 18156 + }, + { + "epoch": 0.9331380409086237, + "grad_norm": 1.2100704908370972, + "learning_rate": 1.1678003357483591e-07, + "loss": 0.6363, + "step": 18157 + }, + { + "epoch": 0.9331894336519684, + "grad_norm": 1.074168086051941, + "learning_rate": 1.1660127983911362e-07, + "loss": 0.6848, + "step": 18158 + }, + { + "epoch": 0.933240826395313, + "grad_norm": 0.7845690846443176, + "learning_rate": 1.1642266140221848e-07, + "loss": 0.6489, + "step": 18159 + }, + { + "epoch": 0.9332922191386577, + "grad_norm": 1.0633260011672974, + "learning_rate": 1.162441782690965e-07, + "loss": 0.6798, + "step": 18160 + }, + { + "epoch": 0.9333436118820022, + "grad_norm": 1.2773399353027344, + "learning_rate": 1.1606583044469544e-07, + "loss": 0.7016, + "step": 18161 + }, + { + "epoch": 0.9333950046253469, + "grad_norm": 0.8328320980072021, + "learning_rate": 1.158876179339552e-07, + "loss": 0.6482, + "step": 18162 + }, + { + "epoch": 0.9334463973686915, + "grad_norm": 1.1610026359558105, + "learning_rate": 1.157095407418135e-07, + "loss": 0.6985, + "step": 18163 + }, + { + "epoch": 0.9334977901120362, + "grad_norm": 1.1135119199752808, + "learning_rate": 1.1553159887320365e-07, + "loss": 0.736, + "step": 18164 + }, + { + "epoch": 0.9335491828553808, + "grad_norm": 1.1179707050323486, + "learning_rate": 1.1535379233305665e-07, + "loss": 0.7314, + "step": 18165 + }, + { + "epoch": 0.9336005755987254, + "grad_norm": 1.0149478912353516, + "learning_rate": 1.1517612112629805e-07, + "loss": 0.7079, + "step": 18166 + }, + { + "epoch": 0.9336519683420701, + "grad_norm": 1.0771840810775757, + "learning_rate": 1.1499858525785057e-07, + "loss": 0.6469, + "step": 18167 + }, + { + "epoch": 0.9337033610854147, + "grad_norm": 1.1967853307724, + "learning_rate": 1.1482118473263415e-07, + "loss": 0.7518, + "step": 18168 + }, + { + "epoch": 0.9337547538287594, + "grad_norm": 1.1043626070022583, + "learning_rate": 1.1464391955556154e-07, + "loss": 0.691, + "step": 18169 + }, + { + "epoch": 0.933806146572104, + "grad_norm": 1.1084086894989014, + "learning_rate": 1.1446678973154546e-07, + "loss": 0.6766, + "step": 18170 + }, + { + "epoch": 0.9338575393154487, + "grad_norm": 1.1623708009719849, + "learning_rate": 1.1428979526549366e-07, + "loss": 0.6974, + "step": 18171 + }, + { + "epoch": 0.9339089320587933, + "grad_norm": 1.1333773136138916, + "learning_rate": 1.1411293616230945e-07, + "loss": 0.7476, + "step": 18172 + }, + { + "epoch": 0.933960324802138, + "grad_norm": 1.109848141670227, + "learning_rate": 1.1393621242689224e-07, + "loss": 0.6969, + "step": 18173 + }, + { + "epoch": 0.9340117175454826, + "grad_norm": 1.0783395767211914, + "learning_rate": 1.1375962406413976e-07, + "loss": 0.6664, + "step": 18174 + }, + { + "epoch": 0.9340631102888273, + "grad_norm": 1.1152595281600952, + "learning_rate": 1.1358317107894312e-07, + "loss": 0.7164, + "step": 18175 + }, + { + "epoch": 0.9341145030321718, + "grad_norm": 0.6855306625366211, + "learning_rate": 1.1340685347619229e-07, + "loss": 0.6204, + "step": 18176 + }, + { + "epoch": 0.9341658957755165, + "grad_norm": 1.045112133026123, + "learning_rate": 1.1323067126077169e-07, + "loss": 0.634, + "step": 18177 + }, + { + "epoch": 0.9342172885188611, + "grad_norm": 1.041113257408142, + "learning_rate": 1.1305462443756244e-07, + "loss": 0.681, + "step": 18178 + }, + { + "epoch": 0.9342686812622057, + "grad_norm": 0.7230408191680908, + "learning_rate": 1.1287871301144283e-07, + "loss": 0.6317, + "step": 18179 + }, + { + "epoch": 0.9343200740055504, + "grad_norm": 0.7001280188560486, + "learning_rate": 1.1270293698728507e-07, + "loss": 0.6498, + "step": 18180 + }, + { + "epoch": 0.934371466748895, + "grad_norm": 1.0436370372772217, + "learning_rate": 1.125272963699614e-07, + "loss": 0.6558, + "step": 18181 + }, + { + "epoch": 0.9344228594922397, + "grad_norm": 1.116808295249939, + "learning_rate": 1.1235179116433626e-07, + "loss": 0.7204, + "step": 18182 + }, + { + "epoch": 0.9344742522355843, + "grad_norm": 1.1701769828796387, + "learning_rate": 1.1217642137527296e-07, + "loss": 0.6766, + "step": 18183 + }, + { + "epoch": 0.934525644978929, + "grad_norm": 1.1348007917404175, + "learning_rate": 1.120011870076304e-07, + "loss": 0.7672, + "step": 18184 + }, + { + "epoch": 0.9345770377222736, + "grad_norm": 0.7406056523323059, + "learning_rate": 1.1182608806626306e-07, + "loss": 0.6386, + "step": 18185 + }, + { + "epoch": 0.9346284304656183, + "grad_norm": 1.0408074855804443, + "learning_rate": 1.1165112455602312e-07, + "loss": 0.6991, + "step": 18186 + }, + { + "epoch": 0.9346798232089629, + "grad_norm": 1.0372551679611206, + "learning_rate": 1.1147629648175617e-07, + "loss": 0.7773, + "step": 18187 + }, + { + "epoch": 0.9347312159523076, + "grad_norm": 1.0081762075424194, + "learning_rate": 1.113016038483089e-07, + "loss": 0.6095, + "step": 18188 + }, + { + "epoch": 0.9347826086956522, + "grad_norm": 1.0785664319992065, + "learning_rate": 1.1112704666051855e-07, + "loss": 0.6638, + "step": 18189 + }, + { + "epoch": 0.9348340014389969, + "grad_norm": 1.2045140266418457, + "learning_rate": 1.1095262492322346e-07, + "loss": 0.7384, + "step": 18190 + }, + { + "epoch": 0.9348853941823414, + "grad_norm": 1.100014090538025, + "learning_rate": 1.1077833864125476e-07, + "loss": 0.6898, + "step": 18191 + }, + { + "epoch": 0.934936786925686, + "grad_norm": 1.0657894611358643, + "learning_rate": 1.1060418781944193e-07, + "loss": 0.7225, + "step": 18192 + }, + { + "epoch": 0.9349881796690307, + "grad_norm": 1.1204603910446167, + "learning_rate": 1.104301724626089e-07, + "loss": 0.6584, + "step": 18193 + }, + { + "epoch": 0.9350395724123753, + "grad_norm": 1.1003681421279907, + "learning_rate": 1.1025629257557847e-07, + "loss": 0.7331, + "step": 18194 + }, + { + "epoch": 0.93509096515572, + "grad_norm": 1.094254732131958, + "learning_rate": 1.1008254816316733e-07, + "loss": 0.6812, + "step": 18195 + }, + { + "epoch": 0.9351423578990646, + "grad_norm": 1.1393364667892456, + "learning_rate": 1.0990893923018942e-07, + "loss": 0.7013, + "step": 18196 + }, + { + "epoch": 0.9351937506424093, + "grad_norm": 1.0725802183151245, + "learning_rate": 1.0973546578145422e-07, + "loss": 0.6973, + "step": 18197 + }, + { + "epoch": 0.9352451433857539, + "grad_norm": 1.0916386842727661, + "learning_rate": 1.0956212782176845e-07, + "loss": 0.693, + "step": 18198 + }, + { + "epoch": 0.9352965361290986, + "grad_norm": 0.7486464977264404, + "learning_rate": 1.0938892535593437e-07, + "loss": 0.6095, + "step": 18199 + }, + { + "epoch": 0.9353479288724432, + "grad_norm": 1.12406325340271, + "learning_rate": 1.0921585838875092e-07, + "loss": 0.7451, + "step": 18200 + }, + { + "epoch": 0.9353993216157879, + "grad_norm": 1.081967830657959, + "learning_rate": 1.0904292692501317e-07, + "loss": 0.7024, + "step": 18201 + }, + { + "epoch": 0.9354507143591325, + "grad_norm": 1.0904582738876343, + "learning_rate": 1.0887013096951172e-07, + "loss": 0.7023, + "step": 18202 + }, + { + "epoch": 0.9355021071024772, + "grad_norm": 0.721435546875, + "learning_rate": 1.0869747052703439e-07, + "loss": 0.6842, + "step": 18203 + }, + { + "epoch": 0.9355534998458218, + "grad_norm": 0.758167564868927, + "learning_rate": 1.0852494560236571e-07, + "loss": 0.635, + "step": 18204 + }, + { + "epoch": 0.9356048925891665, + "grad_norm": 1.0998713970184326, + "learning_rate": 1.0835255620028406e-07, + "loss": 0.6779, + "step": 18205 + }, + { + "epoch": 0.935656285332511, + "grad_norm": 1.051513910293579, + "learning_rate": 1.081803023255662e-07, + "loss": 0.6925, + "step": 18206 + }, + { + "epoch": 0.9357076780758556, + "grad_norm": 1.098307728767395, + "learning_rate": 1.0800818398298552e-07, + "loss": 0.7537, + "step": 18207 + }, + { + "epoch": 0.9357590708192003, + "grad_norm": 0.7755401134490967, + "learning_rate": 1.0783620117730986e-07, + "loss": 0.6301, + "step": 18208 + }, + { + "epoch": 0.9358104635625449, + "grad_norm": 1.1143397092819214, + "learning_rate": 1.0766435391330432e-07, + "loss": 0.7014, + "step": 18209 + }, + { + "epoch": 0.9358618563058896, + "grad_norm": 1.092690110206604, + "learning_rate": 1.0749264219573008e-07, + "loss": 0.6818, + "step": 18210 + }, + { + "epoch": 0.9359132490492342, + "grad_norm": 0.7035539150238037, + "learning_rate": 1.0732106602934445e-07, + "loss": 0.6334, + "step": 18211 + }, + { + "epoch": 0.9359646417925789, + "grad_norm": 1.1421818733215332, + "learning_rate": 1.071496254189014e-07, + "loss": 0.6927, + "step": 18212 + }, + { + "epoch": 0.9360160345359235, + "grad_norm": 1.1793495416641235, + "learning_rate": 1.0697832036915046e-07, + "loss": 0.7286, + "step": 18213 + }, + { + "epoch": 0.9360674272792682, + "grad_norm": 0.7602949738502502, + "learning_rate": 1.0680715088483784e-07, + "loss": 0.6722, + "step": 18214 + }, + { + "epoch": 0.9361188200226128, + "grad_norm": 1.0900987386703491, + "learning_rate": 1.0663611697070586e-07, + "loss": 0.6839, + "step": 18215 + }, + { + "epoch": 0.9361702127659575, + "grad_norm": 1.140613079071045, + "learning_rate": 1.064652186314935e-07, + "loss": 0.7412, + "step": 18216 + }, + { + "epoch": 0.9362216055093021, + "grad_norm": 1.2014966011047363, + "learning_rate": 1.0629445587193587e-07, + "loss": 0.6991, + "step": 18217 + }, + { + "epoch": 0.9362729982526468, + "grad_norm": 1.1197752952575684, + "learning_rate": 1.0612382869676307e-07, + "loss": 0.7534, + "step": 18218 + }, + { + "epoch": 0.9363243909959914, + "grad_norm": 1.0953505039215088, + "learning_rate": 1.0595333711070354e-07, + "loss": 0.7493, + "step": 18219 + }, + { + "epoch": 0.936375783739336, + "grad_norm": 1.0983437299728394, + "learning_rate": 1.0578298111848073e-07, + "loss": 0.68, + "step": 18220 + }, + { + "epoch": 0.9364271764826807, + "grad_norm": 1.0882781744003296, + "learning_rate": 1.0561276072481419e-07, + "loss": 0.7087, + "step": 18221 + }, + { + "epoch": 0.9364785692260252, + "grad_norm": 0.8025135397911072, + "learning_rate": 1.0544267593441959e-07, + "loss": 0.6379, + "step": 18222 + }, + { + "epoch": 0.9365299619693699, + "grad_norm": 0.6718245148658752, + "learning_rate": 1.0527272675201094e-07, + "loss": 0.6663, + "step": 18223 + }, + { + "epoch": 0.9365813547127145, + "grad_norm": 1.1004219055175781, + "learning_rate": 1.0510291318229449e-07, + "loss": 0.6912, + "step": 18224 + }, + { + "epoch": 0.9366327474560592, + "grad_norm": 0.7890014052391052, + "learning_rate": 1.0493323522997644e-07, + "loss": 0.6632, + "step": 18225 + }, + { + "epoch": 0.9366841401994038, + "grad_norm": 1.1225265264511108, + "learning_rate": 1.0476369289975808e-07, + "loss": 0.6737, + "step": 18226 + }, + { + "epoch": 0.9367355329427485, + "grad_norm": 1.1219775676727295, + "learning_rate": 1.0459428619633616e-07, + "loss": 0.6931, + "step": 18227 + }, + { + "epoch": 0.9367869256860931, + "grad_norm": 1.0455548763275146, + "learning_rate": 1.0442501512440473e-07, + "loss": 0.71, + "step": 18228 + }, + { + "epoch": 0.9368383184294378, + "grad_norm": 1.0516871213912964, + "learning_rate": 1.0425587968865281e-07, + "loss": 0.6394, + "step": 18229 + }, + { + "epoch": 0.9368897111727824, + "grad_norm": 1.1444944143295288, + "learning_rate": 1.040868798937672e-07, + "loss": 0.6991, + "step": 18230 + }, + { + "epoch": 0.9369411039161271, + "grad_norm": 1.0673075914382935, + "learning_rate": 1.0391801574442972e-07, + "loss": 0.6934, + "step": 18231 + }, + { + "epoch": 0.9369924966594717, + "grad_norm": 1.1196258068084717, + "learning_rate": 1.0374928724531997e-07, + "loss": 0.7063, + "step": 18232 + }, + { + "epoch": 0.9370438894028164, + "grad_norm": 0.8464105129241943, + "learning_rate": 1.0358069440111029e-07, + "loss": 0.6209, + "step": 18233 + }, + { + "epoch": 0.937095282146161, + "grad_norm": 1.1044212579727173, + "learning_rate": 1.034122372164742e-07, + "loss": 0.6579, + "step": 18234 + }, + { + "epoch": 0.9371466748895056, + "grad_norm": 1.1360880136489868, + "learning_rate": 1.0324391569607739e-07, + "loss": 0.7236, + "step": 18235 + }, + { + "epoch": 0.9371980676328503, + "grad_norm": 1.061560034751892, + "learning_rate": 1.0307572984458502e-07, + "loss": 0.6446, + "step": 18236 + }, + { + "epoch": 0.9372494603761948, + "grad_norm": 1.0182867050170898, + "learning_rate": 1.0290767966665504e-07, + "loss": 0.6643, + "step": 18237 + }, + { + "epoch": 0.9373008531195395, + "grad_norm": 0.9916114807128906, + "learning_rate": 1.0273976516694372e-07, + "loss": 0.6987, + "step": 18238 + }, + { + "epoch": 0.9373522458628841, + "grad_norm": 1.0453910827636719, + "learning_rate": 1.0257198635010512e-07, + "loss": 0.6849, + "step": 18239 + }, + { + "epoch": 0.9374036386062288, + "grad_norm": 1.1969722509384155, + "learning_rate": 1.0240434322078497e-07, + "loss": 0.6617, + "step": 18240 + }, + { + "epoch": 0.9374550313495734, + "grad_norm": 0.7796474695205688, + "learning_rate": 1.0223683578362953e-07, + "loss": 0.6448, + "step": 18241 + }, + { + "epoch": 0.9375064240929181, + "grad_norm": 1.079524040222168, + "learning_rate": 1.020694640432801e-07, + "loss": 0.7608, + "step": 18242 + }, + { + "epoch": 0.9375578168362627, + "grad_norm": 0.709758460521698, + "learning_rate": 1.01902228004373e-07, + "loss": 0.6162, + "step": 18243 + }, + { + "epoch": 0.9376092095796074, + "grad_norm": 1.0926910638809204, + "learning_rate": 1.0173512767154114e-07, + "loss": 0.6622, + "step": 18244 + }, + { + "epoch": 0.937660602322952, + "grad_norm": 0.8188534379005432, + "learning_rate": 1.0156816304941586e-07, + "loss": 0.6202, + "step": 18245 + }, + { + "epoch": 0.9377119950662967, + "grad_norm": 1.1509997844696045, + "learning_rate": 1.0140133414262177e-07, + "loss": 0.7219, + "step": 18246 + }, + { + "epoch": 0.9377633878096413, + "grad_norm": 0.6941210627555847, + "learning_rate": 1.0123464095578128e-07, + "loss": 0.5924, + "step": 18247 + }, + { + "epoch": 0.937814780552986, + "grad_norm": 1.1383109092712402, + "learning_rate": 1.0106808349351349e-07, + "loss": 0.7223, + "step": 18248 + }, + { + "epoch": 0.9378661732963306, + "grad_norm": 1.0416836738586426, + "learning_rate": 1.0090166176043137e-07, + "loss": 0.667, + "step": 18249 + }, + { + "epoch": 0.9379175660396752, + "grad_norm": 1.0473331212997437, + "learning_rate": 1.0073537576114789e-07, + "loss": 0.6733, + "step": 18250 + }, + { + "epoch": 0.9379689587830199, + "grad_norm": 0.7374266386032104, + "learning_rate": 1.0056922550026826e-07, + "loss": 0.652, + "step": 18251 + }, + { + "epoch": 0.9380203515263644, + "grad_norm": 1.0661418437957764, + "learning_rate": 1.0040321098239713e-07, + "loss": 0.6975, + "step": 18252 + }, + { + "epoch": 0.9380717442697091, + "grad_norm": 1.1141732931137085, + "learning_rate": 1.0023733221213305e-07, + "loss": 0.6581, + "step": 18253 + }, + { + "epoch": 0.9381231370130537, + "grad_norm": 1.0481083393096924, + "learning_rate": 1.0007158919407289e-07, + "loss": 0.7174, + "step": 18254 + }, + { + "epoch": 0.9381745297563984, + "grad_norm": 1.0148179531097412, + "learning_rate": 9.990598193280854e-08, + "loss": 0.6616, + "step": 18255 + }, + { + "epoch": 0.938225922499743, + "grad_norm": 1.132919192314148, + "learning_rate": 9.974051043292742e-08, + "loss": 0.7723, + "step": 18256 + }, + { + "epoch": 0.9382773152430877, + "grad_norm": 1.1283656358718872, + "learning_rate": 9.957517469901423e-08, + "loss": 0.7251, + "step": 18257 + }, + { + "epoch": 0.9383287079864323, + "grad_norm": 1.1088967323303223, + "learning_rate": 9.940997473565028e-08, + "loss": 0.7, + "step": 18258 + }, + { + "epoch": 0.938380100729777, + "grad_norm": 1.087043046951294, + "learning_rate": 9.924491054741303e-08, + "loss": 0.7361, + "step": 18259 + }, + { + "epoch": 0.9384314934731216, + "grad_norm": 1.0823676586151123, + "learning_rate": 9.907998213887437e-08, + "loss": 0.7075, + "step": 18260 + }, + { + "epoch": 0.9384828862164662, + "grad_norm": 0.7894309163093567, + "learning_rate": 9.89151895146051e-08, + "loss": 0.6497, + "step": 18261 + }, + { + "epoch": 0.9385342789598109, + "grad_norm": 0.7110560536384583, + "learning_rate": 9.875053267916934e-08, + "loss": 0.6489, + "step": 18262 + }, + { + "epoch": 0.9385856717031555, + "grad_norm": 0.7982341051101685, + "learning_rate": 9.858601163713122e-08, + "loss": 0.6452, + "step": 18263 + }, + { + "epoch": 0.9386370644465002, + "grad_norm": 1.098705530166626, + "learning_rate": 9.842162639304708e-08, + "loss": 0.7364, + "step": 18264 + }, + { + "epoch": 0.9386884571898448, + "grad_norm": 0.7360174655914307, + "learning_rate": 9.82573769514722e-08, + "loss": 0.6685, + "step": 18265 + }, + { + "epoch": 0.9387398499331895, + "grad_norm": 1.0338443517684937, + "learning_rate": 9.809326331695734e-08, + "loss": 0.7089, + "step": 18266 + }, + { + "epoch": 0.938791242676534, + "grad_norm": 1.0561853647232056, + "learning_rate": 9.79292854940489e-08, + "loss": 0.6529, + "step": 18267 + }, + { + "epoch": 0.9388426354198787, + "grad_norm": 1.1618887186050415, + "learning_rate": 9.776544348729044e-08, + "loss": 0.6524, + "step": 18268 + }, + { + "epoch": 0.9388940281632233, + "grad_norm": 1.0948460102081299, + "learning_rate": 9.760173730122057e-08, + "loss": 0.6707, + "step": 18269 + }, + { + "epoch": 0.938945420906568, + "grad_norm": 0.8047192096710205, + "learning_rate": 9.743816694037622e-08, + "loss": 0.6497, + "step": 18270 + }, + { + "epoch": 0.9389968136499126, + "grad_norm": 1.1081783771514893, + "learning_rate": 9.727473240928875e-08, + "loss": 0.7123, + "step": 18271 + }, + { + "epoch": 0.9390482063932573, + "grad_norm": 1.1524940729141235, + "learning_rate": 9.711143371248621e-08, + "loss": 0.6751, + "step": 18272 + }, + { + "epoch": 0.9390995991366019, + "grad_norm": 1.0993659496307373, + "learning_rate": 9.694827085449221e-08, + "loss": 0.7311, + "step": 18273 + }, + { + "epoch": 0.9391509918799466, + "grad_norm": 1.0823835134506226, + "learning_rate": 9.678524383982868e-08, + "loss": 0.6678, + "step": 18274 + }, + { + "epoch": 0.9392023846232912, + "grad_norm": 1.086285948753357, + "learning_rate": 9.66223526730109e-08, + "loss": 0.7078, + "step": 18275 + }, + { + "epoch": 0.9392537773666358, + "grad_norm": 1.1212517023086548, + "learning_rate": 9.645959735855304e-08, + "loss": 0.742, + "step": 18276 + }, + { + "epoch": 0.9393051701099805, + "grad_norm": 1.112223505973816, + "learning_rate": 9.629697790096371e-08, + "loss": 0.6737, + "step": 18277 + }, + { + "epoch": 0.9393565628533251, + "grad_norm": 1.0868277549743652, + "learning_rate": 9.613449430474875e-08, + "loss": 0.6769, + "step": 18278 + }, + { + "epoch": 0.9394079555966698, + "grad_norm": 0.7513636350631714, + "learning_rate": 9.597214657441012e-08, + "loss": 0.6482, + "step": 18279 + }, + { + "epoch": 0.9394593483400144, + "grad_norm": 1.1184829473495483, + "learning_rate": 9.580993471444588e-08, + "loss": 0.7089, + "step": 18280 + }, + { + "epoch": 0.9395107410833591, + "grad_norm": 1.064218282699585, + "learning_rate": 9.564785872934967e-08, + "loss": 0.7012, + "step": 18281 + }, + { + "epoch": 0.9395621338267036, + "grad_norm": 1.0187371969223022, + "learning_rate": 9.548591862361179e-08, + "loss": 0.6293, + "step": 18282 + }, + { + "epoch": 0.9396135265700483, + "grad_norm": 1.1224628686904907, + "learning_rate": 9.532411440171974e-08, + "loss": 0.6794, + "step": 18283 + }, + { + "epoch": 0.9396649193133929, + "grad_norm": 1.158615231513977, + "learning_rate": 9.516244606815662e-08, + "loss": 0.7135, + "step": 18284 + }, + { + "epoch": 0.9397163120567376, + "grad_norm": 1.115944743156433, + "learning_rate": 9.500091362740049e-08, + "loss": 0.7009, + "step": 18285 + }, + { + "epoch": 0.9397677048000822, + "grad_norm": 1.0644429922103882, + "learning_rate": 9.483951708392669e-08, + "loss": 0.7317, + "step": 18286 + }, + { + "epoch": 0.9398190975434269, + "grad_norm": 1.1075092554092407, + "learning_rate": 9.467825644220829e-08, + "loss": 0.7078, + "step": 18287 + }, + { + "epoch": 0.9398704902867715, + "grad_norm": 1.0846257209777832, + "learning_rate": 9.451713170671229e-08, + "loss": 0.6725, + "step": 18288 + }, + { + "epoch": 0.9399218830301161, + "grad_norm": 1.2795671224594116, + "learning_rate": 9.435614288190232e-08, + "loss": 0.7004, + "step": 18289 + }, + { + "epoch": 0.9399732757734608, + "grad_norm": 1.1195169687271118, + "learning_rate": 9.419528997224037e-08, + "loss": 0.7202, + "step": 18290 + }, + { + "epoch": 0.9400246685168054, + "grad_norm": 1.0363930463790894, + "learning_rate": 9.403457298218066e-08, + "loss": 0.7148, + "step": 18291 + }, + { + "epoch": 0.9400760612601501, + "grad_norm": 1.0344195365905762, + "learning_rate": 9.387399191617796e-08, + "loss": 0.679, + "step": 18292 + }, + { + "epoch": 0.9401274540034947, + "grad_norm": 1.1695897579193115, + "learning_rate": 9.371354677868039e-08, + "loss": 0.6831, + "step": 18293 + }, + { + "epoch": 0.9401788467468394, + "grad_norm": 1.0498465299606323, + "learning_rate": 9.355323757413326e-08, + "loss": 0.6771, + "step": 18294 + }, + { + "epoch": 0.940230239490184, + "grad_norm": 1.1350390911102295, + "learning_rate": 9.339306430697803e-08, + "loss": 0.6453, + "step": 18295 + }, + { + "epoch": 0.9402816322335287, + "grad_norm": 1.1396948099136353, + "learning_rate": 9.323302698165282e-08, + "loss": 0.699, + "step": 18296 + }, + { + "epoch": 0.9403330249768732, + "grad_norm": 0.6648402214050293, + "learning_rate": 9.307312560259186e-08, + "loss": 0.6738, + "step": 18297 + }, + { + "epoch": 0.9403844177202179, + "grad_norm": 1.0341657400131226, + "learning_rate": 9.291336017422437e-08, + "loss": 0.7235, + "step": 18298 + }, + { + "epoch": 0.9404358104635625, + "grad_norm": 0.6653953194618225, + "learning_rate": 9.275373070097682e-08, + "loss": 0.6229, + "step": 18299 + }, + { + "epoch": 0.9404872032069072, + "grad_norm": 1.1513853073120117, + "learning_rate": 9.259423718727345e-08, + "loss": 0.7019, + "step": 18300 + }, + { + "epoch": 0.9405385959502518, + "grad_norm": 1.1183154582977295, + "learning_rate": 9.243487963753128e-08, + "loss": 0.6989, + "step": 18301 + }, + { + "epoch": 0.9405899886935964, + "grad_norm": 1.1340559720993042, + "learning_rate": 9.227565805616678e-08, + "loss": 0.6513, + "step": 18302 + }, + { + "epoch": 0.9406413814369411, + "grad_norm": 0.7577893137931824, + "learning_rate": 9.211657244759087e-08, + "loss": 0.6582, + "step": 18303 + }, + { + "epoch": 0.9406927741802857, + "grad_norm": 1.1421209573745728, + "learning_rate": 9.195762281621057e-08, + "loss": 0.7183, + "step": 18304 + }, + { + "epoch": 0.9407441669236304, + "grad_norm": 1.1564736366271973, + "learning_rate": 9.17988091664307e-08, + "loss": 0.7751, + "step": 18305 + }, + { + "epoch": 0.940795559666975, + "grad_norm": 0.7559486627578735, + "learning_rate": 9.16401315026505e-08, + "loss": 0.6824, + "step": 18306 + }, + { + "epoch": 0.9408469524103197, + "grad_norm": 1.1291884183883667, + "learning_rate": 9.148158982926703e-08, + "loss": 0.6904, + "step": 18307 + }, + { + "epoch": 0.9408983451536643, + "grad_norm": 1.1255857944488525, + "learning_rate": 9.132318415067232e-08, + "loss": 0.6642, + "step": 18308 + }, + { + "epoch": 0.940949737897009, + "grad_norm": 1.05878484249115, + "learning_rate": 9.116491447125508e-08, + "loss": 0.671, + "step": 18309 + }, + { + "epoch": 0.9410011306403536, + "grad_norm": 0.713230550289154, + "learning_rate": 9.10067807954007e-08, + "loss": 0.6761, + "step": 18310 + }, + { + "epoch": 0.9410525233836983, + "grad_norm": 1.1593071222305298, + "learning_rate": 9.084878312749012e-08, + "loss": 0.7015, + "step": 18311 + }, + { + "epoch": 0.9411039161270429, + "grad_norm": 1.050907015800476, + "learning_rate": 9.069092147190095e-08, + "loss": 0.6591, + "step": 18312 + }, + { + "epoch": 0.9411553088703875, + "grad_norm": 1.0818685293197632, + "learning_rate": 9.05331958330069e-08, + "loss": 0.685, + "step": 18313 + }, + { + "epoch": 0.9412067016137321, + "grad_norm": 1.1976109743118286, + "learning_rate": 9.037560621517783e-08, + "loss": 0.7308, + "step": 18314 + }, + { + "epoch": 0.9412580943570767, + "grad_norm": 1.112247109413147, + "learning_rate": 9.021815262277911e-08, + "loss": 0.7052, + "step": 18315 + }, + { + "epoch": 0.9413094871004214, + "grad_norm": 1.0798821449279785, + "learning_rate": 9.006083506017505e-08, + "loss": 0.6859, + "step": 18316 + }, + { + "epoch": 0.941360879843766, + "grad_norm": 0.871130645275116, + "learning_rate": 8.990365353172215e-08, + "loss": 0.6242, + "step": 18317 + }, + { + "epoch": 0.9414122725871107, + "grad_norm": 1.0228276252746582, + "learning_rate": 8.974660804177693e-08, + "loss": 0.6635, + "step": 18318 + }, + { + "epoch": 0.9414636653304553, + "grad_norm": 0.7340797781944275, + "learning_rate": 8.95896985946898e-08, + "loss": 0.6458, + "step": 18319 + }, + { + "epoch": 0.9415150580738, + "grad_norm": 1.172174334526062, + "learning_rate": 8.943292519480729e-08, + "loss": 0.6582, + "step": 18320 + }, + { + "epoch": 0.9415664508171446, + "grad_norm": 1.1598737239837646, + "learning_rate": 8.92762878464748e-08, + "loss": 0.6876, + "step": 18321 + }, + { + "epoch": 0.9416178435604893, + "grad_norm": 1.3235054016113281, + "learning_rate": 8.911978655403052e-08, + "loss": 0.6558, + "step": 18322 + }, + { + "epoch": 0.9416692363038339, + "grad_norm": 1.0557688474655151, + "learning_rate": 8.896342132181101e-08, + "loss": 0.7143, + "step": 18323 + }, + { + "epoch": 0.9417206290471786, + "grad_norm": 0.9993652701377869, + "learning_rate": 8.88071921541478e-08, + "loss": 0.645, + "step": 18324 + }, + { + "epoch": 0.9417720217905232, + "grad_norm": 1.1330052614212036, + "learning_rate": 8.865109905537128e-08, + "loss": 0.664, + "step": 18325 + }, + { + "epoch": 0.9418234145338679, + "grad_norm": 1.106482744216919, + "learning_rate": 8.849514202980358e-08, + "loss": 0.7293, + "step": 18326 + }, + { + "epoch": 0.9418748072772125, + "grad_norm": 1.0812040567398071, + "learning_rate": 8.833932108176735e-08, + "loss": 0.6779, + "step": 18327 + }, + { + "epoch": 0.941926200020557, + "grad_norm": 1.1602320671081543, + "learning_rate": 8.818363621557913e-08, + "loss": 0.6617, + "step": 18328 + }, + { + "epoch": 0.9419775927639017, + "grad_norm": 1.165284514427185, + "learning_rate": 8.802808743555269e-08, + "loss": 0.7208, + "step": 18329 + }, + { + "epoch": 0.9420289855072463, + "grad_norm": 1.0488582849502563, + "learning_rate": 8.787267474599792e-08, + "loss": 0.7283, + "step": 18330 + }, + { + "epoch": 0.942080378250591, + "grad_norm": 1.07056725025177, + "learning_rate": 8.771739815121916e-08, + "loss": 0.6867, + "step": 18331 + }, + { + "epoch": 0.9421317709939356, + "grad_norm": 1.0258830785751343, + "learning_rate": 8.756225765552074e-08, + "loss": 0.6411, + "step": 18332 + }, + { + "epoch": 0.9421831637372803, + "grad_norm": 1.058482050895691, + "learning_rate": 8.740725326319865e-08, + "loss": 0.7051, + "step": 18333 + }, + { + "epoch": 0.9422345564806249, + "grad_norm": 1.1020543575286865, + "learning_rate": 8.725238497854893e-08, + "loss": 0.6497, + "step": 18334 + }, + { + "epoch": 0.9422859492239696, + "grad_norm": 1.072866439819336, + "learning_rate": 8.709765280586202e-08, + "loss": 0.683, + "step": 18335 + }, + { + "epoch": 0.9423373419673142, + "grad_norm": 1.0985640287399292, + "learning_rate": 8.694305674942504e-08, + "loss": 0.6738, + "step": 18336 + }, + { + "epoch": 0.9423887347106589, + "grad_norm": 0.7717066407203674, + "learning_rate": 8.678859681352014e-08, + "loss": 0.637, + "step": 18337 + }, + { + "epoch": 0.9424401274540035, + "grad_norm": 1.1254777908325195, + "learning_rate": 8.663427300242833e-08, + "loss": 0.6683, + "step": 18338 + }, + { + "epoch": 0.9424915201973482, + "grad_norm": 1.1327439546585083, + "learning_rate": 8.648008532042451e-08, + "loss": 0.692, + "step": 18339 + }, + { + "epoch": 0.9425429129406928, + "grad_norm": 1.2893112897872925, + "learning_rate": 8.632603377177972e-08, + "loss": 0.7192, + "step": 18340 + }, + { + "epoch": 0.9425943056840375, + "grad_norm": 0.8734267354011536, + "learning_rate": 8.617211836076445e-08, + "loss": 0.6341, + "step": 18341 + }, + { + "epoch": 0.9426456984273821, + "grad_norm": 1.0354949235916138, + "learning_rate": 8.601833909164081e-08, + "loss": 0.5949, + "step": 18342 + }, + { + "epoch": 0.9426970911707266, + "grad_norm": 0.7370154857635498, + "learning_rate": 8.586469596866987e-08, + "loss": 0.6318, + "step": 18343 + }, + { + "epoch": 0.9427484839140713, + "grad_norm": 1.1292835474014282, + "learning_rate": 8.571118899610875e-08, + "loss": 0.7157, + "step": 18344 + }, + { + "epoch": 0.9427998766574159, + "grad_norm": 1.090070366859436, + "learning_rate": 8.555781817821185e-08, + "loss": 0.693, + "step": 18345 + }, + { + "epoch": 0.9428512694007606, + "grad_norm": 1.0881165266036987, + "learning_rate": 8.540458351922576e-08, + "loss": 0.6593, + "step": 18346 + }, + { + "epoch": 0.9429026621441052, + "grad_norm": 1.0677086114883423, + "learning_rate": 8.525148502339764e-08, + "loss": 0.6528, + "step": 18347 + }, + { + "epoch": 0.9429540548874499, + "grad_norm": 1.086267113685608, + "learning_rate": 8.509852269496909e-08, + "loss": 0.6647, + "step": 18348 + }, + { + "epoch": 0.9430054476307945, + "grad_norm": 0.7654056549072266, + "learning_rate": 8.494569653817786e-08, + "loss": 0.6634, + "step": 18349 + }, + { + "epoch": 0.9430568403741392, + "grad_norm": 1.1124404668807983, + "learning_rate": 8.479300655725775e-08, + "loss": 0.6679, + "step": 18350 + }, + { + "epoch": 0.9431082331174838, + "grad_norm": 0.6850045919418335, + "learning_rate": 8.464045275643984e-08, + "loss": 0.6282, + "step": 18351 + }, + { + "epoch": 0.9431596258608285, + "grad_norm": 0.9976409673690796, + "learning_rate": 8.448803513995074e-08, + "loss": 0.6193, + "step": 18352 + }, + { + "epoch": 0.9432110186041731, + "grad_norm": 1.107151746749878, + "learning_rate": 8.433575371201264e-08, + "loss": 0.7061, + "step": 18353 + }, + { + "epoch": 0.9432624113475178, + "grad_norm": 1.0202232599258423, + "learning_rate": 8.418360847684548e-08, + "loss": 0.6469, + "step": 18354 + }, + { + "epoch": 0.9433138040908624, + "grad_norm": 1.1062217950820923, + "learning_rate": 8.403159943866368e-08, + "loss": 0.7196, + "step": 18355 + }, + { + "epoch": 0.943365196834207, + "grad_norm": 1.110945463180542, + "learning_rate": 8.387972660167942e-08, + "loss": 0.7138, + "step": 18356 + }, + { + "epoch": 0.9434165895775517, + "grad_norm": 1.1200275421142578, + "learning_rate": 8.372798997010045e-08, + "loss": 0.6775, + "step": 18357 + }, + { + "epoch": 0.9434679823208962, + "grad_norm": 1.0690885782241821, + "learning_rate": 8.357638954813063e-08, + "loss": 0.6893, + "step": 18358 + }, + { + "epoch": 0.9435193750642409, + "grad_norm": 1.0283961296081543, + "learning_rate": 8.342492533996993e-08, + "loss": 0.6597, + "step": 18359 + }, + { + "epoch": 0.9435707678075855, + "grad_norm": 1.0629364252090454, + "learning_rate": 8.327359734981554e-08, + "loss": 0.6675, + "step": 18360 + }, + { + "epoch": 0.9436221605509302, + "grad_norm": 1.0458266735076904, + "learning_rate": 8.312240558185914e-08, + "loss": 0.6334, + "step": 18361 + }, + { + "epoch": 0.9436735532942748, + "grad_norm": 0.7651318311691284, + "learning_rate": 8.297135004029011e-08, + "loss": 0.6268, + "step": 18362 + }, + { + "epoch": 0.9437249460376195, + "grad_norm": 0.7195444107055664, + "learning_rate": 8.282043072929347e-08, + "loss": 0.6298, + "step": 18363 + }, + { + "epoch": 0.9437763387809641, + "grad_norm": 1.1340622901916504, + "learning_rate": 8.26696476530514e-08, + "loss": 0.7046, + "step": 18364 + }, + { + "epoch": 0.9438277315243088, + "grad_norm": 0.7147236466407776, + "learning_rate": 8.25190008157406e-08, + "loss": 0.5916, + "step": 18365 + }, + { + "epoch": 0.9438791242676534, + "grad_norm": 1.1565423011779785, + "learning_rate": 8.236849022153436e-08, + "loss": 0.7452, + "step": 18366 + }, + { + "epoch": 0.9439305170109981, + "grad_norm": 0.6798750758171082, + "learning_rate": 8.221811587460438e-08, + "loss": 0.6579, + "step": 18367 + }, + { + "epoch": 0.9439819097543427, + "grad_norm": 1.0616264343261719, + "learning_rate": 8.206787777911507e-08, + "loss": 0.632, + "step": 18368 + }, + { + "epoch": 0.9440333024976874, + "grad_norm": 1.1190334558486938, + "learning_rate": 8.191777593922978e-08, + "loss": 0.671, + "step": 18369 + }, + { + "epoch": 0.944084695241032, + "grad_norm": 1.081934928894043, + "learning_rate": 8.176781035910796e-08, + "loss": 0.712, + "step": 18370 + }, + { + "epoch": 0.9441360879843766, + "grad_norm": 1.1126043796539307, + "learning_rate": 8.161798104290297e-08, + "loss": 0.614, + "step": 18371 + }, + { + "epoch": 0.9441874807277213, + "grad_norm": 1.0566575527191162, + "learning_rate": 8.146828799476703e-08, + "loss": 0.6895, + "step": 18372 + }, + { + "epoch": 0.9442388734710658, + "grad_norm": 1.3962303400039673, + "learning_rate": 8.131873121884737e-08, + "loss": 0.6978, + "step": 18373 + }, + { + "epoch": 0.9442902662144105, + "grad_norm": 1.080667495727539, + "learning_rate": 8.116931071928736e-08, + "loss": 0.7137, + "step": 18374 + }, + { + "epoch": 0.9443416589577551, + "grad_norm": 1.0879675149917603, + "learning_rate": 8.102002650022645e-08, + "loss": 0.6849, + "step": 18375 + }, + { + "epoch": 0.9443930517010998, + "grad_norm": 1.0781880617141724, + "learning_rate": 8.087087856580189e-08, + "loss": 0.7159, + "step": 18376 + }, + { + "epoch": 0.9444444444444444, + "grad_norm": 1.1003646850585938, + "learning_rate": 8.072186692014538e-08, + "loss": 0.7314, + "step": 18377 + }, + { + "epoch": 0.9444958371877891, + "grad_norm": 0.8745474219322205, + "learning_rate": 8.057299156738474e-08, + "loss": 0.6111, + "step": 18378 + }, + { + "epoch": 0.9445472299311337, + "grad_norm": 1.1004819869995117, + "learning_rate": 8.042425251164553e-08, + "loss": 0.6808, + "step": 18379 + }, + { + "epoch": 0.9445986226744784, + "grad_norm": 1.131296992301941, + "learning_rate": 8.027564975704838e-08, + "loss": 0.6901, + "step": 18380 + }, + { + "epoch": 0.944650015417823, + "grad_norm": 0.7593706846237183, + "learning_rate": 8.012718330770997e-08, + "loss": 0.6197, + "step": 18381 + }, + { + "epoch": 0.9447014081611677, + "grad_norm": 1.0173735618591309, + "learning_rate": 7.997885316774423e-08, + "loss": 0.6394, + "step": 18382 + }, + { + "epoch": 0.9447528009045123, + "grad_norm": 1.039408802986145, + "learning_rate": 7.983065934126177e-08, + "loss": 0.6665, + "step": 18383 + }, + { + "epoch": 0.944804193647857, + "grad_norm": 1.1528152227401733, + "learning_rate": 7.968260183236654e-08, + "loss": 0.7075, + "step": 18384 + }, + { + "epoch": 0.9448555863912016, + "grad_norm": 1.156407117843628, + "learning_rate": 7.95346806451619e-08, + "loss": 0.6699, + "step": 18385 + }, + { + "epoch": 0.9449069791345462, + "grad_norm": 1.0548685789108276, + "learning_rate": 7.938689578374514e-08, + "loss": 0.6925, + "step": 18386 + }, + { + "epoch": 0.9449583718778909, + "grad_norm": 1.0613758563995361, + "learning_rate": 7.923924725221188e-08, + "loss": 0.663, + "step": 18387 + }, + { + "epoch": 0.9450097646212354, + "grad_norm": 1.060703992843628, + "learning_rate": 7.909173505465162e-08, + "loss": 0.6922, + "step": 18388 + }, + { + "epoch": 0.9450611573645801, + "grad_norm": 1.0817965269088745, + "learning_rate": 7.894435919515275e-08, + "loss": 0.6459, + "step": 18389 + }, + { + "epoch": 0.9451125501079247, + "grad_norm": 1.023704171180725, + "learning_rate": 7.879711967779757e-08, + "loss": 0.7078, + "step": 18390 + }, + { + "epoch": 0.9451639428512694, + "grad_norm": 1.124657392501831, + "learning_rate": 7.865001650666504e-08, + "loss": 0.6715, + "step": 18391 + }, + { + "epoch": 0.945215335594614, + "grad_norm": 0.6721488237380981, + "learning_rate": 7.850304968583189e-08, + "loss": 0.6756, + "step": 18392 + }, + { + "epoch": 0.9452667283379587, + "grad_norm": 0.7647435069084167, + "learning_rate": 7.835621921936931e-08, + "loss": 0.6516, + "step": 18393 + }, + { + "epoch": 0.9453181210813033, + "grad_norm": 1.546966314315796, + "learning_rate": 7.820952511134516e-08, + "loss": 0.6842, + "step": 18394 + }, + { + "epoch": 0.945369513824648, + "grad_norm": 1.109662413597107, + "learning_rate": 7.806296736582398e-08, + "loss": 0.7224, + "step": 18395 + }, + { + "epoch": 0.9454209065679926, + "grad_norm": 1.084359049797058, + "learning_rate": 7.791654598686693e-08, + "loss": 0.6387, + "step": 18396 + }, + { + "epoch": 0.9454722993113372, + "grad_norm": 1.2082669734954834, + "learning_rate": 7.777026097852968e-08, + "loss": 0.6831, + "step": 18397 + }, + { + "epoch": 0.9455236920546819, + "grad_norm": 0.7884851098060608, + "learning_rate": 7.762411234486623e-08, + "loss": 0.6896, + "step": 18398 + }, + { + "epoch": 0.9455750847980265, + "grad_norm": 1.0399426221847534, + "learning_rate": 7.74781000899244e-08, + "loss": 0.6466, + "step": 18399 + }, + { + "epoch": 0.9456264775413712, + "grad_norm": 1.0866239070892334, + "learning_rate": 7.7332224217751e-08, + "loss": 0.6741, + "step": 18400 + }, + { + "epoch": 0.9456778702847158, + "grad_norm": 0.8036141395568848, + "learning_rate": 7.718648473238666e-08, + "loss": 0.6583, + "step": 18401 + }, + { + "epoch": 0.9457292630280605, + "grad_norm": 1.0273306369781494, + "learning_rate": 7.704088163786982e-08, + "loss": 0.6924, + "step": 18402 + }, + { + "epoch": 0.9457806557714051, + "grad_norm": 1.1397476196289062, + "learning_rate": 7.689541493823393e-08, + "loss": 0.7054, + "step": 18403 + }, + { + "epoch": 0.9458320485147497, + "grad_norm": 0.7133983969688416, + "learning_rate": 7.675008463750966e-08, + "loss": 0.6529, + "step": 18404 + }, + { + "epoch": 0.9458834412580943, + "grad_norm": 1.0985074043273926, + "learning_rate": 7.660489073972433e-08, + "loss": 0.6644, + "step": 18405 + }, + { + "epoch": 0.945934834001439, + "grad_norm": 0.7019321322441101, + "learning_rate": 7.645983324889861e-08, + "loss": 0.6304, + "step": 18406 + }, + { + "epoch": 0.9459862267447836, + "grad_norm": 1.1341408491134644, + "learning_rate": 7.631491216905318e-08, + "loss": 0.67, + "step": 18407 + }, + { + "epoch": 0.9460376194881283, + "grad_norm": 1.1129119396209717, + "learning_rate": 7.61701275042026e-08, + "loss": 0.6939, + "step": 18408 + }, + { + "epoch": 0.9460890122314729, + "grad_norm": 1.1298763751983643, + "learning_rate": 7.602547925835923e-08, + "loss": 0.6977, + "step": 18409 + }, + { + "epoch": 0.9461404049748176, + "grad_norm": 1.1325587034225464, + "learning_rate": 7.588096743552875e-08, + "loss": 0.7151, + "step": 18410 + }, + { + "epoch": 0.9461917977181622, + "grad_norm": 1.1187431812286377, + "learning_rate": 7.573659203971628e-08, + "loss": 0.6598, + "step": 18411 + }, + { + "epoch": 0.9462431904615068, + "grad_norm": 1.1162960529327393, + "learning_rate": 7.55923530749214e-08, + "loss": 0.7255, + "step": 18412 + }, + { + "epoch": 0.9462945832048515, + "grad_norm": 1.0391203165054321, + "learning_rate": 7.544825054514094e-08, + "loss": 0.7168, + "step": 18413 + }, + { + "epoch": 0.9463459759481961, + "grad_norm": 0.7869401574134827, + "learning_rate": 7.530428445436667e-08, + "loss": 0.6264, + "step": 18414 + }, + { + "epoch": 0.9463973686915408, + "grad_norm": 1.0711759328842163, + "learning_rate": 7.516045480658818e-08, + "loss": 0.6997, + "step": 18415 + }, + { + "epoch": 0.9464487614348854, + "grad_norm": 1.1328225135803223, + "learning_rate": 7.501676160578953e-08, + "loss": 0.6689, + "step": 18416 + }, + { + "epoch": 0.9465001541782301, + "grad_norm": 1.0806596279144287, + "learning_rate": 7.487320485595195e-08, + "loss": 0.7014, + "step": 18417 + }, + { + "epoch": 0.9465515469215747, + "grad_norm": 1.107001781463623, + "learning_rate": 7.472978456105396e-08, + "loss": 0.7079, + "step": 18418 + }, + { + "epoch": 0.9466029396649193, + "grad_norm": 1.094678521156311, + "learning_rate": 7.458650072506734e-08, + "loss": 0.6879, + "step": 18419 + }, + { + "epoch": 0.9466543324082639, + "grad_norm": 1.179389476776123, + "learning_rate": 7.44433533519634e-08, + "loss": 0.7319, + "step": 18420 + }, + { + "epoch": 0.9467057251516086, + "grad_norm": 1.1737526655197144, + "learning_rate": 7.430034244570672e-08, + "loss": 0.673, + "step": 18421 + }, + { + "epoch": 0.9467571178949532, + "grad_norm": 0.7394821047782898, + "learning_rate": 7.415746801026135e-08, + "loss": 0.6793, + "step": 18422 + }, + { + "epoch": 0.9468085106382979, + "grad_norm": 1.0772780179977417, + "learning_rate": 7.401473004958415e-08, + "loss": 0.6769, + "step": 18423 + }, + { + "epoch": 0.9468599033816425, + "grad_norm": 1.0923675298690796, + "learning_rate": 7.387212856763082e-08, + "loss": 0.7749, + "step": 18424 + }, + { + "epoch": 0.9469112961249871, + "grad_norm": 1.0914546251296997, + "learning_rate": 7.37296635683521e-08, + "loss": 0.6714, + "step": 18425 + }, + { + "epoch": 0.9469626888683318, + "grad_norm": 1.1394906044006348, + "learning_rate": 7.358733505569426e-08, + "loss": 0.711, + "step": 18426 + }, + { + "epoch": 0.9470140816116764, + "grad_norm": 1.0678943395614624, + "learning_rate": 7.34451430336014e-08, + "loss": 0.6717, + "step": 18427 + }, + { + "epoch": 0.9470654743550211, + "grad_norm": 1.1875513792037964, + "learning_rate": 7.330308750601312e-08, + "loss": 0.6756, + "step": 18428 + }, + { + "epoch": 0.9471168670983657, + "grad_norm": 1.0514755249023438, + "learning_rate": 7.316116847686517e-08, + "loss": 0.7395, + "step": 18429 + }, + { + "epoch": 0.9471682598417104, + "grad_norm": 1.1398285627365112, + "learning_rate": 7.301938595008883e-08, + "loss": 0.7533, + "step": 18430 + }, + { + "epoch": 0.947219652585055, + "grad_norm": 1.077080249786377, + "learning_rate": 7.287773992961323e-08, + "loss": 0.6521, + "step": 18431 + }, + { + "epoch": 0.9472710453283997, + "grad_norm": 1.0949287414550781, + "learning_rate": 7.27362304193624e-08, + "loss": 0.7117, + "step": 18432 + }, + { + "epoch": 0.9473224380717443, + "grad_norm": 0.8304964303970337, + "learning_rate": 7.259485742325712e-08, + "loss": 0.6638, + "step": 18433 + }, + { + "epoch": 0.9473738308150889, + "grad_norm": 1.0441664457321167, + "learning_rate": 7.245362094521424e-08, + "loss": 0.7334, + "step": 18434 + }, + { + "epoch": 0.9474252235584335, + "grad_norm": 1.149086594581604, + "learning_rate": 7.23125209891462e-08, + "loss": 0.7262, + "step": 18435 + }, + { + "epoch": 0.9474766163017782, + "grad_norm": 1.147416591644287, + "learning_rate": 7.217155755896321e-08, + "loss": 0.7572, + "step": 18436 + }, + { + "epoch": 0.9475280090451228, + "grad_norm": 1.0969284772872925, + "learning_rate": 7.203073065857046e-08, + "loss": 0.7028, + "step": 18437 + }, + { + "epoch": 0.9475794017884674, + "grad_norm": 1.163584589958191, + "learning_rate": 7.189004029186986e-08, + "loss": 0.6995, + "step": 18438 + }, + { + "epoch": 0.9476307945318121, + "grad_norm": 1.0175880193710327, + "learning_rate": 7.174948646275826e-08, + "loss": 0.6561, + "step": 18439 + }, + { + "epoch": 0.9476821872751567, + "grad_norm": 1.1271517276763916, + "learning_rate": 7.160906917513144e-08, + "loss": 0.6774, + "step": 18440 + }, + { + "epoch": 0.9477335800185014, + "grad_norm": 1.194360375404358, + "learning_rate": 7.146878843287907e-08, + "loss": 0.6766, + "step": 18441 + }, + { + "epoch": 0.947784972761846, + "grad_norm": 1.1572412252426147, + "learning_rate": 7.132864423988695e-08, + "loss": 0.699, + "step": 18442 + }, + { + "epoch": 0.9478363655051907, + "grad_norm": 1.0989176034927368, + "learning_rate": 7.118863660003972e-08, + "loss": 0.6701, + "step": 18443 + }, + { + "epoch": 0.9478877582485353, + "grad_norm": 1.117856502532959, + "learning_rate": 7.104876551721485e-08, + "loss": 0.7303, + "step": 18444 + }, + { + "epoch": 0.94793915099188, + "grad_norm": 1.0269311666488647, + "learning_rate": 7.090903099528812e-08, + "loss": 0.7116, + "step": 18445 + }, + { + "epoch": 0.9479905437352246, + "grad_norm": 1.1562519073486328, + "learning_rate": 7.076943303813088e-08, + "loss": 0.6809, + "step": 18446 + }, + { + "epoch": 0.9480419364785693, + "grad_norm": 1.0489423274993896, + "learning_rate": 7.062997164961116e-08, + "loss": 0.7346, + "step": 18447 + }, + { + "epoch": 0.9480933292219139, + "grad_norm": 1.0649052858352661, + "learning_rate": 7.049064683359252e-08, + "loss": 0.6867, + "step": 18448 + }, + { + "epoch": 0.9481447219652585, + "grad_norm": 1.0856635570526123, + "learning_rate": 7.035145859393521e-08, + "loss": 0.7256, + "step": 18449 + }, + { + "epoch": 0.9481961147086031, + "grad_norm": 1.096313714981079, + "learning_rate": 7.021240693449561e-08, + "loss": 0.6991, + "step": 18450 + }, + { + "epoch": 0.9482475074519477, + "grad_norm": 1.1210533380508423, + "learning_rate": 7.007349185912616e-08, + "loss": 0.6776, + "step": 18451 + }, + { + "epoch": 0.9482989001952924, + "grad_norm": 1.1090292930603027, + "learning_rate": 6.99347133716749e-08, + "loss": 0.7521, + "step": 18452 + }, + { + "epoch": 0.948350292938637, + "grad_norm": 1.0571128129959106, + "learning_rate": 6.979607147598821e-08, + "loss": 0.7019, + "step": 18453 + }, + { + "epoch": 0.9484016856819817, + "grad_norm": 1.097296953201294, + "learning_rate": 6.965756617590691e-08, + "loss": 0.7167, + "step": 18454 + }, + { + "epoch": 0.9484530784253263, + "grad_norm": 1.053031086921692, + "learning_rate": 6.951919747526737e-08, + "loss": 0.7015, + "step": 18455 + }, + { + "epoch": 0.948504471168671, + "grad_norm": 1.0557738542556763, + "learning_rate": 6.938096537790484e-08, + "loss": 0.7166, + "step": 18456 + }, + { + "epoch": 0.9485558639120156, + "grad_norm": 1.0871689319610596, + "learning_rate": 6.924286988764795e-08, + "loss": 0.6804, + "step": 18457 + }, + { + "epoch": 0.9486072566553603, + "grad_norm": 0.8251727819442749, + "learning_rate": 6.910491100832306e-08, + "loss": 0.5833, + "step": 18458 + }, + { + "epoch": 0.9486586493987049, + "grad_norm": 1.1040583848953247, + "learning_rate": 6.896708874375213e-08, + "loss": 0.7224, + "step": 18459 + }, + { + "epoch": 0.9487100421420496, + "grad_norm": 1.0890392065048218, + "learning_rate": 6.882940309775487e-08, + "loss": 0.7274, + "step": 18460 + }, + { + "epoch": 0.9487614348853942, + "grad_norm": 1.268336296081543, + "learning_rate": 6.86918540741438e-08, + "loss": 0.6523, + "step": 18461 + }, + { + "epoch": 0.9488128276287389, + "grad_norm": 1.0202683210372925, + "learning_rate": 6.855444167673197e-08, + "loss": 0.6574, + "step": 18462 + }, + { + "epoch": 0.9488642203720835, + "grad_norm": 0.7436390519142151, + "learning_rate": 6.841716590932579e-08, + "loss": 0.622, + "step": 18463 + }, + { + "epoch": 0.948915613115428, + "grad_norm": 1.1500734090805054, + "learning_rate": 6.828002677572831e-08, + "loss": 0.6943, + "step": 18464 + }, + { + "epoch": 0.9489670058587727, + "grad_norm": 0.7075164318084717, + "learning_rate": 6.814302427973929e-08, + "loss": 0.6018, + "step": 18465 + }, + { + "epoch": 0.9490183986021173, + "grad_norm": 1.1226712465286255, + "learning_rate": 6.800615842515457e-08, + "loss": 0.7104, + "step": 18466 + }, + { + "epoch": 0.949069791345462, + "grad_norm": 1.0663801431655884, + "learning_rate": 6.786942921576611e-08, + "loss": 0.6888, + "step": 18467 + }, + { + "epoch": 0.9491211840888066, + "grad_norm": 1.1259154081344604, + "learning_rate": 6.773283665536145e-08, + "loss": 0.7188, + "step": 18468 + }, + { + "epoch": 0.9491725768321513, + "grad_norm": 1.0178067684173584, + "learning_rate": 6.759638074772645e-08, + "loss": 0.6545, + "step": 18469 + }, + { + "epoch": 0.9492239695754959, + "grad_norm": 1.0998835563659668, + "learning_rate": 6.746006149664086e-08, + "loss": 0.6667, + "step": 18470 + }, + { + "epoch": 0.9492753623188406, + "grad_norm": 0.8189653754234314, + "learning_rate": 6.73238789058811e-08, + "loss": 0.626, + "step": 18471 + }, + { + "epoch": 0.9493267550621852, + "grad_norm": 1.165387511253357, + "learning_rate": 6.718783297922082e-08, + "loss": 0.707, + "step": 18472 + }, + { + "epoch": 0.9493781478055299, + "grad_norm": 1.1666316986083984, + "learning_rate": 6.705192372042923e-08, + "loss": 0.7033, + "step": 18473 + }, + { + "epoch": 0.9494295405488745, + "grad_norm": 1.0873501300811768, + "learning_rate": 6.691615113327165e-08, + "loss": 0.6716, + "step": 18474 + }, + { + "epoch": 0.9494809332922192, + "grad_norm": 0.7831141948699951, + "learning_rate": 6.678051522151008e-08, + "loss": 0.6287, + "step": 18475 + }, + { + "epoch": 0.9495323260355638, + "grad_norm": 1.0405778884887695, + "learning_rate": 6.66450159889026e-08, + "loss": 0.6559, + "step": 18476 + }, + { + "epoch": 0.9495837187789085, + "grad_norm": 0.7674185037612915, + "learning_rate": 6.650965343920235e-08, + "loss": 0.6167, + "step": 18477 + }, + { + "epoch": 0.9496351115222531, + "grad_norm": 0.745793879032135, + "learning_rate": 6.63744275761602e-08, + "loss": 0.6089, + "step": 18478 + }, + { + "epoch": 0.9496865042655978, + "grad_norm": 1.1060012578964233, + "learning_rate": 6.62393384035237e-08, + "loss": 0.651, + "step": 18479 + }, + { + "epoch": 0.9497378970089423, + "grad_norm": 1.1515978574752808, + "learning_rate": 6.610438592503376e-08, + "loss": 0.7066, + "step": 18480 + }, + { + "epoch": 0.9497892897522869, + "grad_norm": 0.7230024337768555, + "learning_rate": 6.596957014443073e-08, + "loss": 0.6767, + "step": 18481 + }, + { + "epoch": 0.9498406824956316, + "grad_norm": 1.0488051176071167, + "learning_rate": 6.583489106544938e-08, + "loss": 0.6732, + "step": 18482 + }, + { + "epoch": 0.9498920752389762, + "grad_norm": 1.0918362140655518, + "learning_rate": 6.570034869182062e-08, + "loss": 0.671, + "step": 18483 + }, + { + "epoch": 0.9499434679823209, + "grad_norm": 1.0410696268081665, + "learning_rate": 6.556594302727259e-08, + "loss": 0.7569, + "step": 18484 + }, + { + "epoch": 0.9499948607256655, + "grad_norm": 1.0427526235580444, + "learning_rate": 6.543167407552953e-08, + "loss": 0.6488, + "step": 18485 + }, + { + "epoch": 0.9500462534690102, + "grad_norm": 1.0695544481277466, + "learning_rate": 6.529754184031123e-08, + "loss": 0.6999, + "step": 18486 + }, + { + "epoch": 0.9500976462123548, + "grad_norm": 1.0486271381378174, + "learning_rate": 6.516354632533361e-08, + "loss": 0.7023, + "step": 18487 + }, + { + "epoch": 0.9501490389556995, + "grad_norm": 1.1256617307662964, + "learning_rate": 6.502968753430871e-08, + "loss": 0.6925, + "step": 18488 + }, + { + "epoch": 0.9502004316990441, + "grad_norm": 1.1408568620681763, + "learning_rate": 6.489596547094634e-08, + "loss": 0.6998, + "step": 18489 + }, + { + "epoch": 0.9502518244423888, + "grad_norm": 1.0978319644927979, + "learning_rate": 6.476238013895019e-08, + "loss": 0.7172, + "step": 18490 + }, + { + "epoch": 0.9503032171857334, + "grad_norm": 1.1008049249649048, + "learning_rate": 6.462893154202232e-08, + "loss": 0.7479, + "step": 18491 + }, + { + "epoch": 0.950354609929078, + "grad_norm": 1.0873156785964966, + "learning_rate": 6.449561968385976e-08, + "loss": 0.724, + "step": 18492 + }, + { + "epoch": 0.9504060026724227, + "grad_norm": 1.0022153854370117, + "learning_rate": 6.436244456815566e-08, + "loss": 0.6398, + "step": 18493 + }, + { + "epoch": 0.9504573954157673, + "grad_norm": 1.0211819410324097, + "learning_rate": 6.42294061986004e-08, + "loss": 0.6903, + "step": 18494 + }, + { + "epoch": 0.9505087881591119, + "grad_norm": 1.1302131414413452, + "learning_rate": 6.40965045788794e-08, + "loss": 0.6584, + "step": 18495 + }, + { + "epoch": 0.9505601809024565, + "grad_norm": 1.1093788146972656, + "learning_rate": 6.396373971267523e-08, + "loss": 0.6889, + "step": 18496 + }, + { + "epoch": 0.9506115736458012, + "grad_norm": 1.0910133123397827, + "learning_rate": 6.383111160366551e-08, + "loss": 0.6918, + "step": 18497 + }, + { + "epoch": 0.9506629663891458, + "grad_norm": 1.0377365350723267, + "learning_rate": 6.36986202555262e-08, + "loss": 0.7185, + "step": 18498 + }, + { + "epoch": 0.9507143591324905, + "grad_norm": 1.1549595594406128, + "learning_rate": 6.356626567192603e-08, + "loss": 0.6857, + "step": 18499 + }, + { + "epoch": 0.9507657518758351, + "grad_norm": 1.1532100439071655, + "learning_rate": 6.343404785653373e-08, + "loss": 0.691, + "step": 18500 + }, + { + "epoch": 0.9508171446191798, + "grad_norm": 1.0925688743591309, + "learning_rate": 6.330196681301192e-08, + "loss": 0.7071, + "step": 18501 + }, + { + "epoch": 0.9508685373625244, + "grad_norm": 0.7527129054069519, + "learning_rate": 6.317002254502047e-08, + "loss": 0.6358, + "step": 18502 + }, + { + "epoch": 0.9509199301058691, + "grad_norm": 1.0495054721832275, + "learning_rate": 6.303821505621421e-08, + "loss": 0.6567, + "step": 18503 + }, + { + "epoch": 0.9509713228492137, + "grad_norm": 1.1129714250564575, + "learning_rate": 6.290654435024523e-08, + "loss": 0.6739, + "step": 18504 + }, + { + "epoch": 0.9510227155925584, + "grad_norm": 1.1473026275634766, + "learning_rate": 6.277501043076229e-08, + "loss": 0.7202, + "step": 18505 + }, + { + "epoch": 0.951074108335903, + "grad_norm": 1.154187560081482, + "learning_rate": 6.264361330140856e-08, + "loss": 0.7749, + "step": 18506 + }, + { + "epoch": 0.9511255010792476, + "grad_norm": 1.1078130006790161, + "learning_rate": 6.251235296582503e-08, + "loss": 0.6846, + "step": 18507 + }, + { + "epoch": 0.9511768938225923, + "grad_norm": 1.0758453607559204, + "learning_rate": 6.23812294276488e-08, + "loss": 0.6819, + "step": 18508 + }, + { + "epoch": 0.9512282865659369, + "grad_norm": 1.125129222869873, + "learning_rate": 6.225024269051194e-08, + "loss": 0.711, + "step": 18509 + }, + { + "epoch": 0.9512796793092815, + "grad_norm": 0.9998478293418884, + "learning_rate": 6.211939275804379e-08, + "loss": 0.5959, + "step": 18510 + }, + { + "epoch": 0.9513310720526261, + "grad_norm": 1.058196783065796, + "learning_rate": 6.19886796338709e-08, + "loss": 0.6995, + "step": 18511 + }, + { + "epoch": 0.9513824647959708, + "grad_norm": 1.0488908290863037, + "learning_rate": 6.185810332161257e-08, + "loss": 0.7032, + "step": 18512 + }, + { + "epoch": 0.9514338575393154, + "grad_norm": 1.2267917394638062, + "learning_rate": 6.172766382488815e-08, + "loss": 0.7711, + "step": 18513 + }, + { + "epoch": 0.9514852502826601, + "grad_norm": 1.1512950658798218, + "learning_rate": 6.159736114731141e-08, + "loss": 0.7681, + "step": 18514 + }, + { + "epoch": 0.9515366430260047, + "grad_norm": 1.1782149076461792, + "learning_rate": 6.146719529249168e-08, + "loss": 0.7714, + "step": 18515 + }, + { + "epoch": 0.9515880357693494, + "grad_norm": 1.1275291442871094, + "learning_rate": 6.133716626403607e-08, + "loss": 0.7664, + "step": 18516 + }, + { + "epoch": 0.951639428512694, + "grad_norm": 1.0166822671890259, + "learning_rate": 6.120727406554672e-08, + "loss": 0.6924, + "step": 18517 + }, + { + "epoch": 0.9516908212560387, + "grad_norm": 1.0569884777069092, + "learning_rate": 6.107751870062296e-08, + "loss": 0.7543, + "step": 18518 + }, + { + "epoch": 0.9517422139993833, + "grad_norm": 1.1686623096466064, + "learning_rate": 6.094790017285911e-08, + "loss": 0.7537, + "step": 18519 + }, + { + "epoch": 0.951793606742728, + "grad_norm": 1.0049107074737549, + "learning_rate": 6.08184184858468e-08, + "loss": 0.649, + "step": 18520 + }, + { + "epoch": 0.9518449994860726, + "grad_norm": 1.090505838394165, + "learning_rate": 6.068907364317367e-08, + "loss": 0.7165, + "step": 18521 + }, + { + "epoch": 0.9518963922294172, + "grad_norm": 1.0637328624725342, + "learning_rate": 6.055986564842242e-08, + "loss": 0.734, + "step": 18522 + }, + { + "epoch": 0.9519477849727619, + "grad_norm": 1.1752525568008423, + "learning_rate": 6.043079450517353e-08, + "loss": 0.7089, + "step": 18523 + }, + { + "epoch": 0.9519991777161065, + "grad_norm": 1.0466455221176147, + "learning_rate": 6.030186021700301e-08, + "loss": 0.6929, + "step": 18524 + }, + { + "epoch": 0.9520505704594511, + "grad_norm": 1.2045466899871826, + "learning_rate": 6.017306278748303e-08, + "loss": 0.7534, + "step": 18525 + }, + { + "epoch": 0.9521019632027957, + "grad_norm": 1.0915586948394775, + "learning_rate": 6.004440222018181e-08, + "loss": 0.6362, + "step": 18526 + }, + { + "epoch": 0.9521533559461404, + "grad_norm": 1.0318011045455933, + "learning_rate": 5.991587851866488e-08, + "loss": 0.6808, + "step": 18527 + }, + { + "epoch": 0.952204748689485, + "grad_norm": 1.1518017053604126, + "learning_rate": 5.978749168649212e-08, + "loss": 0.7415, + "step": 18528 + }, + { + "epoch": 0.9522561414328297, + "grad_norm": 1.1035850048065186, + "learning_rate": 5.965924172722071e-08, + "loss": 0.6648, + "step": 18529 + }, + { + "epoch": 0.9523075341761743, + "grad_norm": 1.0127605199813843, + "learning_rate": 5.953112864440391e-08, + "loss": 0.6659, + "step": 18530 + }, + { + "epoch": 0.952358926919519, + "grad_norm": 1.1557966470718384, + "learning_rate": 5.9403152441592224e-08, + "loss": 0.7113, + "step": 18531 + }, + { + "epoch": 0.9524103196628636, + "grad_norm": 0.8569692969322205, + "learning_rate": 5.927531312232948e-08, + "loss": 0.6355, + "step": 18532 + }, + { + "epoch": 0.9524617124062082, + "grad_norm": 1.1663459539413452, + "learning_rate": 5.914761069015951e-08, + "loss": 0.6925, + "step": 18533 + }, + { + "epoch": 0.9525131051495529, + "grad_norm": 1.171074628829956, + "learning_rate": 5.9020045148619474e-08, + "loss": 0.716, + "step": 18534 + }, + { + "epoch": 0.9525644978928975, + "grad_norm": 1.059232473373413, + "learning_rate": 5.889261650124323e-08, + "loss": 0.7402, + "step": 18535 + }, + { + "epoch": 0.9526158906362422, + "grad_norm": 1.0407756567001343, + "learning_rate": 5.876532475156238e-08, + "loss": 0.6882, + "step": 18536 + }, + { + "epoch": 0.9526672833795868, + "grad_norm": 1.0759800672531128, + "learning_rate": 5.863816990310245e-08, + "loss": 0.6955, + "step": 18537 + }, + { + "epoch": 0.9527186761229315, + "grad_norm": 1.0647433996200562, + "learning_rate": 5.8511151959387836e-08, + "loss": 0.7005, + "step": 18538 + }, + { + "epoch": 0.9527700688662761, + "grad_norm": 1.098070502281189, + "learning_rate": 5.838427092393628e-08, + "loss": 0.6648, + "step": 18539 + }, + { + "epoch": 0.9528214616096207, + "grad_norm": 1.0953816175460815, + "learning_rate": 5.825752680026442e-08, + "loss": 0.7157, + "step": 18540 + }, + { + "epoch": 0.9528728543529653, + "grad_norm": 1.0858277082443237, + "learning_rate": 5.8130919591882215e-08, + "loss": 0.7058, + "step": 18541 + }, + { + "epoch": 0.95292424709631, + "grad_norm": 1.1203078031539917, + "learning_rate": 5.800444930229909e-08, + "loss": 0.6798, + "step": 18542 + }, + { + "epoch": 0.9529756398396546, + "grad_norm": 1.0937460660934448, + "learning_rate": 5.78781159350178e-08, + "loss": 0.6466, + "step": 18543 + }, + { + "epoch": 0.9530270325829993, + "grad_norm": 1.0030782222747803, + "learning_rate": 5.7751919493538865e-08, + "loss": 0.6236, + "step": 18544 + }, + { + "epoch": 0.9530784253263439, + "grad_norm": 1.0455782413482666, + "learning_rate": 5.76258599813595e-08, + "loss": 0.7133, + "step": 18545 + }, + { + "epoch": 0.9531298180696886, + "grad_norm": 1.125820279121399, + "learning_rate": 5.7499937401970796e-08, + "loss": 0.7165, + "step": 18546 + }, + { + "epoch": 0.9531812108130332, + "grad_norm": 1.0471553802490234, + "learning_rate": 5.7374151758863296e-08, + "loss": 0.6555, + "step": 18547 + }, + { + "epoch": 0.9532326035563778, + "grad_norm": 1.0910669565200806, + "learning_rate": 5.724850305551977e-08, + "loss": 0.67, + "step": 18548 + }, + { + "epoch": 0.9532839962997225, + "grad_norm": 0.7481269240379333, + "learning_rate": 5.7122991295423534e-08, + "loss": 0.6264, + "step": 18549 + }, + { + "epoch": 0.9533353890430671, + "grad_norm": 1.1256262063980103, + "learning_rate": 5.699761648205126e-08, + "loss": 0.6816, + "step": 18550 + }, + { + "epoch": 0.9533867817864118, + "grad_norm": 0.8002947568893433, + "learning_rate": 5.687237861887629e-08, + "loss": 0.6334, + "step": 18551 + }, + { + "epoch": 0.9534381745297564, + "grad_norm": 1.1651090383529663, + "learning_rate": 5.6747277709368055e-08, + "loss": 0.6828, + "step": 18552 + }, + { + "epoch": 0.9534895672731011, + "grad_norm": 1.054434895515442, + "learning_rate": 5.662231375699379e-08, + "loss": 0.6985, + "step": 18553 + }, + { + "epoch": 0.9535409600164457, + "grad_norm": 1.073267936706543, + "learning_rate": 5.649748676521516e-08, + "loss": 0.6687, + "step": 18554 + }, + { + "epoch": 0.9535923527597903, + "grad_norm": 1.1348817348480225, + "learning_rate": 5.637279673748997e-08, + "loss": 0.6842, + "step": 18555 + }, + { + "epoch": 0.9536437455031349, + "grad_norm": 1.079379916191101, + "learning_rate": 5.6248243677274326e-08, + "loss": 0.6614, + "step": 18556 + }, + { + "epoch": 0.9536951382464796, + "grad_norm": 1.1933705806732178, + "learning_rate": 5.6123827588017155e-08, + "loss": 0.6782, + "step": 18557 + }, + { + "epoch": 0.9537465309898242, + "grad_norm": 1.1850683689117432, + "learning_rate": 5.5999548473167355e-08, + "loss": 0.7161, + "step": 18558 + }, + { + "epoch": 0.9537979237331689, + "grad_norm": 0.9700796604156494, + "learning_rate": 5.587540633616662e-08, + "loss": 0.6576, + "step": 18559 + }, + { + "epoch": 0.9538493164765135, + "grad_norm": 1.1091818809509277, + "learning_rate": 5.5751401180456074e-08, + "loss": 0.6841, + "step": 18560 + }, + { + "epoch": 0.9539007092198581, + "grad_norm": 1.0881222486495972, + "learning_rate": 5.5627533009469657e-08, + "loss": 0.6912, + "step": 18561 + }, + { + "epoch": 0.9539521019632028, + "grad_norm": 1.0959827899932861, + "learning_rate": 5.5503801826640726e-08, + "loss": 0.7079, + "step": 18562 + }, + { + "epoch": 0.9540034947065474, + "grad_norm": 1.0534908771514893, + "learning_rate": 5.538020763539653e-08, + "loss": 0.7006, + "step": 18563 + }, + { + "epoch": 0.9540548874498921, + "grad_norm": 1.0893689393997192, + "learning_rate": 5.5256750439161565e-08, + "loss": 0.723, + "step": 18564 + }, + { + "epoch": 0.9541062801932367, + "grad_norm": 1.057265281677246, + "learning_rate": 5.513343024135587e-08, + "loss": 0.7079, + "step": 18565 + }, + { + "epoch": 0.9541576729365814, + "grad_norm": 1.0557259321212769, + "learning_rate": 5.50102470453967e-08, + "loss": 0.7296, + "step": 18566 + }, + { + "epoch": 0.954209065679926, + "grad_norm": 1.082780361175537, + "learning_rate": 5.4887200854697455e-08, + "loss": 0.6796, + "step": 18567 + }, + { + "epoch": 0.9542604584232707, + "grad_norm": 1.0965237617492676, + "learning_rate": 5.4764291672665395e-08, + "loss": 0.6664, + "step": 18568 + }, + { + "epoch": 0.9543118511666153, + "grad_norm": 1.02618408203125, + "learning_rate": 5.464151950270835e-08, + "loss": 0.6697, + "step": 18569 + }, + { + "epoch": 0.95436324390996, + "grad_norm": 1.0253819227218628, + "learning_rate": 5.451888434822583e-08, + "loss": 0.6777, + "step": 18570 + }, + { + "epoch": 0.9544146366533045, + "grad_norm": 1.0898034572601318, + "learning_rate": 5.4396386212616225e-08, + "loss": 0.7201, + "step": 18571 + }, + { + "epoch": 0.9544660293966492, + "grad_norm": 0.7667589783668518, + "learning_rate": 5.427402509927404e-08, + "loss": 0.648, + "step": 18572 + }, + { + "epoch": 0.9545174221399938, + "grad_norm": 1.1205601692199707, + "learning_rate": 5.415180101158823e-08, + "loss": 0.6971, + "step": 18573 + }, + { + "epoch": 0.9545688148833384, + "grad_norm": 1.1298917531967163, + "learning_rate": 5.402971395294554e-08, + "loss": 0.7141, + "step": 18574 + }, + { + "epoch": 0.9546202076266831, + "grad_norm": 1.2310457229614258, + "learning_rate": 5.3907763926728804e-08, + "loss": 0.6687, + "step": 18575 + }, + { + "epoch": 0.9546716003700277, + "grad_norm": 1.1133778095245361, + "learning_rate": 5.3785950936317e-08, + "loss": 0.7137, + "step": 18576 + }, + { + "epoch": 0.9547229931133724, + "grad_norm": 1.1581921577453613, + "learning_rate": 5.366427498508409e-08, + "loss": 0.7358, + "step": 18577 + }, + { + "epoch": 0.954774385856717, + "grad_norm": 1.101252555847168, + "learning_rate": 5.3542736076402366e-08, + "loss": 0.7525, + "step": 18578 + }, + { + "epoch": 0.9548257786000617, + "grad_norm": 1.1212066411972046, + "learning_rate": 5.3421334213638597e-08, + "loss": 0.6701, + "step": 18579 + }, + { + "epoch": 0.9548771713434063, + "grad_norm": 1.0468226671218872, + "learning_rate": 5.330006940015564e-08, + "loss": 0.7034, + "step": 18580 + }, + { + "epoch": 0.954928564086751, + "grad_norm": 1.1018781661987305, + "learning_rate": 5.317894163931414e-08, + "loss": 0.6954, + "step": 18581 + }, + { + "epoch": 0.9549799568300956, + "grad_norm": 0.666149914264679, + "learning_rate": 5.3057950934470856e-08, + "loss": 0.6264, + "step": 18582 + }, + { + "epoch": 0.9550313495734403, + "grad_norm": 1.0723998546600342, + "learning_rate": 5.293709728897589e-08, + "loss": 0.7156, + "step": 18583 + }, + { + "epoch": 0.9550827423167849, + "grad_norm": 1.2199606895446777, + "learning_rate": 5.281638070617878e-08, + "loss": 0.6588, + "step": 18584 + }, + { + "epoch": 0.9551341350601296, + "grad_norm": 1.0921270847320557, + "learning_rate": 5.269580118942408e-08, + "loss": 0.7056, + "step": 18585 + }, + { + "epoch": 0.9551855278034741, + "grad_norm": 1.1010504961013794, + "learning_rate": 5.257535874205244e-08, + "loss": 0.7068, + "step": 18586 + }, + { + "epoch": 0.9552369205468187, + "grad_norm": 1.134931206703186, + "learning_rate": 5.245505336740064e-08, + "loss": 0.7145, + "step": 18587 + }, + { + "epoch": 0.9552883132901634, + "grad_norm": 0.7802860140800476, + "learning_rate": 5.2334885068802135e-08, + "loss": 0.6304, + "step": 18588 + }, + { + "epoch": 0.955339706033508, + "grad_norm": 1.088397741317749, + "learning_rate": 5.2214853849586465e-08, + "loss": 0.7122, + "step": 18589 + }, + { + "epoch": 0.9553910987768527, + "grad_norm": 1.1143532991409302, + "learning_rate": 5.209495971307821e-08, + "loss": 0.7046, + "step": 18590 + }, + { + "epoch": 0.9554424915201973, + "grad_norm": 1.1202495098114014, + "learning_rate": 5.197520266260081e-08, + "loss": 0.712, + "step": 18591 + }, + { + "epoch": 0.955493884263542, + "grad_norm": 1.0739589929580688, + "learning_rate": 5.1855582701469955e-08, + "loss": 0.6694, + "step": 18592 + }, + { + "epoch": 0.9555452770068866, + "grad_norm": 1.1087214946746826, + "learning_rate": 5.1736099833001876e-08, + "loss": 0.7205, + "step": 18593 + }, + { + "epoch": 0.9555966697502313, + "grad_norm": 1.1017102003097534, + "learning_rate": 5.161675406050615e-08, + "loss": 0.7547, + "step": 18594 + }, + { + "epoch": 0.9556480624935759, + "grad_norm": 0.7690007090568542, + "learning_rate": 5.149754538728902e-08, + "loss": 0.612, + "step": 18595 + }, + { + "epoch": 0.9556994552369206, + "grad_norm": 0.8100370168685913, + "learning_rate": 5.137847381665451e-08, + "loss": 0.6654, + "step": 18596 + }, + { + "epoch": 0.9557508479802652, + "grad_norm": 1.0310572385787964, + "learning_rate": 5.1259539351899976e-08, + "loss": 0.7062, + "step": 18597 + }, + { + "epoch": 0.9558022407236099, + "grad_norm": 1.1250792741775513, + "learning_rate": 5.1140741996321685e-08, + "loss": 0.6585, + "step": 18598 + }, + { + "epoch": 0.9558536334669545, + "grad_norm": 1.0422991514205933, + "learning_rate": 5.102208175321033e-08, + "loss": 0.7023, + "step": 18599 + }, + { + "epoch": 0.9559050262102992, + "grad_norm": 1.1391725540161133, + "learning_rate": 5.090355862585439e-08, + "loss": 0.7041, + "step": 18600 + }, + { + "epoch": 0.9559564189536437, + "grad_norm": 0.7220892310142517, + "learning_rate": 5.078517261753679e-08, + "loss": 0.5987, + "step": 18601 + }, + { + "epoch": 0.9560078116969883, + "grad_norm": 0.6920177936553955, + "learning_rate": 5.06669237315377e-08, + "loss": 0.65, + "step": 18602 + }, + { + "epoch": 0.956059204440333, + "grad_norm": 1.032349944114685, + "learning_rate": 5.054881197113337e-08, + "loss": 0.6448, + "step": 18603 + }, + { + "epoch": 0.9561105971836776, + "grad_norm": 1.101815104484558, + "learning_rate": 5.043083733959675e-08, + "loss": 0.7323, + "step": 18604 + }, + { + "epoch": 0.9561619899270223, + "grad_norm": 1.0962743759155273, + "learning_rate": 5.031299984019633e-08, + "loss": 0.6853, + "step": 18605 + }, + { + "epoch": 0.9562133826703669, + "grad_norm": 1.0746657848358154, + "learning_rate": 5.0195299476195616e-08, + "loss": 0.7137, + "step": 18606 + }, + { + "epoch": 0.9562647754137116, + "grad_norm": 1.0905104875564575, + "learning_rate": 5.007773625085699e-08, + "loss": 0.7003, + "step": 18607 + }, + { + "epoch": 0.9563161681570562, + "grad_norm": 1.0539453029632568, + "learning_rate": 4.996031016743674e-08, + "loss": 0.6621, + "step": 18608 + }, + { + "epoch": 0.9563675609004009, + "grad_norm": 1.0639744997024536, + "learning_rate": 4.9843021229189495e-08, + "loss": 0.6989, + "step": 18609 + }, + { + "epoch": 0.9564189536437455, + "grad_norm": 1.128825068473816, + "learning_rate": 4.9725869439363195e-08, + "loss": 0.7088, + "step": 18610 + }, + { + "epoch": 0.9564703463870902, + "grad_norm": 1.1343121528625488, + "learning_rate": 4.960885480120525e-08, + "loss": 0.662, + "step": 18611 + }, + { + "epoch": 0.9565217391304348, + "grad_norm": 1.107842206954956, + "learning_rate": 4.949197731795641e-08, + "loss": 0.6836, + "step": 18612 + }, + { + "epoch": 0.9565731318737795, + "grad_norm": 1.1908289194107056, + "learning_rate": 4.937523699285518e-08, + "loss": 0.7169, + "step": 18613 + }, + { + "epoch": 0.9566245246171241, + "grad_norm": 1.1074457168579102, + "learning_rate": 4.925863382913676e-08, + "loss": 0.7086, + "step": 18614 + }, + { + "epoch": 0.9566759173604688, + "grad_norm": 1.1247050762176514, + "learning_rate": 4.914216783003079e-08, + "loss": 0.6628, + "step": 18615 + }, + { + "epoch": 0.9567273101038133, + "grad_norm": 0.801388680934906, + "learning_rate": 4.902583899876357e-08, + "loss": 0.6696, + "step": 18616 + }, + { + "epoch": 0.9567787028471579, + "grad_norm": 1.1273503303527832, + "learning_rate": 4.890964733855974e-08, + "loss": 0.7419, + "step": 18617 + }, + { + "epoch": 0.9568300955905026, + "grad_norm": 1.1985262632369995, + "learning_rate": 4.879359285263785e-08, + "loss": 0.7112, + "step": 18618 + }, + { + "epoch": 0.9568814883338472, + "grad_norm": 1.101425290107727, + "learning_rate": 4.867767554421199e-08, + "loss": 0.6751, + "step": 18619 + }, + { + "epoch": 0.9569328810771919, + "grad_norm": 1.1299301385879517, + "learning_rate": 4.856189541649625e-08, + "loss": 0.7197, + "step": 18620 + }, + { + "epoch": 0.9569842738205365, + "grad_norm": 1.1485008001327515, + "learning_rate": 4.844625247269585e-08, + "loss": 0.7466, + "step": 18621 + }, + { + "epoch": 0.9570356665638812, + "grad_norm": 1.1370301246643066, + "learning_rate": 4.833074671601656e-08, + "loss": 0.7132, + "step": 18622 + }, + { + "epoch": 0.9570870593072258, + "grad_norm": 1.148215413093567, + "learning_rate": 4.821537814965804e-08, + "loss": 0.707, + "step": 18623 + }, + { + "epoch": 0.9571384520505705, + "grad_norm": 1.044503092765808, + "learning_rate": 4.810014677681607e-08, + "loss": 0.6896, + "step": 18624 + }, + { + "epoch": 0.9571898447939151, + "grad_norm": 1.0706899166107178, + "learning_rate": 4.7985052600683644e-08, + "loss": 0.6793, + "step": 18625 + }, + { + "epoch": 0.9572412375372598, + "grad_norm": 1.0646222829818726, + "learning_rate": 4.787009562445044e-08, + "loss": 0.6842, + "step": 18626 + }, + { + "epoch": 0.9572926302806044, + "grad_norm": 0.7521428465843201, + "learning_rate": 4.775527585130002e-08, + "loss": 0.6474, + "step": 18627 + }, + { + "epoch": 0.957344023023949, + "grad_norm": 1.0449061393737793, + "learning_rate": 4.7640593284414283e-08, + "loss": 0.6459, + "step": 18628 + }, + { + "epoch": 0.9573954157672937, + "grad_norm": 1.061620831489563, + "learning_rate": 4.7526047926970686e-08, + "loss": 0.7335, + "step": 18629 + }, + { + "epoch": 0.9574468085106383, + "grad_norm": 1.1447478532791138, + "learning_rate": 4.741163978214281e-08, + "loss": 0.7452, + "step": 18630 + }, + { + "epoch": 0.9574982012539829, + "grad_norm": 1.1448407173156738, + "learning_rate": 4.7297368853100325e-08, + "loss": 0.6594, + "step": 18631 + }, + { + "epoch": 0.9575495939973275, + "grad_norm": 1.2221343517303467, + "learning_rate": 4.718323514300849e-08, + "loss": 0.7158, + "step": 18632 + }, + { + "epoch": 0.9576009867406722, + "grad_norm": 1.0822041034698486, + "learning_rate": 4.706923865503088e-08, + "loss": 0.6869, + "step": 18633 + }, + { + "epoch": 0.9576523794840168, + "grad_norm": 1.082029938697815, + "learning_rate": 4.695537939232442e-08, + "loss": 0.6735, + "step": 18634 + }, + { + "epoch": 0.9577037722273615, + "grad_norm": 1.0523247718811035, + "learning_rate": 4.684165735804491e-08, + "loss": 0.6601, + "step": 18635 + }, + { + "epoch": 0.9577551649707061, + "grad_norm": 1.1622936725616455, + "learning_rate": 4.6728072555342616e-08, + "loss": 0.7172, + "step": 18636 + }, + { + "epoch": 0.9578065577140508, + "grad_norm": 0.7009083032608032, + "learning_rate": 4.661462498736391e-08, + "loss": 0.6466, + "step": 18637 + }, + { + "epoch": 0.9578579504573954, + "grad_norm": 1.0581218004226685, + "learning_rate": 4.650131465725294e-08, + "loss": 0.7291, + "step": 18638 + }, + { + "epoch": 0.9579093432007401, + "grad_norm": 0.7652928829193115, + "learning_rate": 4.638814156814886e-08, + "loss": 0.6236, + "step": 18639 + }, + { + "epoch": 0.9579607359440847, + "grad_norm": 1.0943200588226318, + "learning_rate": 4.627510572318694e-08, + "loss": 0.7053, + "step": 18640 + }, + { + "epoch": 0.9580121286874294, + "grad_norm": 1.1425758600234985, + "learning_rate": 4.616220712549857e-08, + "loss": 0.6497, + "step": 18641 + }, + { + "epoch": 0.958063521430774, + "grad_norm": 1.0686031579971313, + "learning_rate": 4.604944577821235e-08, + "loss": 0.6867, + "step": 18642 + }, + { + "epoch": 0.9581149141741186, + "grad_norm": 1.1265840530395508, + "learning_rate": 4.593682168445246e-08, + "loss": 0.7082, + "step": 18643 + }, + { + "epoch": 0.9581663069174633, + "grad_norm": 1.138424277305603, + "learning_rate": 4.582433484733917e-08, + "loss": 0.7235, + "step": 18644 + }, + { + "epoch": 0.9582176996608079, + "grad_norm": 1.1426639556884766, + "learning_rate": 4.571198526998832e-08, + "loss": 0.6913, + "step": 18645 + }, + { + "epoch": 0.9582690924041525, + "grad_norm": 0.7291485071182251, + "learning_rate": 4.5599772955514096e-08, + "loss": 0.6014, + "step": 18646 + }, + { + "epoch": 0.9583204851474971, + "grad_norm": 0.8170145750045776, + "learning_rate": 4.5487697907024006e-08, + "loss": 0.6222, + "step": 18647 + }, + { + "epoch": 0.9583718778908418, + "grad_norm": 1.0010935068130493, + "learning_rate": 4.53757601276239e-08, + "loss": 0.6867, + "step": 18648 + }, + { + "epoch": 0.9584232706341864, + "grad_norm": 1.1479108333587646, + "learning_rate": 4.5263959620414633e-08, + "loss": 0.6372, + "step": 18649 + }, + { + "epoch": 0.9584746633775311, + "grad_norm": 1.09955894947052, + "learning_rate": 4.5152296388494276e-08, + "loss": 0.6808, + "step": 18650 + }, + { + "epoch": 0.9585260561208757, + "grad_norm": 1.039372205734253, + "learning_rate": 4.504077043495592e-08, + "loss": 0.6703, + "step": 18651 + }, + { + "epoch": 0.9585774488642204, + "grad_norm": 0.8475385904312134, + "learning_rate": 4.492938176289041e-08, + "loss": 0.6748, + "step": 18652 + }, + { + "epoch": 0.958628841607565, + "grad_norm": 1.1384634971618652, + "learning_rate": 4.481813037538363e-08, + "loss": 0.709, + "step": 18653 + }, + { + "epoch": 0.9586802343509097, + "grad_norm": 0.7860634326934814, + "learning_rate": 4.470701627551699e-08, + "loss": 0.6332, + "step": 18654 + }, + { + "epoch": 0.9587316270942543, + "grad_norm": 1.1513521671295166, + "learning_rate": 4.4596039466369704e-08, + "loss": 0.6856, + "step": 18655 + }, + { + "epoch": 0.958783019837599, + "grad_norm": 1.098516821861267, + "learning_rate": 4.448519995101652e-08, + "loss": 0.6498, + "step": 18656 + }, + { + "epoch": 0.9588344125809436, + "grad_norm": 0.7560486197471619, + "learning_rate": 4.437449773252889e-08, + "loss": 0.6133, + "step": 18657 + }, + { + "epoch": 0.9588858053242882, + "grad_norm": 1.203675627708435, + "learning_rate": 4.426393281397212e-08, + "loss": 0.6939, + "step": 18658 + }, + { + "epoch": 0.9589371980676329, + "grad_norm": 2.11722469329834, + "learning_rate": 4.415350519841155e-08, + "loss": 0.6967, + "step": 18659 + }, + { + "epoch": 0.9589885908109775, + "grad_norm": 1.1580649614334106, + "learning_rate": 4.404321488890528e-08, + "loss": 0.6797, + "step": 18660 + }, + { + "epoch": 0.9590399835543222, + "grad_norm": 0.8317680358886719, + "learning_rate": 4.3933061888509765e-08, + "loss": 0.6352, + "step": 18661 + }, + { + "epoch": 0.9590913762976667, + "grad_norm": 1.0601463317871094, + "learning_rate": 4.3823046200277e-08, + "loss": 0.6917, + "step": 18662 + }, + { + "epoch": 0.9591427690410114, + "grad_norm": 1.0961825847625732, + "learning_rate": 4.3713167827253986e-08, + "loss": 0.7036, + "step": 18663 + }, + { + "epoch": 0.959194161784356, + "grad_norm": 1.1072916984558105, + "learning_rate": 4.360342677248608e-08, + "loss": 0.7072, + "step": 18664 + }, + { + "epoch": 0.9592455545277007, + "grad_norm": 1.132230281829834, + "learning_rate": 4.349382303901306e-08, + "loss": 0.7106, + "step": 18665 + }, + { + "epoch": 0.9592969472710453, + "grad_norm": 1.0210397243499756, + "learning_rate": 4.338435662987195e-08, + "loss": 0.6692, + "step": 18666 + }, + { + "epoch": 0.95934834001439, + "grad_norm": 0.7035905718803406, + "learning_rate": 4.327502754809587e-08, + "loss": 0.6078, + "step": 18667 + }, + { + "epoch": 0.9593997327577346, + "grad_norm": 1.1251174211502075, + "learning_rate": 4.316583579671352e-08, + "loss": 0.7309, + "step": 18668 + }, + { + "epoch": 0.9594511255010792, + "grad_norm": 1.049394130706787, + "learning_rate": 4.3056781378750246e-08, + "loss": 0.6852, + "step": 18669 + }, + { + "epoch": 0.9595025182444239, + "grad_norm": 1.1074708700180054, + "learning_rate": 4.294786429722697e-08, + "loss": 0.7042, + "step": 18670 + }, + { + "epoch": 0.9595539109877685, + "grad_norm": 1.0724937915802002, + "learning_rate": 4.283908455516239e-08, + "loss": 0.6443, + "step": 18671 + }, + { + "epoch": 0.9596053037311132, + "grad_norm": 1.0756522417068481, + "learning_rate": 4.273044215556965e-08, + "loss": 0.7478, + "step": 18672 + }, + { + "epoch": 0.9596566964744578, + "grad_norm": 1.1108465194702148, + "learning_rate": 4.262193710145912e-08, + "loss": 0.7439, + "step": 18673 + }, + { + "epoch": 0.9597080892178025, + "grad_norm": 1.127938151359558, + "learning_rate": 4.2513569395836176e-08, + "loss": 0.7338, + "step": 18674 + }, + { + "epoch": 0.9597594819611471, + "grad_norm": 1.1073179244995117, + "learning_rate": 4.240533904170452e-08, + "loss": 0.6913, + "step": 18675 + }, + { + "epoch": 0.9598108747044918, + "grad_norm": 0.7691919803619385, + "learning_rate": 4.2297246042061754e-08, + "loss": 0.6461, + "step": 18676 + }, + { + "epoch": 0.9598622674478363, + "grad_norm": 1.1023718118667603, + "learning_rate": 4.2189290399903274e-08, + "loss": 0.6794, + "step": 18677 + }, + { + "epoch": 0.959913660191181, + "grad_norm": 0.8810712695121765, + "learning_rate": 4.208147211822e-08, + "loss": 0.6708, + "step": 18678 + }, + { + "epoch": 0.9599650529345256, + "grad_norm": 1.1336606740951538, + "learning_rate": 4.197379119999845e-08, + "loss": 0.6516, + "step": 18679 + }, + { + "epoch": 0.9600164456778703, + "grad_norm": 1.0686670541763306, + "learning_rate": 4.186624764822289e-08, + "loss": 0.6642, + "step": 18680 + }, + { + "epoch": 0.9600678384212149, + "grad_norm": 1.115654706954956, + "learning_rate": 4.175884146587261e-08, + "loss": 0.6875, + "step": 18681 + }, + { + "epoch": 0.9601192311645596, + "grad_norm": 0.7161726355552673, + "learning_rate": 4.1651572655923564e-08, + "loss": 0.6589, + "step": 18682 + }, + { + "epoch": 0.9601706239079042, + "grad_norm": 1.1350669860839844, + "learning_rate": 4.154444122134726e-08, + "loss": 0.7162, + "step": 18683 + }, + { + "epoch": 0.9602220166512488, + "grad_norm": 1.060953140258789, + "learning_rate": 4.1437447165112446e-08, + "loss": 0.6982, + "step": 18684 + }, + { + "epoch": 0.9602734093945935, + "grad_norm": 1.0805357694625854, + "learning_rate": 4.133059049018284e-08, + "loss": 0.6512, + "step": 18685 + }, + { + "epoch": 0.9603248021379381, + "grad_norm": 1.132425308227539, + "learning_rate": 4.122387119951998e-08, + "loss": 0.7607, + "step": 18686 + }, + { + "epoch": 0.9603761948812828, + "grad_norm": 1.140392780303955, + "learning_rate": 4.1117289296079274e-08, + "loss": 0.6856, + "step": 18687 + }, + { + "epoch": 0.9604275876246274, + "grad_norm": 0.7822204828262329, + "learning_rate": 4.1010844782814475e-08, + "loss": 0.606, + "step": 18688 + }, + { + "epoch": 0.9604789803679721, + "grad_norm": 1.117435097694397, + "learning_rate": 4.090453766267488e-08, + "loss": 0.7681, + "step": 18689 + }, + { + "epoch": 0.9605303731113167, + "grad_norm": 1.07142174243927, + "learning_rate": 4.079836793860537e-08, + "loss": 0.6953, + "step": 18690 + }, + { + "epoch": 0.9605817658546614, + "grad_norm": 1.1133992671966553, + "learning_rate": 4.0692335613548576e-08, + "loss": 0.7242, + "step": 18691 + }, + { + "epoch": 0.9606331585980059, + "grad_norm": 0.6592369079589844, + "learning_rate": 4.058644069043993e-08, + "loss": 0.6383, + "step": 18692 + }, + { + "epoch": 0.9606845513413506, + "grad_norm": 1.2187731266021729, + "learning_rate": 4.048068317221543e-08, + "loss": 0.7294, + "step": 18693 + }, + { + "epoch": 0.9607359440846952, + "grad_norm": 1.0944730043411255, + "learning_rate": 4.037506306180439e-08, + "loss": 0.6569, + "step": 18694 + }, + { + "epoch": 0.9607873368280399, + "grad_norm": 1.1488984823226929, + "learning_rate": 4.0269580362133354e-08, + "loss": 0.7034, + "step": 18695 + }, + { + "epoch": 0.9608387295713845, + "grad_norm": 1.1664592027664185, + "learning_rate": 4.016423507612443e-08, + "loss": 0.6774, + "step": 18696 + }, + { + "epoch": 0.9608901223147291, + "grad_norm": 1.3573521375656128, + "learning_rate": 4.005902720669641e-08, + "loss": 0.7438, + "step": 18697 + }, + { + "epoch": 0.9609415150580738, + "grad_norm": 1.0690838098526, + "learning_rate": 3.995395675676472e-08, + "loss": 0.6623, + "step": 18698 + }, + { + "epoch": 0.9609929078014184, + "grad_norm": 0.8270371556282043, + "learning_rate": 3.984902372923982e-08, + "loss": 0.6056, + "step": 18699 + }, + { + "epoch": 0.9610443005447631, + "grad_norm": 1.1175559759140015, + "learning_rate": 3.9744228127028825e-08, + "loss": 0.6835, + "step": 18700 + }, + { + "epoch": 0.9610956932881077, + "grad_norm": 1.1107882261276245, + "learning_rate": 3.963956995303553e-08, + "loss": 0.6283, + "step": 18701 + }, + { + "epoch": 0.9611470860314524, + "grad_norm": 1.188991665840149, + "learning_rate": 3.953504921015983e-08, + "loss": 0.6881, + "step": 18702 + }, + { + "epoch": 0.961198478774797, + "grad_norm": 0.8681862950325012, + "learning_rate": 3.943066590129663e-08, + "loss": 0.5955, + "step": 18703 + }, + { + "epoch": 0.9612498715181417, + "grad_norm": 1.1701823472976685, + "learning_rate": 3.932642002933973e-08, + "loss": 0.6943, + "step": 18704 + }, + { + "epoch": 0.9613012642614863, + "grad_norm": 1.1131714582443237, + "learning_rate": 3.922231159717516e-08, + "loss": 0.735, + "step": 18705 + }, + { + "epoch": 0.961352657004831, + "grad_norm": 1.1054378747940063, + "learning_rate": 3.9118340607689484e-08, + "loss": 0.6973, + "step": 18706 + }, + { + "epoch": 0.9614040497481755, + "grad_norm": 1.0071227550506592, + "learning_rate": 3.901450706376153e-08, + "loss": 0.6695, + "step": 18707 + }, + { + "epoch": 0.9614554424915202, + "grad_norm": 0.6596884727478027, + "learning_rate": 3.891081096826899e-08, + "loss": 0.6239, + "step": 18708 + }, + { + "epoch": 0.9615068352348648, + "grad_norm": 1.1368803977966309, + "learning_rate": 3.880725232408455e-08, + "loss": 0.7025, + "step": 18709 + }, + { + "epoch": 0.9615582279782094, + "grad_norm": 1.1497546434402466, + "learning_rate": 3.870383113407761e-08, + "loss": 0.7066, + "step": 18710 + }, + { + "epoch": 0.9616096207215541, + "grad_norm": 1.006225347518921, + "learning_rate": 3.860054740111363e-08, + "loss": 0.6488, + "step": 18711 + }, + { + "epoch": 0.9616610134648987, + "grad_norm": 1.1463208198547363, + "learning_rate": 3.8497401128053114e-08, + "loss": 0.6369, + "step": 18712 + }, + { + "epoch": 0.9617124062082434, + "grad_norm": 0.6772070527076721, + "learning_rate": 3.839439231775599e-08, + "loss": 0.6253, + "step": 18713 + }, + { + "epoch": 0.961763798951588, + "grad_norm": 1.136577844619751, + "learning_rate": 3.829152097307387e-08, + "loss": 0.6974, + "step": 18714 + }, + { + "epoch": 0.9618151916949327, + "grad_norm": 1.1231223344802856, + "learning_rate": 3.818878709685836e-08, + "loss": 0.6748, + "step": 18715 + }, + { + "epoch": 0.9618665844382773, + "grad_norm": 1.065050482749939, + "learning_rate": 3.808619069195552e-08, + "loss": 0.6619, + "step": 18716 + }, + { + "epoch": 0.961917977181622, + "grad_norm": 1.0246723890304565, + "learning_rate": 3.798373176120806e-08, + "loss": 0.7145, + "step": 18717 + }, + { + "epoch": 0.9619693699249666, + "grad_norm": 1.034804105758667, + "learning_rate": 3.7881410307453735e-08, + "loss": 0.6466, + "step": 18718 + }, + { + "epoch": 0.9620207626683113, + "grad_norm": 1.031685709953308, + "learning_rate": 3.777922633352804e-08, + "loss": 0.7178, + "step": 18719 + }, + { + "epoch": 0.9620721554116559, + "grad_norm": 1.070880651473999, + "learning_rate": 3.76771798422626e-08, + "loss": 0.688, + "step": 18720 + }, + { + "epoch": 0.9621235481550006, + "grad_norm": 1.0734368562698364, + "learning_rate": 3.7575270836483493e-08, + "loss": 0.6145, + "step": 18721 + }, + { + "epoch": 0.9621749408983451, + "grad_norm": 1.1854833364486694, + "learning_rate": 3.747349931901512e-08, + "loss": 0.681, + "step": 18722 + }, + { + "epoch": 0.9622263336416897, + "grad_norm": 0.7495351433753967, + "learning_rate": 3.7371865292677444e-08, + "loss": 0.6212, + "step": 18723 + }, + { + "epoch": 0.9622777263850344, + "grad_norm": 1.100218653678894, + "learning_rate": 3.7270368760285445e-08, + "loss": 0.6763, + "step": 18724 + }, + { + "epoch": 0.962329119128379, + "grad_norm": 1.0150842666625977, + "learning_rate": 3.716900972465187e-08, + "loss": 0.6888, + "step": 18725 + }, + { + "epoch": 0.9623805118717237, + "grad_norm": 1.0960780382156372, + "learning_rate": 3.706778818858448e-08, + "loss": 0.7086, + "step": 18726 + }, + { + "epoch": 0.9624319046150683, + "grad_norm": 1.0719295740127563, + "learning_rate": 3.696670415488768e-08, + "loss": 0.6987, + "step": 18727 + }, + { + "epoch": 0.962483297358413, + "grad_norm": 1.0648680925369263, + "learning_rate": 3.686575762636257e-08, + "loss": 0.686, + "step": 18728 + }, + { + "epoch": 0.9625346901017576, + "grad_norm": 1.0990171432495117, + "learning_rate": 3.6764948605805265e-08, + "loss": 0.6769, + "step": 18729 + }, + { + "epoch": 0.9625860828451023, + "grad_norm": 1.0862153768539429, + "learning_rate": 3.666427709600961e-08, + "loss": 0.7492, + "step": 18730 + }, + { + "epoch": 0.9626374755884469, + "grad_norm": 0.7320635318756104, + "learning_rate": 3.656374309976396e-08, + "loss": 0.6885, + "step": 18731 + }, + { + "epoch": 0.9626888683317916, + "grad_norm": 1.1207351684570312, + "learning_rate": 3.64633466198544e-08, + "loss": 0.707, + "step": 18732 + }, + { + "epoch": 0.9627402610751362, + "grad_norm": 1.0549672842025757, + "learning_rate": 3.6363087659062047e-08, + "loss": 0.6952, + "step": 18733 + }, + { + "epoch": 0.9627916538184809, + "grad_norm": 1.0477707386016846, + "learning_rate": 3.6262966220164676e-08, + "loss": 0.6906, + "step": 18734 + }, + { + "epoch": 0.9628430465618255, + "grad_norm": 1.064225435256958, + "learning_rate": 3.616298230593673e-08, + "loss": 0.694, + "step": 18735 + }, + { + "epoch": 0.9628944393051702, + "grad_norm": 1.1177211999893188, + "learning_rate": 3.6063135919147675e-08, + "loss": 0.7156, + "step": 18736 + }, + { + "epoch": 0.9629458320485148, + "grad_norm": 1.108678936958313, + "learning_rate": 3.596342706256473e-08, + "loss": 0.7006, + "step": 18737 + }, + { + "epoch": 0.9629972247918593, + "grad_norm": 1.0977599620819092, + "learning_rate": 3.5863855738949015e-08, + "loss": 0.6981, + "step": 18738 + }, + { + "epoch": 0.963048617535204, + "grad_norm": 1.0243377685546875, + "learning_rate": 3.576442195106056e-08, + "loss": 0.6644, + "step": 18739 + }, + { + "epoch": 0.9631000102785486, + "grad_norm": 1.1440057754516602, + "learning_rate": 3.5665125701653816e-08, + "loss": 0.7731, + "step": 18740 + }, + { + "epoch": 0.9631514030218933, + "grad_norm": 1.1067330837249756, + "learning_rate": 3.5565966993479363e-08, + "loss": 0.6632, + "step": 18741 + }, + { + "epoch": 0.9632027957652379, + "grad_norm": 1.2536667585372925, + "learning_rate": 3.546694582928611e-08, + "loss": 0.6299, + "step": 18742 + }, + { + "epoch": 0.9632541885085826, + "grad_norm": 1.1487494707107544, + "learning_rate": 3.5368062211815216e-08, + "loss": 0.6975, + "step": 18743 + }, + { + "epoch": 0.9633055812519272, + "grad_norm": 1.0988941192626953, + "learning_rate": 3.5269316143807796e-08, + "loss": 0.6806, + "step": 18744 + }, + { + "epoch": 0.9633569739952719, + "grad_norm": 1.055826187133789, + "learning_rate": 3.517070762799946e-08, + "loss": 0.6882, + "step": 18745 + }, + { + "epoch": 0.9634083667386165, + "grad_norm": 1.4291555881500244, + "learning_rate": 3.507223666712245e-08, + "loss": 0.7055, + "step": 18746 + }, + { + "epoch": 0.9634597594819612, + "grad_norm": 0.75838702917099, + "learning_rate": 3.497390326390404e-08, + "loss": 0.6424, + "step": 18747 + }, + { + "epoch": 0.9635111522253058, + "grad_norm": 1.1375784873962402, + "learning_rate": 3.487570742106983e-08, + "loss": 0.6865, + "step": 18748 + }, + { + "epoch": 0.9635625449686505, + "grad_norm": 1.0791219472885132, + "learning_rate": 3.4777649141339854e-08, + "loss": 0.7142, + "step": 18749 + }, + { + "epoch": 0.9636139377119951, + "grad_norm": 1.0075712203979492, + "learning_rate": 3.467972842743139e-08, + "loss": 0.6366, + "step": 18750 + }, + { + "epoch": 0.9636653304553398, + "grad_norm": 1.1120567321777344, + "learning_rate": 3.458194528205616e-08, + "loss": 0.715, + "step": 18751 + }, + { + "epoch": 0.9637167231986844, + "grad_norm": 1.1362496614456177, + "learning_rate": 3.4484299707924216e-08, + "loss": 0.7038, + "step": 18752 + }, + { + "epoch": 0.9637681159420289, + "grad_norm": 1.0460567474365234, + "learning_rate": 3.4386791707741177e-08, + "loss": 0.6677, + "step": 18753 + }, + { + "epoch": 0.9638195086853736, + "grad_norm": 1.0803955793380737, + "learning_rate": 3.42894212842082e-08, + "loss": 0.6836, + "step": 18754 + }, + { + "epoch": 0.9638709014287182, + "grad_norm": 0.6990349888801575, + "learning_rate": 3.4192188440023696e-08, + "loss": 0.6523, + "step": 18755 + }, + { + "epoch": 0.9639222941720629, + "grad_norm": 1.0684380531311035, + "learning_rate": 3.409509317787996e-08, + "loss": 0.7144, + "step": 18756 + }, + { + "epoch": 0.9639736869154075, + "grad_norm": 1.1243247985839844, + "learning_rate": 3.3998135500468707e-08, + "loss": 0.659, + "step": 18757 + }, + { + "epoch": 0.9640250796587522, + "grad_norm": 1.0722790956497192, + "learning_rate": 3.3901315410475586e-08, + "loss": 0.6733, + "step": 18758 + }, + { + "epoch": 0.9640764724020968, + "grad_norm": 0.674856424331665, + "learning_rate": 3.380463291058289e-08, + "loss": 0.6423, + "step": 18759 + }, + { + "epoch": 0.9641278651454415, + "grad_norm": 1.1426196098327637, + "learning_rate": 3.370808800346959e-08, + "loss": 0.6972, + "step": 18760 + }, + { + "epoch": 0.9641792578887861, + "grad_norm": 1.0138877630233765, + "learning_rate": 3.361168069181075e-08, + "loss": 0.6707, + "step": 18761 + }, + { + "epoch": 0.9642306506321308, + "grad_norm": 1.0983363389968872, + "learning_rate": 3.351541097827704e-08, + "loss": 0.6222, + "step": 18762 + }, + { + "epoch": 0.9642820433754754, + "grad_norm": 1.1426610946655273, + "learning_rate": 3.3419278865535196e-08, + "loss": 0.7086, + "step": 18763 + }, + { + "epoch": 0.96433343611882, + "grad_norm": 1.1298636198043823, + "learning_rate": 3.3323284356250316e-08, + "loss": 0.69, + "step": 18764 + }, + { + "epoch": 0.9643848288621647, + "grad_norm": 1.0991857051849365, + "learning_rate": 3.322742745308028e-08, + "loss": 0.6982, + "step": 18765 + }, + { + "epoch": 0.9644362216055093, + "grad_norm": 1.1431920528411865, + "learning_rate": 3.313170815868183e-08, + "loss": 0.7004, + "step": 18766 + }, + { + "epoch": 0.964487614348854, + "grad_norm": 1.0627944469451904, + "learning_rate": 3.3036126475706196e-08, + "loss": 0.6928, + "step": 18767 + }, + { + "epoch": 0.9645390070921985, + "grad_norm": 1.060699224472046, + "learning_rate": 3.294068240680293e-08, + "loss": 0.6553, + "step": 18768 + }, + { + "epoch": 0.9645903998355432, + "grad_norm": 1.1477888822555542, + "learning_rate": 3.28453759546149e-08, + "loss": 0.7421, + "step": 18769 + }, + { + "epoch": 0.9646417925788878, + "grad_norm": 1.1723428964614868, + "learning_rate": 3.275020712178334e-08, + "loss": 0.6844, + "step": 18770 + }, + { + "epoch": 0.9646931853222325, + "grad_norm": 1.0870871543884277, + "learning_rate": 3.265517591094503e-08, + "loss": 0.681, + "step": 18771 + }, + { + "epoch": 0.9647445780655771, + "grad_norm": 1.1194822788238525, + "learning_rate": 3.256028232473285e-08, + "loss": 0.6448, + "step": 18772 + }, + { + "epoch": 0.9647959708089218, + "grad_norm": 1.0316683053970337, + "learning_rate": 3.246552636577582e-08, + "loss": 0.6653, + "step": 18773 + }, + { + "epoch": 0.9648473635522664, + "grad_norm": 1.206403374671936, + "learning_rate": 3.237090803669907e-08, + "loss": 0.6406, + "step": 18774 + }, + { + "epoch": 0.9648987562956111, + "grad_norm": 1.1178646087646484, + "learning_rate": 3.227642734012437e-08, + "loss": 0.7032, + "step": 18775 + }, + { + "epoch": 0.9649501490389557, + "grad_norm": 1.0351780652999878, + "learning_rate": 3.218208427866909e-08, + "loss": 0.6666, + "step": 18776 + }, + { + "epoch": 0.9650015417823004, + "grad_norm": 1.0891104936599731, + "learning_rate": 3.20878788549478e-08, + "loss": 0.6841, + "step": 18777 + }, + { + "epoch": 0.965052934525645, + "grad_norm": 1.0699228048324585, + "learning_rate": 3.199381107156951e-08, + "loss": 0.713, + "step": 18778 + }, + { + "epoch": 0.9651043272689896, + "grad_norm": 1.0828994512557983, + "learning_rate": 3.1899880931141045e-08, + "loss": 0.7336, + "step": 18779 + }, + { + "epoch": 0.9651557200123343, + "grad_norm": 1.0952237844467163, + "learning_rate": 3.180608843626476e-08, + "loss": 0.6548, + "step": 18780 + }, + { + "epoch": 0.9652071127556789, + "grad_norm": 1.10565984249115, + "learning_rate": 3.171243358953913e-08, + "loss": 0.71, + "step": 18781 + }, + { + "epoch": 0.9652585054990236, + "grad_norm": 1.1103250980377197, + "learning_rate": 3.1618916393559294e-08, + "loss": 0.71, + "step": 18782 + }, + { + "epoch": 0.9653098982423681, + "grad_norm": 1.110566258430481, + "learning_rate": 3.152553685091597e-08, + "loss": 0.6751, + "step": 18783 + }, + { + "epoch": 0.9653612909857128, + "grad_norm": 1.1556313037872314, + "learning_rate": 3.143229496419653e-08, + "loss": 0.7287, + "step": 18784 + }, + { + "epoch": 0.9654126837290574, + "grad_norm": 0.7787653803825378, + "learning_rate": 3.1339190735983905e-08, + "loss": 0.626, + "step": 18785 + }, + { + "epoch": 0.9654640764724021, + "grad_norm": 1.0406484603881836, + "learning_rate": 3.12462241688577e-08, + "loss": 0.6647, + "step": 18786 + }, + { + "epoch": 0.9655154692157467, + "grad_norm": 0.6849263906478882, + "learning_rate": 3.115339526539418e-08, + "loss": 0.6409, + "step": 18787 + }, + { + "epoch": 0.9655668619590914, + "grad_norm": 1.0911227464675903, + "learning_rate": 3.1060704028164634e-08, + "loss": 0.6564, + "step": 18788 + }, + { + "epoch": 0.965618254702436, + "grad_norm": 1.0870689153671265, + "learning_rate": 3.0968150459737e-08, + "loss": 0.6546, + "step": 18789 + }, + { + "epoch": 0.9656696474457807, + "grad_norm": 1.6593421697616577, + "learning_rate": 3.087573456267645e-08, + "loss": 0.674, + "step": 18790 + }, + { + "epoch": 0.9657210401891253, + "grad_norm": 0.9238305687904358, + "learning_rate": 3.0783456339542605e-08, + "loss": 0.6586, + "step": 18791 + }, + { + "epoch": 0.96577243293247, + "grad_norm": 1.0731452703475952, + "learning_rate": 3.0691315792892306e-08, + "loss": 0.7265, + "step": 18792 + }, + { + "epoch": 0.9658238256758146, + "grad_norm": 1.1256870031356812, + "learning_rate": 3.059931292527907e-08, + "loss": 0.7384, + "step": 18793 + }, + { + "epoch": 0.9658752184191592, + "grad_norm": 1.1552778482437134, + "learning_rate": 3.0507447739250854e-08, + "loss": 0.6979, + "step": 18794 + }, + { + "epoch": 0.9659266111625039, + "grad_norm": 1.077541470527649, + "learning_rate": 3.0415720237353954e-08, + "loss": 0.7119, + "step": 18795 + }, + { + "epoch": 0.9659780039058485, + "grad_norm": 1.0648802518844604, + "learning_rate": 3.032413042212856e-08, + "loss": 0.6847, + "step": 18796 + }, + { + "epoch": 0.9660293966491932, + "grad_norm": 1.0678517818450928, + "learning_rate": 3.023267829611376e-08, + "loss": 0.7152, + "step": 18797 + }, + { + "epoch": 0.9660807893925377, + "grad_norm": 1.1466152667999268, + "learning_rate": 3.014136386184141e-08, + "loss": 0.7051, + "step": 18798 + }, + { + "epoch": 0.9661321821358824, + "grad_norm": 0.8240313529968262, + "learning_rate": 3.005018712184282e-08, + "loss": 0.6166, + "step": 18799 + }, + { + "epoch": 0.966183574879227, + "grad_norm": 1.032137393951416, + "learning_rate": 2.995914807864375e-08, + "loss": 0.6492, + "step": 18800 + }, + { + "epoch": 0.9662349676225717, + "grad_norm": 1.1151278018951416, + "learning_rate": 2.9868246734766624e-08, + "loss": 0.6852, + "step": 18801 + }, + { + "epoch": 0.9662863603659163, + "grad_norm": 1.1414090394973755, + "learning_rate": 2.9777483092729985e-08, + "loss": 0.7051, + "step": 18802 + }, + { + "epoch": 0.966337753109261, + "grad_norm": 1.1444951295852661, + "learning_rate": 2.9686857155048488e-08, + "loss": 0.7062, + "step": 18803 + }, + { + "epoch": 0.9663891458526056, + "grad_norm": 1.1039438247680664, + "learning_rate": 2.9596368924232345e-08, + "loss": 0.6648, + "step": 18804 + }, + { + "epoch": 0.9664405385959502, + "grad_norm": 1.0023292303085327, + "learning_rate": 2.950601840278955e-08, + "loss": 0.6446, + "step": 18805 + }, + { + "epoch": 0.9664919313392949, + "grad_norm": 1.122821569442749, + "learning_rate": 2.9415805593223102e-08, + "loss": 0.6938, + "step": 18806 + }, + { + "epoch": 0.9665433240826395, + "grad_norm": 1.0668556690216064, + "learning_rate": 2.9325730498032113e-08, + "loss": 0.651, + "step": 18807 + }, + { + "epoch": 0.9665947168259842, + "grad_norm": 1.1478619575500488, + "learning_rate": 2.9235793119712362e-08, + "loss": 0.6952, + "step": 18808 + }, + { + "epoch": 0.9666461095693288, + "grad_norm": 1.1086454391479492, + "learning_rate": 2.914599346075575e-08, + "loss": 0.6652, + "step": 18809 + }, + { + "epoch": 0.9666975023126735, + "grad_norm": 1.151504635810852, + "learning_rate": 2.9056331523650282e-08, + "loss": 0.7271, + "step": 18810 + }, + { + "epoch": 0.9667488950560181, + "grad_norm": 1.0807229280471802, + "learning_rate": 2.896680731087953e-08, + "loss": 0.6634, + "step": 18811 + }, + { + "epoch": 0.9668002877993628, + "grad_norm": 1.118722915649414, + "learning_rate": 2.8877420824924285e-08, + "loss": 0.6864, + "step": 18812 + }, + { + "epoch": 0.9668516805427073, + "grad_norm": 1.062164545059204, + "learning_rate": 2.8788172068261457e-08, + "loss": 0.7006, + "step": 18813 + }, + { + "epoch": 0.966903073286052, + "grad_norm": 1.0743906497955322, + "learning_rate": 2.8699061043363508e-08, + "loss": 0.7387, + "step": 18814 + }, + { + "epoch": 0.9669544660293966, + "grad_norm": 1.076715350151062, + "learning_rate": 2.8610087752699024e-08, + "loss": 0.6877, + "step": 18815 + }, + { + "epoch": 0.9670058587727413, + "grad_norm": 1.1419105529785156, + "learning_rate": 2.852125219873325e-08, + "loss": 0.6819, + "step": 18816 + }, + { + "epoch": 0.9670572515160859, + "grad_norm": 1.1111364364624023, + "learning_rate": 2.8432554383927557e-08, + "loss": 0.6808, + "step": 18817 + }, + { + "epoch": 0.9671086442594305, + "grad_norm": 1.0722932815551758, + "learning_rate": 2.834399431073942e-08, + "loss": 0.7049, + "step": 18818 + }, + { + "epoch": 0.9671600370027752, + "grad_norm": 1.0485846996307373, + "learning_rate": 2.8255571981622433e-08, + "loss": 0.684, + "step": 18819 + }, + { + "epoch": 0.9672114297461198, + "grad_norm": 1.0673350095748901, + "learning_rate": 2.8167287399026298e-08, + "loss": 0.7262, + "step": 18820 + }, + { + "epoch": 0.9672628224894645, + "grad_norm": 1.1458088159561157, + "learning_rate": 2.8079140565396846e-08, + "loss": 0.6988, + "step": 18821 + }, + { + "epoch": 0.9673142152328091, + "grad_norm": 1.1272093057632446, + "learning_rate": 2.799113148317656e-08, + "loss": 0.7168, + "step": 18822 + }, + { + "epoch": 0.9673656079761538, + "grad_norm": 1.0721516609191895, + "learning_rate": 2.7903260154804044e-08, + "loss": 0.672, + "step": 18823 + }, + { + "epoch": 0.9674170007194984, + "grad_norm": 1.0681589841842651, + "learning_rate": 2.781552658271347e-08, + "loss": 0.7147, + "step": 18824 + }, + { + "epoch": 0.9674683934628431, + "grad_norm": 1.1155004501342773, + "learning_rate": 2.7727930769335663e-08, + "loss": 0.6529, + "step": 18825 + }, + { + "epoch": 0.9675197862061877, + "grad_norm": 1.0461015701293945, + "learning_rate": 2.764047271709702e-08, + "loss": 0.6724, + "step": 18826 + }, + { + "epoch": 0.9675711789495324, + "grad_norm": 1.0760018825531006, + "learning_rate": 2.7553152428421715e-08, + "loss": 0.683, + "step": 18827 + }, + { + "epoch": 0.967622571692877, + "grad_norm": 1.0824872255325317, + "learning_rate": 2.7465969905728363e-08, + "loss": 0.6644, + "step": 18828 + }, + { + "epoch": 0.9676739644362216, + "grad_norm": 0.7416027188301086, + "learning_rate": 2.737892515143281e-08, + "loss": 0.6674, + "step": 18829 + }, + { + "epoch": 0.9677253571795662, + "grad_norm": 1.061812400817871, + "learning_rate": 2.729201816794591e-08, + "loss": 0.6864, + "step": 18830 + }, + { + "epoch": 0.9677767499229109, + "grad_norm": 1.1292790174484253, + "learning_rate": 2.720524895767629e-08, + "loss": 0.6905, + "step": 18831 + }, + { + "epoch": 0.9678281426662555, + "grad_norm": 1.0828323364257812, + "learning_rate": 2.7118617523027578e-08, + "loss": 0.7002, + "step": 18832 + }, + { + "epoch": 0.9678795354096001, + "grad_norm": 1.0326259136199951, + "learning_rate": 2.703212386640064e-08, + "loss": 0.7304, + "step": 18833 + }, + { + "epoch": 0.9679309281529448, + "grad_norm": 1.0880370140075684, + "learning_rate": 2.6945767990190774e-08, + "loss": 0.6747, + "step": 18834 + }, + { + "epoch": 0.9679823208962894, + "grad_norm": 1.0722930431365967, + "learning_rate": 2.685954989679107e-08, + "loss": 0.6474, + "step": 18835 + }, + { + "epoch": 0.9680337136396341, + "grad_norm": 0.7151961922645569, + "learning_rate": 2.677346958859073e-08, + "loss": 0.6064, + "step": 18836 + }, + { + "epoch": 0.9680851063829787, + "grad_norm": 1.0241864919662476, + "learning_rate": 2.668752706797395e-08, + "loss": 0.7276, + "step": 18837 + }, + { + "epoch": 0.9681364991263234, + "grad_norm": 1.0279613733291626, + "learning_rate": 2.6601722337322166e-08, + "loss": 0.7318, + "step": 18838 + }, + { + "epoch": 0.968187891869668, + "grad_norm": 1.106692910194397, + "learning_rate": 2.6516055399012917e-08, + "loss": 0.7229, + "step": 18839 + }, + { + "epoch": 0.9682392846130127, + "grad_norm": 1.0555864572525024, + "learning_rate": 2.6430526255418753e-08, + "loss": 0.7012, + "step": 18840 + }, + { + "epoch": 0.9682906773563573, + "grad_norm": 1.212843894958496, + "learning_rate": 2.6345134908910552e-08, + "loss": 0.6389, + "step": 18841 + }, + { + "epoch": 0.968342070099702, + "grad_norm": 1.074379324913025, + "learning_rate": 2.6259881361853646e-08, + "loss": 0.6806, + "step": 18842 + }, + { + "epoch": 0.9683934628430466, + "grad_norm": 0.7655879259109497, + "learning_rate": 2.6174765616609478e-08, + "loss": 0.6488, + "step": 18843 + }, + { + "epoch": 0.9684448555863912, + "grad_norm": 1.108168363571167, + "learning_rate": 2.608978767553727e-08, + "loss": 0.7261, + "step": 18844 + }, + { + "epoch": 0.9684962483297358, + "grad_norm": 1.0376719236373901, + "learning_rate": 2.60049475409907e-08, + "loss": 0.6662, + "step": 18845 + }, + { + "epoch": 0.9685476410730804, + "grad_norm": 1.1275608539581299, + "learning_rate": 2.592024521532066e-08, + "loss": 0.6734, + "step": 18846 + }, + { + "epoch": 0.9685990338164251, + "grad_norm": 1.0924030542373657, + "learning_rate": 2.583568070087361e-08, + "loss": 0.6998, + "step": 18847 + }, + { + "epoch": 0.9686504265597697, + "grad_norm": 1.1389002799987793, + "learning_rate": 2.5751253999993232e-08, + "loss": 0.7333, + "step": 18848 + }, + { + "epoch": 0.9687018193031144, + "grad_norm": 1.0855473279953003, + "learning_rate": 2.566696511501765e-08, + "loss": 0.7072, + "step": 18849 + }, + { + "epoch": 0.968753212046459, + "grad_norm": 1.1324273347854614, + "learning_rate": 2.558281404828278e-08, + "loss": 0.7419, + "step": 18850 + }, + { + "epoch": 0.9688046047898037, + "grad_norm": 1.0241976976394653, + "learning_rate": 2.5498800802120082e-08, + "loss": 0.6583, + "step": 18851 + }, + { + "epoch": 0.9688559975331483, + "grad_norm": 1.0598891973495483, + "learning_rate": 2.541492537885659e-08, + "loss": 0.6827, + "step": 18852 + }, + { + "epoch": 0.968907390276493, + "grad_norm": 1.0443761348724365, + "learning_rate": 2.5331187780817113e-08, + "loss": 0.6373, + "step": 18853 + }, + { + "epoch": 0.9689587830198376, + "grad_norm": 1.0728639364242554, + "learning_rate": 2.52475880103209e-08, + "loss": 0.6728, + "step": 18854 + }, + { + "epoch": 0.9690101757631823, + "grad_norm": 1.129329800605774, + "learning_rate": 2.5164126069684434e-08, + "loss": 0.7043, + "step": 18855 + }, + { + "epoch": 0.9690615685065269, + "grad_norm": 1.1329033374786377, + "learning_rate": 2.508080196121976e-08, + "loss": 0.6538, + "step": 18856 + }, + { + "epoch": 0.9691129612498716, + "grad_norm": 0.8134285807609558, + "learning_rate": 2.4997615687236687e-08, + "loss": 0.6852, + "step": 18857 + }, + { + "epoch": 0.9691643539932162, + "grad_norm": 1.0913729667663574, + "learning_rate": 2.4914567250038378e-08, + "loss": 0.6454, + "step": 18858 + }, + { + "epoch": 0.9692157467365607, + "grad_norm": 1.1296486854553223, + "learning_rate": 2.483165665192633e-08, + "loss": 0.697, + "step": 18859 + }, + { + "epoch": 0.9692671394799054, + "grad_norm": 0.6473573446273804, + "learning_rate": 2.474888389519814e-08, + "loss": 0.6588, + "step": 18860 + }, + { + "epoch": 0.96931853222325, + "grad_norm": 1.0771087408065796, + "learning_rate": 2.4666248982146978e-08, + "loss": 0.66, + "step": 18861 + }, + { + "epoch": 0.9693699249665947, + "grad_norm": 1.0867892503738403, + "learning_rate": 2.458375191506157e-08, + "loss": 0.6749, + "step": 18862 + }, + { + "epoch": 0.9694213177099393, + "grad_norm": 0.7781486511230469, + "learning_rate": 2.4501392696227864e-08, + "loss": 0.6233, + "step": 18863 + }, + { + "epoch": 0.969472710453284, + "grad_norm": 0.7067942023277283, + "learning_rate": 2.441917132792848e-08, + "loss": 0.675, + "step": 18864 + }, + { + "epoch": 0.9695241031966286, + "grad_norm": 1.1310036182403564, + "learning_rate": 2.4337087812440485e-08, + "loss": 0.6696, + "step": 18865 + }, + { + "epoch": 0.9695754959399733, + "grad_norm": 0.7079627513885498, + "learning_rate": 2.4255142152038725e-08, + "loss": 0.6427, + "step": 18866 + }, + { + "epoch": 0.9696268886833179, + "grad_norm": 1.0893439054489136, + "learning_rate": 2.417333434899305e-08, + "loss": 0.7214, + "step": 18867 + }, + { + "epoch": 0.9696782814266626, + "grad_norm": 0.8117467164993286, + "learning_rate": 2.4091664405570536e-08, + "loss": 0.6717, + "step": 18868 + }, + { + "epoch": 0.9697296741700072, + "grad_norm": 1.0447922945022583, + "learning_rate": 2.4010132324033265e-08, + "loss": 0.6386, + "step": 18869 + }, + { + "epoch": 0.9697810669133519, + "grad_norm": 1.278834581375122, + "learning_rate": 2.3928738106640536e-08, + "loss": 0.6499, + "step": 18870 + }, + { + "epoch": 0.9698324596566965, + "grad_norm": 1.2126810550689697, + "learning_rate": 2.3847481755647773e-08, + "loss": 0.6415, + "step": 18871 + }, + { + "epoch": 0.9698838524000412, + "grad_norm": 1.0899922847747803, + "learning_rate": 2.376636327330595e-08, + "loss": 0.7028, + "step": 18872 + }, + { + "epoch": 0.9699352451433858, + "grad_norm": 0.6740182638168335, + "learning_rate": 2.3685382661862154e-08, + "loss": 0.6199, + "step": 18873 + }, + { + "epoch": 0.9699866378867303, + "grad_norm": 1.1607987880706787, + "learning_rate": 2.3604539923560154e-08, + "loss": 0.6388, + "step": 18874 + }, + { + "epoch": 0.970038030630075, + "grad_norm": 1.4651659727096558, + "learning_rate": 2.3523835060640376e-08, + "loss": 0.698, + "step": 18875 + }, + { + "epoch": 0.9700894233734196, + "grad_norm": 1.1489415168762207, + "learning_rate": 2.3443268075338253e-08, + "loss": 0.7354, + "step": 18876 + }, + { + "epoch": 0.9701408161167643, + "grad_norm": 1.141338586807251, + "learning_rate": 2.3362838969886447e-08, + "loss": 0.6988, + "step": 18877 + }, + { + "epoch": 0.9701922088601089, + "grad_norm": 1.1430846452713013, + "learning_rate": 2.3282547746512618e-08, + "loss": 0.6567, + "step": 18878 + }, + { + "epoch": 0.9702436016034536, + "grad_norm": 1.0344953536987305, + "learning_rate": 2.320239440744221e-08, + "loss": 0.629, + "step": 18879 + }, + { + "epoch": 0.9702949943467982, + "grad_norm": 1.1419172286987305, + "learning_rate": 2.3122378954895107e-08, + "loss": 0.7167, + "step": 18880 + }, + { + "epoch": 0.9703463870901429, + "grad_norm": 1.0531339645385742, + "learning_rate": 2.3042501391088436e-08, + "loss": 0.7069, + "step": 18881 + }, + { + "epoch": 0.9703977798334875, + "grad_norm": 1.0569449663162231, + "learning_rate": 2.2962761718235415e-08, + "loss": 0.6758, + "step": 18882 + }, + { + "epoch": 0.9704491725768322, + "grad_norm": 1.0781910419464111, + "learning_rate": 2.2883159938545396e-08, + "loss": 0.7064, + "step": 18883 + }, + { + "epoch": 0.9705005653201768, + "grad_norm": 1.0687355995178223, + "learning_rate": 2.2803696054223832e-08, + "loss": 0.6388, + "step": 18884 + }, + { + "epoch": 0.9705519580635215, + "grad_norm": 1.134851098060608, + "learning_rate": 2.272437006747119e-08, + "loss": 0.6626, + "step": 18885 + }, + { + "epoch": 0.9706033508068661, + "grad_norm": 1.061223030090332, + "learning_rate": 2.2645181980487376e-08, + "loss": 0.6844, + "step": 18886 + }, + { + "epoch": 0.9706547435502108, + "grad_norm": 1.1131073236465454, + "learning_rate": 2.256613179546452e-08, + "loss": 0.6816, + "step": 18887 + }, + { + "epoch": 0.9707061362935554, + "grad_norm": 0.6761428713798523, + "learning_rate": 2.2487219514594206e-08, + "loss": 0.6354, + "step": 18888 + }, + { + "epoch": 0.9707575290368999, + "grad_norm": 1.1077100038528442, + "learning_rate": 2.2408445140061352e-08, + "loss": 0.6963, + "step": 18889 + }, + { + "epoch": 0.9708089217802446, + "grad_norm": 1.0538028478622437, + "learning_rate": 2.2329808674049768e-08, + "loss": 0.7109, + "step": 18890 + }, + { + "epoch": 0.9708603145235892, + "grad_norm": 1.0268654823303223, + "learning_rate": 2.2251310118737156e-08, + "loss": 0.6202, + "step": 18891 + }, + { + "epoch": 0.9709117072669339, + "grad_norm": 1.0834881067276, + "learning_rate": 2.2172949476299e-08, + "loss": 0.6789, + "step": 18892 + }, + { + "epoch": 0.9709631000102785, + "grad_norm": 0.7152554988861084, + "learning_rate": 2.209472674890578e-08, + "loss": 0.6402, + "step": 18893 + }, + { + "epoch": 0.9710144927536232, + "grad_norm": 1.0728943347930908, + "learning_rate": 2.2016641938725213e-08, + "loss": 0.7107, + "step": 18894 + }, + { + "epoch": 0.9710658854969678, + "grad_norm": 1.1177711486816406, + "learning_rate": 2.1938695047920566e-08, + "loss": 0.7024, + "step": 18895 + }, + { + "epoch": 0.9711172782403125, + "grad_norm": 0.7367702722549438, + "learning_rate": 2.1860886078651777e-08, + "loss": 0.6362, + "step": 18896 + }, + { + "epoch": 0.9711686709836571, + "grad_norm": 1.178797960281372, + "learning_rate": 2.178321503307379e-08, + "loss": 0.7302, + "step": 18897 + }, + { + "epoch": 0.9712200637270018, + "grad_norm": 1.1022377014160156, + "learning_rate": 2.170568191333933e-08, + "loss": 0.7075, + "step": 18898 + }, + { + "epoch": 0.9712714564703464, + "grad_norm": 1.0785107612609863, + "learning_rate": 2.1628286721596115e-08, + "loss": 0.6978, + "step": 18899 + }, + { + "epoch": 0.971322849213691, + "grad_norm": 1.1143381595611572, + "learning_rate": 2.155102945998855e-08, + "loss": 0.6221, + "step": 18900 + }, + { + "epoch": 0.9713742419570357, + "grad_norm": 1.0877841711044312, + "learning_rate": 2.1473910130657137e-08, + "loss": 0.6679, + "step": 18901 + }, + { + "epoch": 0.9714256347003803, + "grad_norm": 1.1170271635055542, + "learning_rate": 2.139692873573851e-08, + "loss": 0.6631, + "step": 18902 + }, + { + "epoch": 0.971477027443725, + "grad_norm": 1.1502450704574585, + "learning_rate": 2.1320085277365398e-08, + "loss": 0.7399, + "step": 18903 + }, + { + "epoch": 0.9715284201870696, + "grad_norm": 1.104975938796997, + "learning_rate": 2.1243379757667215e-08, + "loss": 0.6933, + "step": 18904 + }, + { + "epoch": 0.9715798129304142, + "grad_norm": 1.1125210523605347, + "learning_rate": 2.1166812178768924e-08, + "loss": 0.7014, + "step": 18905 + }, + { + "epoch": 0.9716312056737588, + "grad_norm": 1.2464022636413574, + "learning_rate": 2.1090382542791608e-08, + "loss": 0.7458, + "step": 18906 + }, + { + "epoch": 0.9716825984171035, + "grad_norm": 1.0029170513153076, + "learning_rate": 2.101409085185302e-08, + "loss": 0.6611, + "step": 18907 + }, + { + "epoch": 0.9717339911604481, + "grad_norm": 1.1367905139923096, + "learning_rate": 2.093793710806702e-08, + "loss": 0.7378, + "step": 18908 + }, + { + "epoch": 0.9717853839037928, + "grad_norm": 0.7762461304664612, + "learning_rate": 2.0861921313543033e-08, + "loss": 0.6297, + "step": 18909 + }, + { + "epoch": 0.9718367766471374, + "grad_norm": 1.0437346696853638, + "learning_rate": 2.0786043470387706e-08, + "loss": 0.7059, + "step": 18910 + }, + { + "epoch": 0.9718881693904821, + "grad_norm": 1.066454529762268, + "learning_rate": 2.0710303580703252e-08, + "loss": 0.6618, + "step": 18911 + }, + { + "epoch": 0.9719395621338267, + "grad_norm": 1.1015859842300415, + "learning_rate": 2.0634701646587983e-08, + "loss": 0.7192, + "step": 18912 + }, + { + "epoch": 0.9719909548771714, + "grad_norm": 1.1418269872665405, + "learning_rate": 2.0559237670136345e-08, + "loss": 0.7073, + "step": 18913 + }, + { + "epoch": 0.972042347620516, + "grad_norm": 0.8296297192573547, + "learning_rate": 2.0483911653439436e-08, + "loss": 0.6096, + "step": 18914 + }, + { + "epoch": 0.9720937403638606, + "grad_norm": 1.1228785514831543, + "learning_rate": 2.0408723598584478e-08, + "loss": 0.6911, + "step": 18915 + }, + { + "epoch": 0.9721451331072053, + "grad_norm": 1.0520683526992798, + "learning_rate": 2.033367350765314e-08, + "loss": 0.6733, + "step": 18916 + }, + { + "epoch": 0.9721965258505499, + "grad_norm": 1.0208766460418701, + "learning_rate": 2.0258761382726533e-08, + "loss": 0.6707, + "step": 18917 + }, + { + "epoch": 0.9722479185938946, + "grad_norm": 1.069037914276123, + "learning_rate": 2.0183987225879665e-08, + "loss": 0.7541, + "step": 18918 + }, + { + "epoch": 0.9722993113372392, + "grad_norm": 1.0833656787872314, + "learning_rate": 2.0109351039183656e-08, + "loss": 0.7455, + "step": 18919 + }, + { + "epoch": 0.9723507040805838, + "grad_norm": 1.0597163438796997, + "learning_rate": 2.0034852824706852e-08, + "loss": 0.6202, + "step": 18920 + }, + { + "epoch": 0.9724020968239284, + "grad_norm": 1.0821118354797363, + "learning_rate": 1.9960492584513157e-08, + "loss": 0.663, + "step": 18921 + }, + { + "epoch": 0.9724534895672731, + "grad_norm": 1.1545156240463257, + "learning_rate": 1.988627032066315e-08, + "loss": 0.7197, + "step": 18922 + }, + { + "epoch": 0.9725048823106177, + "grad_norm": 1.0051571130752563, + "learning_rate": 1.98121860352124e-08, + "loss": 0.6172, + "step": 18923 + }, + { + "epoch": 0.9725562750539624, + "grad_norm": 1.1615413427352905, + "learning_rate": 1.973823973021427e-08, + "loss": 0.6971, + "step": 18924 + }, + { + "epoch": 0.972607667797307, + "grad_norm": 1.130119800567627, + "learning_rate": 1.9664431407716568e-08, + "loss": 0.6997, + "step": 18925 + }, + { + "epoch": 0.9726590605406517, + "grad_norm": 1.0433279275894165, + "learning_rate": 1.959076106976543e-08, + "loss": 0.6686, + "step": 18926 + }, + { + "epoch": 0.9727104532839963, + "grad_norm": 1.029630184173584, + "learning_rate": 1.9517228718401447e-08, + "loss": 0.7213, + "step": 18927 + }, + { + "epoch": 0.972761846027341, + "grad_norm": 1.0754896402359009, + "learning_rate": 1.9443834355661884e-08, + "loss": 0.6846, + "step": 18928 + }, + { + "epoch": 0.9728132387706856, + "grad_norm": 1.111117959022522, + "learning_rate": 1.937057798357955e-08, + "loss": 0.6897, + "step": 18929 + }, + { + "epoch": 0.9728646315140302, + "grad_norm": 1.1520622968673706, + "learning_rate": 1.9297459604185052e-08, + "loss": 0.708, + "step": 18930 + }, + { + "epoch": 0.9729160242573749, + "grad_norm": 0.7266581058502197, + "learning_rate": 1.9224479219503434e-08, + "loss": 0.6598, + "step": 18931 + }, + { + "epoch": 0.9729674170007195, + "grad_norm": 1.060086727142334, + "learning_rate": 1.9151636831557518e-08, + "loss": 0.6833, + "step": 18932 + }, + { + "epoch": 0.9730188097440642, + "grad_norm": 1.08528733253479, + "learning_rate": 1.9078932442364584e-08, + "loss": 0.7053, + "step": 18933 + }, + { + "epoch": 0.9730702024874088, + "grad_norm": 1.0269505977630615, + "learning_rate": 1.9006366053939686e-08, + "loss": 0.7215, + "step": 18934 + }, + { + "epoch": 0.9731215952307534, + "grad_norm": 1.0795613527297974, + "learning_rate": 1.8933937668292878e-08, + "loss": 0.6935, + "step": 18935 + }, + { + "epoch": 0.973172987974098, + "grad_norm": 1.0081359148025513, + "learning_rate": 1.8861647287430897e-08, + "loss": 0.6532, + "step": 18936 + }, + { + "epoch": 0.9732243807174427, + "grad_norm": 0.7232218980789185, + "learning_rate": 1.878949491335713e-08, + "loss": 0.644, + "step": 18937 + }, + { + "epoch": 0.9732757734607873, + "grad_norm": 0.7001712918281555, + "learning_rate": 1.8717480548069987e-08, + "loss": 0.6383, + "step": 18938 + }, + { + "epoch": 0.973327166204132, + "grad_norm": 1.030350685119629, + "learning_rate": 1.864560419356509e-08, + "loss": 0.6483, + "step": 18939 + }, + { + "epoch": 0.9733785589474766, + "grad_norm": 1.1158052682876587, + "learning_rate": 1.8573865851833627e-08, + "loss": 0.6686, + "step": 18940 + }, + { + "epoch": 0.9734299516908212, + "grad_norm": 1.2173320055007935, + "learning_rate": 1.8502265524862895e-08, + "loss": 0.7525, + "step": 18941 + }, + { + "epoch": 0.9734813444341659, + "grad_norm": 0.7782997488975525, + "learning_rate": 1.8430803214637416e-08, + "loss": 0.6527, + "step": 18942 + }, + { + "epoch": 0.9735327371775105, + "grad_norm": 1.0380682945251465, + "learning_rate": 1.8359478923136164e-08, + "loss": 0.6861, + "step": 18943 + }, + { + "epoch": 0.9735841299208552, + "grad_norm": 1.0890341997146606, + "learning_rate": 1.828829265233589e-08, + "loss": 0.7506, + "step": 18944 + }, + { + "epoch": 0.9736355226641998, + "grad_norm": 1.091010570526123, + "learning_rate": 1.8217244404208912e-08, + "loss": 0.7029, + "step": 18945 + }, + { + "epoch": 0.9736869154075445, + "grad_norm": 1.1977953910827637, + "learning_rate": 1.8146334180723092e-08, + "loss": 0.7042, + "step": 18946 + }, + { + "epoch": 0.9737383081508891, + "grad_norm": 1.111505150794983, + "learning_rate": 1.8075561983844083e-08, + "loss": 0.6777, + "step": 18947 + }, + { + "epoch": 0.9737897008942338, + "grad_norm": 1.0528273582458496, + "learning_rate": 1.8004927815531982e-08, + "loss": 0.7168, + "step": 18948 + }, + { + "epoch": 0.9738410936375784, + "grad_norm": 1.0274717807769775, + "learning_rate": 1.7934431677743002e-08, + "loss": 0.6444, + "step": 18949 + }, + { + "epoch": 0.973892486380923, + "grad_norm": 1.1316269636154175, + "learning_rate": 1.786407357243225e-08, + "loss": 0.6739, + "step": 18950 + }, + { + "epoch": 0.9739438791242676, + "grad_norm": 0.7171473503112793, + "learning_rate": 1.7793853501547054e-08, + "loss": 0.6203, + "step": 18951 + }, + { + "epoch": 0.9739952718676123, + "grad_norm": 0.6089109778404236, + "learning_rate": 1.772377146703419e-08, + "loss": 0.6375, + "step": 18952 + }, + { + "epoch": 0.9740466646109569, + "grad_norm": 1.164149284362793, + "learning_rate": 1.7653827470834882e-08, + "loss": 0.7535, + "step": 18953 + }, + { + "epoch": 0.9740980573543015, + "grad_norm": 1.049311637878418, + "learning_rate": 1.7584021514887583e-08, + "loss": 0.6888, + "step": 18954 + }, + { + "epoch": 0.9741494500976462, + "grad_norm": 1.0824832916259766, + "learning_rate": 1.7514353601125188e-08, + "loss": 0.72, + "step": 18955 + }, + { + "epoch": 0.9742008428409908, + "grad_norm": 1.101812481880188, + "learning_rate": 1.7444823731478934e-08, + "loss": 0.6634, + "step": 18956 + }, + { + "epoch": 0.9742522355843355, + "grad_norm": 0.8164855241775513, + "learning_rate": 1.7375431907875053e-08, + "loss": 0.629, + "step": 18957 + }, + { + "epoch": 0.9743036283276801, + "grad_norm": 1.0493357181549072, + "learning_rate": 1.730617813223534e-08, + "loss": 0.6938, + "step": 18958 + }, + { + "epoch": 0.9743550210710248, + "grad_norm": 1.0917471647262573, + "learning_rate": 1.723706240647882e-08, + "loss": 0.7068, + "step": 18959 + }, + { + "epoch": 0.9744064138143694, + "grad_norm": 1.1101254224777222, + "learning_rate": 1.7168084732521186e-08, + "loss": 0.6629, + "step": 18960 + }, + { + "epoch": 0.9744578065577141, + "grad_norm": 1.1838537454605103, + "learning_rate": 1.709924511227312e-08, + "loss": 0.6758, + "step": 18961 + }, + { + "epoch": 0.9745091993010587, + "grad_norm": 1.032524824142456, + "learning_rate": 1.703054354764144e-08, + "loss": 0.6681, + "step": 18962 + }, + { + "epoch": 0.9745605920444034, + "grad_norm": 1.1587255001068115, + "learning_rate": 1.696198004053018e-08, + "loss": 0.6838, + "step": 18963 + }, + { + "epoch": 0.974611984787748, + "grad_norm": 1.0701427459716797, + "learning_rate": 1.689355459283837e-08, + "loss": 0.6337, + "step": 18964 + }, + { + "epoch": 0.9746633775310926, + "grad_norm": 1.155348300933838, + "learning_rate": 1.6825267206462272e-08, + "loss": 0.7387, + "step": 18965 + }, + { + "epoch": 0.9747147702744372, + "grad_norm": 1.1337854862213135, + "learning_rate": 1.6757117883293706e-08, + "loss": 0.7209, + "step": 18966 + }, + { + "epoch": 0.9747661630177819, + "grad_norm": 1.1096447706222534, + "learning_rate": 1.6689106625220607e-08, + "loss": 0.6925, + "step": 18967 + }, + { + "epoch": 0.9748175557611265, + "grad_norm": 1.0637927055358887, + "learning_rate": 1.662123343412758e-08, + "loss": 0.6569, + "step": 18968 + }, + { + "epoch": 0.9748689485044711, + "grad_norm": 1.0688611268997192, + "learning_rate": 1.6553498311894233e-08, + "loss": 0.6654, + "step": 18969 + }, + { + "epoch": 0.9749203412478158, + "grad_norm": 1.0771616697311401, + "learning_rate": 1.6485901260399062e-08, + "loss": 0.6641, + "step": 18970 + }, + { + "epoch": 0.9749717339911604, + "grad_norm": 1.153753399848938, + "learning_rate": 1.6418442281513346e-08, + "loss": 0.6914, + "step": 18971 + }, + { + "epoch": 0.9750231267345051, + "grad_norm": 1.1474782228469849, + "learning_rate": 1.6351121377106705e-08, + "loss": 0.6978, + "step": 18972 + }, + { + "epoch": 0.9750745194778497, + "grad_norm": 0.7070923447608948, + "learning_rate": 1.628393854904431e-08, + "loss": 0.6452, + "step": 18973 + }, + { + "epoch": 0.9751259122211944, + "grad_norm": 1.225093126296997, + "learning_rate": 1.6216893799186896e-08, + "loss": 0.7352, + "step": 18974 + }, + { + "epoch": 0.975177304964539, + "grad_norm": 1.0549525022506714, + "learning_rate": 1.6149987129392973e-08, + "loss": 0.7255, + "step": 18975 + }, + { + "epoch": 0.9752286977078837, + "grad_norm": 1.0518620014190674, + "learning_rate": 1.608321854151551e-08, + "loss": 0.7042, + "step": 18976 + }, + { + "epoch": 0.9752800904512283, + "grad_norm": 1.199503779411316, + "learning_rate": 1.601658803740469e-08, + "loss": 0.7512, + "step": 18977 + }, + { + "epoch": 0.975331483194573, + "grad_norm": 1.1221708059310913, + "learning_rate": 1.5950095618906812e-08, + "loss": 0.6922, + "step": 18978 + }, + { + "epoch": 0.9753828759379176, + "grad_norm": 1.103562831878662, + "learning_rate": 1.5883741287863742e-08, + "loss": 0.7131, + "step": 18979 + }, + { + "epoch": 0.9754342686812622, + "grad_norm": 1.138983130455017, + "learning_rate": 1.5817525046113447e-08, + "loss": 0.6674, + "step": 18980 + }, + { + "epoch": 0.9754856614246068, + "grad_norm": 0.6696897745132446, + "learning_rate": 1.5751446895491685e-08, + "loss": 0.6362, + "step": 18981 + }, + { + "epoch": 0.9755370541679514, + "grad_norm": 1.128674864768982, + "learning_rate": 1.56855068378281e-08, + "loss": 0.6228, + "step": 18982 + }, + { + "epoch": 0.9755884469112961, + "grad_norm": 1.146132230758667, + "learning_rate": 1.5619704874950126e-08, + "loss": 0.7186, + "step": 18983 + }, + { + "epoch": 0.9756398396546407, + "grad_norm": 1.0576441287994385, + "learning_rate": 1.5554041008680742e-08, + "loss": 0.6689, + "step": 18984 + }, + { + "epoch": 0.9756912323979854, + "grad_norm": 0.684005081653595, + "learning_rate": 1.548851524083961e-08, + "loss": 0.6373, + "step": 18985 + }, + { + "epoch": 0.97574262514133, + "grad_norm": 1.107236385345459, + "learning_rate": 1.5423127573241938e-08, + "loss": 0.7091, + "step": 18986 + }, + { + "epoch": 0.9757940178846747, + "grad_norm": 1.1063700914382935, + "learning_rate": 1.5357878007699055e-08, + "loss": 0.6594, + "step": 18987 + }, + { + "epoch": 0.9758454106280193, + "grad_norm": 1.116257667541504, + "learning_rate": 1.5292766546018966e-08, + "loss": 0.6564, + "step": 18988 + }, + { + "epoch": 0.975896803371364, + "grad_norm": 1.096600890159607, + "learning_rate": 1.5227793190005226e-08, + "loss": 0.6635, + "step": 18989 + }, + { + "epoch": 0.9759481961147086, + "grad_norm": 1.089584231376648, + "learning_rate": 1.5162957941459165e-08, + "loss": 0.6859, + "step": 18990 + }, + { + "epoch": 0.9759995888580533, + "grad_norm": 1.076155424118042, + "learning_rate": 1.5098260802176022e-08, + "loss": 0.6196, + "step": 18991 + }, + { + "epoch": 0.9760509816013979, + "grad_norm": 1.0884466171264648, + "learning_rate": 1.503370177394825e-08, + "loss": 0.6579, + "step": 18992 + }, + { + "epoch": 0.9761023743447426, + "grad_norm": 1.042235255241394, + "learning_rate": 1.4969280858565526e-08, + "loss": 0.7141, + "step": 18993 + }, + { + "epoch": 0.9761537670880872, + "grad_norm": 1.0301247835159302, + "learning_rate": 1.4904998057811428e-08, + "loss": 0.6748, + "step": 18994 + }, + { + "epoch": 0.9762051598314319, + "grad_norm": 1.1477546691894531, + "learning_rate": 1.4840853373467856e-08, + "loss": 0.7178, + "step": 18995 + }, + { + "epoch": 0.9762565525747764, + "grad_norm": 1.0929409265518188, + "learning_rate": 1.4776846807311175e-08, + "loss": 0.6667, + "step": 18996 + }, + { + "epoch": 0.976307945318121, + "grad_norm": 1.0887374877929688, + "learning_rate": 1.471297836111607e-08, + "loss": 0.6609, + "step": 18997 + }, + { + "epoch": 0.9763593380614657, + "grad_norm": 1.0710246562957764, + "learning_rate": 1.4649248036651131e-08, + "loss": 0.6651, + "step": 18998 + }, + { + "epoch": 0.9764107308048103, + "grad_norm": 1.0719025135040283, + "learning_rate": 1.4585655835681612e-08, + "loss": 0.6781, + "step": 18999 + }, + { + "epoch": 0.976462123548155, + "grad_norm": 1.145033836364746, + "learning_rate": 1.4522201759970544e-08, + "loss": 0.689, + "step": 19000 + }, + { + "epoch": 0.9765135162914996, + "grad_norm": 1.1196221113204956, + "learning_rate": 1.445888581127486e-08, + "loss": 0.694, + "step": 19001 + }, + { + "epoch": 0.9765649090348443, + "grad_norm": 1.1073291301727295, + "learning_rate": 1.4395707991349817e-08, + "loss": 0.6768, + "step": 19002 + }, + { + "epoch": 0.9766163017781889, + "grad_norm": 1.0918573141098022, + "learning_rate": 1.4332668301945684e-08, + "loss": 0.6901, + "step": 19003 + }, + { + "epoch": 0.9766676945215336, + "grad_norm": 1.0817763805389404, + "learning_rate": 1.4269766744807733e-08, + "loss": 0.7065, + "step": 19004 + }, + { + "epoch": 0.9767190872648782, + "grad_norm": 1.1678199768066406, + "learning_rate": 1.4207003321680125e-08, + "loss": 0.6653, + "step": 19005 + }, + { + "epoch": 0.9767704800082229, + "grad_norm": 1.160949945449829, + "learning_rate": 1.4144378034301465e-08, + "loss": 0.675, + "step": 19006 + }, + { + "epoch": 0.9768218727515675, + "grad_norm": 1.1462547779083252, + "learning_rate": 1.4081890884406479e-08, + "loss": 0.7078, + "step": 19007 + }, + { + "epoch": 0.9768732654949122, + "grad_norm": 1.067353367805481, + "learning_rate": 1.401954187372656e-08, + "loss": 0.7229, + "step": 19008 + }, + { + "epoch": 0.9769246582382568, + "grad_norm": 1.1847907304763794, + "learning_rate": 1.3957331003989216e-08, + "loss": 0.7025, + "step": 19009 + }, + { + "epoch": 0.9769760509816015, + "grad_norm": 1.0363231897354126, + "learning_rate": 1.3895258276918621e-08, + "loss": 0.7048, + "step": 19010 + }, + { + "epoch": 0.977027443724946, + "grad_norm": 1.0179197788238525, + "learning_rate": 1.3833323694233402e-08, + "loss": 0.6591, + "step": 19011 + }, + { + "epoch": 0.9770788364682906, + "grad_norm": 1.1255227327346802, + "learning_rate": 1.3771527257649963e-08, + "loss": 0.6692, + "step": 19012 + }, + { + "epoch": 0.9771302292116353, + "grad_norm": 1.121323823928833, + "learning_rate": 1.3709868968880825e-08, + "loss": 0.6459, + "step": 19013 + }, + { + "epoch": 0.9771816219549799, + "grad_norm": 0.6892160773277283, + "learning_rate": 1.3648348829634061e-08, + "loss": 0.6392, + "step": 19014 + }, + { + "epoch": 0.9772330146983246, + "grad_norm": 1.1293246746063232, + "learning_rate": 1.3586966841613869e-08, + "loss": 0.6854, + "step": 19015 + }, + { + "epoch": 0.9772844074416692, + "grad_norm": 1.0964460372924805, + "learning_rate": 1.3525723006521663e-08, + "loss": 0.6706, + "step": 19016 + }, + { + "epoch": 0.9773358001850139, + "grad_norm": 1.0584444999694824, + "learning_rate": 1.3464617326052754e-08, + "loss": 0.717, + "step": 19017 + }, + { + "epoch": 0.9773871929283585, + "grad_norm": 1.067427158355713, + "learning_rate": 1.3403649801901896e-08, + "loss": 0.7216, + "step": 19018 + }, + { + "epoch": 0.9774385856717032, + "grad_norm": 0.7050039768218994, + "learning_rate": 1.3342820435757186e-08, + "loss": 0.6969, + "step": 19019 + }, + { + "epoch": 0.9774899784150478, + "grad_norm": 0.8224236965179443, + "learning_rate": 1.3282129229303941e-08, + "loss": 0.6617, + "step": 19020 + }, + { + "epoch": 0.9775413711583925, + "grad_norm": 1.0814045667648315, + "learning_rate": 1.322157618422415e-08, + "loss": 0.6938, + "step": 19021 + }, + { + "epoch": 0.9775927639017371, + "grad_norm": 1.1549818515777588, + "learning_rate": 1.3161161302195359e-08, + "loss": 0.6565, + "step": 19022 + }, + { + "epoch": 0.9776441566450818, + "grad_norm": 1.1385267972946167, + "learning_rate": 1.3100884584891227e-08, + "loss": 0.7123, + "step": 19023 + }, + { + "epoch": 0.9776955493884264, + "grad_norm": 1.1226462125778198, + "learning_rate": 1.3040746033982088e-08, + "loss": 0.7123, + "step": 19024 + }, + { + "epoch": 0.977746942131771, + "grad_norm": 1.0890467166900635, + "learning_rate": 1.2980745651133831e-08, + "loss": 0.6765, + "step": 19025 + }, + { + "epoch": 0.9777983348751156, + "grad_norm": 1.048478126525879, + "learning_rate": 1.2920883438008457e-08, + "loss": 0.678, + "step": 19026 + }, + { + "epoch": 0.9778497276184602, + "grad_norm": 0.7622886300086975, + "learning_rate": 1.28611593962652e-08, + "loss": 0.6034, + "step": 19027 + }, + { + "epoch": 0.9779011203618049, + "grad_norm": 1.1089476346969604, + "learning_rate": 1.2801573527558841e-08, + "loss": 0.6964, + "step": 19028 + }, + { + "epoch": 0.9779525131051495, + "grad_norm": 1.1935968399047852, + "learning_rate": 1.2742125833539176e-08, + "loss": 0.7238, + "step": 19029 + }, + { + "epoch": 0.9780039058484942, + "grad_norm": 0.7410804033279419, + "learning_rate": 1.2682816315854329e-08, + "loss": 0.6483, + "step": 19030 + }, + { + "epoch": 0.9780552985918388, + "grad_norm": 0.6861765384674072, + "learning_rate": 1.2623644976147432e-08, + "loss": 0.6595, + "step": 19031 + }, + { + "epoch": 0.9781066913351835, + "grad_norm": 1.1236472129821777, + "learning_rate": 1.2564611816057725e-08, + "loss": 0.6189, + "step": 19032 + }, + { + "epoch": 0.9781580840785281, + "grad_norm": 1.0733482837677002, + "learning_rate": 1.2505716837220572e-08, + "loss": 0.6657, + "step": 19033 + }, + { + "epoch": 0.9782094768218728, + "grad_norm": 1.0664821863174438, + "learning_rate": 1.2446960041267997e-08, + "loss": 0.6509, + "step": 19034 + }, + { + "epoch": 0.9782608695652174, + "grad_norm": 1.1141270399093628, + "learning_rate": 1.2388341429827034e-08, + "loss": 0.6939, + "step": 19035 + }, + { + "epoch": 0.978312262308562, + "grad_norm": 1.1200549602508545, + "learning_rate": 1.2329861004523047e-08, + "loss": 0.6354, + "step": 19036 + }, + { + "epoch": 0.9783636550519067, + "grad_norm": 1.109206199645996, + "learning_rate": 1.2271518766975854e-08, + "loss": 0.6896, + "step": 19037 + }, + { + "epoch": 0.9784150477952513, + "grad_norm": 1.0326014757156372, + "learning_rate": 1.2213314718801383e-08, + "loss": 0.6771, + "step": 19038 + }, + { + "epoch": 0.978466440538596, + "grad_norm": 1.0878466367721558, + "learning_rate": 1.215524886161279e-08, + "loss": 0.6717, + "step": 19039 + }, + { + "epoch": 0.9785178332819406, + "grad_norm": 1.0529557466506958, + "learning_rate": 1.2097321197018785e-08, + "loss": 0.6714, + "step": 19040 + }, + { + "epoch": 0.9785692260252852, + "grad_norm": 1.041327714920044, + "learning_rate": 1.2039531726623644e-08, + "loss": 0.6721, + "step": 19041 + }, + { + "epoch": 0.9786206187686298, + "grad_norm": 1.0779091119766235, + "learning_rate": 1.1981880452029415e-08, + "loss": 0.692, + "step": 19042 + }, + { + "epoch": 0.9786720115119745, + "grad_norm": 1.1115553379058838, + "learning_rate": 1.1924367374832602e-08, + "loss": 0.6781, + "step": 19043 + }, + { + "epoch": 0.9787234042553191, + "grad_norm": 1.041497826576233, + "learning_rate": 1.1866992496626928e-08, + "loss": 0.7056, + "step": 19044 + }, + { + "epoch": 0.9787747969986638, + "grad_norm": 1.071725845336914, + "learning_rate": 1.1809755819002788e-08, + "loss": 0.6658, + "step": 19045 + }, + { + "epoch": 0.9788261897420084, + "grad_norm": 1.0545936822891235, + "learning_rate": 1.1752657343544471e-08, + "loss": 0.6122, + "step": 19046 + }, + { + "epoch": 0.9788775824853531, + "grad_norm": 0.6993469595909119, + "learning_rate": 1.1695697071835154e-08, + "loss": 0.6136, + "step": 19047 + }, + { + "epoch": 0.9789289752286977, + "grad_norm": 1.0951720476150513, + "learning_rate": 1.1638875005452466e-08, + "loss": 0.6981, + "step": 19048 + }, + { + "epoch": 0.9789803679720424, + "grad_norm": 0.7597584128379822, + "learning_rate": 1.15821911459707e-08, + "loss": 0.6327, + "step": 19049 + }, + { + "epoch": 0.979031760715387, + "grad_norm": 0.7588790059089661, + "learning_rate": 1.1525645494960824e-08, + "loss": 0.6513, + "step": 19050 + }, + { + "epoch": 0.9790831534587316, + "grad_norm": 1.1419697999954224, + "learning_rate": 1.1469238053988808e-08, + "loss": 0.6718, + "step": 19051 + }, + { + "epoch": 0.9791345462020763, + "grad_norm": 1.011348009109497, + "learning_rate": 1.1412968824617842e-08, + "loss": 0.6044, + "step": 19052 + }, + { + "epoch": 0.9791859389454209, + "grad_norm": 1.0667184591293335, + "learning_rate": 1.1356837808406685e-08, + "loss": 0.6536, + "step": 19053 + }, + { + "epoch": 0.9792373316887656, + "grad_norm": 1.1459214687347412, + "learning_rate": 1.1300845006910755e-08, + "loss": 0.6748, + "step": 19054 + }, + { + "epoch": 0.9792887244321102, + "grad_norm": 1.1163365840911865, + "learning_rate": 1.1244990421681035e-08, + "loss": 0.7342, + "step": 19055 + }, + { + "epoch": 0.9793401171754548, + "grad_norm": 1.2121094465255737, + "learning_rate": 1.1189274054265175e-08, + "loss": 0.6452, + "step": 19056 + }, + { + "epoch": 0.9793915099187994, + "grad_norm": 0.7309182286262512, + "learning_rate": 1.1133695906207497e-08, + "loss": 0.6596, + "step": 19057 + }, + { + "epoch": 0.9794429026621441, + "grad_norm": 1.0707401037216187, + "learning_rate": 1.107825597904677e-08, + "loss": 0.6239, + "step": 19058 + }, + { + "epoch": 0.9794942954054887, + "grad_norm": 0.7262753844261169, + "learning_rate": 1.1022954274320096e-08, + "loss": 0.6664, + "step": 19059 + }, + { + "epoch": 0.9795456881488334, + "grad_norm": 1.1145539283752441, + "learning_rate": 1.0967790793558475e-08, + "loss": 0.7254, + "step": 19060 + }, + { + "epoch": 0.979597080892178, + "grad_norm": 1.0860663652420044, + "learning_rate": 1.0912765538290681e-08, + "loss": 0.6381, + "step": 19061 + }, + { + "epoch": 0.9796484736355227, + "grad_norm": 1.0046390295028687, + "learning_rate": 1.0857878510041608e-08, + "loss": 0.6395, + "step": 19062 + }, + { + "epoch": 0.9796998663788673, + "grad_norm": 1.0336318016052246, + "learning_rate": 1.0803129710332261e-08, + "loss": 0.701, + "step": 19063 + }, + { + "epoch": 0.979751259122212, + "grad_norm": 1.2074172496795654, + "learning_rate": 1.0748519140678648e-08, + "loss": 0.7768, + "step": 19064 + }, + { + "epoch": 0.9798026518655566, + "grad_norm": 0.695376455783844, + "learning_rate": 1.0694046802594005e-08, + "loss": 0.6197, + "step": 19065 + }, + { + "epoch": 0.9798540446089012, + "grad_norm": 1.2134979963302612, + "learning_rate": 1.0639712697587679e-08, + "loss": 0.6872, + "step": 19066 + }, + { + "epoch": 0.9799054373522459, + "grad_norm": 1.1151384115219116, + "learning_rate": 1.0585516827165132e-08, + "loss": 0.7196, + "step": 19067 + }, + { + "epoch": 0.9799568300955905, + "grad_norm": 1.2165758609771729, + "learning_rate": 1.0531459192827941e-08, + "loss": 0.6879, + "step": 19068 + }, + { + "epoch": 0.9800082228389352, + "grad_norm": 1.089278221130371, + "learning_rate": 1.0477539796073799e-08, + "loss": 0.7039, + "step": 19069 + }, + { + "epoch": 0.9800596155822798, + "grad_norm": 1.030637264251709, + "learning_rate": 1.0423758638395953e-08, + "loss": 0.7178, + "step": 19070 + }, + { + "epoch": 0.9801110083256244, + "grad_norm": 1.1957106590270996, + "learning_rate": 1.0370115721285433e-08, + "loss": 0.6833, + "step": 19071 + }, + { + "epoch": 0.980162401068969, + "grad_norm": 1.101899266242981, + "learning_rate": 1.031661104622772e-08, + "loss": 0.6531, + "step": 19072 + }, + { + "epoch": 0.9802137938123137, + "grad_norm": 1.059823989868164, + "learning_rate": 1.026324461470607e-08, + "loss": 0.6482, + "step": 19073 + }, + { + "epoch": 0.9802651865556583, + "grad_norm": 1.1145623922348022, + "learning_rate": 1.0210016428197633e-08, + "loss": 0.7277, + "step": 19074 + }, + { + "epoch": 0.980316579299003, + "grad_norm": 1.0254786014556885, + "learning_rate": 1.015692648817901e-08, + "loss": 0.6898, + "step": 19075 + }, + { + "epoch": 0.9803679720423476, + "grad_norm": 1.081668734550476, + "learning_rate": 1.010397479611902e-08, + "loss": 0.6799, + "step": 19076 + }, + { + "epoch": 0.9804193647856922, + "grad_norm": 1.0847656726837158, + "learning_rate": 1.0051161353486495e-08, + "loss": 0.6227, + "step": 19077 + }, + { + "epoch": 0.9804707575290369, + "grad_norm": 0.767213761806488, + "learning_rate": 9.998486161743592e-09, + "loss": 0.6262, + "step": 19078 + }, + { + "epoch": 0.9805221502723815, + "grad_norm": 1.0824520587921143, + "learning_rate": 9.945949222350814e-09, + "loss": 0.6713, + "step": 19079 + }, + { + "epoch": 0.9805735430157262, + "grad_norm": 0.7505239248275757, + "learning_rate": 9.893550536761997e-09, + "loss": 0.6221, + "step": 19080 + }, + { + "epoch": 0.9806249357590708, + "grad_norm": 1.0721256732940674, + "learning_rate": 9.841290106430979e-09, + "loss": 0.6697, + "step": 19081 + }, + { + "epoch": 0.9806763285024155, + "grad_norm": 1.0725972652435303, + "learning_rate": 9.789167932803822e-09, + "loss": 0.7019, + "step": 19082 + }, + { + "epoch": 0.9807277212457601, + "grad_norm": 1.0916476249694824, + "learning_rate": 9.73718401732604e-09, + "loss": 0.7018, + "step": 19083 + }, + { + "epoch": 0.9807791139891048, + "grad_norm": 1.1089128255844116, + "learning_rate": 9.685338361437036e-09, + "loss": 0.741, + "step": 19084 + }, + { + "epoch": 0.9808305067324494, + "grad_norm": 1.081346035003662, + "learning_rate": 9.633630966573437e-09, + "loss": 0.6489, + "step": 19085 + }, + { + "epoch": 0.9808818994757941, + "grad_norm": 0.7334508299827576, + "learning_rate": 9.582061834167988e-09, + "loss": 0.6498, + "step": 19086 + }, + { + "epoch": 0.9809332922191386, + "grad_norm": 1.1141550540924072, + "learning_rate": 9.530630965649546e-09, + "loss": 0.5947, + "step": 19087 + }, + { + "epoch": 0.9809846849624833, + "grad_norm": 1.1384509801864624, + "learning_rate": 9.479338362442525e-09, + "loss": 0.7322, + "step": 19088 + }, + { + "epoch": 0.9810360777058279, + "grad_norm": 1.1362167596817017, + "learning_rate": 9.428184025968567e-09, + "loss": 0.651, + "step": 19089 + }, + { + "epoch": 0.9810874704491725, + "grad_norm": 1.0448548793792725, + "learning_rate": 9.377167957644317e-09, + "loss": 0.6895, + "step": 19090 + }, + { + "epoch": 0.9811388631925172, + "grad_norm": 1.0639455318450928, + "learning_rate": 9.326290158884199e-09, + "loss": 0.6757, + "step": 19091 + }, + { + "epoch": 0.9811902559358618, + "grad_norm": 1.1399660110473633, + "learning_rate": 9.275550631097086e-09, + "loss": 0.7301, + "step": 19092 + }, + { + "epoch": 0.9812416486792065, + "grad_norm": 1.0856454372406006, + "learning_rate": 9.224949375689074e-09, + "loss": 0.6618, + "step": 19093 + }, + { + "epoch": 0.9812930414225511, + "grad_norm": 1.1854712963104248, + "learning_rate": 9.17448639406182e-09, + "loss": 0.699, + "step": 19094 + }, + { + "epoch": 0.9813444341658958, + "grad_norm": 1.0969607830047607, + "learning_rate": 9.124161687613653e-09, + "loss": 0.665, + "step": 19095 + }, + { + "epoch": 0.9813958269092404, + "grad_norm": 1.1026151180267334, + "learning_rate": 9.07397525773901e-09, + "loss": 0.667, + "step": 19096 + }, + { + "epoch": 0.9814472196525851, + "grad_norm": 1.168717622756958, + "learning_rate": 9.02392710582789e-09, + "loss": 0.7122, + "step": 19097 + }, + { + "epoch": 0.9814986123959297, + "grad_norm": 1.1737709045410156, + "learning_rate": 8.974017233267517e-09, + "loss": 0.6891, + "step": 19098 + }, + { + "epoch": 0.9815500051392744, + "grad_norm": 1.1070634126663208, + "learning_rate": 8.92424564144012e-09, + "loss": 0.6932, + "step": 19099 + }, + { + "epoch": 0.981601397882619, + "grad_norm": 1.1919206380844116, + "learning_rate": 8.874612331725151e-09, + "loss": 0.6701, + "step": 19100 + }, + { + "epoch": 0.9816527906259637, + "grad_norm": 0.7962684631347656, + "learning_rate": 8.825117305497622e-09, + "loss": 0.6349, + "step": 19101 + }, + { + "epoch": 0.9817041833693082, + "grad_norm": 1.1634429693222046, + "learning_rate": 8.775760564128654e-09, + "loss": 0.6741, + "step": 19102 + }, + { + "epoch": 0.9817555761126529, + "grad_norm": 1.1207243204116821, + "learning_rate": 8.726542108985492e-09, + "loss": 0.6821, + "step": 19103 + }, + { + "epoch": 0.9818069688559975, + "grad_norm": 0.7481328845024109, + "learning_rate": 8.677461941432041e-09, + "loss": 0.6464, + "step": 19104 + }, + { + "epoch": 0.9818583615993421, + "grad_norm": 1.1418112516403198, + "learning_rate": 8.628520062828882e-09, + "loss": 0.7229, + "step": 19105 + }, + { + "epoch": 0.9819097543426868, + "grad_norm": 1.108088493347168, + "learning_rate": 8.579716474530486e-09, + "loss": 0.6829, + "step": 19106 + }, + { + "epoch": 0.9819611470860314, + "grad_norm": 1.153940200805664, + "learning_rate": 8.531051177890215e-09, + "loss": 0.7257, + "step": 19107 + }, + { + "epoch": 0.9820125398293761, + "grad_norm": 1.1209362745285034, + "learning_rate": 8.482524174255325e-09, + "loss": 0.7353, + "step": 19108 + }, + { + "epoch": 0.9820639325727207, + "grad_norm": 1.1414313316345215, + "learning_rate": 8.434135464971404e-09, + "loss": 0.7462, + "step": 19109 + }, + { + "epoch": 0.9821153253160654, + "grad_norm": 1.0669515132904053, + "learning_rate": 8.385885051378495e-09, + "loss": 0.7085, + "step": 19110 + }, + { + "epoch": 0.98216671805941, + "grad_norm": 1.107173204421997, + "learning_rate": 8.337772934813859e-09, + "loss": 0.7037, + "step": 19111 + }, + { + "epoch": 0.9822181108027547, + "grad_norm": 0.7353394031524658, + "learning_rate": 8.289799116609764e-09, + "loss": 0.6598, + "step": 19112 + }, + { + "epoch": 0.9822695035460993, + "grad_norm": 1.118727445602417, + "learning_rate": 8.241963598095703e-09, + "loss": 0.6675, + "step": 19113 + }, + { + "epoch": 0.982320896289444, + "grad_norm": 1.026202917098999, + "learning_rate": 8.194266380597838e-09, + "loss": 0.6836, + "step": 19114 + }, + { + "epoch": 0.9823722890327886, + "grad_norm": 1.1317459344863892, + "learning_rate": 8.146707465436221e-09, + "loss": 0.7484, + "step": 19115 + }, + { + "epoch": 0.9824236817761333, + "grad_norm": 1.1192821264266968, + "learning_rate": 8.099286853929245e-09, + "loss": 0.6845, + "step": 19116 + }, + { + "epoch": 0.9824750745194778, + "grad_norm": 1.1494070291519165, + "learning_rate": 8.052004547390302e-09, + "loss": 0.6491, + "step": 19117 + }, + { + "epoch": 0.9825264672628224, + "grad_norm": 1.0828487873077393, + "learning_rate": 8.004860547130566e-09, + "loss": 0.7016, + "step": 19118 + }, + { + "epoch": 0.9825778600061671, + "grad_norm": 1.1415433883666992, + "learning_rate": 7.95785485445455e-09, + "loss": 0.736, + "step": 19119 + }, + { + "epoch": 0.9826292527495117, + "grad_norm": 0.7266352772712708, + "learning_rate": 7.91098747066621e-09, + "loss": 0.6134, + "step": 19120 + }, + { + "epoch": 0.9826806454928564, + "grad_norm": 0.755668580532074, + "learning_rate": 7.86425839706284e-09, + "loss": 0.6347, + "step": 19121 + }, + { + "epoch": 0.982732038236201, + "grad_norm": 1.0913411378860474, + "learning_rate": 7.81766763493952e-09, + "loss": 0.6555, + "step": 19122 + }, + { + "epoch": 0.9827834309795457, + "grad_norm": 1.141624927520752, + "learning_rate": 7.771215185586877e-09, + "loss": 0.6939, + "step": 19123 + }, + { + "epoch": 0.9828348237228903, + "grad_norm": 0.7152939438819885, + "learning_rate": 7.724901050292776e-09, + "loss": 0.688, + "step": 19124 + }, + { + "epoch": 0.982886216466235, + "grad_norm": 1.0655516386032104, + "learning_rate": 7.678725230339523e-09, + "loss": 0.7159, + "step": 19125 + }, + { + "epoch": 0.9829376092095796, + "grad_norm": 1.1815950870513916, + "learning_rate": 7.632687727006649e-09, + "loss": 0.6745, + "step": 19126 + }, + { + "epoch": 0.9829890019529243, + "grad_norm": 1.0789704322814941, + "learning_rate": 7.5867885415698e-09, + "loss": 0.6666, + "step": 19127 + }, + { + "epoch": 0.9830403946962689, + "grad_norm": 1.1177507638931274, + "learning_rate": 7.541027675300183e-09, + "loss": 0.7692, + "step": 19128 + }, + { + "epoch": 0.9830917874396136, + "grad_norm": 1.1244651079177856, + "learning_rate": 7.495405129466227e-09, + "loss": 0.6679, + "step": 19129 + }, + { + "epoch": 0.9831431801829582, + "grad_norm": 1.0718439817428589, + "learning_rate": 7.4499209053319195e-09, + "loss": 0.6809, + "step": 19130 + }, + { + "epoch": 0.9831945729263029, + "grad_norm": 0.9903975129127502, + "learning_rate": 7.404575004157366e-09, + "loss": 0.6413, + "step": 19131 + }, + { + "epoch": 0.9832459656696474, + "grad_norm": 1.2095125913619995, + "learning_rate": 7.3593674271987826e-09, + "loss": 0.7274, + "step": 19132 + }, + { + "epoch": 0.983297358412992, + "grad_norm": 1.072476863861084, + "learning_rate": 7.3142981757085006e-09, + "loss": 0.6719, + "step": 19133 + }, + { + "epoch": 0.9833487511563367, + "grad_norm": 1.0492796897888184, + "learning_rate": 7.2693672509355215e-09, + "loss": 0.6504, + "step": 19134 + }, + { + "epoch": 0.9834001438996813, + "grad_norm": 1.0729732513427734, + "learning_rate": 7.2245746541244055e-09, + "loss": 0.6778, + "step": 19135 + }, + { + "epoch": 0.983451536643026, + "grad_norm": 1.055814266204834, + "learning_rate": 7.179920386516381e-09, + "loss": 0.6753, + "step": 19136 + }, + { + "epoch": 0.9835029293863706, + "grad_norm": 0.7535542845726013, + "learning_rate": 7.135404449348793e-09, + "loss": 0.6658, + "step": 19137 + }, + { + "epoch": 0.9835543221297153, + "grad_norm": 1.0257213115692139, + "learning_rate": 7.091026843855098e-09, + "loss": 0.6883, + "step": 19138 + }, + { + "epoch": 0.9836057148730599, + "grad_norm": 0.7484007477760315, + "learning_rate": 7.046787571263758e-09, + "loss": 0.5925, + "step": 19139 + }, + { + "epoch": 0.9836571076164046, + "grad_norm": 1.067734956741333, + "learning_rate": 7.002686632802124e-09, + "loss": 0.6775, + "step": 19140 + }, + { + "epoch": 0.9837085003597492, + "grad_norm": 0.7192723155021667, + "learning_rate": 6.958724029690334e-09, + "loss": 0.6252, + "step": 19141 + }, + { + "epoch": 0.9837598931030939, + "grad_norm": 1.1488723754882812, + "learning_rate": 6.914899763147409e-09, + "loss": 0.6476, + "step": 19142 + }, + { + "epoch": 0.9838112858464385, + "grad_norm": 1.1378847360610962, + "learning_rate": 6.871213834387936e-09, + "loss": 0.6822, + "step": 19143 + }, + { + "epoch": 0.9838626785897832, + "grad_norm": 1.102319598197937, + "learning_rate": 6.8276662446215e-09, + "loss": 0.6462, + "step": 19144 + }, + { + "epoch": 0.9839140713331278, + "grad_norm": 1.0780017375946045, + "learning_rate": 6.784256995054361e-09, + "loss": 0.6819, + "step": 19145 + }, + { + "epoch": 0.9839654640764725, + "grad_norm": 1.1294132471084595, + "learning_rate": 6.74098608689e-09, + "loss": 0.7185, + "step": 19146 + }, + { + "epoch": 0.984016856819817, + "grad_norm": 0.9775769710540771, + "learning_rate": 6.697853521326903e-09, + "loss": 0.6296, + "step": 19147 + }, + { + "epoch": 0.9840682495631616, + "grad_norm": 0.6856719255447388, + "learning_rate": 6.654859299560223e-09, + "loss": 0.6196, + "step": 19148 + }, + { + "epoch": 0.9841196423065063, + "grad_norm": 1.0954852104187012, + "learning_rate": 6.6120034227812325e-09, + "loss": 0.7642, + "step": 19149 + }, + { + "epoch": 0.9841710350498509, + "grad_norm": 1.256247639656067, + "learning_rate": 6.5692858921767575e-09, + "loss": 0.7164, + "step": 19150 + }, + { + "epoch": 0.9842224277931956, + "grad_norm": 1.199968934059143, + "learning_rate": 6.526706708930853e-09, + "loss": 0.6516, + "step": 19151 + }, + { + "epoch": 0.9842738205365402, + "grad_norm": 1.0853835344314575, + "learning_rate": 6.484265874223128e-09, + "loss": 0.6676, + "step": 19152 + }, + { + "epoch": 0.9843252132798849, + "grad_norm": 0.7812142372131348, + "learning_rate": 6.441963389229311e-09, + "loss": 0.6347, + "step": 19153 + }, + { + "epoch": 0.9843766060232295, + "grad_norm": 0.741860032081604, + "learning_rate": 6.399799255121242e-09, + "loss": 0.6906, + "step": 19154 + }, + { + "epoch": 0.9844279987665742, + "grad_norm": 1.0499294996261597, + "learning_rate": 6.35777347306743e-09, + "loss": 0.7231, + "step": 19155 + }, + { + "epoch": 0.9844793915099188, + "grad_norm": 1.0360826253890991, + "learning_rate": 6.3158860442325e-09, + "loss": 0.6269, + "step": 19156 + }, + { + "epoch": 0.9845307842532635, + "grad_norm": 1.0612317323684692, + "learning_rate": 6.274136969776634e-09, + "loss": 0.6786, + "step": 19157 + }, + { + "epoch": 0.9845821769966081, + "grad_norm": 0.6755431294441223, + "learning_rate": 6.23252625085613e-09, + "loss": 0.6322, + "step": 19158 + }, + { + "epoch": 0.9846335697399528, + "grad_norm": 0.7971909046173096, + "learning_rate": 6.19105388862451e-09, + "loss": 0.6373, + "step": 19159 + }, + { + "epoch": 0.9846849624832974, + "grad_norm": 1.1279340982437134, + "learning_rate": 6.1497198842308535e-09, + "loss": 0.7082, + "step": 19160 + }, + { + "epoch": 0.984736355226642, + "grad_norm": 1.1069625616073608, + "learning_rate": 6.108524238819802e-09, + "loss": 0.7229, + "step": 19161 + }, + { + "epoch": 0.9847877479699867, + "grad_norm": 1.1133421659469604, + "learning_rate": 6.067466953532663e-09, + "loss": 0.7153, + "step": 19162 + }, + { + "epoch": 0.9848391407133312, + "grad_norm": 1.0618606805801392, + "learning_rate": 6.0265480295079724e-09, + "loss": 0.67, + "step": 19163 + }, + { + "epoch": 0.9848905334566759, + "grad_norm": 1.1177117824554443, + "learning_rate": 5.985767467878157e-09, + "loss": 0.7185, + "step": 19164 + }, + { + "epoch": 0.9849419262000205, + "grad_norm": 1.081763744354248, + "learning_rate": 5.9451252697739766e-09, + "loss": 0.6822, + "step": 19165 + }, + { + "epoch": 0.9849933189433652, + "grad_norm": 1.1279723644256592, + "learning_rate": 5.904621436320645e-09, + "loss": 0.6519, + "step": 19166 + }, + { + "epoch": 0.9850447116867098, + "grad_norm": 1.1024889945983887, + "learning_rate": 5.864255968641153e-09, + "loss": 0.694, + "step": 19167 + }, + { + "epoch": 0.9850961044300545, + "grad_norm": 1.078163981437683, + "learning_rate": 5.824028867853493e-09, + "loss": 0.7349, + "step": 19168 + }, + { + "epoch": 0.9851474971733991, + "grad_norm": 1.0550600290298462, + "learning_rate": 5.78394013507233e-09, + "loss": 0.6831, + "step": 19169 + }, + { + "epoch": 0.9851988899167438, + "grad_norm": 1.13813316822052, + "learning_rate": 5.743989771408443e-09, + "loss": 0.7109, + "step": 19170 + }, + { + "epoch": 0.9852502826600884, + "grad_norm": 1.0683363676071167, + "learning_rate": 5.704177777968167e-09, + "loss": 0.6855, + "step": 19171 + }, + { + "epoch": 0.985301675403433, + "grad_norm": 1.1147421598434448, + "learning_rate": 5.664504155855066e-09, + "loss": 0.6867, + "step": 19172 + }, + { + "epoch": 0.9853530681467777, + "grad_norm": 1.1705666780471802, + "learning_rate": 5.62496890616826e-09, + "loss": 0.7796, + "step": 19173 + }, + { + "epoch": 0.9854044608901223, + "grad_norm": 1.0893540382385254, + "learning_rate": 5.5855720300024285e-09, + "loss": 0.7028, + "step": 19174 + }, + { + "epoch": 0.985455853633467, + "grad_norm": 1.084223985671997, + "learning_rate": 5.546313528450031e-09, + "loss": 0.6677, + "step": 19175 + }, + { + "epoch": 0.9855072463768116, + "grad_norm": 1.045938491821289, + "learning_rate": 5.507193402598532e-09, + "loss": 0.6436, + "step": 19176 + }, + { + "epoch": 0.9855586391201563, + "grad_norm": 1.0392664670944214, + "learning_rate": 5.468211653531508e-09, + "loss": 0.6835, + "step": 19177 + }, + { + "epoch": 0.9856100318635008, + "grad_norm": 0.6864719986915588, + "learning_rate": 5.429368282329206e-09, + "loss": 0.6343, + "step": 19178 + }, + { + "epoch": 0.9856614246068455, + "grad_norm": 1.2241264581680298, + "learning_rate": 5.390663290067988e-09, + "loss": 0.7432, + "step": 19179 + }, + { + "epoch": 0.9857128173501901, + "grad_norm": 1.0778214931488037, + "learning_rate": 5.352096677819774e-09, + "loss": 0.6722, + "step": 19180 + }, + { + "epoch": 0.9857642100935348, + "grad_norm": 0.6745914220809937, + "learning_rate": 5.313668446653153e-09, + "loss": 0.6947, + "step": 19181 + }, + { + "epoch": 0.9858156028368794, + "grad_norm": 1.120334506034851, + "learning_rate": 5.27537859763283e-09, + "loss": 0.6699, + "step": 19182 + }, + { + "epoch": 0.9858669955802241, + "grad_norm": 1.0697579383850098, + "learning_rate": 5.237227131819622e-09, + "loss": 0.7022, + "step": 19183 + }, + { + "epoch": 0.9859183883235687, + "grad_norm": 1.0539544820785522, + "learning_rate": 5.199214050271018e-09, + "loss": 0.7388, + "step": 19184 + }, + { + "epoch": 0.9859697810669134, + "grad_norm": 1.1074775457382202, + "learning_rate": 5.161339354040062e-09, + "loss": 0.6944, + "step": 19185 + }, + { + "epoch": 0.986021173810258, + "grad_norm": 1.140161395072937, + "learning_rate": 5.123603044175362e-09, + "loss": 0.6932, + "step": 19186 + }, + { + "epoch": 0.9860725665536026, + "grad_norm": 0.7605612277984619, + "learning_rate": 5.086005121723303e-09, + "loss": 0.6379, + "step": 19187 + }, + { + "epoch": 0.9861239592969473, + "grad_norm": 1.0699491500854492, + "learning_rate": 5.0485455877252735e-09, + "loss": 0.6653, + "step": 19188 + }, + { + "epoch": 0.9861753520402919, + "grad_norm": 0.8272678256034851, + "learning_rate": 5.011224443218776e-09, + "loss": 0.622, + "step": 19189 + }, + { + "epoch": 0.9862267447836366, + "grad_norm": 0.7901523113250732, + "learning_rate": 4.97404168923854e-09, + "loss": 0.6459, + "step": 19190 + }, + { + "epoch": 0.9862781375269812, + "grad_norm": 0.7620747089385986, + "learning_rate": 4.936997326813742e-09, + "loss": 0.6946, + "step": 19191 + }, + { + "epoch": 0.9863295302703259, + "grad_norm": 1.148488998413086, + "learning_rate": 4.900091356971892e-09, + "loss": 0.7208, + "step": 19192 + }, + { + "epoch": 0.9863809230136704, + "grad_norm": 0.7960978746414185, + "learning_rate": 4.863323780734952e-09, + "loss": 0.6766, + "step": 19193 + }, + { + "epoch": 0.9864323157570151, + "grad_norm": 1.1352416276931763, + "learning_rate": 4.826694599121551e-09, + "loss": 0.6709, + "step": 19194 + }, + { + "epoch": 0.9864837085003597, + "grad_norm": 1.0732593536376953, + "learning_rate": 4.790203813146433e-09, + "loss": 0.6695, + "step": 19195 + }, + { + "epoch": 0.9865351012437044, + "grad_norm": 1.127615213394165, + "learning_rate": 4.75385142382101e-09, + "loss": 0.6609, + "step": 19196 + }, + { + "epoch": 0.986586493987049, + "grad_norm": 0.8002522587776184, + "learning_rate": 4.7176374321517004e-09, + "loss": 0.6381, + "step": 19197 + }, + { + "epoch": 0.9866378867303937, + "grad_norm": 1.1601793766021729, + "learning_rate": 4.6815618391427005e-09, + "loss": 0.6586, + "step": 19198 + }, + { + "epoch": 0.9866892794737383, + "grad_norm": 1.1371480226516724, + "learning_rate": 4.645624645793212e-09, + "loss": 0.7339, + "step": 19199 + }, + { + "epoch": 0.986740672217083, + "grad_norm": 1.1167134046554565, + "learning_rate": 4.609825853099104e-09, + "loss": 0.7273, + "step": 19200 + }, + { + "epoch": 0.9867920649604276, + "grad_norm": 1.046738624572754, + "learning_rate": 4.57416546205125e-09, + "loss": 0.6663, + "step": 19201 + }, + { + "epoch": 0.9868434577037722, + "grad_norm": 1.2537602186203003, + "learning_rate": 4.53864347363886e-09, + "loss": 0.6941, + "step": 19202 + }, + { + "epoch": 0.9868948504471169, + "grad_norm": 1.073128581047058, + "learning_rate": 4.50325988884559e-09, + "loss": 0.7056, + "step": 19203 + }, + { + "epoch": 0.9869462431904615, + "grad_norm": 1.1635302305221558, + "learning_rate": 4.468014708651769e-09, + "loss": 0.6861, + "step": 19204 + }, + { + "epoch": 0.9869976359338062, + "grad_norm": 1.0978872776031494, + "learning_rate": 4.432907934033836e-09, + "loss": 0.6761, + "step": 19205 + }, + { + "epoch": 0.9870490286771508, + "grad_norm": 1.0596505403518677, + "learning_rate": 4.397939565964349e-09, + "loss": 0.6528, + "step": 19206 + }, + { + "epoch": 0.9871004214204955, + "grad_norm": 1.0515906810760498, + "learning_rate": 4.36310960541253e-09, + "loss": 0.6755, + "step": 19207 + }, + { + "epoch": 0.98715181416384, + "grad_norm": 0.7850010991096497, + "learning_rate": 4.328418053343164e-09, + "loss": 0.5906, + "step": 19208 + }, + { + "epoch": 0.9872032069071847, + "grad_norm": 1.1051132678985596, + "learning_rate": 4.293864910717704e-09, + "loss": 0.6903, + "step": 19209 + }, + { + "epoch": 0.9872545996505293, + "grad_norm": 1.102198600769043, + "learning_rate": 4.259450178492608e-09, + "loss": 0.7134, + "step": 19210 + }, + { + "epoch": 0.987305992393874, + "grad_norm": 1.0436303615570068, + "learning_rate": 4.225173857622111e-09, + "loss": 0.6876, + "step": 19211 + }, + { + "epoch": 0.9873573851372186, + "grad_norm": 1.0251169204711914, + "learning_rate": 4.191035949056011e-09, + "loss": 0.7034, + "step": 19212 + }, + { + "epoch": 0.9874087778805632, + "grad_norm": 1.1378138065338135, + "learning_rate": 4.15703645373966e-09, + "loss": 0.7146, + "step": 19213 + }, + { + "epoch": 0.9874601706239079, + "grad_norm": 1.1331069469451904, + "learning_rate": 4.1231753726150844e-09, + "loss": 0.6707, + "step": 19214 + }, + { + "epoch": 0.9875115633672525, + "grad_norm": 1.0966565608978271, + "learning_rate": 4.0894527066204226e-09, + "loss": 0.6803, + "step": 19215 + }, + { + "epoch": 0.9875629561105972, + "grad_norm": 1.1502457857131958, + "learning_rate": 4.055868456690482e-09, + "loss": 0.7299, + "step": 19216 + }, + { + "epoch": 0.9876143488539418, + "grad_norm": 1.1860049962997437, + "learning_rate": 4.022422623755073e-09, + "loss": 0.6803, + "step": 19217 + }, + { + "epoch": 0.9876657415972865, + "grad_norm": 1.07864511013031, + "learning_rate": 3.989115208741789e-09, + "loss": 0.7067, + "step": 19218 + }, + { + "epoch": 0.9877171343406311, + "grad_norm": 1.0841528177261353, + "learning_rate": 3.955946212572115e-09, + "loss": 0.7246, + "step": 19219 + }, + { + "epoch": 0.9877685270839758, + "grad_norm": 1.13443124294281, + "learning_rate": 3.922915636165869e-09, + "loss": 0.6953, + "step": 19220 + }, + { + "epoch": 0.9878199198273204, + "grad_norm": 1.132979154586792, + "learning_rate": 3.890023480437876e-09, + "loss": 0.7386, + "step": 19221 + }, + { + "epoch": 0.9878713125706651, + "grad_norm": 1.1379449367523193, + "learning_rate": 3.857269746300185e-09, + "loss": 0.663, + "step": 19222 + }, + { + "epoch": 0.9879227053140096, + "grad_norm": 1.183051347732544, + "learning_rate": 3.824654434659291e-09, + "loss": 0.6806, + "step": 19223 + }, + { + "epoch": 0.9879740980573543, + "grad_norm": 0.6957756876945496, + "learning_rate": 3.792177546418918e-09, + "loss": 0.6417, + "step": 19224 + }, + { + "epoch": 0.9880254908006989, + "grad_norm": 1.1658682823181152, + "learning_rate": 3.7598390824794554e-09, + "loss": 0.6569, + "step": 19225 + }, + { + "epoch": 0.9880768835440435, + "grad_norm": 1.2409968376159668, + "learning_rate": 3.7276390437368526e-09, + "loss": 0.75, + "step": 19226 + }, + { + "epoch": 0.9881282762873882, + "grad_norm": 0.7006943225860596, + "learning_rate": 3.69557743108262e-09, + "loss": 0.6273, + "step": 19227 + }, + { + "epoch": 0.9881796690307328, + "grad_norm": 1.140687346458435, + "learning_rate": 3.66365424540549e-09, + "loss": 0.7123, + "step": 19228 + }, + { + "epoch": 0.9882310617740775, + "grad_norm": 1.1553950309753418, + "learning_rate": 3.6318694875903114e-09, + "loss": 0.709, + "step": 19229 + }, + { + "epoch": 0.9882824545174221, + "grad_norm": 1.0695443153381348, + "learning_rate": 3.6002231585163807e-09, + "loss": 0.6669, + "step": 19230 + }, + { + "epoch": 0.9883338472607668, + "grad_norm": 1.0534558296203613, + "learning_rate": 3.5687152590618833e-09, + "loss": 0.642, + "step": 19231 + }, + { + "epoch": 0.9883852400041114, + "grad_norm": 1.0932321548461914, + "learning_rate": 3.5373457900994557e-09, + "loss": 0.6906, + "step": 19232 + }, + { + "epoch": 0.9884366327474561, + "grad_norm": 1.0996112823486328, + "learning_rate": 3.5061147524972916e-09, + "loss": 0.7088, + "step": 19233 + }, + { + "epoch": 0.9884880254908007, + "grad_norm": 1.1056674718856812, + "learning_rate": 3.4750221471219203e-09, + "loss": 0.7206, + "step": 19234 + }, + { + "epoch": 0.9885394182341454, + "grad_norm": 1.1441634893417358, + "learning_rate": 3.4440679748337647e-09, + "loss": 0.7442, + "step": 19235 + }, + { + "epoch": 0.98859081097749, + "grad_norm": 1.2682429552078247, + "learning_rate": 3.413252236491582e-09, + "loss": 0.702, + "step": 19236 + }, + { + "epoch": 0.9886422037208347, + "grad_norm": 1.0489912033081055, + "learning_rate": 3.382574932948024e-09, + "loss": 0.6646, + "step": 19237 + }, + { + "epoch": 0.9886935964641792, + "grad_norm": 1.0879982709884644, + "learning_rate": 3.352036065053521e-09, + "loss": 0.6808, + "step": 19238 + }, + { + "epoch": 0.9887449892075239, + "grad_norm": 1.1172878742218018, + "learning_rate": 3.3216356336540632e-09, + "loss": 0.7368, + "step": 19239 + }, + { + "epoch": 0.9887963819508685, + "grad_norm": 1.107009768486023, + "learning_rate": 3.29137363959231e-09, + "loss": 0.6807, + "step": 19240 + }, + { + "epoch": 0.9888477746942131, + "grad_norm": 1.0634379386901855, + "learning_rate": 3.2612500837064797e-09, + "loss": 0.6292, + "step": 19241 + }, + { + "epoch": 0.9888991674375578, + "grad_norm": 0.7787332534790039, + "learning_rate": 3.2312649668309047e-09, + "loss": 0.624, + "step": 19242 + }, + { + "epoch": 0.9889505601809024, + "grad_norm": 1.0656942129135132, + "learning_rate": 3.201418289796032e-09, + "loss": 0.6707, + "step": 19243 + }, + { + "epoch": 0.9890019529242471, + "grad_norm": 1.1543809175491333, + "learning_rate": 3.171710053430088e-09, + "loss": 0.6567, + "step": 19244 + }, + { + "epoch": 0.9890533456675917, + "grad_norm": 1.1073237657546997, + "learning_rate": 3.142140258555193e-09, + "loss": 0.7106, + "step": 19245 + }, + { + "epoch": 0.9891047384109364, + "grad_norm": 1.0966774225234985, + "learning_rate": 3.112708905990136e-09, + "loss": 0.6869, + "step": 19246 + }, + { + "epoch": 0.989156131154281, + "grad_norm": 1.0618302822113037, + "learning_rate": 3.0834159965514866e-09, + "loss": 0.6506, + "step": 19247 + }, + { + "epoch": 0.9892075238976257, + "grad_norm": 1.0517992973327637, + "learning_rate": 3.0542615310502623e-09, + "loss": 0.644, + "step": 19248 + }, + { + "epoch": 0.9892589166409703, + "grad_norm": 1.1183230876922607, + "learning_rate": 3.0252455102947056e-09, + "loss": 0.7027, + "step": 19249 + }, + { + "epoch": 0.989310309384315, + "grad_norm": 1.1745147705078125, + "learning_rate": 2.9963679350875075e-09, + "loss": 0.744, + "step": 19250 + }, + { + "epoch": 0.9893617021276596, + "grad_norm": 0.7406668663024902, + "learning_rate": 2.967628806229694e-09, + "loss": 0.6428, + "step": 19251 + }, + { + "epoch": 0.9894130948710043, + "grad_norm": 1.0840258598327637, + "learning_rate": 2.939028124517851e-09, + "loss": 0.7137, + "step": 19252 + }, + { + "epoch": 0.9894644876143489, + "grad_norm": 1.0861191749572754, + "learning_rate": 2.910565890743566e-09, + "loss": 0.7174, + "step": 19253 + }, + { + "epoch": 0.9895158803576934, + "grad_norm": 1.0830777883529663, + "learning_rate": 2.882242105695654e-09, + "loss": 0.7272, + "step": 19254 + }, + { + "epoch": 0.9895672731010381, + "grad_norm": 1.05614173412323, + "learning_rate": 2.8540567701590414e-09, + "loss": 0.6591, + "step": 19255 + }, + { + "epoch": 0.9896186658443827, + "grad_norm": 1.005201816558838, + "learning_rate": 2.826009884914771e-09, + "loss": 0.6358, + "step": 19256 + }, + { + "epoch": 0.9896700585877274, + "grad_norm": 0.6927486062049866, + "learning_rate": 2.7981014507394434e-09, + "loss": 0.645, + "step": 19257 + }, + { + "epoch": 0.989721451331072, + "grad_norm": 1.0683655738830566, + "learning_rate": 2.7703314684063286e-09, + "loss": 0.664, + "step": 19258 + }, + { + "epoch": 0.9897728440744167, + "grad_norm": 1.0542893409729004, + "learning_rate": 2.742699938684812e-09, + "loss": 0.7179, + "step": 19259 + }, + { + "epoch": 0.9898242368177613, + "grad_norm": 1.1283996105194092, + "learning_rate": 2.7152068623409466e-09, + "loss": 0.7214, + "step": 19260 + }, + { + "epoch": 0.989875629561106, + "grad_norm": 1.095484733581543, + "learning_rate": 2.687852240136346e-09, + "loss": 0.6845, + "step": 19261 + }, + { + "epoch": 0.9899270223044506, + "grad_norm": 1.7568156719207764, + "learning_rate": 2.6606360728281823e-09, + "loss": 0.6367, + "step": 19262 + }, + { + "epoch": 0.9899784150477953, + "grad_norm": 1.0557917356491089, + "learning_rate": 2.6335583611714067e-09, + "loss": 0.6645, + "step": 19263 + }, + { + "epoch": 0.9900298077911399, + "grad_norm": 1.1051298379898071, + "learning_rate": 2.60661910591542e-09, + "loss": 0.6987, + "step": 19264 + }, + { + "epoch": 0.9900812005344846, + "grad_norm": 1.1440355777740479, + "learning_rate": 2.5798183078074023e-09, + "loss": 0.6958, + "step": 19265 + }, + { + "epoch": 0.9901325932778292, + "grad_norm": 1.139714002609253, + "learning_rate": 2.5531559675889827e-09, + "loss": 0.728, + "step": 19266 + }, + { + "epoch": 0.9901839860211739, + "grad_norm": 1.1090095043182373, + "learning_rate": 2.5266320859995697e-09, + "loss": 0.7134, + "step": 19267 + }, + { + "epoch": 0.9902353787645185, + "grad_norm": 0.6799173355102539, + "learning_rate": 2.5002466637741306e-09, + "loss": 0.6505, + "step": 19268 + }, + { + "epoch": 0.990286771507863, + "grad_norm": 1.1176661252975464, + "learning_rate": 2.473999701643193e-09, + "loss": 0.7034, + "step": 19269 + }, + { + "epoch": 0.9903381642512077, + "grad_norm": 1.1120586395263672, + "learning_rate": 2.4478912003339517e-09, + "loss": 0.6973, + "step": 19270 + }, + { + "epoch": 0.9903895569945523, + "grad_norm": 1.097770094871521, + "learning_rate": 2.4219211605702733e-09, + "loss": 0.6337, + "step": 19271 + }, + { + "epoch": 0.990440949737897, + "grad_norm": 0.6981989145278931, + "learning_rate": 2.3960895830710262e-09, + "loss": 0.6547, + "step": 19272 + }, + { + "epoch": 0.9904923424812416, + "grad_norm": 1.1528936624526978, + "learning_rate": 2.370396468552305e-09, + "loss": 0.6989, + "step": 19273 + }, + { + "epoch": 0.9905437352245863, + "grad_norm": 1.1162132024765015, + "learning_rate": 2.3448418177263177e-09, + "loss": 0.7451, + "step": 19274 + }, + { + "epoch": 0.9905951279679309, + "grad_norm": 1.099804401397705, + "learning_rate": 2.319425631300276e-09, + "loss": 0.6712, + "step": 19275 + }, + { + "epoch": 0.9906465207112756, + "grad_norm": 1.039446473121643, + "learning_rate": 2.294147909978617e-09, + "loss": 0.6803, + "step": 19276 + }, + { + "epoch": 0.9906979134546202, + "grad_norm": 1.0218850374221802, + "learning_rate": 2.2690086544624456e-09, + "loss": 0.7064, + "step": 19277 + }, + { + "epoch": 0.9907493061979649, + "grad_norm": 1.1670374870300293, + "learning_rate": 2.244007865446762e-09, + "loss": 0.7159, + "step": 19278 + }, + { + "epoch": 0.9908006989413095, + "grad_norm": 1.0085628032684326, + "learning_rate": 2.2191455436254562e-09, + "loss": 0.6435, + "step": 19279 + }, + { + "epoch": 0.9908520916846542, + "grad_norm": 1.1783610582351685, + "learning_rate": 2.1944216896874203e-09, + "loss": 0.6745, + "step": 19280 + }, + { + "epoch": 0.9909034844279988, + "grad_norm": 1.1046825647354126, + "learning_rate": 2.169836304317108e-09, + "loss": 0.6931, + "step": 19281 + }, + { + "epoch": 0.9909548771713435, + "grad_norm": 1.093119740486145, + "learning_rate": 2.145389388195085e-09, + "loss": 0.6337, + "step": 19282 + }, + { + "epoch": 0.9910062699146881, + "grad_norm": 1.1016041040420532, + "learning_rate": 2.1210809420002533e-09, + "loss": 0.7049, + "step": 19283 + }, + { + "epoch": 0.9910576626580326, + "grad_norm": 1.0662263631820679, + "learning_rate": 2.0969109664048525e-09, + "loss": 0.6937, + "step": 19284 + }, + { + "epoch": 0.9911090554013773, + "grad_norm": 1.060379981994629, + "learning_rate": 2.0728794620789026e-09, + "loss": 0.6962, + "step": 19285 + }, + { + "epoch": 0.9911604481447219, + "grad_norm": 1.0481971502304077, + "learning_rate": 2.0489864296879825e-09, + "loss": 0.7024, + "step": 19286 + }, + { + "epoch": 0.9912118408880666, + "grad_norm": 1.0933082103729248, + "learning_rate": 2.0252318698948946e-09, + "loss": 0.6486, + "step": 19287 + }, + { + "epoch": 0.9912632336314112, + "grad_norm": 0.6570514440536499, + "learning_rate": 2.001615783356892e-09, + "loss": 0.6203, + "step": 19288 + }, + { + "epoch": 0.9913146263747559, + "grad_norm": 1.102957010269165, + "learning_rate": 1.978138170729005e-09, + "loss": 0.7241, + "step": 19289 + }, + { + "epoch": 0.9913660191181005, + "grad_norm": 0.7595608234405518, + "learning_rate": 1.954799032660715e-09, + "loss": 0.6532, + "step": 19290 + }, + { + "epoch": 0.9914174118614452, + "grad_norm": 1.0640813112258911, + "learning_rate": 1.931598369799281e-09, + "loss": 0.7241, + "step": 19291 + }, + { + "epoch": 0.9914688046047898, + "grad_norm": 1.180168628692627, + "learning_rate": 1.908536182787524e-09, + "loss": 0.7511, + "step": 19292 + }, + { + "epoch": 0.9915201973481345, + "grad_norm": 1.077062964439392, + "learning_rate": 1.885612472264375e-09, + "loss": 0.674, + "step": 19293 + }, + { + "epoch": 0.9915715900914791, + "grad_norm": 1.130330204963684, + "learning_rate": 1.862827238865439e-09, + "loss": 0.7047, + "step": 19294 + }, + { + "epoch": 0.9916229828348238, + "grad_norm": 1.1044999361038208, + "learning_rate": 1.8401804832207659e-09, + "loss": 0.7291, + "step": 19295 + }, + { + "epoch": 0.9916743755781684, + "grad_norm": 0.7969647645950317, + "learning_rate": 1.8176722059587426e-09, + "loss": 0.6745, + "step": 19296 + }, + { + "epoch": 0.991725768321513, + "grad_norm": 1.1171493530273438, + "learning_rate": 1.7953024077027591e-09, + "loss": 0.6517, + "step": 19297 + }, + { + "epoch": 0.9917771610648577, + "grad_norm": 1.0566861629486084, + "learning_rate": 1.77307108907232e-09, + "loss": 0.7163, + "step": 19298 + }, + { + "epoch": 0.9918285538082022, + "grad_norm": 0.8097507357597351, + "learning_rate": 1.7509782506841544e-09, + "loss": 0.6759, + "step": 19299 + }, + { + "epoch": 0.9918799465515469, + "grad_norm": 1.1557252407073975, + "learning_rate": 1.7290238931494396e-09, + "loss": 0.6738, + "step": 19300 + }, + { + "epoch": 0.9919313392948915, + "grad_norm": 1.0709545612335205, + "learning_rate": 1.707208017076578e-09, + "loss": 0.6949, + "step": 19301 + }, + { + "epoch": 0.9919827320382362, + "grad_norm": 1.0855231285095215, + "learning_rate": 1.6855306230706414e-09, + "loss": 0.6368, + "step": 19302 + }, + { + "epoch": 0.9920341247815808, + "grad_norm": 1.8344392776489258, + "learning_rate": 1.663991711731705e-09, + "loss": 0.6544, + "step": 19303 + }, + { + "epoch": 0.9920855175249255, + "grad_norm": 1.1377217769622803, + "learning_rate": 1.642591283656514e-09, + "loss": 0.7408, + "step": 19304 + }, + { + "epoch": 0.9921369102682701, + "grad_norm": 0.8285262584686279, + "learning_rate": 1.621329339438482e-09, + "loss": 0.6368, + "step": 19305 + }, + { + "epoch": 0.9921883030116148, + "grad_norm": 1.1041715145111084, + "learning_rate": 1.600205879666028e-09, + "loss": 0.7292, + "step": 19306 + }, + { + "epoch": 0.9922396957549594, + "grad_norm": 1.0027024745941162, + "learning_rate": 1.5792209049247942e-09, + "loss": 0.6552, + "step": 19307 + }, + { + "epoch": 0.992291088498304, + "grad_norm": 1.0270957946777344, + "learning_rate": 1.5583744157959824e-09, + "loss": 0.6531, + "step": 19308 + }, + { + "epoch": 0.9923424812416487, + "grad_norm": 0.9946281909942627, + "learning_rate": 1.5376664128574636e-09, + "loss": 0.6692, + "step": 19309 + }, + { + "epoch": 0.9923938739849933, + "grad_norm": 1.036358118057251, + "learning_rate": 1.5170968966826682e-09, + "loss": 0.6735, + "step": 19310 + }, + { + "epoch": 0.992445266728338, + "grad_norm": 1.0617839097976685, + "learning_rate": 1.4966658678416957e-09, + "loss": 0.6708, + "step": 19311 + }, + { + "epoch": 0.9924966594716826, + "grad_norm": 1.2486793994903564, + "learning_rate": 1.4763733269002044e-09, + "loss": 0.7248, + "step": 19312 + }, + { + "epoch": 0.9925480522150273, + "grad_norm": 1.0622004270553589, + "learning_rate": 1.456219274421078e-09, + "loss": 0.6449, + "step": 19313 + }, + { + "epoch": 0.9925994449583718, + "grad_norm": 1.1487758159637451, + "learning_rate": 1.4362037109622028e-09, + "loss": 0.6286, + "step": 19314 + }, + { + "epoch": 0.9926508377017165, + "grad_norm": 1.1095880270004272, + "learning_rate": 1.4163266370781358e-09, + "loss": 0.7358, + "step": 19315 + }, + { + "epoch": 0.9927022304450611, + "grad_norm": 1.0970951318740845, + "learning_rate": 1.3965880533195476e-09, + "loss": 0.6845, + "step": 19316 + }, + { + "epoch": 0.9927536231884058, + "grad_norm": 1.1831855773925781, + "learning_rate": 1.3769879602337776e-09, + "loss": 0.649, + "step": 19317 + }, + { + "epoch": 0.9928050159317504, + "grad_norm": 1.0302222967147827, + "learning_rate": 1.3575263583637256e-09, + "loss": 0.6804, + "step": 19318 + }, + { + "epoch": 0.9928564086750951, + "grad_norm": 0.7512422800064087, + "learning_rate": 1.338203248248404e-09, + "loss": 0.629, + "step": 19319 + }, + { + "epoch": 0.9929078014184397, + "grad_norm": 0.7811951041221619, + "learning_rate": 1.3190186304229413e-09, + "loss": 0.61, + "step": 19320 + }, + { + "epoch": 0.9929591941617844, + "grad_norm": 1.157462239265442, + "learning_rate": 1.2999725054191338e-09, + "loss": 0.694, + "step": 19321 + }, + { + "epoch": 0.993010586905129, + "grad_norm": 0.8165507912635803, + "learning_rate": 1.2810648737648924e-09, + "loss": 0.6401, + "step": 19322 + }, + { + "epoch": 0.9930619796484736, + "grad_norm": 1.0097500085830688, + "learning_rate": 1.2622957359836873e-09, + "loss": 0.6468, + "step": 19323 + }, + { + "epoch": 0.9931133723918183, + "grad_norm": 0.6846978068351746, + "learning_rate": 1.2436650925956584e-09, + "loss": 0.632, + "step": 19324 + }, + { + "epoch": 0.9931647651351629, + "grad_norm": 0.7729439735412598, + "learning_rate": 1.2251729441176142e-09, + "loss": 0.5841, + "step": 19325 + }, + { + "epoch": 0.9932161578785076, + "grad_norm": 1.0360852479934692, + "learning_rate": 1.2068192910602573e-09, + "loss": 0.6509, + "step": 19326 + }, + { + "epoch": 0.9932675506218522, + "grad_norm": 0.6819604635238647, + "learning_rate": 1.1886041339337352e-09, + "loss": 0.6006, + "step": 19327 + }, + { + "epoch": 0.9933189433651969, + "grad_norm": 1.020704746246338, + "learning_rate": 1.1705274732420891e-09, + "loss": 0.6806, + "step": 19328 + }, + { + "epoch": 0.9933703361085414, + "grad_norm": 1.1895776987075806, + "learning_rate": 1.1525893094865848e-09, + "loss": 0.7034, + "step": 19329 + }, + { + "epoch": 0.9934217288518861, + "grad_norm": 1.1127362251281738, + "learning_rate": 1.1347896431629368e-09, + "loss": 0.6764, + "step": 19330 + }, + { + "epoch": 0.9934731215952307, + "grad_norm": 1.1071585416793823, + "learning_rate": 1.1171284747657497e-09, + "loss": 0.6842, + "step": 19331 + }, + { + "epoch": 0.9935245143385754, + "grad_norm": 1.3185594081878662, + "learning_rate": 1.0996058047835212e-09, + "loss": 0.7139, + "step": 19332 + }, + { + "epoch": 0.99357590708192, + "grad_norm": 0.7500571608543396, + "learning_rate": 1.0822216337025293e-09, + "loss": 0.6276, + "step": 19333 + }, + { + "epoch": 0.9936272998252647, + "grad_norm": 1.1356713771820068, + "learning_rate": 1.0649759620029454e-09, + "loss": 0.7597, + "step": 19334 + }, + { + "epoch": 0.9936786925686093, + "grad_norm": 1.1430342197418213, + "learning_rate": 1.0478687901643858e-09, + "loss": 0.6902, + "step": 19335 + }, + { + "epoch": 0.993730085311954, + "grad_norm": 0.8056836128234863, + "learning_rate": 1.0309001186592504e-09, + "loss": 0.6206, + "step": 19336 + }, + { + "epoch": 0.9937814780552986, + "grad_norm": 1.074803352355957, + "learning_rate": 1.014069947958829e-09, + "loss": 0.6539, + "step": 19337 + }, + { + "epoch": 0.9938328707986432, + "grad_norm": 0.9977484941482544, + "learning_rate": 9.973782785283048e-10, + "loss": 0.6842, + "step": 19338 + }, + { + "epoch": 0.9938842635419879, + "grad_norm": 0.7673470973968506, + "learning_rate": 9.808251108311961e-10, + "loss": 0.6204, + "step": 19339 + }, + { + "epoch": 0.9939356562853325, + "grad_norm": 0.760940670967102, + "learning_rate": 9.6441044532547e-10, + "loss": 0.6269, + "step": 19340 + }, + { + "epoch": 0.9939870490286772, + "grad_norm": 1.1634966135025024, + "learning_rate": 9.481342824663175e-10, + "loss": 0.7114, + "step": 19341 + }, + { + "epoch": 0.9940384417720218, + "grad_norm": 0.7205185294151306, + "learning_rate": 9.319966227044897e-10, + "loss": 0.6165, + "step": 19342 + }, + { + "epoch": 0.9940898345153665, + "grad_norm": 1.0723607540130615, + "learning_rate": 9.159974664874061e-10, + "loss": 0.6538, + "step": 19343 + }, + { + "epoch": 0.9941412272587111, + "grad_norm": 1.0852857828140259, + "learning_rate": 9.001368142574907e-10, + "loss": 0.6644, + "step": 19344 + }, + { + "epoch": 0.9941926200020557, + "grad_norm": 1.0827651023864746, + "learning_rate": 8.844146664549468e-10, + "loss": 0.7022, + "step": 19345 + }, + { + "epoch": 0.9942440127454003, + "grad_norm": 1.10652494430542, + "learning_rate": 8.688310235149822e-10, + "loss": 0.673, + "step": 19346 + }, + { + "epoch": 0.994295405488745, + "grad_norm": 1.0643011331558228, + "learning_rate": 8.533858858700283e-10, + "loss": 0.7093, + "step": 19347 + }, + { + "epoch": 0.9943467982320896, + "grad_norm": 1.102324366569519, + "learning_rate": 8.380792539469663e-10, + "loss": 0.6672, + "step": 19348 + }, + { + "epoch": 0.9943981909754342, + "grad_norm": 1.1710870265960693, + "learning_rate": 8.229111281704561e-10, + "loss": 0.7245, + "step": 19349 + }, + { + "epoch": 0.9944495837187789, + "grad_norm": 1.0414108037948608, + "learning_rate": 8.078815089607173e-10, + "loss": 0.6828, + "step": 19350 + }, + { + "epoch": 0.9945009764621235, + "grad_norm": 1.0861867666244507, + "learning_rate": 7.929903967346387e-10, + "loss": 0.6848, + "step": 19351 + }, + { + "epoch": 0.9945523692054682, + "grad_norm": 0.8204453587532043, + "learning_rate": 7.782377919035577e-10, + "loss": 0.665, + "step": 19352 + }, + { + "epoch": 0.9946037619488128, + "grad_norm": 0.7217210531234741, + "learning_rate": 7.636236948771469e-10, + "loss": 0.6393, + "step": 19353 + }, + { + "epoch": 0.9946551546921575, + "grad_norm": 1.1099402904510498, + "learning_rate": 7.491481060606376e-10, + "loss": 0.741, + "step": 19354 + }, + { + "epoch": 0.9947065474355021, + "grad_norm": 1.1761648654937744, + "learning_rate": 7.3481102585371e-10, + "loss": 0.7044, + "step": 19355 + }, + { + "epoch": 0.9947579401788468, + "grad_norm": 1.095995306968689, + "learning_rate": 7.206124546549342e-10, + "loss": 0.7392, + "step": 19356 + }, + { + "epoch": 0.9948093329221914, + "grad_norm": 1.0842957496643066, + "learning_rate": 7.065523928567741e-10, + "loss": 0.7186, + "step": 19357 + }, + { + "epoch": 0.9948607256655361, + "grad_norm": 1.0737346410751343, + "learning_rate": 6.926308408494731e-10, + "loss": 0.6959, + "step": 19358 + }, + { + "epoch": 0.9949121184088807, + "grad_norm": 1.1387611627578735, + "learning_rate": 6.788477990188336e-10, + "loss": 0.7084, + "step": 19359 + }, + { + "epoch": 0.9949635111522253, + "grad_norm": 1.0786956548690796, + "learning_rate": 6.652032677456622e-10, + "loss": 0.7256, + "step": 19360 + }, + { + "epoch": 0.9950149038955699, + "grad_norm": 1.0833635330200195, + "learning_rate": 6.516972474090999e-10, + "loss": 0.6744, + "step": 19361 + }, + { + "epoch": 0.9950662966389145, + "grad_norm": 0.7598153948783875, + "learning_rate": 6.383297383827369e-10, + "loss": 0.6838, + "step": 19362 + }, + { + "epoch": 0.9951176893822592, + "grad_norm": 1.0951838493347168, + "learning_rate": 6.251007410373877e-10, + "loss": 0.7148, + "step": 19363 + }, + { + "epoch": 0.9951690821256038, + "grad_norm": 1.1390982866287231, + "learning_rate": 6.120102557388707e-10, + "loss": 0.6945, + "step": 19364 + }, + { + "epoch": 0.9952204748689485, + "grad_norm": 1.0435545444488525, + "learning_rate": 5.990582828502289e-10, + "loss": 0.6466, + "step": 19365 + }, + { + "epoch": 0.9952718676122931, + "grad_norm": 1.119020938873291, + "learning_rate": 5.862448227306195e-10, + "loss": 0.6679, + "step": 19366 + }, + { + "epoch": 0.9953232603556378, + "grad_norm": 1.1077371835708618, + "learning_rate": 5.735698757347586e-10, + "loss": 0.6964, + "step": 19367 + }, + { + "epoch": 0.9953746530989824, + "grad_norm": 1.0444180965423584, + "learning_rate": 5.610334422140317e-10, + "loss": 0.6235, + "step": 19368 + }, + { + "epoch": 0.9954260458423271, + "grad_norm": 1.052249550819397, + "learning_rate": 5.486355225153838e-10, + "loss": 0.6228, + "step": 19369 + }, + { + "epoch": 0.9954774385856717, + "grad_norm": 1.1058058738708496, + "learning_rate": 5.36376116982984e-10, + "loss": 0.6653, + "step": 19370 + }, + { + "epoch": 0.9955288313290164, + "grad_norm": 1.1063816547393799, + "learning_rate": 5.242552259554501e-10, + "loss": 0.6784, + "step": 19371 + }, + { + "epoch": 0.995580224072361, + "grad_norm": 0.6766038537025452, + "learning_rate": 5.122728497691798e-10, + "loss": 0.6236, + "step": 19372 + }, + { + "epoch": 0.9956316168157057, + "grad_norm": 1.0495966672897339, + "learning_rate": 5.004289887566849e-10, + "loss": 0.6843, + "step": 19373 + }, + { + "epoch": 0.9956830095590503, + "grad_norm": 1.1163429021835327, + "learning_rate": 4.887236432449261e-10, + "loss": 0.6562, + "step": 19374 + }, + { + "epoch": 0.9957344023023948, + "grad_norm": 1.1211323738098145, + "learning_rate": 4.771568135591986e-10, + "loss": 0.7072, + "step": 19375 + }, + { + "epoch": 0.9957857950457395, + "grad_norm": 1.0442214012145996, + "learning_rate": 4.657285000198019e-10, + "loss": 0.713, + "step": 19376 + }, + { + "epoch": 0.9958371877890841, + "grad_norm": 1.17401921749115, + "learning_rate": 4.544387029431496e-10, + "loss": 0.7511, + "step": 19377 + }, + { + "epoch": 0.9958885805324288, + "grad_norm": 0.7029767632484436, + "learning_rate": 4.4328742264176937e-10, + "loss": 0.625, + "step": 19378 + }, + { + "epoch": 0.9959399732757734, + "grad_norm": 1.0480568408966064, + "learning_rate": 4.322746594254135e-10, + "loss": 0.6551, + "step": 19379 + }, + { + "epoch": 0.9959913660191181, + "grad_norm": 1.1074014902114868, + "learning_rate": 4.21400413598283e-10, + "loss": 0.6887, + "step": 19380 + }, + { + "epoch": 0.9960427587624627, + "grad_norm": 1.145377278327942, + "learning_rate": 4.106646854623586e-10, + "loss": 0.734, + "step": 19381 + }, + { + "epoch": 0.9960941515058074, + "grad_norm": 1.07652747631073, + "learning_rate": 4.000674753151801e-10, + "loss": 0.746, + "step": 19382 + }, + { + "epoch": 0.996145544249152, + "grad_norm": 1.073360800743103, + "learning_rate": 3.896087834492912e-10, + "loss": 0.6807, + "step": 19383 + }, + { + "epoch": 0.9961969369924967, + "grad_norm": 1.1766273975372314, + "learning_rate": 3.792886101555704e-10, + "loss": 0.6981, + "step": 19384 + }, + { + "epoch": 0.9962483297358413, + "grad_norm": 1.1354820728302002, + "learning_rate": 3.6910695571934497e-10, + "loss": 0.6654, + "step": 19385 + }, + { + "epoch": 0.996299722479186, + "grad_norm": 1.211204171180725, + "learning_rate": 3.590638204231667e-10, + "loss": 0.678, + "step": 19386 + }, + { + "epoch": 0.9963511152225306, + "grad_norm": 0.7538310289382935, + "learning_rate": 3.491592045451464e-10, + "loss": 0.6567, + "step": 19387 + }, + { + "epoch": 0.9964025079658753, + "grad_norm": 0.8086171746253967, + "learning_rate": 3.393931083589541e-10, + "loss": 0.6838, + "step": 19388 + }, + { + "epoch": 0.9964539007092199, + "grad_norm": 1.116568684577942, + "learning_rate": 3.297655321365945e-10, + "loss": 0.7269, + "step": 19389 + }, + { + "epoch": 0.9965052934525644, + "grad_norm": 1.0952311754226685, + "learning_rate": 3.2027647614341074e-10, + "loss": 0.7112, + "step": 19390 + }, + { + "epoch": 0.9965566861959091, + "grad_norm": 1.048426628112793, + "learning_rate": 3.109259406430809e-10, + "loss": 0.7166, + "step": 19391 + }, + { + "epoch": 0.9966080789392537, + "grad_norm": 0.8646671772003174, + "learning_rate": 3.0171392589428693e-10, + "loss": 0.6076, + "step": 19392 + }, + { + "epoch": 0.9966594716825984, + "grad_norm": 0.8289965391159058, + "learning_rate": 2.9264043215293523e-10, + "loss": 0.6482, + "step": 19393 + }, + { + "epoch": 0.996710864425943, + "grad_norm": 1.1529563665390015, + "learning_rate": 2.837054596693811e-10, + "loss": 0.6602, + "step": 19394 + }, + { + "epoch": 0.9967622571692877, + "grad_norm": 1.1179028749465942, + "learning_rate": 2.7490900869231453e-10, + "loss": 0.7117, + "step": 19395 + }, + { + "epoch": 0.9968136499126323, + "grad_norm": 1.038568139076233, + "learning_rate": 2.6625107946431917e-10, + "loss": 0.619, + "step": 19396 + }, + { + "epoch": 0.996865042655977, + "grad_norm": 1.0913738012313843, + "learning_rate": 2.5773167222631344e-10, + "loss": 0.6518, + "step": 19397 + }, + { + "epoch": 0.9969164353993216, + "grad_norm": 1.0856178998947144, + "learning_rate": 2.4935078721366467e-10, + "loss": 0.763, + "step": 19398 + }, + { + "epoch": 0.9969678281426663, + "grad_norm": 1.0060220956802368, + "learning_rate": 2.4110842465840943e-10, + "loss": 0.6939, + "step": 19399 + }, + { + "epoch": 0.9970192208860109, + "grad_norm": 1.146921992301941, + "learning_rate": 2.3300458478980883e-10, + "loss": 0.7225, + "step": 19400 + }, + { + "epoch": 0.9970706136293556, + "grad_norm": 1.088821530342102, + "learning_rate": 2.250392678315727e-10, + "loss": 0.642, + "step": 19401 + }, + { + "epoch": 0.9971220063727002, + "grad_norm": 1.101847767829895, + "learning_rate": 2.1721247400463553e-10, + "loss": 0.7211, + "step": 19402 + }, + { + "epoch": 0.9971733991160449, + "grad_norm": 1.1838881969451904, + "learning_rate": 2.0952420352604587e-10, + "loss": 0.6958, + "step": 19403 + }, + { + "epoch": 0.9972247918593895, + "grad_norm": 1.1068248748779297, + "learning_rate": 2.0197445660841141e-10, + "loss": 0.6346, + "step": 19404 + }, + { + "epoch": 0.997276184602734, + "grad_norm": 1.0783617496490479, + "learning_rate": 1.9456323346100926e-10, + "loss": 0.6496, + "step": 19405 + }, + { + "epoch": 0.9973275773460787, + "grad_norm": 0.8151327967643738, + "learning_rate": 1.872905342897857e-10, + "loss": 0.6704, + "step": 19406 + }, + { + "epoch": 0.9973789700894233, + "grad_norm": 1.2066408395767212, + "learning_rate": 1.8015635929513608e-10, + "loss": 0.6906, + "step": 19407 + }, + { + "epoch": 0.997430362832768, + "grad_norm": 1.1067622900009155, + "learning_rate": 1.7316070867579027e-10, + "loss": 0.683, + "step": 19408 + }, + { + "epoch": 0.9974817555761126, + "grad_norm": 1.1232110261917114, + "learning_rate": 1.6630358262437195e-10, + "loss": 0.6773, + "step": 19409 + }, + { + "epoch": 0.9975331483194573, + "grad_norm": 1.0760105848312378, + "learning_rate": 1.5958498133239465e-10, + "loss": 0.6787, + "step": 19410 + }, + { + "epoch": 0.9975845410628019, + "grad_norm": 1.0958530902862549, + "learning_rate": 1.5300490498471044e-10, + "loss": 0.7423, + "step": 19411 + }, + { + "epoch": 0.9976359338061466, + "grad_norm": 1.0523499250411987, + "learning_rate": 1.4656335376450614e-10, + "loss": 0.7048, + "step": 19412 + }, + { + "epoch": 0.9976873265494912, + "grad_norm": 1.113553524017334, + "learning_rate": 1.4026032784941745e-10, + "loss": 0.7031, + "step": 19413 + }, + { + "epoch": 0.9977387192928359, + "grad_norm": 1.0779064893722534, + "learning_rate": 1.340958274148596e-10, + "loss": 0.6924, + "step": 19414 + }, + { + "epoch": 0.9977901120361805, + "grad_norm": 1.0766246318817139, + "learning_rate": 1.2806985263125182e-10, + "loss": 0.6902, + "step": 19415 + }, + { + "epoch": 0.9978415047795252, + "grad_norm": 1.086360216140747, + "learning_rate": 1.2218240366512756e-10, + "loss": 0.6549, + "step": 19416 + }, + { + "epoch": 0.9978928975228698, + "grad_norm": 1.1849026679992676, + "learning_rate": 1.1643348068024474e-10, + "loss": 0.7057, + "step": 19417 + }, + { + "epoch": 0.9979442902662145, + "grad_norm": 1.1411043405532837, + "learning_rate": 1.1082308383592033e-10, + "loss": 0.6592, + "step": 19418 + }, + { + "epoch": 0.9979956830095591, + "grad_norm": 1.136001467704773, + "learning_rate": 1.0535121328758557e-10, + "loss": 0.6929, + "step": 19419 + }, + { + "epoch": 0.9980470757529037, + "grad_norm": 1.0285239219665527, + "learning_rate": 1.000178691862308e-10, + "loss": 0.685, + "step": 19420 + }, + { + "epoch": 0.9980984684962483, + "grad_norm": 1.1170852184295654, + "learning_rate": 9.482305168007078e-11, + "loss": 0.7042, + "step": 19421 + }, + { + "epoch": 0.9981498612395929, + "grad_norm": 1.0366523265838623, + "learning_rate": 8.976676091287939e-11, + "loss": 0.7244, + "step": 19422 + }, + { + "epoch": 0.9982012539829376, + "grad_norm": 1.0879096984863281, + "learning_rate": 8.484899702509986e-11, + "loss": 0.6623, + "step": 19423 + }, + { + "epoch": 0.9982526467262822, + "grad_norm": 1.2013845443725586, + "learning_rate": 8.006976015273448e-11, + "loss": 0.6941, + "step": 19424 + }, + { + "epoch": 0.9983040394696269, + "grad_norm": 1.1568068265914917, + "learning_rate": 7.542905042789983e-11, + "loss": 0.7231, + "step": 19425 + }, + { + "epoch": 0.9983554322129715, + "grad_norm": 1.1374436616897583, + "learning_rate": 7.092686797938176e-11, + "loss": 0.7134, + "step": 19426 + }, + { + "epoch": 0.9984068249563162, + "grad_norm": 0.8318619728088379, + "learning_rate": 6.656321293208034e-11, + "loss": 0.6832, + "step": 19427 + }, + { + "epoch": 0.9984582176996608, + "grad_norm": 1.015324592590332, + "learning_rate": 6.233808540700992e-11, + "loss": 0.6737, + "step": 19428 + }, + { + "epoch": 0.9985096104430055, + "grad_norm": 1.079624056816101, + "learning_rate": 5.82514855207439e-11, + "loss": 0.7477, + "step": 19429 + }, + { + "epoch": 0.9985610031863501, + "grad_norm": 1.205488681793213, + "learning_rate": 5.430341338708012e-11, + "loss": 0.7202, + "step": 19430 + }, + { + "epoch": 0.9986123959296948, + "grad_norm": 1.1007046699523926, + "learning_rate": 5.0493869115375574e-11, + "loss": 0.7049, + "step": 19431 + }, + { + "epoch": 0.9986637886730394, + "grad_norm": 0.7101621031761169, + "learning_rate": 4.6822852810546325e-11, + "loss": 0.6442, + "step": 19432 + }, + { + "epoch": 0.998715181416384, + "grad_norm": 1.0886563062667847, + "learning_rate": 4.329036457417779e-11, + "loss": 0.6984, + "step": 19433 + }, + { + "epoch": 0.9987665741597287, + "grad_norm": 1.0681804418563843, + "learning_rate": 3.9896404505634924e-11, + "loss": 0.6496, + "step": 19434 + }, + { + "epoch": 0.9988179669030733, + "grad_norm": 1.0775365829467773, + "learning_rate": 3.6640972697066234e-11, + "loss": 0.6964, + "step": 19435 + }, + { + "epoch": 0.9988693596464179, + "grad_norm": 0.7069593071937561, + "learning_rate": 3.352406924006513e-11, + "loss": 0.6577, + "step": 19436 + }, + { + "epoch": 0.9989207523897625, + "grad_norm": 1.030396580696106, + "learning_rate": 3.0545694220118774e-11, + "loss": 0.6419, + "step": 19437 + }, + { + "epoch": 0.9989721451331072, + "grad_norm": 1.0528852939605713, + "learning_rate": 2.7705847720493895e-11, + "loss": 0.6406, + "step": 19438 + }, + { + "epoch": 0.9990235378764518, + "grad_norm": 1.1139971017837524, + "learning_rate": 2.5004529819461222e-11, + "loss": 0.644, + "step": 19439 + }, + { + "epoch": 0.9990749306197965, + "grad_norm": 1.064696192741394, + "learning_rate": 2.2441740591405692e-11, + "loss": 0.7158, + "step": 19440 + }, + { + "epoch": 0.9991263233631411, + "grad_norm": 1.0799723863601685, + "learning_rate": 2.0017480107936693e-11, + "loss": 0.6679, + "step": 19441 + }, + { + "epoch": 0.9991777161064858, + "grad_norm": 1.082567811012268, + "learning_rate": 1.7731748436222718e-11, + "loss": 0.6849, + "step": 19442 + }, + { + "epoch": 0.9992291088498304, + "grad_norm": 1.1231456995010376, + "learning_rate": 1.5584545639546477e-11, + "loss": 0.7249, + "step": 19443 + }, + { + "epoch": 0.999280501593175, + "grad_norm": 1.1078248023986816, + "learning_rate": 1.3575871777304905e-11, + "loss": 0.7129, + "step": 19444 + }, + { + "epoch": 0.9993318943365197, + "grad_norm": 1.1178227663040161, + "learning_rate": 1.1705726905009152e-11, + "loss": 0.664, + "step": 19445 + }, + { + "epoch": 0.9993832870798643, + "grad_norm": 1.0704572200775146, + "learning_rate": 9.974111074839698e-12, + "loss": 0.6856, + "step": 19446 + }, + { + "epoch": 0.999434679823209, + "grad_norm": 1.1912915706634521, + "learning_rate": 8.381024333981025e-12, + "loss": 0.7084, + "step": 19447 + }, + { + "epoch": 0.9994860725665536, + "grad_norm": 1.1131463050842285, + "learning_rate": 6.9264667279522744e-12, + "loss": 0.7317, + "step": 19448 + }, + { + "epoch": 0.9995374653098983, + "grad_norm": 1.0881973505020142, + "learning_rate": 5.6104382956112534e-12, + "loss": 0.7371, + "step": 19449 + }, + { + "epoch": 0.9995888580532429, + "grad_norm": 1.1469289064407349, + "learning_rate": 4.432939074150433e-12, + "loss": 0.7116, + "step": 19450 + }, + { + "epoch": 0.9996402507965875, + "grad_norm": 1.0609543323516846, + "learning_rate": 3.3939690963213923e-12, + "loss": 0.7003, + "step": 19451 + }, + { + "epoch": 0.9996916435399321, + "grad_norm": 1.1198300123214722, + "learning_rate": 2.4935283904348183e-12, + "loss": 0.6302, + "step": 19452 + }, + { + "epoch": 0.9997430362832768, + "grad_norm": 0.68081134557724, + "learning_rate": 1.731616982025841e-12, + "loss": 0.6537, + "step": 19453 + }, + { + "epoch": 0.9997944290266214, + "grad_norm": 1.081189751625061, + "learning_rate": 1.1082348916335861e-12, + "loss": 0.6603, + "step": 19454 + }, + { + "epoch": 0.9998458217699661, + "grad_norm": 0.9850096106529236, + "learning_rate": 6.233821364665105e-13, + "loss": 0.6412, + "step": 19455 + }, + { + "epoch": 0.9998972145133107, + "grad_norm": 1.0728039741516113, + "learning_rate": 2.7705873040240196e-13, + "loss": 0.6702, + "step": 19456 + }, + { + "epoch": 0.9999486072566554, + "grad_norm": 0.9957613348960876, + "learning_rate": 6.926468287815624e-14, + "loss": 0.7068, + "step": 19457 + }, + { + "epoch": 1.0, + "grad_norm": 0.7922065854072571, + "learning_rate": 0.0, + "loss": 0.6775, + "step": 19458 + }, + { + "epoch": 1.0, + "step": 19458, + "total_flos": 3.0958737148945734e+20, + "train_loss": 0.7380289890248087, + "train_runtime": 68709.733, + "train_samples_per_second": 144.993, + "train_steps_per_second": 0.283 + } + ], + "logging_steps": 1.0, + "max_steps": 19458, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 3.0958737148945734e+20, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}