{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 9375, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 71.90381315562517, "learning_rate": 2.1321961620469085e-08, "loss": 2.6807, "step": 1 }, { "epoch": 0.0, "grad_norm": 65.0979958510511, "learning_rate": 4.264392324093817e-08, "loss": 2.4015, "step": 2 }, { "epoch": 0.0, "grad_norm": 82.72682046248394, "learning_rate": 6.396588486140725e-08, "loss": 2.8423, "step": 3 }, { "epoch": 0.0, "grad_norm": 65.81808478472077, "learning_rate": 8.528784648187634e-08, "loss": 2.5009, "step": 4 }, { "epoch": 0.0, "grad_norm": 64.77955550846546, "learning_rate": 1.0660980810234542e-07, "loss": 2.5248, "step": 5 }, { "epoch": 0.0, "grad_norm": 80.9744188871008, "learning_rate": 1.279317697228145e-07, "loss": 2.7217, "step": 6 }, { "epoch": 0.0, "grad_norm": 54.80374862377474, "learning_rate": 1.4925373134328358e-07, "loss": 2.3682, "step": 7 }, { "epoch": 0.0, "grad_norm": 79.14110225033387, "learning_rate": 1.7057569296375268e-07, "loss": 2.6812, "step": 8 }, { "epoch": 0.0, "grad_norm": 73.05975253950746, "learning_rate": 1.918976545842218e-07, "loss": 2.6741, "step": 9 }, { "epoch": 0.0, "grad_norm": 6.608453784390931, "learning_rate": 2.1321961620469084e-07, "loss": 0.7419, "step": 10 }, { "epoch": 0.0, "grad_norm": 69.71244231686454, "learning_rate": 2.3454157782515995e-07, "loss": 2.4508, "step": 11 }, { "epoch": 0.0, "grad_norm": 54.84947821110166, "learning_rate": 2.55863539445629e-07, "loss": 2.2918, "step": 12 }, { "epoch": 0.0, "grad_norm": 6.635108412911208, "learning_rate": 2.771855010660981e-07, "loss": 0.7419, "step": 13 }, { "epoch": 0.0, "grad_norm": 67.85107721506957, "learning_rate": 2.9850746268656716e-07, "loss": 2.5028, "step": 14 }, { "epoch": 0.0, "grad_norm": 88.54320816601799, "learning_rate": 3.1982942430703626e-07, "loss": 2.6378, "step": 15 }, { "epoch": 0.0, "grad_norm": 63.533435553398455, "learning_rate": 3.4115138592750537e-07, "loss": 2.2011, "step": 16 }, { "epoch": 0.0, "grad_norm": 74.15234973323447, "learning_rate": 3.624733475479744e-07, "loss": 2.4233, "step": 17 }, { "epoch": 0.0, "grad_norm": 78.43402210343505, "learning_rate": 3.837953091684436e-07, "loss": 2.5343, "step": 18 }, { "epoch": 0.0, "grad_norm": 68.49930237387298, "learning_rate": 4.0511727078891263e-07, "loss": 2.2404, "step": 19 }, { "epoch": 0.0, "grad_norm": 74.73715203475054, "learning_rate": 4.264392324093817e-07, "loss": 2.4183, "step": 20 }, { "epoch": 0.0, "grad_norm": 60.7643139724294, "learning_rate": 4.4776119402985074e-07, "loss": 2.041, "step": 21 }, { "epoch": 0.0, "grad_norm": 65.57591326212172, "learning_rate": 4.690831556503199e-07, "loss": 1.9347, "step": 22 }, { "epoch": 0.0, "grad_norm": 60.657649948555054, "learning_rate": 4.904051172707889e-07, "loss": 1.8105, "step": 23 }, { "epoch": 0.0, "grad_norm": 47.24929083120985, "learning_rate": 5.11727078891258e-07, "loss": 1.7286, "step": 24 }, { "epoch": 0.0, "grad_norm": 44.23281641514938, "learning_rate": 5.33049040511727e-07, "loss": 1.6498, "step": 25 }, { "epoch": 0.0, "grad_norm": 40.96129482370775, "learning_rate": 5.543710021321962e-07, "loss": 1.5009, "step": 26 }, { "epoch": 0.0, "grad_norm": 39.60195812748587, "learning_rate": 5.756929637526653e-07, "loss": 1.5405, "step": 27 }, { "epoch": 0.0, "grad_norm": 31.386543974364535, "learning_rate": 5.970149253731343e-07, "loss": 1.4177, "step": 28 }, { "epoch": 0.0, "grad_norm": 23.22614602688747, "learning_rate": 6.183368869936035e-07, "loss": 1.2236, "step": 29 }, { "epoch": 0.0, "grad_norm": 25.38997398301144, "learning_rate": 6.396588486140725e-07, "loss": 1.0585, "step": 30 }, { "epoch": 0.0, "grad_norm": 2.5420365762313586, "learning_rate": 6.609808102345417e-07, "loss": 0.6574, "step": 31 }, { "epoch": 0.0, "grad_norm": 19.785190576986974, "learning_rate": 6.823027718550107e-07, "loss": 1.1303, "step": 32 }, { "epoch": 0.0, "grad_norm": 16.768918859418747, "learning_rate": 7.036247334754798e-07, "loss": 1.0963, "step": 33 }, { "epoch": 0.0, "grad_norm": 14.424196157613897, "learning_rate": 7.249466950959488e-07, "loss": 1.0874, "step": 34 }, { "epoch": 0.0, "grad_norm": 8.604807341516791, "learning_rate": 7.462686567164179e-07, "loss": 1.1122, "step": 35 }, { "epoch": 0.0, "grad_norm": 7.343986029879382, "learning_rate": 7.675906183368872e-07, "loss": 1.0333, "step": 36 }, { "epoch": 0.0, "grad_norm": 7.6368702461571765, "learning_rate": 7.889125799573562e-07, "loss": 1.0588, "step": 37 }, { "epoch": 0.0, "grad_norm": 5.724567566052884, "learning_rate": 8.102345415778253e-07, "loss": 1.0685, "step": 38 }, { "epoch": 0.0, "grad_norm": 2.4773234943623565, "learning_rate": 8.315565031982943e-07, "loss": 0.6743, "step": 39 }, { "epoch": 0.0, "grad_norm": 4.723371326069207, "learning_rate": 8.528784648187634e-07, "loss": 1.0171, "step": 40 }, { "epoch": 0.0, "grad_norm": 4.6756219824048335, "learning_rate": 8.742004264392324e-07, "loss": 0.964, "step": 41 }, { "epoch": 0.0, "grad_norm": 3.677487042779229, "learning_rate": 8.955223880597015e-07, "loss": 0.891, "step": 42 }, { "epoch": 0.0, "grad_norm": 4.4962947628266585, "learning_rate": 9.168443496801707e-07, "loss": 1.0379, "step": 43 }, { "epoch": 0.0, "grad_norm": 3.4409279055308537, "learning_rate": 9.381663113006398e-07, "loss": 0.896, "step": 44 }, { "epoch": 0.0, "grad_norm": 3.3754435792601964, "learning_rate": 9.594882729211088e-07, "loss": 0.9042, "step": 45 }, { "epoch": 0.0, "grad_norm": 3.0954598427026117, "learning_rate": 9.808102345415779e-07, "loss": 0.9375, "step": 46 }, { "epoch": 0.01, "grad_norm": 3.1204817041161235, "learning_rate": 1.002132196162047e-06, "loss": 0.9328, "step": 47 }, { "epoch": 0.01, "grad_norm": 2.743992972303733, "learning_rate": 1.023454157782516e-06, "loss": 0.8853, "step": 48 }, { "epoch": 0.01, "grad_norm": 2.283238431755266, "learning_rate": 1.044776119402985e-06, "loss": 0.668, "step": 49 }, { "epoch": 0.01, "grad_norm": 2.6857366358885324, "learning_rate": 1.066098081023454e-06, "loss": 0.8865, "step": 50 }, { "epoch": 0.01, "grad_norm": 2.6632542634190473, "learning_rate": 1.0874200426439234e-06, "loss": 0.9128, "step": 51 }, { "epoch": 0.01, "grad_norm": 2.3937217370745447, "learning_rate": 1.1087420042643924e-06, "loss": 0.9658, "step": 52 }, { "epoch": 0.01, "grad_norm": 2.411514450266353, "learning_rate": 1.1300639658848615e-06, "loss": 0.8404, "step": 53 }, { "epoch": 0.01, "grad_norm": 2.738609460183846, "learning_rate": 1.1513859275053305e-06, "loss": 0.9152, "step": 54 }, { "epoch": 0.01, "grad_norm": 3.0317981553432327, "learning_rate": 1.1727078891257996e-06, "loss": 0.8803, "step": 55 }, { "epoch": 0.01, "grad_norm": 2.9496331117266874, "learning_rate": 1.1940298507462686e-06, "loss": 0.83, "step": 56 }, { "epoch": 0.01, "grad_norm": 2.5468132978269793, "learning_rate": 1.2153518123667379e-06, "loss": 0.886, "step": 57 }, { "epoch": 0.01, "grad_norm": 2.7766015521436387, "learning_rate": 1.236673773987207e-06, "loss": 0.8805, "step": 58 }, { "epoch": 0.01, "grad_norm": 2.578962862312576, "learning_rate": 1.257995735607676e-06, "loss": 0.9707, "step": 59 }, { "epoch": 0.01, "grad_norm": 2.4724596031569175, "learning_rate": 1.279317697228145e-06, "loss": 0.8172, "step": 60 }, { "epoch": 0.01, "grad_norm": 2.560367156266906, "learning_rate": 1.300639658848614e-06, "loss": 0.945, "step": 61 }, { "epoch": 0.01, "grad_norm": 2.05470302521068, "learning_rate": 1.3219616204690834e-06, "loss": 0.6595, "step": 62 }, { "epoch": 0.01, "grad_norm": 2.4831516125890984, "learning_rate": 1.3432835820895524e-06, "loss": 0.8792, "step": 63 }, { "epoch": 0.01, "grad_norm": 2.4215766117674584, "learning_rate": 1.3646055437100215e-06, "loss": 0.9182, "step": 64 }, { "epoch": 0.01, "grad_norm": 2.851851089769218, "learning_rate": 1.3859275053304905e-06, "loss": 0.7883, "step": 65 }, { "epoch": 0.01, "grad_norm": 2.444030959382413, "learning_rate": 1.4072494669509596e-06, "loss": 0.8757, "step": 66 }, { "epoch": 0.01, "grad_norm": 2.25898837271714, "learning_rate": 1.4285714285714286e-06, "loss": 0.8097, "step": 67 }, { "epoch": 0.01, "grad_norm": 2.6905286747558126, "learning_rate": 1.4498933901918977e-06, "loss": 0.7835, "step": 68 }, { "epoch": 0.01, "grad_norm": 2.410852890162238, "learning_rate": 1.4712153518123667e-06, "loss": 0.8912, "step": 69 }, { "epoch": 0.01, "grad_norm": 2.984950057353551, "learning_rate": 1.4925373134328358e-06, "loss": 0.8912, "step": 70 }, { "epoch": 0.01, "grad_norm": 1.7477467392439536, "learning_rate": 1.5138592750533053e-06, "loss": 0.6268, "step": 71 }, { "epoch": 0.01, "grad_norm": 2.6540561200616106, "learning_rate": 1.5351812366737743e-06, "loss": 0.7814, "step": 72 }, { "epoch": 0.01, "grad_norm": 2.4750188186869484, "learning_rate": 1.5565031982942434e-06, "loss": 0.9103, "step": 73 }, { "epoch": 0.01, "grad_norm": 2.339933320726532, "learning_rate": 1.5778251599147124e-06, "loss": 0.8339, "step": 74 }, { "epoch": 0.01, "grad_norm": 2.316018715339278, "learning_rate": 1.5991471215351815e-06, "loss": 0.8485, "step": 75 }, { "epoch": 0.01, "grad_norm": 1.486102505364975, "learning_rate": 1.6204690831556505e-06, "loss": 0.6558, "step": 76 }, { "epoch": 0.01, "grad_norm": 2.5259486012698757, "learning_rate": 1.6417910447761196e-06, "loss": 0.8009, "step": 77 }, { "epoch": 0.01, "grad_norm": 2.471738645776523, "learning_rate": 1.6631130063965886e-06, "loss": 0.8488, "step": 78 }, { "epoch": 0.01, "grad_norm": 2.4632633325038884, "learning_rate": 1.6844349680170577e-06, "loss": 0.8376, "step": 79 }, { "epoch": 0.01, "grad_norm": 2.1503807564903266, "learning_rate": 1.7057569296375267e-06, "loss": 0.7287, "step": 80 }, { "epoch": 0.01, "grad_norm": 2.288774509911447, "learning_rate": 1.7270788912579958e-06, "loss": 0.7674, "step": 81 }, { "epoch": 0.01, "grad_norm": 2.3011409747830864, "learning_rate": 1.7484008528784648e-06, "loss": 0.8124, "step": 82 }, { "epoch": 0.01, "grad_norm": 2.5143509242752207, "learning_rate": 1.7697228144989339e-06, "loss": 0.7865, "step": 83 }, { "epoch": 0.01, "grad_norm": 2.4275368075022543, "learning_rate": 1.791044776119403e-06, "loss": 0.9169, "step": 84 }, { "epoch": 0.01, "grad_norm": 2.340533948046053, "learning_rate": 1.812366737739872e-06, "loss": 0.8448, "step": 85 }, { "epoch": 0.01, "grad_norm": 2.623719014551309, "learning_rate": 1.8336886993603415e-06, "loss": 0.9845, "step": 86 }, { "epoch": 0.01, "grad_norm": 2.3667592478503, "learning_rate": 1.8550106609808105e-06, "loss": 0.8937, "step": 87 }, { "epoch": 0.01, "grad_norm": 1.4885012252423957, "learning_rate": 1.8763326226012796e-06, "loss": 0.6266, "step": 88 }, { "epoch": 0.01, "grad_norm": 2.0545312123681274, "learning_rate": 1.8976545842217486e-06, "loss": 0.7946, "step": 89 }, { "epoch": 0.01, "grad_norm": 2.2430084239609087, "learning_rate": 1.9189765458422177e-06, "loss": 0.7812, "step": 90 }, { "epoch": 0.01, "grad_norm": 2.3652543945543565, "learning_rate": 1.9402985074626867e-06, "loss": 0.8154, "step": 91 }, { "epoch": 0.01, "grad_norm": 1.3100244841714277, "learning_rate": 1.9616204690831558e-06, "loss": 0.6329, "step": 92 }, { "epoch": 0.01, "grad_norm": 1.265530018862225, "learning_rate": 1.982942430703625e-06, "loss": 0.6316, "step": 93 }, { "epoch": 0.01, "grad_norm": 1.2124161575326533, "learning_rate": 2.004264392324094e-06, "loss": 0.6422, "step": 94 }, { "epoch": 0.01, "grad_norm": 2.782768132861841, "learning_rate": 2.025586353944563e-06, "loss": 0.8865, "step": 95 }, { "epoch": 0.01, "grad_norm": 2.277507566327829, "learning_rate": 2.046908315565032e-06, "loss": 0.7979, "step": 96 }, { "epoch": 0.01, "grad_norm": 2.3425005979277764, "learning_rate": 2.068230277185501e-06, "loss": 0.7438, "step": 97 }, { "epoch": 0.01, "grad_norm": 1.1944874449883904, "learning_rate": 2.08955223880597e-06, "loss": 0.6404, "step": 98 }, { "epoch": 0.01, "grad_norm": 2.3868948462630053, "learning_rate": 2.110874200426439e-06, "loss": 0.8254, "step": 99 }, { "epoch": 0.01, "grad_norm": 2.5348748815253863, "learning_rate": 2.132196162046908e-06, "loss": 0.7509, "step": 100 }, { "epoch": 0.01, "grad_norm": 1.2519791667386433, "learning_rate": 2.1535181236673773e-06, "loss": 0.6451, "step": 101 }, { "epoch": 0.01, "grad_norm": 2.370903392023387, "learning_rate": 2.1748400852878467e-06, "loss": 0.8004, "step": 102 }, { "epoch": 0.01, "grad_norm": 2.3589050985286044, "learning_rate": 2.1961620469083158e-06, "loss": 0.7718, "step": 103 }, { "epoch": 0.01, "grad_norm": 2.3389906234569517, "learning_rate": 2.217484008528785e-06, "loss": 0.7582, "step": 104 }, { "epoch": 0.01, "grad_norm": 2.5345381014205564, "learning_rate": 2.238805970149254e-06, "loss": 0.809, "step": 105 }, { "epoch": 0.01, "grad_norm": 1.1752242523481677, "learning_rate": 2.260127931769723e-06, "loss": 0.5796, "step": 106 }, { "epoch": 0.01, "grad_norm": 3.1164540095846283, "learning_rate": 2.281449893390192e-06, "loss": 0.8115, "step": 107 }, { "epoch": 0.01, "grad_norm": 2.8752359053581573, "learning_rate": 2.302771855010661e-06, "loss": 0.8834, "step": 108 }, { "epoch": 0.01, "grad_norm": 1.1017388941097148, "learning_rate": 2.32409381663113e-06, "loss": 0.6024, "step": 109 }, { "epoch": 0.01, "grad_norm": 2.3715754026783813, "learning_rate": 2.345415778251599e-06, "loss": 0.824, "step": 110 }, { "epoch": 0.01, "grad_norm": 2.0875211755569523, "learning_rate": 2.366737739872068e-06, "loss": 0.7256, "step": 111 }, { "epoch": 0.01, "grad_norm": 2.494047885886461, "learning_rate": 2.3880597014925373e-06, "loss": 0.745, "step": 112 }, { "epoch": 0.01, "grad_norm": 2.316525885746664, "learning_rate": 2.4093816631130067e-06, "loss": 0.8165, "step": 113 }, { "epoch": 0.01, "grad_norm": 2.5468520490386775, "learning_rate": 2.4307036247334758e-06, "loss": 0.8393, "step": 114 }, { "epoch": 0.01, "grad_norm": 2.897440686587939, "learning_rate": 2.452025586353945e-06, "loss": 0.8101, "step": 115 }, { "epoch": 0.01, "grad_norm": 2.1144185567037486, "learning_rate": 2.473347547974414e-06, "loss": 0.7841, "step": 116 }, { "epoch": 0.01, "grad_norm": 2.5555951032971227, "learning_rate": 2.494669509594883e-06, "loss": 0.8461, "step": 117 }, { "epoch": 0.01, "grad_norm": 2.231074774311186, "learning_rate": 2.515991471215352e-06, "loss": 0.6876, "step": 118 }, { "epoch": 0.01, "grad_norm": 2.8868488824611105, "learning_rate": 2.537313432835821e-06, "loss": 0.8927, "step": 119 }, { "epoch": 0.01, "grad_norm": 2.1757507397710496, "learning_rate": 2.55863539445629e-06, "loss": 0.6984, "step": 120 }, { "epoch": 0.01, "grad_norm": 1.3049849481403495, "learning_rate": 2.579957356076759e-06, "loss": 0.6253, "step": 121 }, { "epoch": 0.01, "grad_norm": 2.2738918760035998, "learning_rate": 2.601279317697228e-06, "loss": 0.8088, "step": 122 }, { "epoch": 0.01, "grad_norm": 2.5071219827968023, "learning_rate": 2.6226012793176977e-06, "loss": 0.7408, "step": 123 }, { "epoch": 0.01, "grad_norm": 2.73396057573115, "learning_rate": 2.6439232409381667e-06, "loss": 0.7976, "step": 124 }, { "epoch": 0.01, "grad_norm": 2.4535463498350563, "learning_rate": 2.6652452025586358e-06, "loss": 0.7595, "step": 125 }, { "epoch": 0.01, "grad_norm": 2.4904198024413438, "learning_rate": 2.686567164179105e-06, "loss": 0.7744, "step": 126 }, { "epoch": 0.01, "grad_norm": 2.181392280889873, "learning_rate": 2.707889125799574e-06, "loss": 0.7592, "step": 127 }, { "epoch": 0.01, "grad_norm": 2.717063784003909, "learning_rate": 2.729211087420043e-06, "loss": 0.8235, "step": 128 }, { "epoch": 0.01, "grad_norm": 2.259536309382374, "learning_rate": 2.750533049040512e-06, "loss": 0.7965, "step": 129 }, { "epoch": 0.01, "grad_norm": 2.385448499718112, "learning_rate": 2.771855010660981e-06, "loss": 0.7876, "step": 130 }, { "epoch": 0.01, "grad_norm": 1.196255247106115, "learning_rate": 2.79317697228145e-06, "loss": 0.6093, "step": 131 }, { "epoch": 0.01, "grad_norm": 2.221987531129518, "learning_rate": 2.814498933901919e-06, "loss": 0.7934, "step": 132 }, { "epoch": 0.01, "grad_norm": 2.653013013058444, "learning_rate": 2.835820895522388e-06, "loss": 0.7297, "step": 133 }, { "epoch": 0.01, "grad_norm": 2.1183155769317272, "learning_rate": 2.8571428571428573e-06, "loss": 0.7325, "step": 134 }, { "epoch": 0.01, "grad_norm": 2.3048653028405175, "learning_rate": 2.8784648187633263e-06, "loss": 0.837, "step": 135 }, { "epoch": 0.01, "grad_norm": 2.2870724097589923, "learning_rate": 2.8997867803837954e-06, "loss": 0.6751, "step": 136 }, { "epoch": 0.01, "grad_norm": 2.406149943347608, "learning_rate": 2.9211087420042644e-06, "loss": 0.7517, "step": 137 }, { "epoch": 0.01, "grad_norm": 2.2330138685105214, "learning_rate": 2.9424307036247335e-06, "loss": 0.886, "step": 138 }, { "epoch": 0.01, "grad_norm": 2.4498527536171437, "learning_rate": 2.9637526652452025e-06, "loss": 0.7649, "step": 139 }, { "epoch": 0.01, "grad_norm": 2.4607328931840637, "learning_rate": 2.9850746268656716e-06, "loss": 0.8234, "step": 140 }, { "epoch": 0.02, "grad_norm": 2.544303052787543, "learning_rate": 3.006396588486141e-06, "loss": 0.7499, "step": 141 }, { "epoch": 0.02, "grad_norm": 2.3590977793811136, "learning_rate": 3.0277185501066105e-06, "loss": 0.7255, "step": 142 }, { "epoch": 0.02, "grad_norm": 2.4604556739842196, "learning_rate": 3.0490405117270796e-06, "loss": 0.7456, "step": 143 }, { "epoch": 0.02, "grad_norm": 2.142625799511512, "learning_rate": 3.0703624733475486e-06, "loss": 0.849, "step": 144 }, { "epoch": 0.02, "grad_norm": 2.319265515532903, "learning_rate": 3.0916844349680177e-06, "loss": 0.7379, "step": 145 }, { "epoch": 0.02, "grad_norm": 2.2592325462240135, "learning_rate": 3.1130063965884867e-06, "loss": 0.8152, "step": 146 }, { "epoch": 0.02, "grad_norm": 2.278885847566507, "learning_rate": 3.1343283582089558e-06, "loss": 0.7892, "step": 147 }, { "epoch": 0.02, "grad_norm": 2.807584002813302, "learning_rate": 3.155650319829425e-06, "loss": 0.7273, "step": 148 }, { "epoch": 0.02, "grad_norm": 2.3789471694110023, "learning_rate": 3.176972281449894e-06, "loss": 0.8025, "step": 149 }, { "epoch": 0.02, "grad_norm": 2.366222740551134, "learning_rate": 3.198294243070363e-06, "loss": 0.7947, "step": 150 }, { "epoch": 0.02, "grad_norm": 2.4152382300837467, "learning_rate": 3.219616204690832e-06, "loss": 0.7988, "step": 151 }, { "epoch": 0.02, "grad_norm": 2.6762735499052033, "learning_rate": 3.240938166311301e-06, "loss": 0.7433, "step": 152 }, { "epoch": 0.02, "grad_norm": 1.4585262720906806, "learning_rate": 3.26226012793177e-06, "loss": 0.5893, "step": 153 }, { "epoch": 0.02, "grad_norm": 2.359496111341599, "learning_rate": 3.283582089552239e-06, "loss": 0.6922, "step": 154 }, { "epoch": 0.02, "grad_norm": 2.466922686445037, "learning_rate": 3.304904051172708e-06, "loss": 0.7506, "step": 155 }, { "epoch": 0.02, "grad_norm": 2.1292295070905447, "learning_rate": 3.3262260127931773e-06, "loss": 0.9269, "step": 156 }, { "epoch": 0.02, "grad_norm": 2.2991105264456877, "learning_rate": 3.3475479744136463e-06, "loss": 0.8578, "step": 157 }, { "epoch": 0.02, "grad_norm": 3.037550552293596, "learning_rate": 3.3688699360341154e-06, "loss": 0.8009, "step": 158 }, { "epoch": 0.02, "grad_norm": 2.4740207049848677, "learning_rate": 3.3901918976545844e-06, "loss": 0.6763, "step": 159 }, { "epoch": 0.02, "grad_norm": 2.645222758246351, "learning_rate": 3.4115138592750535e-06, "loss": 0.6469, "step": 160 }, { "epoch": 0.02, "grad_norm": 2.3899830395966593, "learning_rate": 3.4328358208955225e-06, "loss": 0.7484, "step": 161 }, { "epoch": 0.02, "grad_norm": 2.1265395346060445, "learning_rate": 3.4541577825159916e-06, "loss": 0.6465, "step": 162 }, { "epoch": 0.02, "grad_norm": 2.1224379747872004, "learning_rate": 3.4754797441364606e-06, "loss": 0.6561, "step": 163 }, { "epoch": 0.02, "grad_norm": 2.2637432768746173, "learning_rate": 3.4968017057569297e-06, "loss": 0.7561, "step": 164 }, { "epoch": 0.02, "grad_norm": 2.7479332648175863, "learning_rate": 3.5181236673773987e-06, "loss": 0.8949, "step": 165 }, { "epoch": 0.02, "grad_norm": 2.168772500804281, "learning_rate": 3.5394456289978678e-06, "loss": 0.8457, "step": 166 }, { "epoch": 0.02, "grad_norm": 2.7093822420504807, "learning_rate": 3.560767590618337e-06, "loss": 0.7681, "step": 167 }, { "epoch": 0.02, "grad_norm": 2.1891205360090087, "learning_rate": 3.582089552238806e-06, "loss": 0.7678, "step": 168 }, { "epoch": 0.02, "grad_norm": 2.508371464255401, "learning_rate": 3.603411513859275e-06, "loss": 0.7963, "step": 169 }, { "epoch": 0.02, "grad_norm": 2.2011194210560667, "learning_rate": 3.624733475479744e-06, "loss": 0.7172, "step": 170 }, { "epoch": 0.02, "grad_norm": 2.423413905503277, "learning_rate": 3.6460554371002135e-06, "loss": 0.7092, "step": 171 }, { "epoch": 0.02, "grad_norm": 2.371533477891736, "learning_rate": 3.667377398720683e-06, "loss": 0.7771, "step": 172 }, { "epoch": 0.02, "grad_norm": 2.379776155501427, "learning_rate": 3.688699360341152e-06, "loss": 0.7574, "step": 173 }, { "epoch": 0.02, "grad_norm": 1.6810483131300218, "learning_rate": 3.710021321961621e-06, "loss": 0.5992, "step": 174 }, { "epoch": 0.02, "grad_norm": 2.3213040292067317, "learning_rate": 3.73134328358209e-06, "loss": 0.8135, "step": 175 }, { "epoch": 0.02, "grad_norm": 1.2776647325229393, "learning_rate": 3.752665245202559e-06, "loss": 0.6124, "step": 176 }, { "epoch": 0.02, "grad_norm": 3.0431534824105397, "learning_rate": 3.773987206823028e-06, "loss": 0.7635, "step": 177 }, { "epoch": 0.02, "grad_norm": 2.226239407670719, "learning_rate": 3.7953091684434973e-06, "loss": 0.7506, "step": 178 }, { "epoch": 0.02, "grad_norm": 2.347844508507702, "learning_rate": 3.816631130063966e-06, "loss": 0.7159, "step": 179 }, { "epoch": 0.02, "grad_norm": 2.3889594454032, "learning_rate": 3.837953091684435e-06, "loss": 0.7626, "step": 180 }, { "epoch": 0.02, "grad_norm": 2.753291976675029, "learning_rate": 3.859275053304904e-06, "loss": 0.6823, "step": 181 }, { "epoch": 0.02, "grad_norm": 2.004738442264844, "learning_rate": 3.8805970149253735e-06, "loss": 0.6169, "step": 182 }, { "epoch": 0.02, "grad_norm": 2.6019881802489775, "learning_rate": 3.9019189765458425e-06, "loss": 0.7295, "step": 183 }, { "epoch": 0.02, "grad_norm": 2.3046773487186973, "learning_rate": 3.9232409381663116e-06, "loss": 0.7538, "step": 184 }, { "epoch": 0.02, "grad_norm": 2.205275598198936, "learning_rate": 3.944562899786781e-06, "loss": 0.7653, "step": 185 }, { "epoch": 0.02, "grad_norm": 2.287655002533816, "learning_rate": 3.96588486140725e-06, "loss": 0.7871, "step": 186 }, { "epoch": 0.02, "grad_norm": 2.2961867304174977, "learning_rate": 3.987206823027719e-06, "loss": 0.734, "step": 187 }, { "epoch": 0.02, "grad_norm": 2.3141853847804916, "learning_rate": 4.008528784648188e-06, "loss": 0.7085, "step": 188 }, { "epoch": 0.02, "grad_norm": 2.3717844589346426, "learning_rate": 4.029850746268657e-06, "loss": 0.7692, "step": 189 }, { "epoch": 0.02, "grad_norm": 2.709388275333783, "learning_rate": 4.051172707889126e-06, "loss": 0.8876, "step": 190 }, { "epoch": 0.02, "grad_norm": 2.18296931020356, "learning_rate": 4.072494669509595e-06, "loss": 0.7572, "step": 191 }, { "epoch": 0.02, "grad_norm": 2.3305740761444893, "learning_rate": 4.093816631130064e-06, "loss": 0.6274, "step": 192 }, { "epoch": 0.02, "grad_norm": 2.1658006762334683, "learning_rate": 4.115138592750533e-06, "loss": 0.7452, "step": 193 }, { "epoch": 0.02, "grad_norm": 2.148566638046189, "learning_rate": 4.136460554371002e-06, "loss": 0.6294, "step": 194 }, { "epoch": 0.02, "grad_norm": 2.331523844195692, "learning_rate": 4.157782515991471e-06, "loss": 0.758, "step": 195 }, { "epoch": 0.02, "grad_norm": 2.440867201500487, "learning_rate": 4.17910447761194e-06, "loss": 0.8048, "step": 196 }, { "epoch": 0.02, "grad_norm": 2.4733886228123616, "learning_rate": 4.200426439232409e-06, "loss": 0.7999, "step": 197 }, { "epoch": 0.02, "grad_norm": 2.4478611798680023, "learning_rate": 4.221748400852878e-06, "loss": 0.8145, "step": 198 }, { "epoch": 0.02, "grad_norm": 2.4299047116797095, "learning_rate": 4.243070362473347e-06, "loss": 0.7543, "step": 199 }, { "epoch": 0.02, "grad_norm": 2.5819398658237067, "learning_rate": 4.264392324093816e-06, "loss": 0.8093, "step": 200 }, { "epoch": 0.02, "grad_norm": 2.1584343101470025, "learning_rate": 4.2857142857142855e-06, "loss": 0.7315, "step": 201 }, { "epoch": 0.02, "grad_norm": 2.202685188830754, "learning_rate": 4.3070362473347545e-06, "loss": 0.838, "step": 202 }, { "epoch": 0.02, "grad_norm": 2.1297730036632143, "learning_rate": 4.3283582089552236e-06, "loss": 0.7096, "step": 203 }, { "epoch": 0.02, "grad_norm": 3.1699025779432666, "learning_rate": 4.3496801705756935e-06, "loss": 0.7726, "step": 204 }, { "epoch": 0.02, "grad_norm": 2.0338015847241864, "learning_rate": 4.3710021321961625e-06, "loss": 0.7041, "step": 205 }, { "epoch": 0.02, "grad_norm": 2.152708291353312, "learning_rate": 4.3923240938166316e-06, "loss": 0.69, "step": 206 }, { "epoch": 0.02, "grad_norm": 2.3935117410190947, "learning_rate": 4.413646055437101e-06, "loss": 0.6682, "step": 207 }, { "epoch": 0.02, "grad_norm": 2.1800641809415913, "learning_rate": 4.43496801705757e-06, "loss": 0.7173, "step": 208 }, { "epoch": 0.02, "grad_norm": 2.2951348085478216, "learning_rate": 4.456289978678039e-06, "loss": 0.7925, "step": 209 }, { "epoch": 0.02, "grad_norm": 2.35442440213586, "learning_rate": 4.477611940298508e-06, "loss": 0.7667, "step": 210 }, { "epoch": 0.02, "grad_norm": 2.208797477549214, "learning_rate": 4.498933901918977e-06, "loss": 0.7309, "step": 211 }, { "epoch": 0.02, "grad_norm": 2.306888521367427, "learning_rate": 4.520255863539446e-06, "loss": 0.6302, "step": 212 }, { "epoch": 0.02, "grad_norm": 2.30764751943009, "learning_rate": 4.541577825159915e-06, "loss": 0.848, "step": 213 }, { "epoch": 0.02, "grad_norm": 2.293797324100531, "learning_rate": 4.562899786780384e-06, "loss": 0.7011, "step": 214 }, { "epoch": 0.02, "grad_norm": 2.1606917302267123, "learning_rate": 4.584221748400853e-06, "loss": 0.7962, "step": 215 }, { "epoch": 0.02, "grad_norm": 2.8386379195522577, "learning_rate": 4.605543710021322e-06, "loss": 0.7767, "step": 216 }, { "epoch": 0.02, "grad_norm": 2.261300889685826, "learning_rate": 4.626865671641791e-06, "loss": 0.7776, "step": 217 }, { "epoch": 0.02, "grad_norm": 2.4480714250574067, "learning_rate": 4.64818763326226e-06, "loss": 0.6938, "step": 218 }, { "epoch": 0.02, "grad_norm": 2.7269426461720494, "learning_rate": 4.669509594882729e-06, "loss": 0.7165, "step": 219 }, { "epoch": 0.02, "grad_norm": 2.6380373959349632, "learning_rate": 4.690831556503198e-06, "loss": 0.6648, "step": 220 }, { "epoch": 0.02, "grad_norm": 1.4006524051489857, "learning_rate": 4.712153518123667e-06, "loss": 0.5894, "step": 221 }, { "epoch": 0.02, "grad_norm": 2.2139849176308273, "learning_rate": 4.733475479744136e-06, "loss": 0.7315, "step": 222 }, { "epoch": 0.02, "grad_norm": 2.274181185275746, "learning_rate": 4.7547974413646055e-06, "loss": 0.7214, "step": 223 }, { "epoch": 0.02, "grad_norm": 1.1556321235540465, "learning_rate": 4.7761194029850745e-06, "loss": 0.5765, "step": 224 }, { "epoch": 0.02, "grad_norm": 2.3355004181293166, "learning_rate": 4.797441364605544e-06, "loss": 0.7399, "step": 225 }, { "epoch": 0.02, "grad_norm": 2.093390981956813, "learning_rate": 4.8187633262260135e-06, "loss": 0.6846, "step": 226 }, { "epoch": 0.02, "grad_norm": 1.3011535582232405, "learning_rate": 4.8400852878464825e-06, "loss": 0.6076, "step": 227 }, { "epoch": 0.02, "grad_norm": 2.4278515079335072, "learning_rate": 4.8614072494669516e-06, "loss": 0.702, "step": 228 }, { "epoch": 0.02, "grad_norm": 2.1677551753618394, "learning_rate": 4.882729211087421e-06, "loss": 0.7361, "step": 229 }, { "epoch": 0.02, "grad_norm": 2.5114181226013406, "learning_rate": 4.90405117270789e-06, "loss": 0.727, "step": 230 }, { "epoch": 0.02, "grad_norm": 2.133495645221872, "learning_rate": 4.925373134328359e-06, "loss": 0.6881, "step": 231 }, { "epoch": 0.02, "grad_norm": 2.3314831786977397, "learning_rate": 4.946695095948828e-06, "loss": 0.7817, "step": 232 }, { "epoch": 0.02, "grad_norm": 1.600862898704814, "learning_rate": 4.968017057569297e-06, "loss": 0.6093, "step": 233 }, { "epoch": 0.02, "grad_norm": 2.3821305495276195, "learning_rate": 4.989339019189766e-06, "loss": 0.596, "step": 234 }, { "epoch": 0.03, "grad_norm": 2.1247743559368413, "learning_rate": 5.010660980810235e-06, "loss": 0.8075, "step": 235 }, { "epoch": 0.03, "grad_norm": 2.2031038913908025, "learning_rate": 5.031982942430704e-06, "loss": 0.7616, "step": 236 }, { "epoch": 0.03, "grad_norm": 1.2543547016799141, "learning_rate": 5.053304904051173e-06, "loss": 0.5959, "step": 237 }, { "epoch": 0.03, "grad_norm": 2.472643573326743, "learning_rate": 5.074626865671642e-06, "loss": 0.8322, "step": 238 }, { "epoch": 0.03, "grad_norm": 2.1073895415804436, "learning_rate": 5.095948827292111e-06, "loss": 0.7685, "step": 239 }, { "epoch": 0.03, "grad_norm": 2.7373996524493696, "learning_rate": 5.11727078891258e-06, "loss": 0.8244, "step": 240 }, { "epoch": 0.03, "grad_norm": 2.3487267221947103, "learning_rate": 5.138592750533049e-06, "loss": 0.7952, "step": 241 }, { "epoch": 0.03, "grad_norm": 2.3143057579805517, "learning_rate": 5.159914712153518e-06, "loss": 0.7052, "step": 242 }, { "epoch": 0.03, "grad_norm": 2.292635650075937, "learning_rate": 5.181236673773987e-06, "loss": 0.7071, "step": 243 }, { "epoch": 0.03, "grad_norm": 2.0484753361717427, "learning_rate": 5.202558635394456e-06, "loss": 0.6848, "step": 244 }, { "epoch": 0.03, "grad_norm": 2.2287600696976546, "learning_rate": 5.2238805970149255e-06, "loss": 0.7347, "step": 245 }, { "epoch": 0.03, "grad_norm": 2.047625613996402, "learning_rate": 5.245202558635395e-06, "loss": 0.7396, "step": 246 }, { "epoch": 0.03, "grad_norm": 2.366193172725648, "learning_rate": 5.2665245202558636e-06, "loss": 0.8575, "step": 247 }, { "epoch": 0.03, "grad_norm": 2.2619974632056996, "learning_rate": 5.2878464818763335e-06, "loss": 0.73, "step": 248 }, { "epoch": 0.03, "grad_norm": 2.136730834954746, "learning_rate": 5.309168443496802e-06, "loss": 0.6852, "step": 249 }, { "epoch": 0.03, "grad_norm": 2.3750857919361312, "learning_rate": 5.3304904051172716e-06, "loss": 0.755, "step": 250 }, { "epoch": 0.03, "grad_norm": 1.8673320207055493, "learning_rate": 5.351812366737741e-06, "loss": 0.7082, "step": 251 }, { "epoch": 0.03, "grad_norm": 2.2636012878644376, "learning_rate": 5.37313432835821e-06, "loss": 0.6166, "step": 252 }, { "epoch": 0.03, "grad_norm": 2.4677489467917613, "learning_rate": 5.394456289978679e-06, "loss": 0.7477, "step": 253 }, { "epoch": 0.03, "grad_norm": 2.4537307841820613, "learning_rate": 5.415778251599148e-06, "loss": 0.7992, "step": 254 }, { "epoch": 0.03, "grad_norm": 2.3330861818774102, "learning_rate": 5.437100213219617e-06, "loss": 0.7181, "step": 255 }, { "epoch": 0.03, "grad_norm": 2.3185106230573354, "learning_rate": 5.458422174840086e-06, "loss": 0.6902, "step": 256 }, { "epoch": 0.03, "grad_norm": 2.0841306029891666, "learning_rate": 5.479744136460555e-06, "loss": 0.7121, "step": 257 }, { "epoch": 0.03, "grad_norm": 2.2122098632280696, "learning_rate": 5.501066098081024e-06, "loss": 0.7397, "step": 258 }, { "epoch": 0.03, "grad_norm": 2.4863224887933186, "learning_rate": 5.522388059701493e-06, "loss": 0.6754, "step": 259 }, { "epoch": 0.03, "grad_norm": 2.388368260166849, "learning_rate": 5.543710021321962e-06, "loss": 0.6668, "step": 260 }, { "epoch": 0.03, "grad_norm": 2.1835584762135047, "learning_rate": 5.565031982942431e-06, "loss": 0.7214, "step": 261 }, { "epoch": 0.03, "grad_norm": 2.303639783785995, "learning_rate": 5.5863539445629e-06, "loss": 0.8225, "step": 262 }, { "epoch": 0.03, "grad_norm": 2.262780617417238, "learning_rate": 5.607675906183369e-06, "loss": 0.8766, "step": 263 }, { "epoch": 0.03, "grad_norm": 2.215058942044246, "learning_rate": 5.628997867803838e-06, "loss": 0.6725, "step": 264 }, { "epoch": 0.03, "grad_norm": 2.1746318317857045, "learning_rate": 5.650319829424308e-06, "loss": 0.7439, "step": 265 }, { "epoch": 0.03, "grad_norm": 2.017748771187934, "learning_rate": 5.671641791044776e-06, "loss": 0.7539, "step": 266 }, { "epoch": 0.03, "grad_norm": 2.242296552950907, "learning_rate": 5.692963752665246e-06, "loss": 0.6664, "step": 267 }, { "epoch": 0.03, "grad_norm": 2.144929396608, "learning_rate": 5.7142857142857145e-06, "loss": 0.7372, "step": 268 }, { "epoch": 0.03, "grad_norm": 1.5341973337406167, "learning_rate": 5.735607675906184e-06, "loss": 0.5773, "step": 269 }, { "epoch": 0.03, "grad_norm": 2.242536663571702, "learning_rate": 5.756929637526653e-06, "loss": 0.6615, "step": 270 }, { "epoch": 0.03, "grad_norm": 2.2240130325558427, "learning_rate": 5.7782515991471225e-06, "loss": 0.8846, "step": 271 }, { "epoch": 0.03, "grad_norm": 2.2945642806143938, "learning_rate": 5.799573560767591e-06, "loss": 0.8006, "step": 272 }, { "epoch": 0.03, "grad_norm": 1.2821782210499268, "learning_rate": 5.820895522388061e-06, "loss": 0.5996, "step": 273 }, { "epoch": 0.03, "grad_norm": 2.353621905679835, "learning_rate": 5.842217484008529e-06, "loss": 0.7471, "step": 274 }, { "epoch": 0.03, "grad_norm": 1.1810182069064965, "learning_rate": 5.863539445628999e-06, "loss": 0.5763, "step": 275 }, { "epoch": 0.03, "grad_norm": 2.17770518280821, "learning_rate": 5.884861407249467e-06, "loss": 0.8109, "step": 276 }, { "epoch": 0.03, "grad_norm": 2.504224074506798, "learning_rate": 5.906183368869937e-06, "loss": 0.7558, "step": 277 }, { "epoch": 0.03, "grad_norm": 2.394756928299786, "learning_rate": 5.927505330490405e-06, "loss": 0.7774, "step": 278 }, { "epoch": 0.03, "grad_norm": 2.244073775244681, "learning_rate": 5.948827292110875e-06, "loss": 0.6663, "step": 279 }, { "epoch": 0.03, "grad_norm": 2.137230142475369, "learning_rate": 5.970149253731343e-06, "loss": 0.6185, "step": 280 }, { "epoch": 0.03, "grad_norm": 2.183171265472615, "learning_rate": 5.991471215351813e-06, "loss": 0.8106, "step": 281 }, { "epoch": 0.03, "grad_norm": 2.279872983551697, "learning_rate": 6.012793176972282e-06, "loss": 0.6872, "step": 282 }, { "epoch": 0.03, "grad_norm": 2.584411045113133, "learning_rate": 6.034115138592751e-06, "loss": 0.8365, "step": 283 }, { "epoch": 0.03, "grad_norm": 2.07008234934182, "learning_rate": 6.055437100213221e-06, "loss": 0.7779, "step": 284 }, { "epoch": 0.03, "grad_norm": 2.3579059645003086, "learning_rate": 6.076759061833689e-06, "loss": 0.7031, "step": 285 }, { "epoch": 0.03, "grad_norm": 2.14207154898313, "learning_rate": 6.098081023454159e-06, "loss": 0.6179, "step": 286 }, { "epoch": 0.03, "grad_norm": 2.211216498312054, "learning_rate": 6.119402985074627e-06, "loss": 0.7265, "step": 287 }, { "epoch": 0.03, "grad_norm": 2.05052506844446, "learning_rate": 6.140724946695097e-06, "loss": 0.7071, "step": 288 }, { "epoch": 0.03, "grad_norm": 2.475563083377262, "learning_rate": 6.1620469083155655e-06, "loss": 0.6633, "step": 289 }, { "epoch": 0.03, "grad_norm": 2.0986878940447915, "learning_rate": 6.183368869936035e-06, "loss": 0.6604, "step": 290 }, { "epoch": 0.03, "grad_norm": 2.2664463842668976, "learning_rate": 6.2046908315565036e-06, "loss": 0.6315, "step": 291 }, { "epoch": 0.03, "grad_norm": 1.9819813442498007, "learning_rate": 6.2260127931769735e-06, "loss": 0.5941, "step": 292 }, { "epoch": 0.03, "grad_norm": 2.118060331597257, "learning_rate": 6.247334754797442e-06, "loss": 0.7145, "step": 293 }, { "epoch": 0.03, "grad_norm": 2.0685337221225346, "learning_rate": 6.2686567164179116e-06, "loss": 0.6601, "step": 294 }, { "epoch": 0.03, "grad_norm": 2.633229886829962, "learning_rate": 6.28997867803838e-06, "loss": 0.6578, "step": 295 }, { "epoch": 0.03, "grad_norm": 1.5645097150773666, "learning_rate": 6.31130063965885e-06, "loss": 0.5699, "step": 296 }, { "epoch": 0.03, "grad_norm": 2.3033846735234174, "learning_rate": 6.332622601279318e-06, "loss": 0.7354, "step": 297 }, { "epoch": 0.03, "grad_norm": 2.204301079437869, "learning_rate": 6.353944562899788e-06, "loss": 0.7665, "step": 298 }, { "epoch": 0.03, "grad_norm": 2.257210201742839, "learning_rate": 6.375266524520256e-06, "loss": 0.7788, "step": 299 }, { "epoch": 0.03, "grad_norm": 2.0145874139681403, "learning_rate": 6.396588486140726e-06, "loss": 0.7787, "step": 300 }, { "epoch": 0.03, "grad_norm": 1.264572719145909, "learning_rate": 6.417910447761194e-06, "loss": 0.5866, "step": 301 }, { "epoch": 0.03, "grad_norm": 2.1785564558694785, "learning_rate": 6.439232409381664e-06, "loss": 0.7324, "step": 302 }, { "epoch": 0.03, "grad_norm": 1.9874389019323766, "learning_rate": 6.460554371002132e-06, "loss": 0.7699, "step": 303 }, { "epoch": 0.03, "grad_norm": 2.197150989772039, "learning_rate": 6.481876332622602e-06, "loss": 0.6896, "step": 304 }, { "epoch": 0.03, "grad_norm": 1.9161310042402333, "learning_rate": 6.50319829424307e-06, "loss": 0.6892, "step": 305 }, { "epoch": 0.03, "grad_norm": 2.114971823833315, "learning_rate": 6.52452025586354e-06, "loss": 0.7704, "step": 306 }, { "epoch": 0.03, "grad_norm": 2.2913064405954016, "learning_rate": 6.545842217484008e-06, "loss": 0.707, "step": 307 }, { "epoch": 0.03, "grad_norm": 2.2043766958081283, "learning_rate": 6.567164179104478e-06, "loss": 0.7566, "step": 308 }, { "epoch": 0.03, "grad_norm": 2.3429665400467514, "learning_rate": 6.5884861407249465e-06, "loss": 0.8435, "step": 309 }, { "epoch": 0.03, "grad_norm": 1.4422837951012202, "learning_rate": 6.609808102345416e-06, "loss": 0.5648, "step": 310 }, { "epoch": 0.03, "grad_norm": 2.3970140906215036, "learning_rate": 6.631130063965885e-06, "loss": 0.8439, "step": 311 }, { "epoch": 0.03, "grad_norm": 1.3097830138437536, "learning_rate": 6.6524520255863545e-06, "loss": 0.6077, "step": 312 }, { "epoch": 0.03, "grad_norm": 2.134544863499373, "learning_rate": 6.673773987206824e-06, "loss": 0.6554, "step": 313 }, { "epoch": 0.03, "grad_norm": 2.21813798667481, "learning_rate": 6.695095948827293e-06, "loss": 0.7105, "step": 314 }, { "epoch": 0.03, "grad_norm": 2.1506137339783677, "learning_rate": 6.7164179104477625e-06, "loss": 0.7189, "step": 315 }, { "epoch": 0.03, "grad_norm": 2.2968563498222974, "learning_rate": 6.737739872068231e-06, "loss": 0.823, "step": 316 }, { "epoch": 0.03, "grad_norm": 2.0023484533503466, "learning_rate": 6.759061833688701e-06, "loss": 0.664, "step": 317 }, { "epoch": 0.03, "grad_norm": 2.2751838144248677, "learning_rate": 6.780383795309169e-06, "loss": 0.6964, "step": 318 }, { "epoch": 0.03, "grad_norm": 2.5885484741410973, "learning_rate": 6.801705756929639e-06, "loss": 0.7057, "step": 319 }, { "epoch": 0.03, "grad_norm": 2.268607247979474, "learning_rate": 6.823027718550107e-06, "loss": 0.691, "step": 320 }, { "epoch": 0.03, "grad_norm": 1.8711883856733342, "learning_rate": 6.844349680170577e-06, "loss": 0.7298, "step": 321 }, { "epoch": 0.03, "grad_norm": 2.1945501313754012, "learning_rate": 6.865671641791045e-06, "loss": 0.6768, "step": 322 }, { "epoch": 0.03, "grad_norm": 2.0205980984162504, "learning_rate": 6.886993603411515e-06, "loss": 0.728, "step": 323 }, { "epoch": 0.03, "grad_norm": 2.1273830862434813, "learning_rate": 6.908315565031983e-06, "loss": 0.6021, "step": 324 }, { "epoch": 0.03, "grad_norm": 2.1192899307185145, "learning_rate": 6.929637526652453e-06, "loss": 0.5962, "step": 325 }, { "epoch": 0.03, "grad_norm": 2.313419614878974, "learning_rate": 6.950959488272921e-06, "loss": 0.7148, "step": 326 }, { "epoch": 0.03, "grad_norm": 2.784625049771456, "learning_rate": 6.972281449893391e-06, "loss": 0.7836, "step": 327 }, { "epoch": 0.03, "grad_norm": 2.4389824909137126, "learning_rate": 6.993603411513859e-06, "loss": 0.9042, "step": 328 }, { "epoch": 0.04, "grad_norm": 2.213970631675844, "learning_rate": 7.014925373134329e-06, "loss": 0.6935, "step": 329 }, { "epoch": 0.04, "grad_norm": 1.9392897392133233, "learning_rate": 7.0362473347547975e-06, "loss": 0.6418, "step": 330 }, { "epoch": 0.04, "grad_norm": 2.0188992189861104, "learning_rate": 7.057569296375267e-06, "loss": 0.7674, "step": 331 }, { "epoch": 0.04, "grad_norm": 1.9151112269587893, "learning_rate": 7.0788912579957356e-06, "loss": 0.6263, "step": 332 }, { "epoch": 0.04, "grad_norm": 1.505920460806218, "learning_rate": 7.1002132196162055e-06, "loss": 0.5908, "step": 333 }, { "epoch": 0.04, "grad_norm": 2.4357486321697475, "learning_rate": 7.121535181236674e-06, "loss": 0.8295, "step": 334 }, { "epoch": 0.04, "grad_norm": 2.0645669235416726, "learning_rate": 7.1428571428571436e-06, "loss": 0.6117, "step": 335 }, { "epoch": 0.04, "grad_norm": 2.5114913340366107, "learning_rate": 7.164179104477612e-06, "loss": 0.6803, "step": 336 }, { "epoch": 0.04, "grad_norm": 2.013540168001089, "learning_rate": 7.185501066098082e-06, "loss": 0.6124, "step": 337 }, { "epoch": 0.04, "grad_norm": 1.9852698079989546, "learning_rate": 7.20682302771855e-06, "loss": 0.6551, "step": 338 }, { "epoch": 0.04, "grad_norm": 1.9327166334615125, "learning_rate": 7.22814498933902e-06, "loss": 0.7837, "step": 339 }, { "epoch": 0.04, "grad_norm": 1.4601519141325212, "learning_rate": 7.249466950959488e-06, "loss": 0.6064, "step": 340 }, { "epoch": 0.04, "grad_norm": 1.9635824773834913, "learning_rate": 7.270788912579958e-06, "loss": 0.7087, "step": 341 }, { "epoch": 0.04, "grad_norm": 2.063906964717241, "learning_rate": 7.292110874200427e-06, "loss": 0.7049, "step": 342 }, { "epoch": 0.04, "grad_norm": 2.687583000447066, "learning_rate": 7.313432835820896e-06, "loss": 0.6921, "step": 343 }, { "epoch": 0.04, "grad_norm": 2.6668184920423688, "learning_rate": 7.334754797441366e-06, "loss": 0.733, "step": 344 }, { "epoch": 0.04, "grad_norm": 2.3520670435603677, "learning_rate": 7.356076759061834e-06, "loss": 0.6385, "step": 345 }, { "epoch": 0.04, "grad_norm": 2.227876504113966, "learning_rate": 7.377398720682304e-06, "loss": 0.7655, "step": 346 }, { "epoch": 0.04, "grad_norm": 2.7075253198540343, "learning_rate": 7.398720682302772e-06, "loss": 0.7178, "step": 347 }, { "epoch": 0.04, "grad_norm": 2.108356033921158, "learning_rate": 7.420042643923242e-06, "loss": 0.6349, "step": 348 }, { "epoch": 0.04, "grad_norm": 2.01257662168482, "learning_rate": 7.44136460554371e-06, "loss": 0.6541, "step": 349 }, { "epoch": 0.04, "grad_norm": 1.9472102546576928, "learning_rate": 7.46268656716418e-06, "loss": 0.7068, "step": 350 }, { "epoch": 0.04, "grad_norm": 2.1254656423987974, "learning_rate": 7.484008528784648e-06, "loss": 0.7161, "step": 351 }, { "epoch": 0.04, "grad_norm": 1.7585904657270945, "learning_rate": 7.505330490405118e-06, "loss": 0.5822, "step": 352 }, { "epoch": 0.04, "grad_norm": 2.543815922328202, "learning_rate": 7.5266524520255865e-06, "loss": 0.7828, "step": 353 }, { "epoch": 0.04, "grad_norm": 1.2157702809006006, "learning_rate": 7.547974413646056e-06, "loss": 0.5947, "step": 354 }, { "epoch": 0.04, "grad_norm": 2.0655949635239272, "learning_rate": 7.569296375266525e-06, "loss": 0.7255, "step": 355 }, { "epoch": 0.04, "grad_norm": 1.4004320954273684, "learning_rate": 7.5906183368869945e-06, "loss": 0.611, "step": 356 }, { "epoch": 0.04, "grad_norm": 1.309687300482201, "learning_rate": 7.611940298507463e-06, "loss": 0.6047, "step": 357 }, { "epoch": 0.04, "grad_norm": 3.0133647414240397, "learning_rate": 7.633262260127933e-06, "loss": 0.6807, "step": 358 }, { "epoch": 0.04, "grad_norm": 1.2116967878822211, "learning_rate": 7.654584221748402e-06, "loss": 0.5675, "step": 359 }, { "epoch": 0.04, "grad_norm": 2.4094868800430946, "learning_rate": 7.67590618336887e-06, "loss": 0.7514, "step": 360 }, { "epoch": 0.04, "grad_norm": 2.5061514561945772, "learning_rate": 7.69722814498934e-06, "loss": 0.6947, "step": 361 }, { "epoch": 0.04, "grad_norm": 1.428850216317708, "learning_rate": 7.718550106609809e-06, "loss": 0.5815, "step": 362 }, { "epoch": 0.04, "grad_norm": 2.060642264773424, "learning_rate": 7.739872068230278e-06, "loss": 0.8323, "step": 363 }, { "epoch": 0.04, "grad_norm": 2.2597641108754427, "learning_rate": 7.761194029850747e-06, "loss": 0.7006, "step": 364 }, { "epoch": 0.04, "grad_norm": 2.0897579649485, "learning_rate": 7.782515991471216e-06, "loss": 0.6739, "step": 365 }, { "epoch": 0.04, "grad_norm": 2.0892588695051635, "learning_rate": 7.803837953091685e-06, "loss": 0.746, "step": 366 }, { "epoch": 0.04, "grad_norm": 1.9196424609162235, "learning_rate": 7.825159914712154e-06, "loss": 0.7038, "step": 367 }, { "epoch": 0.04, "grad_norm": 2.223626440071476, "learning_rate": 7.846481876332623e-06, "loss": 0.6695, "step": 368 }, { "epoch": 0.04, "grad_norm": 2.0254415433392943, "learning_rate": 7.867803837953092e-06, "loss": 0.6862, "step": 369 }, { "epoch": 0.04, "grad_norm": 2.1703771903238955, "learning_rate": 7.889125799573561e-06, "loss": 0.6367, "step": 370 }, { "epoch": 0.04, "grad_norm": 2.2703293157201427, "learning_rate": 7.91044776119403e-06, "loss": 0.7408, "step": 371 }, { "epoch": 0.04, "grad_norm": 2.295119834850993, "learning_rate": 7.9317697228145e-06, "loss": 0.7125, "step": 372 }, { "epoch": 0.04, "grad_norm": 2.1517403029594546, "learning_rate": 7.953091684434968e-06, "loss": 0.7193, "step": 373 }, { "epoch": 0.04, "grad_norm": 2.1058379784226524, "learning_rate": 7.974413646055437e-06, "loss": 0.7686, "step": 374 }, { "epoch": 0.04, "grad_norm": 2.1446004269183523, "learning_rate": 7.995735607675907e-06, "loss": 0.7936, "step": 375 }, { "epoch": 0.04, "grad_norm": 2.2009061394376124, "learning_rate": 8.017057569296376e-06, "loss": 0.7746, "step": 376 }, { "epoch": 0.04, "grad_norm": 2.0293347772674313, "learning_rate": 8.038379530916846e-06, "loss": 0.7159, "step": 377 }, { "epoch": 0.04, "grad_norm": 2.155556725891097, "learning_rate": 8.059701492537314e-06, "loss": 0.7462, "step": 378 }, { "epoch": 0.04, "grad_norm": 2.494731489118891, "learning_rate": 8.081023454157784e-06, "loss": 0.7926, "step": 379 }, { "epoch": 0.04, "grad_norm": 2.2337325345832797, "learning_rate": 8.102345415778252e-06, "loss": 0.6173, "step": 380 }, { "epoch": 0.04, "grad_norm": 2.13859244857659, "learning_rate": 8.123667377398723e-06, "loss": 0.7417, "step": 381 }, { "epoch": 0.04, "grad_norm": 2.2118939604481165, "learning_rate": 8.14498933901919e-06, "loss": 0.7042, "step": 382 }, { "epoch": 0.04, "grad_norm": 2.2090419439694875, "learning_rate": 8.16631130063966e-06, "loss": 0.7048, "step": 383 }, { "epoch": 0.04, "grad_norm": 2.4636751956841647, "learning_rate": 8.187633262260128e-06, "loss": 0.62, "step": 384 }, { "epoch": 0.04, "grad_norm": 2.182098480068467, "learning_rate": 8.208955223880599e-06, "loss": 0.8119, "step": 385 }, { "epoch": 0.04, "grad_norm": 1.8482221814938813, "learning_rate": 8.230277185501066e-06, "loss": 0.6471, "step": 386 }, { "epoch": 0.04, "grad_norm": 2.0700781995579014, "learning_rate": 8.251599147121537e-06, "loss": 0.6879, "step": 387 }, { "epoch": 0.04, "grad_norm": 2.053879354805902, "learning_rate": 8.272921108742004e-06, "loss": 0.6416, "step": 388 }, { "epoch": 0.04, "grad_norm": 2.030985046674814, "learning_rate": 8.294243070362475e-06, "loss": 0.7098, "step": 389 }, { "epoch": 0.04, "grad_norm": 2.213164076153124, "learning_rate": 8.315565031982942e-06, "loss": 0.7641, "step": 390 }, { "epoch": 0.04, "grad_norm": 2.039098200967983, "learning_rate": 8.336886993603413e-06, "loss": 0.7143, "step": 391 }, { "epoch": 0.04, "grad_norm": 2.19815072240277, "learning_rate": 8.35820895522388e-06, "loss": 0.7722, "step": 392 }, { "epoch": 0.04, "grad_norm": 2.281468219295063, "learning_rate": 8.379530916844351e-06, "loss": 0.6845, "step": 393 }, { "epoch": 0.04, "grad_norm": 2.526955711598895, "learning_rate": 8.400852878464819e-06, "loss": 0.7822, "step": 394 }, { "epoch": 0.04, "grad_norm": 2.260625664058287, "learning_rate": 8.42217484008529e-06, "loss": 0.7143, "step": 395 }, { "epoch": 0.04, "grad_norm": 2.084290923084773, "learning_rate": 8.443496801705757e-06, "loss": 0.8318, "step": 396 }, { "epoch": 0.04, "grad_norm": 2.4465228499292886, "learning_rate": 8.464818763326227e-06, "loss": 0.7709, "step": 397 }, { "epoch": 0.04, "grad_norm": 2.3909746449702265, "learning_rate": 8.486140724946695e-06, "loss": 0.6964, "step": 398 }, { "epoch": 0.04, "grad_norm": 3.3921531983870317, "learning_rate": 8.507462686567165e-06, "loss": 0.7512, "step": 399 }, { "epoch": 0.04, "grad_norm": 2.017549560637024, "learning_rate": 8.528784648187633e-06, "loss": 0.7088, "step": 400 }, { "epoch": 0.04, "grad_norm": 2.5748164419880255, "learning_rate": 8.550106609808104e-06, "loss": 0.7151, "step": 401 }, { "epoch": 0.04, "grad_norm": 2.180971426654698, "learning_rate": 8.571428571428571e-06, "loss": 0.7312, "step": 402 }, { "epoch": 0.04, "grad_norm": 2.0677510461213977, "learning_rate": 8.592750533049042e-06, "loss": 0.8284, "step": 403 }, { "epoch": 0.04, "grad_norm": 2.457968811332488, "learning_rate": 8.614072494669509e-06, "loss": 0.7809, "step": 404 }, { "epoch": 0.04, "grad_norm": 2.1325549592841613, "learning_rate": 8.63539445628998e-06, "loss": 0.7422, "step": 405 }, { "epoch": 0.04, "grad_norm": 2.352667354898509, "learning_rate": 8.656716417910447e-06, "loss": 0.7202, "step": 406 }, { "epoch": 0.04, "grad_norm": 2.454836423396653, "learning_rate": 8.678038379530918e-06, "loss": 0.7573, "step": 407 }, { "epoch": 0.04, "grad_norm": 2.0928649287444405, "learning_rate": 8.699360341151387e-06, "loss": 0.6425, "step": 408 }, { "epoch": 0.04, "grad_norm": 2.217643338268498, "learning_rate": 8.720682302771856e-06, "loss": 0.7481, "step": 409 }, { "epoch": 0.04, "grad_norm": 2.6512235143628575, "learning_rate": 8.742004264392325e-06, "loss": 0.769, "step": 410 }, { "epoch": 0.04, "grad_norm": 2.1590489739934235, "learning_rate": 8.763326226012794e-06, "loss": 0.7268, "step": 411 }, { "epoch": 0.04, "grad_norm": 2.140452060376157, "learning_rate": 8.784648187633263e-06, "loss": 0.8121, "step": 412 }, { "epoch": 0.04, "grad_norm": 1.9136977570571905, "learning_rate": 8.805970149253732e-06, "loss": 0.7507, "step": 413 }, { "epoch": 0.04, "grad_norm": 2.2241428877315044, "learning_rate": 8.827292110874201e-06, "loss": 0.6659, "step": 414 }, { "epoch": 0.04, "grad_norm": 3.036050580524872, "learning_rate": 8.84861407249467e-06, "loss": 0.6196, "step": 415 }, { "epoch": 0.04, "grad_norm": 2.0051181663321525, "learning_rate": 8.86993603411514e-06, "loss": 0.6324, "step": 416 }, { "epoch": 0.04, "grad_norm": 2.5952697819804147, "learning_rate": 8.891257995735608e-06, "loss": 0.6915, "step": 417 }, { "epoch": 0.04, "grad_norm": 2.298646249042025, "learning_rate": 8.912579957356077e-06, "loss": 0.7147, "step": 418 }, { "epoch": 0.04, "grad_norm": 2.217105796156813, "learning_rate": 8.933901918976547e-06, "loss": 0.8181, "step": 419 }, { "epoch": 0.04, "grad_norm": 2.280829952621721, "learning_rate": 8.955223880597016e-06, "loss": 0.8052, "step": 420 }, { "epoch": 0.04, "grad_norm": 2.0059873903755596, "learning_rate": 8.976545842217485e-06, "loss": 0.6908, "step": 421 }, { "epoch": 0.05, "grad_norm": 2.2741874734143277, "learning_rate": 8.997867803837954e-06, "loss": 0.6082, "step": 422 }, { "epoch": 0.05, "grad_norm": 2.2835743447486907, "learning_rate": 9.019189765458423e-06, "loss": 0.7531, "step": 423 }, { "epoch": 0.05, "grad_norm": 2.127968744356948, "learning_rate": 9.040511727078892e-06, "loss": 0.6501, "step": 424 }, { "epoch": 0.05, "grad_norm": 2.1169403081924636, "learning_rate": 9.06183368869936e-06, "loss": 0.7312, "step": 425 }, { "epoch": 0.05, "grad_norm": 2.5264497900629173, "learning_rate": 9.08315565031983e-06, "loss": 0.7451, "step": 426 }, { "epoch": 0.05, "grad_norm": 1.9715121100321042, "learning_rate": 9.104477611940299e-06, "loss": 0.7107, "step": 427 }, { "epoch": 0.05, "grad_norm": 2.458217424132453, "learning_rate": 9.125799573560768e-06, "loss": 0.749, "step": 428 }, { "epoch": 0.05, "grad_norm": 2.003433198051545, "learning_rate": 9.147121535181237e-06, "loss": 0.707, "step": 429 }, { "epoch": 0.05, "grad_norm": 2.463437999694586, "learning_rate": 9.168443496801706e-06, "loss": 0.5937, "step": 430 }, { "epoch": 0.05, "grad_norm": 2.136210788274872, "learning_rate": 9.189765458422175e-06, "loss": 0.7294, "step": 431 }, { "epoch": 0.05, "grad_norm": 1.7898375084601177, "learning_rate": 9.211087420042644e-06, "loss": 0.5953, "step": 432 }, { "epoch": 0.05, "grad_norm": 2.680512361818752, "learning_rate": 9.232409381663113e-06, "loss": 0.7947, "step": 433 }, { "epoch": 0.05, "grad_norm": 2.227306338883224, "learning_rate": 9.253731343283582e-06, "loss": 0.7468, "step": 434 }, { "epoch": 0.05, "grad_norm": 2.1673420570952073, "learning_rate": 9.275053304904051e-06, "loss": 0.7582, "step": 435 }, { "epoch": 0.05, "grad_norm": 2.4113286486499623, "learning_rate": 9.29637526652452e-06, "loss": 0.7066, "step": 436 }, { "epoch": 0.05, "grad_norm": 2.1521017809395957, "learning_rate": 9.31769722814499e-06, "loss": 0.6765, "step": 437 }, { "epoch": 0.05, "grad_norm": 2.057276610343296, "learning_rate": 9.339019189765458e-06, "loss": 0.7401, "step": 438 }, { "epoch": 0.05, "grad_norm": 2.140335998248113, "learning_rate": 9.36034115138593e-06, "loss": 0.7592, "step": 439 }, { "epoch": 0.05, "grad_norm": 2.0559652736056884, "learning_rate": 9.381663113006397e-06, "loss": 0.6791, "step": 440 }, { "epoch": 0.05, "grad_norm": 2.1498297263599544, "learning_rate": 9.402985074626867e-06, "loss": 0.7174, "step": 441 }, { "epoch": 0.05, "grad_norm": 2.4054525722809457, "learning_rate": 9.424307036247335e-06, "loss": 0.8152, "step": 442 }, { "epoch": 0.05, "grad_norm": 2.566702842922497, "learning_rate": 9.445628997867805e-06, "loss": 0.7255, "step": 443 }, { "epoch": 0.05, "grad_norm": 2.0534452555283846, "learning_rate": 9.466950959488273e-06, "loss": 0.6615, "step": 444 }, { "epoch": 0.05, "grad_norm": 2.2542005530535576, "learning_rate": 9.488272921108744e-06, "loss": 0.8465, "step": 445 }, { "epoch": 0.05, "grad_norm": 2.0897553621974763, "learning_rate": 9.509594882729211e-06, "loss": 0.6866, "step": 446 }, { "epoch": 0.05, "grad_norm": 2.129317561633391, "learning_rate": 9.530916844349682e-06, "loss": 0.6933, "step": 447 }, { "epoch": 0.05, "grad_norm": 2.2539657509634234, "learning_rate": 9.552238805970149e-06, "loss": 0.6964, "step": 448 }, { "epoch": 0.05, "grad_norm": 4.6934959424120315, "learning_rate": 9.57356076759062e-06, "loss": 0.6195, "step": 449 }, { "epoch": 0.05, "grad_norm": 2.3579218344771737, "learning_rate": 9.594882729211089e-06, "loss": 0.7779, "step": 450 }, { "epoch": 0.05, "grad_norm": 2.5141922440889783, "learning_rate": 9.616204690831558e-06, "loss": 0.6939, "step": 451 }, { "epoch": 0.05, "grad_norm": 2.1461556204638685, "learning_rate": 9.637526652452027e-06, "loss": 0.6564, "step": 452 }, { "epoch": 0.05, "grad_norm": 2.3599159405423236, "learning_rate": 9.658848614072496e-06, "loss": 0.7499, "step": 453 }, { "epoch": 0.05, "grad_norm": 2.1398571280838694, "learning_rate": 9.680170575692965e-06, "loss": 0.7524, "step": 454 }, { "epoch": 0.05, "grad_norm": 2.463589324168276, "learning_rate": 9.701492537313434e-06, "loss": 0.762, "step": 455 }, { "epoch": 0.05, "grad_norm": 2.1095719399284376, "learning_rate": 9.722814498933903e-06, "loss": 0.7058, "step": 456 }, { "epoch": 0.05, "grad_norm": 2.370028038546173, "learning_rate": 9.744136460554372e-06, "loss": 0.7679, "step": 457 }, { "epoch": 0.05, "grad_norm": 2.2244922386680286, "learning_rate": 9.765458422174841e-06, "loss": 0.7768, "step": 458 }, { "epoch": 0.05, "grad_norm": 2.022873024047102, "learning_rate": 9.78678038379531e-06, "loss": 0.7677, "step": 459 }, { "epoch": 0.05, "grad_norm": 2.6828454125392325, "learning_rate": 9.80810234541578e-06, "loss": 0.6906, "step": 460 }, { "epoch": 0.05, "grad_norm": 2.312956306349923, "learning_rate": 9.829424307036248e-06, "loss": 0.7747, "step": 461 }, { "epoch": 0.05, "grad_norm": 2.3961091377689248, "learning_rate": 9.850746268656717e-06, "loss": 0.7455, "step": 462 }, { "epoch": 0.05, "grad_norm": 2.0977719698836723, "learning_rate": 9.872068230277187e-06, "loss": 0.6449, "step": 463 }, { "epoch": 0.05, "grad_norm": 2.4235380164734646, "learning_rate": 9.893390191897656e-06, "loss": 0.7423, "step": 464 }, { "epoch": 0.05, "grad_norm": 1.770860193371396, "learning_rate": 9.914712153518125e-06, "loss": 0.5824, "step": 465 }, { "epoch": 0.05, "grad_norm": 2.0575565714055357, "learning_rate": 9.936034115138594e-06, "loss": 0.7148, "step": 466 }, { "epoch": 0.05, "grad_norm": 1.2886525971244704, "learning_rate": 9.957356076759063e-06, "loss": 0.5873, "step": 467 }, { "epoch": 0.05, "grad_norm": 2.2994791804871966, "learning_rate": 9.978678038379532e-06, "loss": 0.7906, "step": 468 }, { "epoch": 0.05, "grad_norm": 2.5140759814063376, "learning_rate": 1e-05, "loss": 0.6666, "step": 469 }, { "epoch": 0.05, "grad_norm": 2.1348913181601823, "learning_rate": 9.999999688918369e-06, "loss": 0.5861, "step": 470 }, { "epoch": 0.05, "grad_norm": 2.058424820064007, "learning_rate": 9.999998755673514e-06, "loss": 0.7243, "step": 471 }, { "epoch": 0.05, "grad_norm": 2.0906177838788644, "learning_rate": 9.999997200265549e-06, "loss": 0.7693, "step": 472 }, { "epoch": 0.05, "grad_norm": 2.2007110326702115, "learning_rate": 9.999995022694671e-06, "loss": 0.8085, "step": 473 }, { "epoch": 0.05, "grad_norm": 1.4865042395857748, "learning_rate": 9.99999222296115e-06, "loss": 0.595, "step": 474 }, { "epoch": 0.05, "grad_norm": 2.112286693396701, "learning_rate": 9.999988801065333e-06, "loss": 0.8143, "step": 475 }, { "epoch": 0.05, "grad_norm": 2.111547755295895, "learning_rate": 9.999984757007647e-06, "loss": 0.7746, "step": 476 }, { "epoch": 0.05, "grad_norm": 1.4800024363173365, "learning_rate": 9.999980090788592e-06, "loss": 0.59, "step": 477 }, { "epoch": 0.05, "grad_norm": 2.1706163174092747, "learning_rate": 9.999974802408754e-06, "loss": 0.8034, "step": 478 }, { "epoch": 0.05, "grad_norm": 2.0549748112774933, "learning_rate": 9.999968891868788e-06, "loss": 0.771, "step": 479 }, { "epoch": 0.05, "grad_norm": 2.2317957942875264, "learning_rate": 9.999962359169429e-06, "loss": 0.663, "step": 480 }, { "epoch": 0.05, "grad_norm": 2.2280830678294175, "learning_rate": 9.999955204311492e-06, "loss": 0.8016, "step": 481 }, { "epoch": 0.05, "grad_norm": 1.828546205295456, "learning_rate": 9.999947427295866e-06, "loss": 0.631, "step": 482 }, { "epoch": 0.05, "grad_norm": 1.9299137783191542, "learning_rate": 9.999939028123517e-06, "loss": 0.7037, "step": 483 }, { "epoch": 0.05, "grad_norm": 2.5192995366925306, "learning_rate": 9.999930006795494e-06, "loss": 0.7277, "step": 484 }, { "epoch": 0.05, "grad_norm": 2.191981743517004, "learning_rate": 9.999920363312916e-06, "loss": 0.7331, "step": 485 }, { "epoch": 0.05, "grad_norm": 2.4020870667430647, "learning_rate": 9.999910097676986e-06, "loss": 0.7168, "step": 486 }, { "epoch": 0.05, "grad_norm": 1.8159347928275944, "learning_rate": 9.999899209888979e-06, "loss": 0.6055, "step": 487 }, { "epoch": 0.05, "grad_norm": 2.0847812066321554, "learning_rate": 9.99988769995025e-06, "loss": 0.7211, "step": 488 }, { "epoch": 0.05, "grad_norm": 2.217079763334724, "learning_rate": 9.999875567862234e-06, "loss": 0.7092, "step": 489 }, { "epoch": 0.05, "grad_norm": 2.1527624690531773, "learning_rate": 9.999862813626435e-06, "loss": 0.8428, "step": 490 }, { "epoch": 0.05, "grad_norm": 2.267081233548912, "learning_rate": 9.999849437244446e-06, "loss": 0.7733, "step": 491 }, { "epoch": 0.05, "grad_norm": 1.9027383149393842, "learning_rate": 9.99983543871793e-06, "loss": 0.7632, "step": 492 }, { "epoch": 0.05, "grad_norm": 2.291247814563774, "learning_rate": 9.999820818048625e-06, "loss": 0.7171, "step": 493 }, { "epoch": 0.05, "grad_norm": 1.92555011309108, "learning_rate": 9.999805575238354e-06, "loss": 0.5944, "step": 494 }, { "epoch": 0.05, "grad_norm": 1.9932854445678063, "learning_rate": 9.999789710289014e-06, "loss": 0.8028, "step": 495 }, { "epoch": 0.05, "grad_norm": 2.116233703841943, "learning_rate": 9.999773223202577e-06, "loss": 0.7322, "step": 496 }, { "epoch": 0.05, "grad_norm": 3.7353400777715464, "learning_rate": 9.999756113981097e-06, "loss": 0.5599, "step": 497 }, { "epoch": 0.05, "grad_norm": 1.9307403238466276, "learning_rate": 9.999738382626697e-06, "loss": 0.8173, "step": 498 }, { "epoch": 0.05, "grad_norm": 2.0901181765264876, "learning_rate": 9.999720029141592e-06, "loss": 0.798, "step": 499 }, { "epoch": 0.05, "grad_norm": 2.27387291611197, "learning_rate": 9.999701053528062e-06, "loss": 0.7244, "step": 500 }, { "epoch": 0.05, "grad_norm": 2.2219721878856844, "learning_rate": 9.999681455788464e-06, "loss": 0.7393, "step": 501 }, { "epoch": 0.05, "grad_norm": 1.979898933036548, "learning_rate": 9.999661235925244e-06, "loss": 0.755, "step": 502 }, { "epoch": 0.05, "grad_norm": 2.065293291389177, "learning_rate": 9.999640393940913e-06, "loss": 0.5865, "step": 503 }, { "epoch": 0.05, "grad_norm": 2.1298398091021857, "learning_rate": 9.999618929838064e-06, "loss": 0.8095, "step": 504 }, { "epoch": 0.05, "grad_norm": 2.0148176793583903, "learning_rate": 9.999596843619373e-06, "loss": 0.7078, "step": 505 }, { "epoch": 0.05, "grad_norm": 2.1670104905559096, "learning_rate": 9.999574135287583e-06, "loss": 0.8384, "step": 506 }, { "epoch": 0.05, "grad_norm": 2.337584870836254, "learning_rate": 9.99955080484552e-06, "loss": 0.752, "step": 507 }, { "epoch": 0.05, "grad_norm": 2.2300567580598787, "learning_rate": 9.99952685229609e-06, "loss": 0.5856, "step": 508 }, { "epoch": 0.05, "grad_norm": 1.9720986404740304, "learning_rate": 9.999502277642272e-06, "loss": 0.6637, "step": 509 }, { "epoch": 0.05, "grad_norm": 2.149344108790718, "learning_rate": 9.999477080887125e-06, "loss": 0.7021, "step": 510 }, { "epoch": 0.05, "grad_norm": 1.3499593991224883, "learning_rate": 9.999451262033782e-06, "loss": 0.5803, "step": 511 }, { "epoch": 0.05, "grad_norm": 2.1489615678675675, "learning_rate": 9.999424821085457e-06, "loss": 0.7878, "step": 512 }, { "epoch": 0.05, "grad_norm": 2.2349661294221352, "learning_rate": 9.99939775804544e-06, "loss": 0.7777, "step": 513 }, { "epoch": 0.05, "grad_norm": 1.445990564392625, "learning_rate": 9.9993700729171e-06, "loss": 0.5751, "step": 514 }, { "epoch": 0.05, "grad_norm": 2.142928780508434, "learning_rate": 9.99934176570388e-06, "loss": 0.7339, "step": 515 }, { "epoch": 0.06, "grad_norm": 2.3532825083388325, "learning_rate": 9.999312836409301e-06, "loss": 0.7182, "step": 516 }, { "epoch": 0.06, "grad_norm": 1.2555795474557712, "learning_rate": 9.999283285036967e-06, "loss": 0.5716, "step": 517 }, { "epoch": 0.06, "grad_norm": 2.263567679968871, "learning_rate": 9.999253111590552e-06, "loss": 0.7323, "step": 518 }, { "epoch": 0.06, "grad_norm": 2.291934827302687, "learning_rate": 9.999222316073811e-06, "loss": 0.7496, "step": 519 }, { "epoch": 0.06, "grad_norm": 2.233093762309112, "learning_rate": 9.999190898490577e-06, "loss": 0.7558, "step": 520 }, { "epoch": 0.06, "grad_norm": 1.3723499466438467, "learning_rate": 9.999158858844758e-06, "loss": 0.5658, "step": 521 }, { "epoch": 0.06, "grad_norm": 2.0860348086949223, "learning_rate": 9.99912619714034e-06, "loss": 0.773, "step": 522 }, { "epoch": 0.06, "grad_norm": 1.1794370646944827, "learning_rate": 9.999092913381392e-06, "loss": 0.5712, "step": 523 }, { "epoch": 0.06, "grad_norm": 2.4603525184940116, "learning_rate": 9.99905900757205e-06, "loss": 0.7018, "step": 524 }, { "epoch": 0.06, "grad_norm": 3.2282086532579934, "learning_rate": 9.999024479716535e-06, "loss": 0.5773, "step": 525 }, { "epoch": 0.06, "grad_norm": 2.281166864936243, "learning_rate": 9.998989329819144e-06, "loss": 0.7203, "step": 526 }, { "epoch": 0.06, "grad_norm": 1.9599869339293756, "learning_rate": 9.998953557884251e-06, "loss": 0.6512, "step": 527 }, { "epoch": 0.06, "grad_norm": 2.0210161885744866, "learning_rate": 9.998917163916306e-06, "loss": 0.7684, "step": 528 }, { "epoch": 0.06, "grad_norm": 1.9913498500832318, "learning_rate": 9.998880147919839e-06, "loss": 0.6611, "step": 529 }, { "epoch": 0.06, "grad_norm": 2.345222783316688, "learning_rate": 9.998842509899456e-06, "loss": 0.6626, "step": 530 }, { "epoch": 0.06, "grad_norm": 2.1428869225830756, "learning_rate": 9.998804249859836e-06, "loss": 0.7251, "step": 531 }, { "epoch": 0.06, "grad_norm": 2.1663559183599435, "learning_rate": 9.998765367805747e-06, "loss": 0.6245, "step": 532 }, { "epoch": 0.06, "grad_norm": 2.2114879932236176, "learning_rate": 9.998725863742022e-06, "loss": 0.6453, "step": 533 }, { "epoch": 0.06, "grad_norm": 1.8784783442460897, "learning_rate": 9.998685737673579e-06, "loss": 0.618, "step": 534 }, { "epoch": 0.06, "grad_norm": 2.1292486043362917, "learning_rate": 9.998644989605409e-06, "loss": 0.6286, "step": 535 }, { "epoch": 0.06, "grad_norm": 1.99424558752577, "learning_rate": 9.998603619542586e-06, "loss": 0.6505, "step": 536 }, { "epoch": 0.06, "grad_norm": 2.153863950808217, "learning_rate": 9.998561627490253e-06, "loss": 0.7507, "step": 537 }, { "epoch": 0.06, "grad_norm": 1.956330564123323, "learning_rate": 9.998519013453637e-06, "loss": 0.7344, "step": 538 }, { "epoch": 0.06, "grad_norm": 1.6360186428301462, "learning_rate": 9.998475777438043e-06, "loss": 0.5994, "step": 539 }, { "epoch": 0.06, "grad_norm": 2.680810266440842, "learning_rate": 9.998431919448847e-06, "loss": 0.7406, "step": 540 }, { "epoch": 0.06, "grad_norm": 2.3004479716558124, "learning_rate": 9.998387439491512e-06, "loss": 0.7619, "step": 541 }, { "epoch": 0.06, "grad_norm": 2.2262051891425934, "learning_rate": 9.998342337571566e-06, "loss": 0.8293, "step": 542 }, { "epoch": 0.06, "grad_norm": 2.2518763583513586, "learning_rate": 9.998296613694626e-06, "loss": 0.801, "step": 543 }, { "epoch": 0.06, "grad_norm": 1.3221272812754887, "learning_rate": 9.998250267866379e-06, "loss": 0.5948, "step": 544 }, { "epoch": 0.06, "grad_norm": 1.9436033073946586, "learning_rate": 9.998203300092593e-06, "loss": 0.6466, "step": 545 }, { "epoch": 0.06, "grad_norm": 1.9186972543248222, "learning_rate": 9.998155710379113e-06, "loss": 0.6989, "step": 546 }, { "epoch": 0.06, "grad_norm": 2.107883666731592, "learning_rate": 9.99810749873186e-06, "loss": 0.7473, "step": 547 }, { "epoch": 0.06, "grad_norm": 2.28667096782876, "learning_rate": 9.998058665156832e-06, "loss": 0.6867, "step": 548 }, { "epoch": 0.06, "grad_norm": 1.998226056959844, "learning_rate": 9.998009209660105e-06, "loss": 0.6895, "step": 549 }, { "epoch": 0.06, "grad_norm": 2.190990569686665, "learning_rate": 9.997959132247836e-06, "loss": 0.6874, "step": 550 }, { "epoch": 0.06, "grad_norm": 2.1303058094331644, "learning_rate": 9.997908432926256e-06, "loss": 0.665, "step": 551 }, { "epoch": 0.06, "grad_norm": 1.987753099339409, "learning_rate": 9.99785711170167e-06, "loss": 0.7754, "step": 552 }, { "epoch": 0.06, "grad_norm": 2.150610273512931, "learning_rate": 9.997805168580468e-06, "loss": 0.722, "step": 553 }, { "epoch": 0.06, "grad_norm": 2.0495067845780643, "learning_rate": 9.997752603569112e-06, "loss": 0.797, "step": 554 }, { "epoch": 0.06, "grad_norm": 2.032343616971857, "learning_rate": 9.99769941667414e-06, "loss": 0.5963, "step": 555 }, { "epoch": 0.06, "grad_norm": 1.9536159256623367, "learning_rate": 9.997645607902175e-06, "loss": 0.658, "step": 556 }, { "epoch": 0.06, "grad_norm": 2.0078671085446436, "learning_rate": 9.997591177259909e-06, "loss": 0.6972, "step": 557 }, { "epoch": 0.06, "grad_norm": 2.4101314250290535, "learning_rate": 9.997536124754117e-06, "loss": 0.6085, "step": 558 }, { "epoch": 0.06, "grad_norm": 2.133946534848276, "learning_rate": 9.997480450391647e-06, "loss": 0.7247, "step": 559 }, { "epoch": 0.06, "grad_norm": 2.481392709647769, "learning_rate": 9.997424154179432e-06, "loss": 0.6976, "step": 560 }, { "epoch": 0.06, "grad_norm": 2.1924767945755597, "learning_rate": 9.99736723612447e-06, "loss": 0.7141, "step": 561 }, { "epoch": 0.06, "grad_norm": 2.5433572405585965, "learning_rate": 9.997309696233848e-06, "loss": 0.6575, "step": 562 }, { "epoch": 0.06, "grad_norm": 1.9680509610519206, "learning_rate": 9.997251534514725e-06, "loss": 0.7856, "step": 563 }, { "epoch": 0.06, "grad_norm": 2.019826433663185, "learning_rate": 9.997192750974337e-06, "loss": 0.7422, "step": 564 }, { "epoch": 0.06, "grad_norm": 2.06398144404219, "learning_rate": 9.99713334562e-06, "loss": 0.6584, "step": 565 }, { "epoch": 0.06, "grad_norm": 1.9039660608496785, "learning_rate": 9.997073318459105e-06, "loss": 0.6126, "step": 566 }, { "epoch": 0.06, "grad_norm": 2.0929058392375803, "learning_rate": 9.997012669499123e-06, "loss": 0.7855, "step": 567 }, { "epoch": 0.06, "grad_norm": 2.168904288729975, "learning_rate": 9.9969513987476e-06, "loss": 0.7926, "step": 568 }, { "epoch": 0.06, "grad_norm": 2.1838325557949623, "learning_rate": 9.996889506212157e-06, "loss": 0.7871, "step": 569 }, { "epoch": 0.06, "grad_norm": 2.2754004003628636, "learning_rate": 9.9968269919005e-06, "loss": 0.7057, "step": 570 }, { "epoch": 0.06, "grad_norm": 2.58913007973236, "learning_rate": 9.996763855820404e-06, "loss": 0.7378, "step": 571 }, { "epoch": 0.06, "grad_norm": 2.6363498261226472, "learning_rate": 9.996700097979728e-06, "loss": 0.6999, "step": 572 }, { "epoch": 0.06, "grad_norm": 2.1700532827060344, "learning_rate": 9.996635718386404e-06, "loss": 0.6638, "step": 573 }, { "epoch": 0.06, "grad_norm": 1.9935011099212747, "learning_rate": 9.996570717048443e-06, "loss": 0.6804, "step": 574 }, { "epoch": 0.06, "grad_norm": 1.953525609266388, "learning_rate": 9.996505093973937e-06, "loss": 0.6941, "step": 575 }, { "epoch": 0.06, "grad_norm": 2.0473557810859333, "learning_rate": 9.996438849171047e-06, "loss": 0.7049, "step": 576 }, { "epoch": 0.06, "grad_norm": 2.049049504555642, "learning_rate": 9.996371982648017e-06, "loss": 0.7732, "step": 577 }, { "epoch": 0.06, "grad_norm": 2.0660461063319246, "learning_rate": 9.996304494413167e-06, "loss": 0.5903, "step": 578 }, { "epoch": 0.06, "grad_norm": 1.7317171065723056, "learning_rate": 9.996236384474895e-06, "loss": 0.606, "step": 579 }, { "epoch": 0.06, "grad_norm": 2.0153569591296336, "learning_rate": 9.99616765284168e-06, "loss": 0.5499, "step": 580 }, { "epoch": 0.06, "grad_norm": 1.690803787588496, "learning_rate": 9.996098299522069e-06, "loss": 0.5619, "step": 581 }, { "epoch": 0.06, "grad_norm": 1.7618724705988527, "learning_rate": 9.996028324524694e-06, "loss": 0.5912, "step": 582 }, { "epoch": 0.06, "grad_norm": 2.127429251908212, "learning_rate": 9.995957727858262e-06, "loss": 0.7157, "step": 583 }, { "epoch": 0.06, "grad_norm": 2.0068642451825514, "learning_rate": 9.995886509531558e-06, "loss": 0.7833, "step": 584 }, { "epoch": 0.06, "grad_norm": 1.9595512719668657, "learning_rate": 9.995814669553444e-06, "loss": 0.766, "step": 585 }, { "epoch": 0.06, "grad_norm": 2.173554023497718, "learning_rate": 9.995742207932858e-06, "loss": 0.7361, "step": 586 }, { "epoch": 0.06, "grad_norm": 1.934652376912064, "learning_rate": 9.995669124678818e-06, "loss": 0.6198, "step": 587 }, { "epoch": 0.06, "grad_norm": 2.399775482715586, "learning_rate": 9.995595419800417e-06, "loss": 0.7285, "step": 588 }, { "epoch": 0.06, "grad_norm": 2.073446867873034, "learning_rate": 9.995521093306829e-06, "loss": 0.8366, "step": 589 }, { "epoch": 0.06, "grad_norm": 2.430570324610234, "learning_rate": 9.995446145207297e-06, "loss": 0.8691, "step": 590 }, { "epoch": 0.06, "grad_norm": 2.0377556574858593, "learning_rate": 9.995370575511151e-06, "loss": 0.7025, "step": 591 }, { "epoch": 0.06, "grad_norm": 1.9144824022168099, "learning_rate": 9.995294384227793e-06, "loss": 0.7908, "step": 592 }, { "epoch": 0.06, "grad_norm": 3.2726945771692, "learning_rate": 9.995217571366707e-06, "loss": 0.6165, "step": 593 }, { "epoch": 0.06, "grad_norm": 2.2734658872260898, "learning_rate": 9.995140136937446e-06, "loss": 0.666, "step": 594 }, { "epoch": 0.06, "grad_norm": 2.040432810681011, "learning_rate": 9.995062080949648e-06, "loss": 0.6387, "step": 595 }, { "epoch": 0.06, "grad_norm": 2.4014302801798135, "learning_rate": 9.994983403413026e-06, "loss": 0.7535, "step": 596 }, { "epoch": 0.06, "grad_norm": 2.1019464201280003, "learning_rate": 9.994904104337368e-06, "loss": 0.7371, "step": 597 }, { "epoch": 0.06, "grad_norm": 1.3062170336297445, "learning_rate": 9.994824183732546e-06, "loss": 0.5708, "step": 598 }, { "epoch": 0.06, "grad_norm": 2.027120074782287, "learning_rate": 9.994743641608498e-06, "loss": 0.6528, "step": 599 }, { "epoch": 0.06, "grad_norm": 2.1522525898630374, "learning_rate": 9.994662477975253e-06, "loss": 0.8286, "step": 600 }, { "epoch": 0.06, "grad_norm": 2.2163952338513937, "learning_rate": 9.994580692842903e-06, "loss": 0.6992, "step": 601 }, { "epoch": 0.06, "grad_norm": 2.1219047037881027, "learning_rate": 9.994498286221632e-06, "loss": 0.7042, "step": 602 }, { "epoch": 0.06, "grad_norm": 6.637830224891093, "learning_rate": 9.99441525812169e-06, "loss": 0.8653, "step": 603 }, { "epoch": 0.06, "grad_norm": 1.8285942298814684, "learning_rate": 9.99433160855341e-06, "loss": 0.6809, "step": 604 }, { "epoch": 0.06, "grad_norm": 2.241241700047561, "learning_rate": 9.994247337527198e-06, "loss": 0.8052, "step": 605 }, { "epoch": 0.06, "grad_norm": 1.4430911745701815, "learning_rate": 9.994162445053544e-06, "loss": 0.5795, "step": 606 }, { "epoch": 0.06, "grad_norm": 2.294426824444891, "learning_rate": 9.994076931143008e-06, "loss": 0.7521, "step": 607 }, { "epoch": 0.06, "grad_norm": 2.0017918307053986, "learning_rate": 9.993990795806235e-06, "loss": 0.7714, "step": 608 }, { "epoch": 0.06, "grad_norm": 1.913287836974257, "learning_rate": 9.993904039053937e-06, "loss": 0.7364, "step": 609 }, { "epoch": 0.07, "grad_norm": 1.9911550719033342, "learning_rate": 9.993816660896913e-06, "loss": 0.6561, "step": 610 }, { "epoch": 0.07, "grad_norm": 1.9995230363304013, "learning_rate": 9.993728661346038e-06, "loss": 0.7073, "step": 611 }, { "epoch": 0.07, "grad_norm": 1.9336890006056058, "learning_rate": 9.993640040412255e-06, "loss": 0.7712, "step": 612 }, { "epoch": 0.07, "grad_norm": 2.1047945517432374, "learning_rate": 9.9935507981066e-06, "loss": 0.6676, "step": 613 }, { "epoch": 0.07, "grad_norm": 1.9667065143380251, "learning_rate": 9.993460934440172e-06, "loss": 0.6177, "step": 614 }, { "epoch": 0.07, "grad_norm": 2.1167336310180174, "learning_rate": 9.993370449424153e-06, "loss": 0.6476, "step": 615 }, { "epoch": 0.07, "grad_norm": 2.310750607635879, "learning_rate": 9.993279343069805e-06, "loss": 0.5923, "step": 616 }, { "epoch": 0.07, "grad_norm": 1.5416082258963588, "learning_rate": 9.993187615388463e-06, "loss": 0.6007, "step": 617 }, { "epoch": 0.07, "grad_norm": 2.1917121995216404, "learning_rate": 9.99309526639154e-06, "loss": 0.7683, "step": 618 }, { "epoch": 0.07, "grad_norm": 2.1257992154375605, "learning_rate": 9.99300229609053e-06, "loss": 0.6726, "step": 619 }, { "epoch": 0.07, "grad_norm": 2.1117415642364032, "learning_rate": 9.992908704496998e-06, "loss": 0.7283, "step": 620 }, { "epoch": 0.07, "grad_norm": 3.0943573796800585, "learning_rate": 9.992814491622593e-06, "loss": 0.5815, "step": 621 }, { "epoch": 0.07, "grad_norm": 2.0062378259530522, "learning_rate": 9.992719657479037e-06, "loss": 0.751, "step": 622 }, { "epoch": 0.07, "grad_norm": 2.084903066046197, "learning_rate": 9.992624202078131e-06, "loss": 0.6486, "step": 623 }, { "epoch": 0.07, "grad_norm": 2.021442419549133, "learning_rate": 9.99252812543175e-06, "loss": 0.6957, "step": 624 }, { "epoch": 0.07, "grad_norm": 2.0817766737305745, "learning_rate": 9.992431427551853e-06, "loss": 0.7251, "step": 625 }, { "epoch": 0.07, "grad_norm": 2.071923406327063, "learning_rate": 9.99233410845047e-06, "loss": 0.7606, "step": 626 }, { "epoch": 0.07, "grad_norm": 1.9036451192204027, "learning_rate": 9.99223616813971e-06, "loss": 0.6325, "step": 627 }, { "epoch": 0.07, "grad_norm": 2.100329941945002, "learning_rate": 9.992137606631764e-06, "loss": 0.683, "step": 628 }, { "epoch": 0.07, "grad_norm": 2.3814714259034853, "learning_rate": 9.992038423938891e-06, "loss": 0.7117, "step": 629 }, { "epoch": 0.07, "grad_norm": 2.106234397157804, "learning_rate": 9.991938620073438e-06, "loss": 0.7779, "step": 630 }, { "epoch": 0.07, "grad_norm": 2.131907313237941, "learning_rate": 9.991838195047819e-06, "loss": 0.743, "step": 631 }, { "epoch": 0.07, "grad_norm": 1.85501424555443, "learning_rate": 9.99173714887453e-06, "loss": 0.5805, "step": 632 }, { "epoch": 0.07, "grad_norm": 2.2135450790693243, "learning_rate": 9.99163548156615e-06, "loss": 0.7904, "step": 633 }, { "epoch": 0.07, "grad_norm": 1.8976702279057605, "learning_rate": 9.991533193135324e-06, "loss": 0.6577, "step": 634 }, { "epoch": 0.07, "grad_norm": 2.3429332682302135, "learning_rate": 9.991430283594784e-06, "loss": 0.7482, "step": 635 }, { "epoch": 0.07, "grad_norm": 2.1272830261359177, "learning_rate": 9.991326752957333e-06, "loss": 0.5983, "step": 636 }, { "epoch": 0.07, "grad_norm": 1.9427527615446765, "learning_rate": 9.991222601235853e-06, "loss": 0.6602, "step": 637 }, { "epoch": 0.07, "grad_norm": 2.157412733612042, "learning_rate": 9.991117828443306e-06, "loss": 0.8272, "step": 638 }, { "epoch": 0.07, "grad_norm": 2.050803998935102, "learning_rate": 9.991012434592728e-06, "loss": 0.8503, "step": 639 }, { "epoch": 0.07, "grad_norm": 1.833365635711004, "learning_rate": 9.990906419697233e-06, "loss": 0.7243, "step": 640 }, { "epoch": 0.07, "grad_norm": 1.8094860450720394, "learning_rate": 9.990799783770015e-06, "loss": 0.6331, "step": 641 }, { "epoch": 0.07, "grad_norm": 1.87511795548035, "learning_rate": 9.990692526824341e-06, "loss": 0.6359, "step": 642 }, { "epoch": 0.07, "grad_norm": 1.913379701273309, "learning_rate": 9.990584648873555e-06, "loss": 0.5739, "step": 643 }, { "epoch": 0.07, "grad_norm": 1.9131672269045306, "learning_rate": 9.990476149931088e-06, "loss": 0.6628, "step": 644 }, { "epoch": 0.07, "grad_norm": 1.9143622929934117, "learning_rate": 9.990367030010433e-06, "loss": 0.6661, "step": 645 }, { "epoch": 0.07, "grad_norm": 2.1191849486762204, "learning_rate": 9.99025728912517e-06, "loss": 0.8235, "step": 646 }, { "epoch": 0.07, "grad_norm": 2.226411858034136, "learning_rate": 9.990146927288957e-06, "loss": 0.8076, "step": 647 }, { "epoch": 0.07, "grad_norm": 1.3509558202523788, "learning_rate": 9.990035944515523e-06, "loss": 0.5663, "step": 648 }, { "epoch": 0.07, "grad_norm": 2.392633927410714, "learning_rate": 9.989924340818683e-06, "loss": 0.7076, "step": 649 }, { "epoch": 0.07, "grad_norm": 2.0550175958037835, "learning_rate": 9.989812116212319e-06, "loss": 0.6906, "step": 650 }, { "epoch": 0.07, "grad_norm": 2.1418560214403106, "learning_rate": 9.989699270710398e-06, "loss": 0.6702, "step": 651 }, { "epoch": 0.07, "grad_norm": 1.2323932692334831, "learning_rate": 9.989585804326963e-06, "loss": 0.5804, "step": 652 }, { "epoch": 0.07, "grad_norm": 1.1959911527904012, "learning_rate": 9.989471717076128e-06, "loss": 0.567, "step": 653 }, { "epoch": 0.07, "grad_norm": 2.1379099740570844, "learning_rate": 9.989357008972093e-06, "loss": 0.7381, "step": 654 }, { "epoch": 0.07, "grad_norm": 1.9425170265710328, "learning_rate": 9.98924168002913e-06, "loss": 0.599, "step": 655 }, { "epoch": 0.07, "grad_norm": 1.9569473288217318, "learning_rate": 9.989125730261592e-06, "loss": 0.6819, "step": 656 }, { "epoch": 0.07, "grad_norm": 1.9403564214339772, "learning_rate": 9.989009159683905e-06, "loss": 0.7455, "step": 657 }, { "epoch": 0.07, "grad_norm": 1.5136083142393864, "learning_rate": 9.988891968310575e-06, "loss": 0.5599, "step": 658 }, { "epoch": 0.07, "grad_norm": 2.022631646620912, "learning_rate": 9.988774156156183e-06, "loss": 0.7297, "step": 659 }, { "epoch": 0.07, "grad_norm": 2.261699194905261, "learning_rate": 9.988655723235389e-06, "loss": 0.7326, "step": 660 }, { "epoch": 0.07, "grad_norm": 1.9887528628671254, "learning_rate": 9.98853666956293e-06, "loss": 0.7621, "step": 661 }, { "epoch": 0.07, "grad_norm": 2.1677962413146092, "learning_rate": 9.988416995153622e-06, "loss": 0.7885, "step": 662 }, { "epoch": 0.07, "grad_norm": 2.3122900022671997, "learning_rate": 9.988296700022356e-06, "loss": 0.6921, "step": 663 }, { "epoch": 0.07, "grad_norm": 1.6321020444727437, "learning_rate": 9.988175784184097e-06, "loss": 0.5891, "step": 664 }, { "epoch": 0.07, "grad_norm": 1.307184419959274, "learning_rate": 9.988054247653895e-06, "loss": 0.5634, "step": 665 }, { "epoch": 0.07, "grad_norm": 2.1095346202400083, "learning_rate": 9.987932090446872e-06, "loss": 0.7762, "step": 666 }, { "epoch": 0.07, "grad_norm": 2.085476190428995, "learning_rate": 9.987809312578226e-06, "loss": 0.7805, "step": 667 }, { "epoch": 0.07, "grad_norm": 2.1272218138409773, "learning_rate": 9.987685914063238e-06, "loss": 0.7313, "step": 668 }, { "epoch": 0.07, "grad_norm": 2.1514632039349633, "learning_rate": 9.987561894917264e-06, "loss": 0.7474, "step": 669 }, { "epoch": 0.07, "grad_norm": 1.8497719778738677, "learning_rate": 9.987437255155729e-06, "loss": 0.713, "step": 670 }, { "epoch": 0.07, "grad_norm": 1.8597615611697178, "learning_rate": 9.98731199479415e-06, "loss": 0.6519, "step": 671 }, { "epoch": 0.07, "grad_norm": 2.2581133395443334, "learning_rate": 9.98718611384811e-06, "loss": 0.7065, "step": 672 }, { "epoch": 0.07, "grad_norm": 2.2829176881022044, "learning_rate": 9.987059612333271e-06, "loss": 0.6012, "step": 673 }, { "epoch": 0.07, "grad_norm": 2.024498595578416, "learning_rate": 9.986932490265377e-06, "loss": 0.6884, "step": 674 }, { "epoch": 0.07, "grad_norm": 2.322109831476108, "learning_rate": 9.986804747660243e-06, "loss": 0.8803, "step": 675 }, { "epoch": 0.07, "grad_norm": 2.1700685852865234, "learning_rate": 9.98667638453377e-06, "loss": 0.6992, "step": 676 }, { "epoch": 0.07, "grad_norm": 2.0974802311907674, "learning_rate": 9.986547400901924e-06, "loss": 0.6932, "step": 677 }, { "epoch": 0.07, "grad_norm": 1.8931549720001173, "learning_rate": 9.986417796780759e-06, "loss": 0.809, "step": 678 }, { "epoch": 0.07, "grad_norm": 2.1642920805873023, "learning_rate": 9.9862875721864e-06, "loss": 0.7757, "step": 679 }, { "epoch": 0.07, "grad_norm": 1.9272861767266036, "learning_rate": 9.986156727135051e-06, "loss": 0.7277, "step": 680 }, { "epoch": 0.07, "grad_norm": 1.9486194189384864, "learning_rate": 9.986025261642996e-06, "loss": 0.6799, "step": 681 }, { "epoch": 0.07, "grad_norm": 2.272345022261695, "learning_rate": 9.98589317572659e-06, "loss": 0.8434, "step": 682 }, { "epoch": 0.07, "grad_norm": 2.7878893621888485, "learning_rate": 9.985760469402272e-06, "loss": 0.6242, "step": 683 }, { "epoch": 0.07, "grad_norm": 2.095845820755322, "learning_rate": 9.985627142686553e-06, "loss": 0.6653, "step": 684 }, { "epoch": 0.07, "grad_norm": 2.3638055577167187, "learning_rate": 9.985493195596024e-06, "loss": 0.7654, "step": 685 }, { "epoch": 0.07, "grad_norm": 2.062490746159455, "learning_rate": 9.985358628147351e-06, "loss": 0.7398, "step": 686 }, { "epoch": 0.07, "grad_norm": 1.9694974579177487, "learning_rate": 9.985223440357281e-06, "loss": 0.735, "step": 687 }, { "epoch": 0.07, "grad_norm": 1.9152475598811038, "learning_rate": 9.985087632242634e-06, "loss": 0.7278, "step": 688 }, { "epoch": 0.07, "grad_norm": 1.7612131097771835, "learning_rate": 9.98495120382031e-06, "loss": 0.6222, "step": 689 }, { "epoch": 0.07, "grad_norm": 2.1270991313160477, "learning_rate": 9.984814155107286e-06, "loss": 0.6939, "step": 690 }, { "epoch": 0.07, "grad_norm": 1.5320649137375222, "learning_rate": 9.98467648612061e-06, "loss": 0.5869, "step": 691 }, { "epoch": 0.07, "grad_norm": 2.249421075466919, "learning_rate": 9.984538196877421e-06, "loss": 0.6335, "step": 692 }, { "epoch": 0.07, "grad_norm": 2.1669426672121106, "learning_rate": 9.984399287394919e-06, "loss": 0.7051, "step": 693 }, { "epoch": 0.07, "grad_norm": 2.212888433735402, "learning_rate": 9.984259757690394e-06, "loss": 0.7155, "step": 694 }, { "epoch": 0.07, "grad_norm": 2.4015550587088272, "learning_rate": 9.984119607781204e-06, "loss": 0.7098, "step": 695 }, { "epoch": 0.07, "grad_norm": 2.3209412728115586, "learning_rate": 9.983978837684793e-06, "loss": 0.6205, "step": 696 }, { "epoch": 0.07, "grad_norm": 1.8483773618719024, "learning_rate": 9.983837447418673e-06, "loss": 0.7695, "step": 697 }, { "epoch": 0.07, "grad_norm": 2.0307850306665642, "learning_rate": 9.98369543700044e-06, "loss": 0.7387, "step": 698 }, { "epoch": 0.07, "grad_norm": 2.0339487312434312, "learning_rate": 9.983552806447764e-06, "loss": 0.7173, "step": 699 }, { "epoch": 0.07, "grad_norm": 1.9739268801914807, "learning_rate": 9.983409555778393e-06, "loss": 0.7467, "step": 700 }, { "epoch": 0.07, "grad_norm": 2.3206536579438635, "learning_rate": 9.983265685010151e-06, "loss": 0.6889, "step": 701 }, { "epoch": 0.07, "grad_norm": 2.148725441427418, "learning_rate": 9.983121194160943e-06, "loss": 0.7923, "step": 702 }, { "epoch": 0.07, "grad_norm": 1.9699853818196402, "learning_rate": 9.982976083248746e-06, "loss": 0.6291, "step": 703 }, { "epoch": 0.08, "grad_norm": 2.1936785939309056, "learning_rate": 9.982830352291617e-06, "loss": 0.8564, "step": 704 }, { "epoch": 0.08, "grad_norm": 1.9023408623514682, "learning_rate": 9.98268400130769e-06, "loss": 0.7802, "step": 705 }, { "epoch": 0.08, "grad_norm": 2.2620408098523197, "learning_rate": 9.982537030315175e-06, "loss": 0.7311, "step": 706 }, { "epoch": 0.08, "grad_norm": 2.015584384394391, "learning_rate": 9.98238943933236e-06, "loss": 0.7308, "step": 707 }, { "epoch": 0.08, "grad_norm": 1.9672942757193936, "learning_rate": 9.982241228377614e-06, "loss": 0.6735, "step": 708 }, { "epoch": 0.08, "grad_norm": 2.137632860923766, "learning_rate": 9.982092397469372e-06, "loss": 0.6237, "step": 709 }, { "epoch": 0.08, "grad_norm": 2.559792475700103, "learning_rate": 9.981942946626161e-06, "loss": 0.7911, "step": 710 }, { "epoch": 0.08, "grad_norm": 2.0293082115108882, "learning_rate": 9.981792875866572e-06, "loss": 0.6729, "step": 711 }, { "epoch": 0.08, "grad_norm": 1.831414608911045, "learning_rate": 9.981642185209281e-06, "loss": 0.7034, "step": 712 }, { "epoch": 0.08, "grad_norm": 2.0184942107815873, "learning_rate": 9.98149087467304e-06, "loss": 0.6834, "step": 713 }, { "epoch": 0.08, "grad_norm": 1.8240909842325956, "learning_rate": 9.981338944276674e-06, "loss": 0.5807, "step": 714 }, { "epoch": 0.08, "grad_norm": 1.947991431330162, "learning_rate": 9.98118639403909e-06, "loss": 0.7565, "step": 715 }, { "epoch": 0.08, "grad_norm": 1.9439295326948298, "learning_rate": 9.98103322397927e-06, "loss": 0.6664, "step": 716 }, { "epoch": 0.08, "grad_norm": 2.212704982139624, "learning_rate": 9.980879434116275e-06, "loss": 0.6357, "step": 717 }, { "epoch": 0.08, "grad_norm": 1.4274149994082583, "learning_rate": 9.980725024469238e-06, "loss": 0.572, "step": 718 }, { "epoch": 0.08, "grad_norm": 1.9812590717534178, "learning_rate": 9.980569995057375e-06, "loss": 0.6874, "step": 719 }, { "epoch": 0.08, "grad_norm": 1.3471143379497177, "learning_rate": 9.980414345899975e-06, "loss": 0.5803, "step": 720 }, { "epoch": 0.08, "grad_norm": 1.871838069311446, "learning_rate": 9.980258077016408e-06, "loss": 0.7319, "step": 721 }, { "epoch": 0.08, "grad_norm": 1.96661405787852, "learning_rate": 9.980101188426119e-06, "loss": 0.5871, "step": 722 }, { "epoch": 0.08, "grad_norm": 2.118408510064543, "learning_rate": 9.979943680148627e-06, "loss": 0.682, "step": 723 }, { "epoch": 0.08, "grad_norm": 2.160124781612276, "learning_rate": 9.979785552203534e-06, "loss": 0.6755, "step": 724 }, { "epoch": 0.08, "grad_norm": 2.2601701557515526, "learning_rate": 9.979626804610515e-06, "loss": 0.6919, "step": 725 }, { "epoch": 0.08, "grad_norm": 2.250704776006004, "learning_rate": 9.979467437389323e-06, "loss": 0.7609, "step": 726 }, { "epoch": 0.08, "grad_norm": 2.213659880293427, "learning_rate": 9.97930745055979e-06, "loss": 0.804, "step": 727 }, { "epoch": 0.08, "grad_norm": 2.3125522061331627, "learning_rate": 9.979146844141822e-06, "loss": 0.8506, "step": 728 }, { "epoch": 0.08, "grad_norm": 1.9843539238180703, "learning_rate": 9.978985618155407e-06, "loss": 0.7644, "step": 729 }, { "epoch": 0.08, "grad_norm": 1.9600425531614163, "learning_rate": 9.978823772620603e-06, "loss": 0.7423, "step": 730 }, { "epoch": 0.08, "grad_norm": 2.144696908868071, "learning_rate": 9.97866130755755e-06, "loss": 0.5735, "step": 731 }, { "epoch": 0.08, "grad_norm": 3.1330246162176785, "learning_rate": 9.978498222986464e-06, "loss": 0.6162, "step": 732 }, { "epoch": 0.08, "grad_norm": 1.957530245306612, "learning_rate": 9.978334518927637e-06, "loss": 0.6951, "step": 733 }, { "epoch": 0.08, "grad_norm": 1.9942640230434803, "learning_rate": 9.978170195401443e-06, "loss": 0.7413, "step": 734 }, { "epoch": 0.08, "grad_norm": 1.9275272544208994, "learning_rate": 9.978005252428324e-06, "loss": 0.6661, "step": 735 }, { "epoch": 0.08, "grad_norm": 2.2055377820007345, "learning_rate": 9.977839690028809e-06, "loss": 0.734, "step": 736 }, { "epoch": 0.08, "grad_norm": 1.9083210699666606, "learning_rate": 9.977673508223496e-06, "loss": 0.7305, "step": 737 }, { "epoch": 0.08, "grad_norm": 1.6477805487408526, "learning_rate": 9.977506707033065e-06, "loss": 0.579, "step": 738 }, { "epoch": 0.08, "grad_norm": 1.938637550826593, "learning_rate": 9.977339286478271e-06, "loss": 0.6977, "step": 739 }, { "epoch": 0.08, "grad_norm": 2.006251003540572, "learning_rate": 9.977171246579947e-06, "loss": 0.7297, "step": 740 }, { "epoch": 0.08, "grad_norm": 2.0932612198856115, "learning_rate": 9.977002587359002e-06, "loss": 0.6015, "step": 741 }, { "epoch": 0.08, "grad_norm": 1.2891210356507048, "learning_rate": 9.976833308836421e-06, "loss": 0.5984, "step": 742 }, { "epoch": 0.08, "grad_norm": 2.4106542123142765, "learning_rate": 9.976663411033274e-06, "loss": 0.647, "step": 743 }, { "epoch": 0.08, "grad_norm": 2.2512643369242973, "learning_rate": 9.976492893970695e-06, "loss": 0.7656, "step": 744 }, { "epoch": 0.08, "grad_norm": 1.9320332857768847, "learning_rate": 9.976321757669906e-06, "loss": 0.7153, "step": 745 }, { "epoch": 0.08, "grad_norm": 2.05674691415075, "learning_rate": 9.9761500021522e-06, "loss": 0.7081, "step": 746 }, { "epoch": 0.08, "grad_norm": 2.467271200210628, "learning_rate": 9.975977627438949e-06, "loss": 0.7561, "step": 747 }, { "epoch": 0.08, "grad_norm": 1.803131234117888, "learning_rate": 9.975804633551603e-06, "loss": 0.6797, "step": 748 }, { "epoch": 0.08, "grad_norm": 1.3539325661282482, "learning_rate": 9.975631020511687e-06, "loss": 0.5757, "step": 749 }, { "epoch": 0.08, "grad_norm": 1.9904678885844604, "learning_rate": 9.975456788340805e-06, "loss": 0.6381, "step": 750 }, { "epoch": 0.08, "grad_norm": 1.8866196451530965, "learning_rate": 9.975281937060637e-06, "loss": 0.6291, "step": 751 }, { "epoch": 0.08, "grad_norm": 1.2637426072773468, "learning_rate": 9.975106466692942e-06, "loss": 0.5635, "step": 752 }, { "epoch": 0.08, "grad_norm": 2.013065226559289, "learning_rate": 9.97493037725955e-06, "loss": 0.7404, "step": 753 }, { "epoch": 0.08, "grad_norm": 2.5826130133132383, "learning_rate": 9.974753668782376e-06, "loss": 0.6701, "step": 754 }, { "epoch": 0.08, "grad_norm": 2.002372446844306, "learning_rate": 9.974576341283407e-06, "loss": 0.8088, "step": 755 }, { "epoch": 0.08, "grad_norm": 2.071851889190228, "learning_rate": 9.974398394784708e-06, "loss": 0.6311, "step": 756 }, { "epoch": 0.08, "grad_norm": 1.9119455760258455, "learning_rate": 9.974219829308422e-06, "loss": 0.6942, "step": 757 }, { "epoch": 0.08, "grad_norm": 1.9012401677140014, "learning_rate": 9.974040644876768e-06, "loss": 0.5653, "step": 758 }, { "epoch": 0.08, "grad_norm": 2.4954562043102184, "learning_rate": 9.973860841512042e-06, "loss": 0.8313, "step": 759 }, { "epoch": 0.08, "grad_norm": 2.1831003131876647, "learning_rate": 9.973680419236617e-06, "loss": 0.7592, "step": 760 }, { "epoch": 0.08, "grad_norm": 2.365226188889621, "learning_rate": 9.973499378072947e-06, "loss": 0.7629, "step": 761 }, { "epoch": 0.08, "grad_norm": 2.0696480780114483, "learning_rate": 9.973317718043554e-06, "loss": 0.6869, "step": 762 }, { "epoch": 0.08, "grad_norm": 2.2515992070410165, "learning_rate": 9.973135439171047e-06, "loss": 0.7058, "step": 763 }, { "epoch": 0.08, "grad_norm": 2.287946622457778, "learning_rate": 9.972952541478105e-06, "loss": 0.792, "step": 764 }, { "epoch": 0.08, "grad_norm": 2.1457025039025384, "learning_rate": 9.972769024987486e-06, "loss": 0.6892, "step": 765 }, { "epoch": 0.08, "grad_norm": 1.9551359298873103, "learning_rate": 9.972584889722028e-06, "loss": 0.6975, "step": 766 }, { "epoch": 0.08, "grad_norm": 2.4415101683215443, "learning_rate": 9.97240013570464e-06, "loss": 0.5937, "step": 767 }, { "epoch": 0.08, "grad_norm": 1.9598053025990756, "learning_rate": 9.972214762958316e-06, "loss": 0.7056, "step": 768 }, { "epoch": 0.08, "grad_norm": 1.7636673248920527, "learning_rate": 9.972028771506116e-06, "loss": 0.6485, "step": 769 }, { "epoch": 0.08, "grad_norm": 1.8398689015712024, "learning_rate": 9.971842161371191e-06, "loss": 0.6528, "step": 770 }, { "epoch": 0.08, "grad_norm": 2.036998222601364, "learning_rate": 9.971654932576757e-06, "loss": 0.8269, "step": 771 }, { "epoch": 0.08, "grad_norm": 2.2372850540938267, "learning_rate": 9.97146708514611e-06, "loss": 0.6838, "step": 772 }, { "epoch": 0.08, "grad_norm": 2.184524253100954, "learning_rate": 9.97127861910263e-06, "loss": 0.6584, "step": 773 }, { "epoch": 0.08, "grad_norm": 2.0262285471104238, "learning_rate": 9.97108953446976e-06, "loss": 0.6831, "step": 774 }, { "epoch": 0.08, "grad_norm": 2.0327519019115647, "learning_rate": 9.970899831271035e-06, "loss": 0.7139, "step": 775 }, { "epoch": 0.08, "grad_norm": 4.692977519635072, "learning_rate": 9.97070950953006e-06, "loss": 0.7304, "step": 776 }, { "epoch": 0.08, "grad_norm": 2.190948787362846, "learning_rate": 9.970518569270513e-06, "loss": 0.6726, "step": 777 }, { "epoch": 0.08, "grad_norm": 1.883598967551119, "learning_rate": 9.970327010516157e-06, "loss": 0.7445, "step": 778 }, { "epoch": 0.08, "grad_norm": 1.9669665308771074, "learning_rate": 9.970134833290825e-06, "loss": 0.711, "step": 779 }, { "epoch": 0.08, "grad_norm": 2.006137564133702, "learning_rate": 9.969942037618435e-06, "loss": 0.7494, "step": 780 }, { "epoch": 0.08, "grad_norm": 1.9594896655663117, "learning_rate": 9.96974862352297e-06, "loss": 0.7115, "step": 781 }, { "epoch": 0.08, "grad_norm": 2.2458314184537476, "learning_rate": 9.969554591028504e-06, "loss": 0.7228, "step": 782 }, { "epoch": 0.08, "grad_norm": 2.122996726550829, "learning_rate": 9.969359940159178e-06, "loss": 0.6048, "step": 783 }, { "epoch": 0.08, "grad_norm": 1.807588607447363, "learning_rate": 9.969164670939211e-06, "loss": 0.7539, "step": 784 }, { "epoch": 0.08, "grad_norm": 1.8072967615525843, "learning_rate": 9.968968783392905e-06, "loss": 0.6841, "step": 785 }, { "epoch": 0.08, "grad_norm": 1.9750367842149152, "learning_rate": 9.968772277544631e-06, "loss": 0.6508, "step": 786 }, { "epoch": 0.08, "grad_norm": 1.92985685375252, "learning_rate": 9.968575153418844e-06, "loss": 0.6518, "step": 787 }, { "epoch": 0.08, "grad_norm": 1.9415021241965027, "learning_rate": 9.96837741104007e-06, "loss": 0.7101, "step": 788 }, { "epoch": 0.08, "grad_norm": 3.138389303105328, "learning_rate": 9.968179050432916e-06, "loss": 0.7551, "step": 789 }, { "epoch": 0.08, "grad_norm": 2.06912804935792, "learning_rate": 9.967980071622064e-06, "loss": 0.7757, "step": 790 }, { "epoch": 0.08, "grad_norm": 1.9782282676201464, "learning_rate": 9.967780474632274e-06, "loss": 0.7201, "step": 791 }, { "epoch": 0.08, "grad_norm": 1.9235000853711717, "learning_rate": 9.96758025948838e-06, "loss": 0.7161, "step": 792 }, { "epoch": 0.08, "grad_norm": 2.0168975228094927, "learning_rate": 9.9673794262153e-06, "loss": 0.5946, "step": 793 }, { "epoch": 0.08, "grad_norm": 2.190046831374383, "learning_rate": 9.96717797483802e-06, "loss": 0.6504, "step": 794 }, { "epoch": 0.08, "grad_norm": 2.066331690054245, "learning_rate": 9.966975905381611e-06, "loss": 0.7086, "step": 795 }, { "epoch": 0.08, "grad_norm": 2.1870842416292247, "learning_rate": 9.966773217871213e-06, "loss": 0.7502, "step": 796 }, { "epoch": 0.09, "grad_norm": 2.3475366489178966, "learning_rate": 9.966569912332048e-06, "loss": 0.7051, "step": 797 }, { "epoch": 0.09, "grad_norm": 1.9478665436224456, "learning_rate": 9.966365988789414e-06, "loss": 0.6896, "step": 798 }, { "epoch": 0.09, "grad_norm": 1.4535954079611202, "learning_rate": 9.966161447268688e-06, "loss": 0.6, "step": 799 }, { "epoch": 0.09, "grad_norm": 2.0098017989535353, "learning_rate": 9.96595628779532e-06, "loss": 0.7593, "step": 800 }, { "epoch": 0.09, "grad_norm": 1.880906608054262, "learning_rate": 9.965750510394837e-06, "loss": 0.7277, "step": 801 }, { "epoch": 0.09, "grad_norm": 1.1857153751978953, "learning_rate": 9.965544115092848e-06, "loss": 0.5674, "step": 802 }, { "epoch": 0.09, "grad_norm": 2.106034308539044, "learning_rate": 9.965337101915032e-06, "loss": 0.7538, "step": 803 }, { "epoch": 0.09, "grad_norm": 1.9121895893278351, "learning_rate": 9.965129470887148e-06, "loss": 0.671, "step": 804 }, { "epoch": 0.09, "grad_norm": 2.1694459200260203, "learning_rate": 9.964921222035036e-06, "loss": 0.7015, "step": 805 }, { "epoch": 0.09, "grad_norm": 1.50002212868652, "learning_rate": 9.964712355384605e-06, "loss": 0.5565, "step": 806 }, { "epoch": 0.09, "grad_norm": 1.9326774301241596, "learning_rate": 9.964502870961848e-06, "loss": 0.644, "step": 807 }, { "epoch": 0.09, "grad_norm": 2.1034744029894457, "learning_rate": 9.964292768792828e-06, "loss": 0.7933, "step": 808 }, { "epoch": 0.09, "grad_norm": 2.1809279544344395, "learning_rate": 9.964082048903689e-06, "loss": 0.7301, "step": 809 }, { "epoch": 0.09, "grad_norm": 2.1700808097988644, "learning_rate": 9.963870711320655e-06, "loss": 0.7464, "step": 810 }, { "epoch": 0.09, "grad_norm": 2.005971007129935, "learning_rate": 9.963658756070022e-06, "loss": 0.6912, "step": 811 }, { "epoch": 0.09, "grad_norm": 2.011299282363924, "learning_rate": 9.963446183178162e-06, "loss": 0.6697, "step": 812 }, { "epoch": 0.09, "grad_norm": 1.9266537570224225, "learning_rate": 9.96323299267153e-06, "loss": 0.773, "step": 813 }, { "epoch": 0.09, "grad_norm": 2.0791223919065143, "learning_rate": 9.963019184576648e-06, "loss": 0.5803, "step": 814 }, { "epoch": 0.09, "grad_norm": 1.9851044287152388, "learning_rate": 9.962804758920126e-06, "loss": 0.7125, "step": 815 }, { "epoch": 0.09, "grad_norm": 1.9911429202161286, "learning_rate": 9.962589715728642e-06, "loss": 0.7257, "step": 816 }, { "epoch": 0.09, "grad_norm": 1.9876988411934722, "learning_rate": 9.962374055028958e-06, "loss": 0.7587, "step": 817 }, { "epoch": 0.09, "grad_norm": 2.2737985152925733, "learning_rate": 9.962157776847906e-06, "loss": 0.7561, "step": 818 }, { "epoch": 0.09, "grad_norm": 1.8837430212873085, "learning_rate": 9.961940881212398e-06, "loss": 0.6476, "step": 819 }, { "epoch": 0.09, "grad_norm": 2.183936881830148, "learning_rate": 9.961723368149426e-06, "loss": 0.6539, "step": 820 }, { "epoch": 0.09, "grad_norm": 2.0544131147856155, "learning_rate": 9.961505237686054e-06, "loss": 0.6427, "step": 821 }, { "epoch": 0.09, "grad_norm": 1.3949131995680661, "learning_rate": 9.961286489849424e-06, "loss": 0.5806, "step": 822 }, { "epoch": 0.09, "grad_norm": 1.860314273119948, "learning_rate": 9.961067124666755e-06, "loss": 0.5779, "step": 823 }, { "epoch": 0.09, "grad_norm": 1.9573945675024338, "learning_rate": 9.960847142165344e-06, "loss": 0.6635, "step": 824 }, { "epoch": 0.09, "grad_norm": 2.941503920766964, "learning_rate": 9.960626542372564e-06, "loss": 0.6536, "step": 825 }, { "epoch": 0.09, "grad_norm": 1.893696492005455, "learning_rate": 9.960405325315865e-06, "loss": 0.7218, "step": 826 }, { "epoch": 0.09, "grad_norm": 1.2482565658463098, "learning_rate": 9.960183491022775e-06, "loss": 0.5901, "step": 827 }, { "epoch": 0.09, "grad_norm": 2.029356069724932, "learning_rate": 9.959961039520893e-06, "loss": 0.7505, "step": 828 }, { "epoch": 0.09, "grad_norm": 1.9529747446169994, "learning_rate": 9.959737970837905e-06, "loss": 0.6669, "step": 829 }, { "epoch": 0.09, "grad_norm": 2.068190441238179, "learning_rate": 9.959514285001564e-06, "loss": 0.7604, "step": 830 }, { "epoch": 0.09, "grad_norm": 1.7806790223991478, "learning_rate": 9.959289982039704e-06, "loss": 0.7501, "step": 831 }, { "epoch": 0.09, "grad_norm": 1.2894435140863645, "learning_rate": 9.959065061980237e-06, "loss": 0.5932, "step": 832 }, { "epoch": 0.09, "grad_norm": 1.9511644451551509, "learning_rate": 9.958839524851152e-06, "loss": 0.6962, "step": 833 }, { "epoch": 0.09, "grad_norm": 1.9801217679824539, "learning_rate": 9.958613370680507e-06, "loss": 0.6834, "step": 834 }, { "epoch": 0.09, "grad_norm": 1.939714976402363, "learning_rate": 9.95838659949645e-06, "loss": 0.6901, "step": 835 }, { "epoch": 0.09, "grad_norm": 2.1239436256454867, "learning_rate": 9.958159211327197e-06, "loss": 0.7179, "step": 836 }, { "epoch": 0.09, "grad_norm": 1.553093124626845, "learning_rate": 9.95793120620104e-06, "loss": 0.586, "step": 837 }, { "epoch": 0.09, "grad_norm": 2.0187676377826915, "learning_rate": 9.95770258414635e-06, "loss": 0.6163, "step": 838 }, { "epoch": 0.09, "grad_norm": 1.818306398255729, "learning_rate": 9.957473345191578e-06, "loss": 0.6194, "step": 839 }, { "epoch": 0.09, "grad_norm": 1.9799832731532732, "learning_rate": 9.957243489365246e-06, "loss": 0.7678, "step": 840 }, { "epoch": 0.09, "grad_norm": 1.8910856142622416, "learning_rate": 9.95701301669596e-06, "loss": 0.6977, "step": 841 }, { "epoch": 0.09, "grad_norm": 1.764942749824653, "learning_rate": 9.956781927212394e-06, "loss": 0.7287, "step": 842 }, { "epoch": 0.09, "grad_norm": 2.05372873625905, "learning_rate": 9.956550220943305e-06, "loss": 0.6527, "step": 843 }, { "epoch": 0.09, "grad_norm": 1.6191609862880678, "learning_rate": 9.956317897917523e-06, "loss": 0.5839, "step": 844 }, { "epoch": 0.09, "grad_norm": 2.0570053994959965, "learning_rate": 9.956084958163959e-06, "loss": 0.7275, "step": 845 }, { "epoch": 0.09, "grad_norm": 2.12687978997014, "learning_rate": 9.955851401711598e-06, "loss": 0.7786, "step": 846 }, { "epoch": 0.09, "grad_norm": 2.419409974476829, "learning_rate": 9.9556172285895e-06, "loss": 0.7233, "step": 847 }, { "epoch": 0.09, "grad_norm": 1.2669899387195107, "learning_rate": 9.955382438826805e-06, "loss": 0.5651, "step": 848 }, { "epoch": 0.09, "grad_norm": 2.1031983553668625, "learning_rate": 9.955147032452728e-06, "loss": 0.7858, "step": 849 }, { "epoch": 0.09, "grad_norm": 1.293847647549828, "learning_rate": 9.954911009496561e-06, "loss": 0.565, "step": 850 }, { "epoch": 0.09, "grad_norm": 2.04216113102852, "learning_rate": 9.954674369987676e-06, "loss": 0.5988, "step": 851 }, { "epoch": 0.09, "grad_norm": 1.9708814930680758, "learning_rate": 9.954437113955515e-06, "loss": 0.6332, "step": 852 }, { "epoch": 0.09, "grad_norm": 2.0106287477784215, "learning_rate": 9.954199241429604e-06, "loss": 0.7319, "step": 853 }, { "epoch": 0.09, "grad_norm": 2.208112382170846, "learning_rate": 9.953960752439537e-06, "loss": 0.7637, "step": 854 }, { "epoch": 0.09, "grad_norm": 2.03840883965434, "learning_rate": 9.953721647014994e-06, "loss": 0.6205, "step": 855 }, { "epoch": 0.09, "grad_norm": 2.0448535660787863, "learning_rate": 9.953481925185726e-06, "loss": 0.7341, "step": 856 }, { "epoch": 0.09, "grad_norm": 1.8245402546880618, "learning_rate": 9.953241586981563e-06, "loss": 0.7053, "step": 857 }, { "epoch": 0.09, "grad_norm": 1.9158666124943733, "learning_rate": 9.95300063243241e-06, "loss": 0.687, "step": 858 }, { "epoch": 0.09, "grad_norm": 2.1383069655722107, "learning_rate": 9.95275906156825e-06, "loss": 0.7777, "step": 859 }, { "epoch": 0.09, "grad_norm": 1.8852773508891008, "learning_rate": 9.952516874419142e-06, "loss": 0.6989, "step": 860 }, { "epoch": 0.09, "grad_norm": 1.9365178890972148, "learning_rate": 9.952274071015224e-06, "loss": 0.6269, "step": 861 }, { "epoch": 0.09, "grad_norm": 1.8445518976600077, "learning_rate": 9.952030651386705e-06, "loss": 0.6634, "step": 862 }, { "epoch": 0.09, "grad_norm": 2.0032291410027248, "learning_rate": 9.951786615563876e-06, "loss": 0.7145, "step": 863 }, { "epoch": 0.09, "grad_norm": 1.985835250824167, "learning_rate": 9.951541963577105e-06, "loss": 0.6592, "step": 864 }, { "epoch": 0.09, "grad_norm": 1.486941319579158, "learning_rate": 9.951296695456833e-06, "loss": 0.5759, "step": 865 }, { "epoch": 0.09, "grad_norm": 1.973220467891377, "learning_rate": 9.951050811233578e-06, "loss": 0.7293, "step": 866 }, { "epoch": 0.09, "grad_norm": 2.176338625338852, "learning_rate": 9.950804310937938e-06, "loss": 0.6672, "step": 867 }, { "epoch": 0.09, "grad_norm": 2.0873120361041644, "learning_rate": 9.950557194600587e-06, "loss": 0.6939, "step": 868 }, { "epoch": 0.09, "grad_norm": 2.155052143239552, "learning_rate": 9.95030946225227e-06, "loss": 0.7204, "step": 869 }, { "epoch": 0.09, "grad_norm": 2.126437473930114, "learning_rate": 9.950061113923817e-06, "loss": 0.7747, "step": 870 }, { "epoch": 0.09, "grad_norm": 1.9576723687001039, "learning_rate": 9.949812149646128e-06, "loss": 0.6628, "step": 871 }, { "epoch": 0.09, "grad_norm": 1.6614711222747989, "learning_rate": 9.949562569450184e-06, "loss": 0.5759, "step": 872 }, { "epoch": 0.09, "grad_norm": 1.9308667142738394, "learning_rate": 9.94931237336704e-06, "loss": 0.8066, "step": 873 }, { "epoch": 0.09, "grad_norm": 2.2555118848379188, "learning_rate": 9.949061561427828e-06, "loss": 0.6847, "step": 874 }, { "epoch": 0.09, "grad_norm": 1.9314068619899856, "learning_rate": 9.94881013366376e-06, "loss": 0.65, "step": 875 }, { "epoch": 0.09, "grad_norm": 1.2859002621915272, "learning_rate": 9.948558090106118e-06, "loss": 0.577, "step": 876 }, { "epoch": 0.09, "grad_norm": 1.8343524294261944, "learning_rate": 9.948305430786267e-06, "loss": 0.6663, "step": 877 }, { "epoch": 0.09, "grad_norm": 1.27413886353906, "learning_rate": 9.948052155735646e-06, "loss": 0.5671, "step": 878 }, { "epoch": 0.09, "grad_norm": 1.9329949031784248, "learning_rate": 9.947798264985771e-06, "loss": 0.7477, "step": 879 }, { "epoch": 0.09, "grad_norm": 1.8350205719622865, "learning_rate": 9.947543758568232e-06, "loss": 0.6675, "step": 880 }, { "epoch": 0.09, "grad_norm": 2.049017749705895, "learning_rate": 9.947288636514698e-06, "loss": 0.6804, "step": 881 }, { "epoch": 0.09, "grad_norm": 2.2655192668260327, "learning_rate": 9.947032898856919e-06, "loss": 0.8159, "step": 882 }, { "epoch": 0.09, "grad_norm": 1.9121519579763797, "learning_rate": 9.94677654562671e-06, "loss": 0.4883, "step": 883 }, { "epoch": 0.09, "grad_norm": 2.4395973188709585, "learning_rate": 9.946519576855977e-06, "loss": 0.7293, "step": 884 }, { "epoch": 0.09, "grad_norm": 2.260831118007597, "learning_rate": 9.94626199257669e-06, "loss": 0.7526, "step": 885 }, { "epoch": 0.09, "grad_norm": 1.80117002770644, "learning_rate": 9.946003792820905e-06, "loss": 0.5586, "step": 886 }, { "epoch": 0.09, "grad_norm": 1.766625518259006, "learning_rate": 9.945744977620746e-06, "loss": 0.6205, "step": 887 }, { "epoch": 0.09, "grad_norm": 1.9655994887125887, "learning_rate": 9.945485547008423e-06, "loss": 0.7833, "step": 888 }, { "epoch": 0.09, "grad_norm": 1.7434404422813297, "learning_rate": 9.945225501016213e-06, "loss": 0.6344, "step": 889 }, { "epoch": 0.09, "grad_norm": 1.7481374716356062, "learning_rate": 9.944964839676477e-06, "loss": 0.6894, "step": 890 }, { "epoch": 0.1, "grad_norm": 1.8176523302689798, "learning_rate": 9.944703563021648e-06, "loss": 0.6502, "step": 891 }, { "epoch": 0.1, "grad_norm": 2.004294240652657, "learning_rate": 9.94444167108424e-06, "loss": 0.6388, "step": 892 }, { "epoch": 0.1, "grad_norm": 1.9647069079247585, "learning_rate": 9.944179163896836e-06, "loss": 0.6653, "step": 893 }, { "epoch": 0.1, "grad_norm": 1.5461969697837032, "learning_rate": 9.943916041492107e-06, "loss": 0.575, "step": 894 }, { "epoch": 0.1, "grad_norm": 2.22323381433167, "learning_rate": 9.94365230390279e-06, "loss": 0.7681, "step": 895 }, { "epoch": 0.1, "grad_norm": 1.8998692376958821, "learning_rate": 9.943387951161702e-06, "loss": 0.7469, "step": 896 }, { "epoch": 0.1, "grad_norm": 2.086551583069632, "learning_rate": 9.943122983301741e-06, "loss": 0.6981, "step": 897 }, { "epoch": 0.1, "grad_norm": 1.9536848721695246, "learning_rate": 9.942857400355874e-06, "loss": 0.6746, "step": 898 }, { "epoch": 0.1, "grad_norm": 1.915576041501433, "learning_rate": 9.942591202357148e-06, "loss": 0.7611, "step": 899 }, { "epoch": 0.1, "grad_norm": 1.364021134309292, "learning_rate": 9.94232438933869e-06, "loss": 0.5942, "step": 900 }, { "epoch": 0.1, "grad_norm": 1.8294385717738884, "learning_rate": 9.942056961333696e-06, "loss": 0.6488, "step": 901 }, { "epoch": 0.1, "grad_norm": 2.0144998684366797, "learning_rate": 9.941788918375445e-06, "loss": 0.7137, "step": 902 }, { "epoch": 0.1, "grad_norm": 1.8401864763994462, "learning_rate": 9.941520260497293e-06, "loss": 0.6927, "step": 903 }, { "epoch": 0.1, "grad_norm": 2.0097762078773203, "learning_rate": 9.941250987732666e-06, "loss": 0.7142, "step": 904 }, { "epoch": 0.1, "grad_norm": 1.8890714132205273, "learning_rate": 9.940981100115071e-06, "loss": 0.6837, "step": 905 }, { "epoch": 0.1, "grad_norm": 2.041126033579686, "learning_rate": 9.940710597678091e-06, "loss": 0.6598, "step": 906 }, { "epoch": 0.1, "grad_norm": 1.9660795411746788, "learning_rate": 9.940439480455386e-06, "loss": 0.7457, "step": 907 }, { "epoch": 0.1, "grad_norm": 2.143386623832964, "learning_rate": 9.940167748480693e-06, "loss": 0.6951, "step": 908 }, { "epoch": 0.1, "grad_norm": 2.0067135962369353, "learning_rate": 9.93989540178782e-06, "loss": 0.7059, "step": 909 }, { "epoch": 0.1, "grad_norm": 1.8220722349802303, "learning_rate": 9.939622440410662e-06, "loss": 0.6973, "step": 910 }, { "epoch": 0.1, "grad_norm": 2.010374325160988, "learning_rate": 9.939348864383178e-06, "loss": 0.6585, "step": 911 }, { "epoch": 0.1, "grad_norm": 2.875720078345201, "learning_rate": 9.939074673739413e-06, "loss": 0.7384, "step": 912 }, { "epoch": 0.1, "grad_norm": 1.8907160301271748, "learning_rate": 9.938799868513487e-06, "loss": 0.624, "step": 913 }, { "epoch": 0.1, "grad_norm": 1.8995600000054556, "learning_rate": 9.93852444873959e-06, "loss": 0.7035, "step": 914 }, { "epoch": 0.1, "grad_norm": 2.029031209686484, "learning_rate": 9.938248414451997e-06, "loss": 0.73, "step": 915 }, { "epoch": 0.1, "grad_norm": 1.800337655998664, "learning_rate": 9.937971765685054e-06, "loss": 0.7336, "step": 916 }, { "epoch": 0.1, "grad_norm": 2.048644285170949, "learning_rate": 9.937694502473189e-06, "loss": 0.694, "step": 917 }, { "epoch": 0.1, "grad_norm": 1.916332241748504, "learning_rate": 9.937416624850897e-06, "loss": 0.5679, "step": 918 }, { "epoch": 0.1, "grad_norm": 1.9503941564167693, "learning_rate": 9.937138132852758e-06, "loss": 0.726, "step": 919 }, { "epoch": 0.1, "grad_norm": 1.7357780538305856, "learning_rate": 9.936859026513423e-06, "loss": 0.6987, "step": 920 }, { "epoch": 0.1, "grad_norm": 1.9836512302752924, "learning_rate": 9.936579305867626e-06, "loss": 0.6031, "step": 921 }, { "epoch": 0.1, "grad_norm": 1.9129468283991689, "learning_rate": 9.93629897095017e-06, "loss": 0.6942, "step": 922 }, { "epoch": 0.1, "grad_norm": 1.7109833132937449, "learning_rate": 9.93601802179594e-06, "loss": 0.5749, "step": 923 }, { "epoch": 0.1, "grad_norm": 1.8711908538362967, "learning_rate": 9.935736458439893e-06, "loss": 0.6866, "step": 924 }, { "epoch": 0.1, "grad_norm": 1.3582192709770602, "learning_rate": 9.935454280917067e-06, "loss": 0.5714, "step": 925 }, { "epoch": 0.1, "grad_norm": 1.8848765171642992, "learning_rate": 9.935171489262575e-06, "loss": 0.7623, "step": 926 }, { "epoch": 0.1, "grad_norm": 1.7549797668129477, "learning_rate": 9.9348880835116e-06, "loss": 0.6968, "step": 927 }, { "epoch": 0.1, "grad_norm": 1.9686542009178487, "learning_rate": 9.934604063699413e-06, "loss": 0.7196, "step": 928 }, { "epoch": 0.1, "grad_norm": 1.875174874306256, "learning_rate": 9.934319429861353e-06, "loss": 0.6942, "step": 929 }, { "epoch": 0.1, "grad_norm": 1.8909187353221806, "learning_rate": 9.934034182032835e-06, "loss": 0.6843, "step": 930 }, { "epoch": 0.1, "grad_norm": 1.9074927253587837, "learning_rate": 9.933748320249357e-06, "loss": 0.7063, "step": 931 }, { "epoch": 0.1, "grad_norm": 1.7165779862491144, "learning_rate": 9.933461844546488e-06, "loss": 0.7235, "step": 932 }, { "epoch": 0.1, "grad_norm": 2.026549340024964, "learning_rate": 9.933174754959877e-06, "loss": 0.6733, "step": 933 }, { "epoch": 0.1, "grad_norm": 1.957450032518651, "learning_rate": 9.932887051525243e-06, "loss": 0.7106, "step": 934 }, { "epoch": 0.1, "grad_norm": 1.9578558112482305, "learning_rate": 9.93259873427839e-06, "loss": 0.7308, "step": 935 }, { "epoch": 0.1, "grad_norm": 2.0359320597671235, "learning_rate": 9.932309803255192e-06, "loss": 0.7446, "step": 936 }, { "epoch": 0.1, "grad_norm": 1.9314899166828192, "learning_rate": 9.932020258491601e-06, "loss": 0.7315, "step": 937 }, { "epoch": 0.1, "grad_norm": 2.1937137495410672, "learning_rate": 9.93173010002365e-06, "loss": 0.6579, "step": 938 }, { "epoch": 0.1, "grad_norm": 1.7967415208646527, "learning_rate": 9.931439327887436e-06, "loss": 0.6257, "step": 939 }, { "epoch": 0.1, "grad_norm": 1.9995339984620413, "learning_rate": 9.931147942119149e-06, "loss": 0.7286, "step": 940 }, { "epoch": 0.1, "grad_norm": 2.0906891098607465, "learning_rate": 9.93085594275504e-06, "loss": 0.7374, "step": 941 }, { "epoch": 0.1, "grad_norm": 1.9354501979533265, "learning_rate": 9.930563329831448e-06, "loss": 0.6604, "step": 942 }, { "epoch": 0.1, "grad_norm": 1.8164024536807182, "learning_rate": 9.930270103384783e-06, "loss": 0.6615, "step": 943 }, { "epoch": 0.1, "grad_norm": 1.8302877141688194, "learning_rate": 9.92997626345153e-06, "loss": 0.6876, "step": 944 }, { "epoch": 0.1, "grad_norm": 1.9391248774091867, "learning_rate": 9.929681810068252e-06, "loss": 0.723, "step": 945 }, { "epoch": 0.1, "grad_norm": 2.2854809216447265, "learning_rate": 9.929386743271591e-06, "loss": 0.7219, "step": 946 }, { "epoch": 0.1, "grad_norm": 2.0803385566869887, "learning_rate": 9.929091063098264e-06, "loss": 0.753, "step": 947 }, { "epoch": 0.1, "grad_norm": 1.8769520773415693, "learning_rate": 9.92879476958506e-06, "loss": 0.6698, "step": 948 }, { "epoch": 0.1, "grad_norm": 3.0338984130413382, "learning_rate": 9.928497862768848e-06, "loss": 0.6133, "step": 949 }, { "epoch": 0.1, "grad_norm": 2.031369733803435, "learning_rate": 9.928200342686573e-06, "loss": 0.683, "step": 950 }, { "epoch": 0.1, "grad_norm": 1.9121917737759306, "learning_rate": 9.927902209375258e-06, "loss": 0.6706, "step": 951 }, { "epoch": 0.1, "grad_norm": 1.5398526806875403, "learning_rate": 9.927603462871999e-06, "loss": 0.5589, "step": 952 }, { "epoch": 0.1, "grad_norm": 1.926857273983394, "learning_rate": 9.92730410321397e-06, "loss": 0.8163, "step": 953 }, { "epoch": 0.1, "grad_norm": 3.5068458384049706, "learning_rate": 9.927004130438423e-06, "loss": 0.6586, "step": 954 }, { "epoch": 0.1, "grad_norm": 1.9278614835729058, "learning_rate": 9.92670354458268e-06, "loss": 0.7044, "step": 955 }, { "epoch": 0.1, "grad_norm": 2.0421542180059484, "learning_rate": 9.926402345684147e-06, "loss": 0.5966, "step": 956 }, { "epoch": 0.1, "grad_norm": 2.4799028559445557, "learning_rate": 9.926100533780304e-06, "loss": 0.7221, "step": 957 }, { "epoch": 0.1, "grad_norm": 1.9928578796849754, "learning_rate": 9.925798108908705e-06, "loss": 0.657, "step": 958 }, { "epoch": 0.1, "grad_norm": 1.9410689513497479, "learning_rate": 9.925495071106979e-06, "loss": 0.6789, "step": 959 }, { "epoch": 0.1, "grad_norm": 2.516525809540818, "learning_rate": 9.925191420412836e-06, "loss": 0.6031, "step": 960 }, { "epoch": 0.1, "grad_norm": 1.932592464077483, "learning_rate": 9.924887156864061e-06, "loss": 0.6312, "step": 961 }, { "epoch": 0.1, "grad_norm": 2.052584271243782, "learning_rate": 9.924582280498514e-06, "loss": 0.6575, "step": 962 }, { "epoch": 0.1, "grad_norm": 1.5864506591112706, "learning_rate": 9.924276791354131e-06, "loss": 0.582, "step": 963 }, { "epoch": 0.1, "grad_norm": 1.9981394765498213, "learning_rate": 9.923970689468922e-06, "loss": 0.7274, "step": 964 }, { "epoch": 0.1, "grad_norm": 1.9558972807348234, "learning_rate": 9.923663974880982e-06, "loss": 0.7455, "step": 965 }, { "epoch": 0.1, "grad_norm": 2.186883302929937, "learning_rate": 9.923356647628471e-06, "loss": 0.7631, "step": 966 }, { "epoch": 0.1, "grad_norm": 2.4301464397100925, "learning_rate": 9.923048707749634e-06, "loss": 0.7261, "step": 967 }, { "epoch": 0.1, "grad_norm": 2.03369318956468, "learning_rate": 9.922740155282786e-06, "loss": 0.7837, "step": 968 }, { "epoch": 0.1, "grad_norm": 1.9354296124543051, "learning_rate": 9.922430990266326e-06, "loss": 0.733, "step": 969 }, { "epoch": 0.1, "grad_norm": 2.13882100530349, "learning_rate": 9.922121212738717e-06, "loss": 0.6927, "step": 970 }, { "epoch": 0.1, "grad_norm": 2.0047547780420274, "learning_rate": 9.921810822738509e-06, "loss": 0.6247, "step": 971 }, { "epoch": 0.1, "grad_norm": 1.8115122257951366, "learning_rate": 9.921499820304327e-06, "loss": 0.5767, "step": 972 }, { "epoch": 0.1, "grad_norm": 2.24469124210294, "learning_rate": 9.921188205474866e-06, "loss": 0.774, "step": 973 }, { "epoch": 0.1, "grad_norm": 1.8300500874822017, "learning_rate": 9.920875978288904e-06, "loss": 0.7531, "step": 974 }, { "epoch": 0.1, "grad_norm": 1.9012945648917925, "learning_rate": 9.920563138785291e-06, "loss": 0.7384, "step": 975 }, { "epoch": 0.1, "grad_norm": 1.8306690170124877, "learning_rate": 9.920249687002953e-06, "loss": 0.5666, "step": 976 }, { "epoch": 0.1, "grad_norm": 1.7457977065773942, "learning_rate": 9.919935622980895e-06, "loss": 0.6765, "step": 977 }, { "epoch": 0.1, "grad_norm": 1.9278634956711513, "learning_rate": 9.919620946758197e-06, "loss": 0.7025, "step": 978 }, { "epoch": 0.1, "grad_norm": 2.166289595980555, "learning_rate": 9.919305658374016e-06, "loss": 0.7082, "step": 979 }, { "epoch": 0.1, "grad_norm": 2.236197763986314, "learning_rate": 9.918989757867584e-06, "loss": 0.747, "step": 980 }, { "epoch": 0.1, "grad_norm": 1.569836586103569, "learning_rate": 9.918673245278205e-06, "loss": 0.5812, "step": 981 }, { "epoch": 0.1, "grad_norm": 1.8656833514731779, "learning_rate": 9.918356120645269e-06, "loss": 0.6862, "step": 982 }, { "epoch": 0.1, "grad_norm": 1.8385215417858827, "learning_rate": 9.918038384008234e-06, "loss": 0.7312, "step": 983 }, { "epoch": 0.1, "grad_norm": 1.781369823040621, "learning_rate": 9.917720035406638e-06, "loss": 0.6231, "step": 984 }, { "epoch": 0.11, "grad_norm": 2.1382941604994836, "learning_rate": 9.917401074880091e-06, "loss": 0.8087, "step": 985 }, { "epoch": 0.11, "grad_norm": 1.670298417061977, "learning_rate": 9.917081502468286e-06, "loss": 0.5768, "step": 986 }, { "epoch": 0.11, "grad_norm": 1.8302615113233287, "learning_rate": 9.916761318210988e-06, "loss": 0.6685, "step": 987 }, { "epoch": 0.11, "grad_norm": 1.7682347058845456, "learning_rate": 9.916440522148036e-06, "loss": 0.71, "step": 988 }, { "epoch": 0.11, "grad_norm": 1.3045888762554116, "learning_rate": 9.916119114319348e-06, "loss": 0.5689, "step": 989 }, { "epoch": 0.11, "grad_norm": 2.007081779309069, "learning_rate": 9.915797094764919e-06, "loss": 0.6655, "step": 990 }, { "epoch": 0.11, "grad_norm": 1.9687661002794195, "learning_rate": 9.915474463524817e-06, "loss": 0.7861, "step": 991 }, { "epoch": 0.11, "grad_norm": 1.9347585933015765, "learning_rate": 9.915151220639188e-06, "loss": 0.6805, "step": 992 }, { "epoch": 0.11, "grad_norm": 1.4758140924230898, "learning_rate": 9.914827366148256e-06, "loss": 0.5783, "step": 993 }, { "epoch": 0.11, "grad_norm": 2.0505240432042173, "learning_rate": 9.914502900092318e-06, "loss": 0.738, "step": 994 }, { "epoch": 0.11, "grad_norm": 2.0317083555885724, "learning_rate": 9.914177822511748e-06, "loss": 0.7546, "step": 995 }, { "epoch": 0.11, "grad_norm": 2.090014713235605, "learning_rate": 9.913852133446995e-06, "loss": 0.7306, "step": 996 }, { "epoch": 0.11, "grad_norm": 2.1859586601793493, "learning_rate": 9.913525832938588e-06, "loss": 0.7022, "step": 997 }, { "epoch": 0.11, "grad_norm": 2.1145029311609673, "learning_rate": 9.913198921027128e-06, "loss": 0.5942, "step": 998 }, { "epoch": 0.11, "grad_norm": 1.2889611701485963, "learning_rate": 9.912871397753293e-06, "loss": 0.5814, "step": 999 }, { "epoch": 0.11, "grad_norm": 1.9029234738799066, "learning_rate": 9.912543263157837e-06, "loss": 0.7214, "step": 1000 }, { "epoch": 0.11, "grad_norm": 1.282132657865543, "learning_rate": 9.912214517281592e-06, "loss": 0.595, "step": 1001 }, { "epoch": 0.11, "grad_norm": 1.981867860858088, "learning_rate": 9.911885160165466e-06, "loss": 0.6955, "step": 1002 }, { "epoch": 0.11, "grad_norm": 2.027611730946998, "learning_rate": 9.911555191850439e-06, "loss": 0.6802, "step": 1003 }, { "epoch": 0.11, "grad_norm": 1.9944241224893418, "learning_rate": 9.911224612377572e-06, "loss": 0.7528, "step": 1004 }, { "epoch": 0.11, "grad_norm": 2.1289621306071345, "learning_rate": 9.910893421787999e-06, "loss": 0.7307, "step": 1005 }, { "epoch": 0.11, "grad_norm": 1.8140205764613233, "learning_rate": 9.910561620122932e-06, "loss": 0.7218, "step": 1006 }, { "epoch": 0.11, "grad_norm": 1.7185826810897067, "learning_rate": 9.910229207423654e-06, "loss": 0.6437, "step": 1007 }, { "epoch": 0.11, "grad_norm": 1.7844350510924598, "learning_rate": 9.909896183731533e-06, "loss": 0.6695, "step": 1008 }, { "epoch": 0.11, "grad_norm": 1.8723833290256655, "learning_rate": 9.909562549088005e-06, "loss": 0.6566, "step": 1009 }, { "epoch": 0.11, "grad_norm": 2.013513583205812, "learning_rate": 9.909228303534585e-06, "loss": 0.7512, "step": 1010 }, { "epoch": 0.11, "grad_norm": 2.012806002550236, "learning_rate": 9.908893447112869e-06, "loss": 0.7707, "step": 1011 }, { "epoch": 0.11, "grad_norm": 1.8327438471830064, "learning_rate": 9.908557979864518e-06, "loss": 0.7486, "step": 1012 }, { "epoch": 0.11, "grad_norm": 1.8050949135937022, "learning_rate": 9.908221901831275e-06, "loss": 0.6331, "step": 1013 }, { "epoch": 0.11, "grad_norm": 2.0177193345955127, "learning_rate": 9.907885213054962e-06, "loss": 0.7158, "step": 1014 }, { "epoch": 0.11, "grad_norm": 1.759654160524063, "learning_rate": 9.907547913577475e-06, "loss": 0.6308, "step": 1015 }, { "epoch": 0.11, "grad_norm": 1.7360199226903215, "learning_rate": 9.907210003440781e-06, "loss": 0.6496, "step": 1016 }, { "epoch": 0.11, "grad_norm": 2.188457628683241, "learning_rate": 9.906871482686933e-06, "loss": 0.577, "step": 1017 }, { "epoch": 0.11, "grad_norm": 2.1764475947251967, "learning_rate": 9.906532351358047e-06, "loss": 0.7578, "step": 1018 }, { "epoch": 0.11, "grad_norm": 1.410609035679558, "learning_rate": 9.906192609496328e-06, "loss": 0.5613, "step": 1019 }, { "epoch": 0.11, "grad_norm": 2.022933930979579, "learning_rate": 9.905852257144046e-06, "loss": 0.6633, "step": 1020 }, { "epoch": 0.11, "grad_norm": 2.0668874112821976, "learning_rate": 9.905511294343557e-06, "loss": 0.7712, "step": 1021 }, { "epoch": 0.11, "grad_norm": 2.057931020595363, "learning_rate": 9.905169721137285e-06, "loss": 0.7391, "step": 1022 }, { "epoch": 0.11, "grad_norm": 1.7962901766624568, "learning_rate": 9.904827537567731e-06, "loss": 0.5644, "step": 1023 }, { "epoch": 0.11, "grad_norm": 1.8651694857233392, "learning_rate": 9.904484743677476e-06, "loss": 0.6891, "step": 1024 }, { "epoch": 0.11, "grad_norm": 2.418700242757065, "learning_rate": 9.904141339509177e-06, "loss": 0.6683, "step": 1025 }, { "epoch": 0.11, "grad_norm": 1.7761025917513806, "learning_rate": 9.903797325105562e-06, "loss": 0.6848, "step": 1026 }, { "epoch": 0.11, "grad_norm": 1.9974070639632837, "learning_rate": 9.903452700509437e-06, "loss": 0.7564, "step": 1027 }, { "epoch": 0.11, "grad_norm": 2.0272114413683573, "learning_rate": 9.903107465763686e-06, "loss": 0.7539, "step": 1028 }, { "epoch": 0.11, "grad_norm": 1.8755963248990544, "learning_rate": 9.902761620911267e-06, "loss": 0.7771, "step": 1029 }, { "epoch": 0.11, "grad_norm": 1.8504262132334046, "learning_rate": 9.902415165995215e-06, "loss": 0.6575, "step": 1030 }, { "epoch": 0.11, "grad_norm": 1.8656589779242017, "learning_rate": 9.902068101058639e-06, "loss": 0.6699, "step": 1031 }, { "epoch": 0.11, "grad_norm": 1.8462684446468653, "learning_rate": 9.901720426144727e-06, "loss": 0.6759, "step": 1032 }, { "epoch": 0.11, "grad_norm": 1.79645477548539, "learning_rate": 9.90137214129674e-06, "loss": 0.6061, "step": 1033 }, { "epoch": 0.11, "grad_norm": 1.7091850829519826, "learning_rate": 9.901023246558015e-06, "loss": 0.5892, "step": 1034 }, { "epoch": 0.11, "grad_norm": 1.7886007184092905, "learning_rate": 9.900673741971969e-06, "loss": 0.7052, "step": 1035 }, { "epoch": 0.11, "grad_norm": 2.2992988229939915, "learning_rate": 9.900323627582088e-06, "loss": 0.7144, "step": 1036 }, { "epoch": 0.11, "grad_norm": 1.8393193687179574, "learning_rate": 9.89997290343194e-06, "loss": 0.7144, "step": 1037 }, { "epoch": 0.11, "grad_norm": 2.041125456047106, "learning_rate": 9.899621569565166e-06, "loss": 0.7372, "step": 1038 }, { "epoch": 0.11, "grad_norm": 2.4232526743394254, "learning_rate": 9.899269626025482e-06, "loss": 0.6577, "step": 1039 }, { "epoch": 0.11, "grad_norm": 1.7865601961207078, "learning_rate": 9.898917072856686e-06, "loss": 0.6103, "step": 1040 }, { "epoch": 0.11, "grad_norm": 2.024262093306313, "learning_rate": 9.89856391010264e-06, "loss": 0.7018, "step": 1041 }, { "epoch": 0.11, "grad_norm": 2.0953377563970546, "learning_rate": 9.898210137807295e-06, "loss": 0.7599, "step": 1042 }, { "epoch": 0.11, "grad_norm": 2.1726507445793533, "learning_rate": 9.897855756014669e-06, "loss": 0.8102, "step": 1043 }, { "epoch": 0.11, "grad_norm": 1.7684949804348247, "learning_rate": 9.89750076476886e-06, "loss": 0.8323, "step": 1044 }, { "epoch": 0.11, "grad_norm": 2.3983913238121235, "learning_rate": 9.89714516411404e-06, "loss": 0.673, "step": 1045 }, { "epoch": 0.11, "grad_norm": 2.2178908510906363, "learning_rate": 9.896788954094456e-06, "loss": 0.725, "step": 1046 }, { "epoch": 0.11, "grad_norm": 2.4508010417056476, "learning_rate": 9.896432134754432e-06, "loss": 0.6766, "step": 1047 }, { "epoch": 0.11, "grad_norm": 1.7738472743682026, "learning_rate": 9.896074706138372e-06, "loss": 0.5717, "step": 1048 }, { "epoch": 0.11, "grad_norm": 1.9340161601357928, "learning_rate": 9.895716668290746e-06, "loss": 0.7412, "step": 1049 }, { "epoch": 0.11, "grad_norm": 1.8709432244607314, "learning_rate": 9.895358021256113e-06, "loss": 0.777, "step": 1050 }, { "epoch": 0.11, "grad_norm": 2.0257832395409734, "learning_rate": 9.894998765079093e-06, "loss": 0.7581, "step": 1051 }, { "epoch": 0.11, "grad_norm": 1.750313364116904, "learning_rate": 9.894638899804391e-06, "loss": 0.723, "step": 1052 }, { "epoch": 0.11, "grad_norm": 2.1174556606020123, "learning_rate": 9.89427842547679e-06, "loss": 0.6384, "step": 1053 }, { "epoch": 0.11, "grad_norm": 2.0752634192209696, "learning_rate": 9.893917342141141e-06, "loss": 0.799, "step": 1054 }, { "epoch": 0.11, "grad_norm": 1.945032182092935, "learning_rate": 9.893555649842374e-06, "loss": 0.7173, "step": 1055 }, { "epoch": 0.11, "grad_norm": 1.5149126334137737, "learning_rate": 9.8931933486255e-06, "loss": 0.578, "step": 1056 }, { "epoch": 0.11, "grad_norm": 2.0069927971302475, "learning_rate": 9.892830438535596e-06, "loss": 0.7286, "step": 1057 }, { "epoch": 0.11, "grad_norm": 1.8343084788860764, "learning_rate": 9.892466919617822e-06, "loss": 0.7218, "step": 1058 }, { "epoch": 0.11, "grad_norm": 2.0930664292886103, "learning_rate": 9.892102791917411e-06, "loss": 0.729, "step": 1059 }, { "epoch": 0.11, "grad_norm": 2.157425921695639, "learning_rate": 9.891738055479673e-06, "loss": 0.6458, "step": 1060 }, { "epoch": 0.11, "grad_norm": 1.8090400329012988, "learning_rate": 9.891372710349995e-06, "loss": 0.6574, "step": 1061 }, { "epoch": 0.11, "grad_norm": 1.8977893037319264, "learning_rate": 9.891006756573834e-06, "loss": 0.6121, "step": 1062 }, { "epoch": 0.11, "grad_norm": 1.8531852070833517, "learning_rate": 9.890640194196727e-06, "loss": 0.6924, "step": 1063 }, { "epoch": 0.11, "grad_norm": 1.7224245319638707, "learning_rate": 9.89027302326429e-06, "loss": 0.6329, "step": 1064 }, { "epoch": 0.11, "grad_norm": 3.162216231666514, "learning_rate": 9.889905243822209e-06, "loss": 0.6887, "step": 1065 }, { "epoch": 0.11, "grad_norm": 1.838592229232448, "learning_rate": 9.889536855916247e-06, "loss": 0.6435, "step": 1066 }, { "epoch": 0.11, "grad_norm": 2.0938723323450894, "learning_rate": 9.889167859592243e-06, "loss": 0.7416, "step": 1067 }, { "epoch": 0.11, "grad_norm": 1.8917862824018499, "learning_rate": 9.888798254896114e-06, "loss": 0.7205, "step": 1068 }, { "epoch": 0.11, "grad_norm": 1.8341928840698354, "learning_rate": 9.888428041873851e-06, "loss": 0.6728, "step": 1069 }, { "epoch": 0.11, "grad_norm": 2.195202316826438, "learning_rate": 9.888057220571518e-06, "loss": 0.823, "step": 1070 }, { "epoch": 0.11, "grad_norm": 2.249471900151845, "learning_rate": 9.887685791035261e-06, "loss": 0.7655, "step": 1071 }, { "epoch": 0.11, "grad_norm": 1.9447277294857879, "learning_rate": 9.887313753311296e-06, "loss": 0.6815, "step": 1072 }, { "epoch": 0.11, "grad_norm": 2.0888986458223457, "learning_rate": 9.886941107445915e-06, "loss": 0.6243, "step": 1073 }, { "epoch": 0.11, "grad_norm": 1.916850075617966, "learning_rate": 9.88656785348549e-06, "loss": 0.7679, "step": 1074 }, { "epoch": 0.11, "grad_norm": 1.8181363066497542, "learning_rate": 9.886193991476466e-06, "loss": 0.6015, "step": 1075 }, { "epoch": 0.11, "grad_norm": 2.167579094313577, "learning_rate": 9.88581952146536e-06, "loss": 0.6994, "step": 1076 }, { "epoch": 0.11, "grad_norm": 2.0165368326520428, "learning_rate": 9.885444443498771e-06, "loss": 0.77, "step": 1077 }, { "epoch": 0.11, "grad_norm": 1.9823476488619958, "learning_rate": 9.885068757623374e-06, "loss": 0.7399, "step": 1078 }, { "epoch": 0.12, "grad_norm": 1.571111182702907, "learning_rate": 9.88469246388591e-06, "loss": 0.5742, "step": 1079 }, { "epoch": 0.12, "grad_norm": 2.062835621338316, "learning_rate": 9.884315562333207e-06, "loss": 0.6955, "step": 1080 }, { "epoch": 0.12, "grad_norm": 2.198329290139248, "learning_rate": 9.883938053012161e-06, "loss": 0.7383, "step": 1081 }, { "epoch": 0.12, "grad_norm": 2.0668143595451776, "learning_rate": 9.883559935969749e-06, "loss": 0.666, "step": 1082 }, { "epoch": 0.12, "grad_norm": 1.8600065731174904, "learning_rate": 9.88318121125302e-06, "loss": 0.6278, "step": 1083 }, { "epoch": 0.12, "grad_norm": 1.9071347911668686, "learning_rate": 9.8828018789091e-06, "loss": 0.7641, "step": 1084 }, { "epoch": 0.12, "grad_norm": 1.55277098194175, "learning_rate": 9.882421938985192e-06, "loss": 0.5887, "step": 1085 }, { "epoch": 0.12, "grad_norm": 2.1023594428489703, "learning_rate": 9.882041391528568e-06, "loss": 0.7507, "step": 1086 }, { "epoch": 0.12, "grad_norm": 2.0114235649142547, "learning_rate": 9.881660236586585e-06, "loss": 0.7485, "step": 1087 }, { "epoch": 0.12, "grad_norm": 2.073247932544052, "learning_rate": 9.881278474206669e-06, "loss": 0.7291, "step": 1088 }, { "epoch": 0.12, "grad_norm": 1.8659267886296693, "learning_rate": 9.880896104436326e-06, "loss": 0.6674, "step": 1089 }, { "epoch": 0.12, "grad_norm": 1.8471914188252256, "learning_rate": 9.880513127323133e-06, "loss": 0.7397, "step": 1090 }, { "epoch": 0.12, "grad_norm": 1.7799182100601656, "learning_rate": 9.880129542914744e-06, "loss": 0.6997, "step": 1091 }, { "epoch": 0.12, "grad_norm": 1.6762031125518808, "learning_rate": 9.879745351258893e-06, "loss": 0.5757, "step": 1092 }, { "epoch": 0.12, "grad_norm": 1.2983531909362245, "learning_rate": 9.879360552403383e-06, "loss": 0.5578, "step": 1093 }, { "epoch": 0.12, "grad_norm": 1.9753717393708492, "learning_rate": 9.878975146396096e-06, "loss": 0.7236, "step": 1094 }, { "epoch": 0.12, "grad_norm": 1.971290454791585, "learning_rate": 9.87858913328499e-06, "loss": 0.7637, "step": 1095 }, { "epoch": 0.12, "grad_norm": 2.101887725623431, "learning_rate": 9.878202513118097e-06, "loss": 0.7305, "step": 1096 }, { "epoch": 0.12, "grad_norm": 1.9113102174108476, "learning_rate": 9.877815285943527e-06, "loss": 0.6939, "step": 1097 }, { "epoch": 0.12, "grad_norm": 1.7808291435893235, "learning_rate": 9.87742745180946e-06, "loss": 0.6409, "step": 1098 }, { "epoch": 0.12, "grad_norm": 1.8331840087844902, "learning_rate": 9.87703901076416e-06, "loss": 0.6981, "step": 1099 }, { "epoch": 0.12, "grad_norm": 1.8874226086172639, "learning_rate": 9.876649962855957e-06, "loss": 0.7008, "step": 1100 }, { "epoch": 0.12, "grad_norm": 1.8983871468674451, "learning_rate": 9.876260308133264e-06, "loss": 0.7832, "step": 1101 }, { "epoch": 0.12, "grad_norm": 1.7991108608896953, "learning_rate": 9.875870046644564e-06, "loss": 0.7268, "step": 1102 }, { "epoch": 0.12, "grad_norm": 1.9488739076505195, "learning_rate": 9.875479178438424e-06, "loss": 0.7855, "step": 1103 }, { "epoch": 0.12, "grad_norm": 1.8695182736978835, "learning_rate": 9.875087703563475e-06, "loss": 0.619, "step": 1104 }, { "epoch": 0.12, "grad_norm": 2.1147696201514132, "learning_rate": 9.874695622068432e-06, "loss": 0.614, "step": 1105 }, { "epoch": 0.12, "grad_norm": 2.2358828925381635, "learning_rate": 9.874302934002082e-06, "loss": 0.7611, "step": 1106 }, { "epoch": 0.12, "grad_norm": 1.9485152886956123, "learning_rate": 9.873909639413288e-06, "loss": 0.7324, "step": 1107 }, { "epoch": 0.12, "grad_norm": 1.687534951722709, "learning_rate": 9.873515738350989e-06, "loss": 0.6728, "step": 1108 }, { "epoch": 0.12, "grad_norm": 2.1344241392679972, "learning_rate": 9.8731212308642e-06, "loss": 0.6856, "step": 1109 }, { "epoch": 0.12, "grad_norm": 1.9616247619786598, "learning_rate": 9.87272611700201e-06, "loss": 0.6718, "step": 1110 }, { "epoch": 0.12, "grad_norm": 3.5951746272676104, "learning_rate": 9.872330396813583e-06, "loss": 0.6186, "step": 1111 }, { "epoch": 0.12, "grad_norm": 1.7538809116357224, "learning_rate": 9.87193407034816e-06, "loss": 0.7318, "step": 1112 }, { "epoch": 0.12, "grad_norm": 2.073773802080402, "learning_rate": 9.87153713765506e-06, "loss": 0.7373, "step": 1113 }, { "epoch": 0.12, "grad_norm": 1.821428167744318, "learning_rate": 9.87113959878367e-06, "loss": 0.6712, "step": 1114 }, { "epoch": 0.12, "grad_norm": 1.743376085926891, "learning_rate": 9.870741453783459e-06, "loss": 0.6818, "step": 1115 }, { "epoch": 0.12, "grad_norm": 1.927775611195236, "learning_rate": 9.870342702703969e-06, "loss": 0.6854, "step": 1116 }, { "epoch": 0.12, "grad_norm": 1.9364077474060248, "learning_rate": 9.869943345594815e-06, "loss": 0.7581, "step": 1117 }, { "epoch": 0.12, "grad_norm": 2.489412223938706, "learning_rate": 9.869543382505695e-06, "loss": 0.8355, "step": 1118 }, { "epoch": 0.12, "grad_norm": 2.2601158301095885, "learning_rate": 9.869142813486375e-06, "loss": 0.7751, "step": 1119 }, { "epoch": 0.12, "grad_norm": 1.9100987878582096, "learning_rate": 9.868741638586697e-06, "loss": 0.7304, "step": 1120 }, { "epoch": 0.12, "grad_norm": 1.9721439402131538, "learning_rate": 9.868339857856583e-06, "loss": 0.6015, "step": 1121 }, { "epoch": 0.12, "grad_norm": 2.037075325542874, "learning_rate": 9.867937471346027e-06, "loss": 0.7256, "step": 1122 }, { "epoch": 0.12, "grad_norm": 1.8645865434044824, "learning_rate": 9.8675344791051e-06, "loss": 0.7251, "step": 1123 }, { "epoch": 0.12, "grad_norm": 1.9503534555329998, "learning_rate": 9.867130881183945e-06, "loss": 0.8167, "step": 1124 }, { "epoch": 0.12, "grad_norm": 1.9705201004667032, "learning_rate": 9.866726677632784e-06, "loss": 0.7361, "step": 1125 }, { "epoch": 0.12, "grad_norm": 1.9312756398099806, "learning_rate": 9.866321868501914e-06, "loss": 0.7585, "step": 1126 }, { "epoch": 0.12, "grad_norm": 1.9337353010296359, "learning_rate": 9.865916453841704e-06, "loss": 0.7159, "step": 1127 }, { "epoch": 0.12, "grad_norm": 1.8270132647361805, "learning_rate": 9.865510433702604e-06, "loss": 0.716, "step": 1128 }, { "epoch": 0.12, "grad_norm": 2.1112567961147013, "learning_rate": 9.865103808135132e-06, "loss": 0.7454, "step": 1129 }, { "epoch": 0.12, "grad_norm": 1.952132071128556, "learning_rate": 9.86469657718989e-06, "loss": 0.5855, "step": 1130 }, { "epoch": 0.12, "grad_norm": 1.6260427446197172, "learning_rate": 9.864288740917548e-06, "loss": 0.593, "step": 1131 }, { "epoch": 0.12, "grad_norm": 1.7688785757207794, "learning_rate": 9.863880299368856e-06, "loss": 0.7697, "step": 1132 }, { "epoch": 0.12, "grad_norm": 2.0095141403008174, "learning_rate": 9.863471252594636e-06, "loss": 0.8023, "step": 1133 }, { "epoch": 0.12, "grad_norm": 2.1168874264664534, "learning_rate": 9.863061600645786e-06, "loss": 0.7413, "step": 1134 }, { "epoch": 0.12, "grad_norm": 1.9107289154360276, "learning_rate": 9.862651343573283e-06, "loss": 0.7469, "step": 1135 }, { "epoch": 0.12, "grad_norm": 2.1470571896473567, "learning_rate": 9.862240481428173e-06, "loss": 0.6908, "step": 1136 }, { "epoch": 0.12, "grad_norm": 2.1835129897059806, "learning_rate": 9.861829014261584e-06, "loss": 0.7253, "step": 1137 }, { "epoch": 0.12, "grad_norm": 1.8631432300164208, "learning_rate": 9.861416942124714e-06, "loss": 0.6871, "step": 1138 }, { "epoch": 0.12, "grad_norm": 2.0833703380851993, "learning_rate": 9.861004265068839e-06, "loss": 0.7659, "step": 1139 }, { "epoch": 0.12, "grad_norm": 1.7825779039583463, "learning_rate": 9.860590983145307e-06, "loss": 0.6536, "step": 1140 }, { "epoch": 0.12, "grad_norm": 2.1531667741800344, "learning_rate": 9.860177096405547e-06, "loss": 0.7241, "step": 1141 }, { "epoch": 0.12, "grad_norm": 1.894519203315901, "learning_rate": 9.85976260490106e-06, "loss": 0.7599, "step": 1142 }, { "epoch": 0.12, "grad_norm": 1.7100295410203696, "learning_rate": 9.859347508683418e-06, "loss": 0.6652, "step": 1143 }, { "epoch": 0.12, "grad_norm": 2.131758730036725, "learning_rate": 9.85893180780428e-06, "loss": 0.6904, "step": 1144 }, { "epoch": 0.12, "grad_norm": 1.8465879368858262, "learning_rate": 9.858515502315364e-06, "loss": 0.6834, "step": 1145 }, { "epoch": 0.12, "grad_norm": 1.675481957603573, "learning_rate": 9.858098592268479e-06, "loss": 0.7151, "step": 1146 }, { "epoch": 0.12, "grad_norm": 1.6695598350237997, "learning_rate": 9.8576810777155e-06, "loss": 0.5794, "step": 1147 }, { "epoch": 0.12, "grad_norm": 1.8982148244488253, "learning_rate": 9.857262958708377e-06, "loss": 0.7107, "step": 1148 }, { "epoch": 0.12, "grad_norm": 4.653541226810674, "learning_rate": 9.856844235299141e-06, "loss": 0.6043, "step": 1149 }, { "epoch": 0.12, "grad_norm": 1.9985882995686473, "learning_rate": 9.856424907539894e-06, "loss": 0.6363, "step": 1150 }, { "epoch": 0.12, "grad_norm": 2.660419410808726, "learning_rate": 9.856004975482813e-06, "loss": 0.7492, "step": 1151 }, { "epoch": 0.12, "grad_norm": 2.094762812511159, "learning_rate": 9.85558443918015e-06, "loss": 0.7142, "step": 1152 }, { "epoch": 0.12, "grad_norm": 1.7891641767006303, "learning_rate": 9.855163298684238e-06, "loss": 0.5817, "step": 1153 }, { "epoch": 0.12, "grad_norm": 1.9764396599092842, "learning_rate": 9.854741554047477e-06, "loss": 0.7792, "step": 1154 }, { "epoch": 0.12, "grad_norm": 2.1811180146730917, "learning_rate": 9.854319205322347e-06, "loss": 0.7188, "step": 1155 }, { "epoch": 0.12, "grad_norm": 1.9735499234562783, "learning_rate": 9.853896252561403e-06, "loss": 0.7535, "step": 1156 }, { "epoch": 0.12, "grad_norm": 1.843537155434031, "learning_rate": 9.85347269581727e-06, "loss": 0.5524, "step": 1157 }, { "epoch": 0.12, "grad_norm": 1.9067718338217368, "learning_rate": 9.853048535142658e-06, "loss": 0.6011, "step": 1158 }, { "epoch": 0.12, "grad_norm": 1.8844624636839615, "learning_rate": 9.852623770590342e-06, "loss": 0.6944, "step": 1159 }, { "epoch": 0.12, "grad_norm": 1.6315364148472764, "learning_rate": 9.852198402213178e-06, "loss": 0.6791, "step": 1160 }, { "epoch": 0.12, "grad_norm": 1.9254700404004361, "learning_rate": 9.851772430064097e-06, "loss": 0.6452, "step": 1161 }, { "epoch": 0.12, "grad_norm": 1.8322162111699545, "learning_rate": 9.851345854196101e-06, "loss": 0.6529, "step": 1162 }, { "epoch": 0.12, "grad_norm": 1.778401618828545, "learning_rate": 9.850918674662273e-06, "loss": 0.7074, "step": 1163 }, { "epoch": 0.12, "grad_norm": 2.1876724993060304, "learning_rate": 9.850490891515768e-06, "loss": 0.6733, "step": 1164 }, { "epoch": 0.12, "grad_norm": 2.0748461761688226, "learning_rate": 9.850062504809814e-06, "loss": 0.5884, "step": 1165 }, { "epoch": 0.12, "grad_norm": 2.2514212000406344, "learning_rate": 9.849633514597716e-06, "loss": 0.7663, "step": 1166 }, { "epoch": 0.12, "grad_norm": 1.9483231996491384, "learning_rate": 9.849203920932857e-06, "loss": 0.6342, "step": 1167 }, { "epoch": 0.12, "grad_norm": 2.086305840507267, "learning_rate": 9.848773723868691e-06, "loss": 0.6951, "step": 1168 }, { "epoch": 0.12, "grad_norm": 2.067544733170661, "learning_rate": 9.84834292345875e-06, "loss": 0.6868, "step": 1169 }, { "epoch": 0.12, "grad_norm": 2.1757728618039582, "learning_rate": 9.847911519756634e-06, "loss": 0.7911, "step": 1170 }, { "epoch": 0.12, "grad_norm": 2.003669206846316, "learning_rate": 9.847479512816031e-06, "loss": 0.742, "step": 1171 }, { "epoch": 0.13, "grad_norm": 2.0908539798844408, "learning_rate": 9.847046902690696e-06, "loss": 0.7545, "step": 1172 }, { "epoch": 0.13, "grad_norm": 1.718942976304231, "learning_rate": 9.846613689434455e-06, "loss": 0.6281, "step": 1173 }, { "epoch": 0.13, "grad_norm": 1.973474003795294, "learning_rate": 9.846179873101216e-06, "loss": 0.6856, "step": 1174 }, { "epoch": 0.13, "grad_norm": 2.199896160836224, "learning_rate": 9.845745453744961e-06, "loss": 0.6898, "step": 1175 }, { "epoch": 0.13, "grad_norm": 1.920872923203301, "learning_rate": 9.845310431419746e-06, "loss": 0.7566, "step": 1176 }, { "epoch": 0.13, "grad_norm": 1.3448515009312285, "learning_rate": 9.844874806179701e-06, "loss": 0.5921, "step": 1177 }, { "epoch": 0.13, "grad_norm": 1.7409250576180915, "learning_rate": 9.844438578079033e-06, "loss": 0.6664, "step": 1178 }, { "epoch": 0.13, "grad_norm": 1.957046931041937, "learning_rate": 9.844001747172022e-06, "loss": 0.7218, "step": 1179 }, { "epoch": 0.13, "grad_norm": 1.8786521676346843, "learning_rate": 9.843564313513025e-06, "loss": 0.6975, "step": 1180 }, { "epoch": 0.13, "grad_norm": 1.869109052865043, "learning_rate": 9.843126277156472e-06, "loss": 0.6122, "step": 1181 }, { "epoch": 0.13, "grad_norm": 1.6162861314512873, "learning_rate": 9.84268763815687e-06, "loss": 0.6326, "step": 1182 }, { "epoch": 0.13, "grad_norm": 1.8947767826807616, "learning_rate": 9.842248396568798e-06, "loss": 0.7526, "step": 1183 }, { "epoch": 0.13, "grad_norm": 2.1652222352677692, "learning_rate": 9.841808552446914e-06, "loss": 0.5969, "step": 1184 }, { "epoch": 0.13, "grad_norm": 1.8637517249980293, "learning_rate": 9.84136810584595e-06, "loss": 0.7352, "step": 1185 }, { "epoch": 0.13, "grad_norm": 2.1223605015771922, "learning_rate": 9.840927056820708e-06, "loss": 0.7505, "step": 1186 }, { "epoch": 0.13, "grad_norm": 1.7976513505486629, "learning_rate": 9.840485405426073e-06, "loss": 0.7093, "step": 1187 }, { "epoch": 0.13, "grad_norm": 1.819088778614165, "learning_rate": 9.840043151717e-06, "loss": 0.6338, "step": 1188 }, { "epoch": 0.13, "grad_norm": 1.8633709159891727, "learning_rate": 9.839600295748518e-06, "loss": 0.6895, "step": 1189 }, { "epoch": 0.13, "grad_norm": 1.7542692867607792, "learning_rate": 9.839156837575731e-06, "loss": 0.711, "step": 1190 }, { "epoch": 0.13, "grad_norm": 2.029640015634501, "learning_rate": 9.838712777253827e-06, "loss": 0.7075, "step": 1191 }, { "epoch": 0.13, "grad_norm": 1.9316947684678862, "learning_rate": 9.838268114838055e-06, "loss": 0.6453, "step": 1192 }, { "epoch": 0.13, "grad_norm": 1.9737053732854417, "learning_rate": 9.837822850383746e-06, "loss": 0.8135, "step": 1193 }, { "epoch": 0.13, "grad_norm": 1.7406899391822386, "learning_rate": 9.837376983946309e-06, "loss": 0.6565, "step": 1194 }, { "epoch": 0.13, "grad_norm": 1.919188789261364, "learning_rate": 9.836930515581221e-06, "loss": 0.6873, "step": 1195 }, { "epoch": 0.13, "grad_norm": 2.0018225849000757, "learning_rate": 9.83648344534404e-06, "loss": 0.628, "step": 1196 }, { "epoch": 0.13, "grad_norm": 1.956036815174723, "learning_rate": 9.836035773290394e-06, "loss": 0.8559, "step": 1197 }, { "epoch": 0.13, "grad_norm": 1.7308980927610769, "learning_rate": 9.835587499475987e-06, "loss": 0.6088, "step": 1198 }, { "epoch": 0.13, "grad_norm": 2.1144165321211434, "learning_rate": 9.835138623956603e-06, "loss": 0.7332, "step": 1199 }, { "epoch": 0.13, "grad_norm": 2.1073635465243727, "learning_rate": 9.834689146788092e-06, "loss": 0.669, "step": 1200 }, { "epoch": 0.13, "grad_norm": 1.6921987687531084, "learning_rate": 9.834239068026388e-06, "loss": 0.6642, "step": 1201 }, { "epoch": 0.13, "grad_norm": 1.8220836906957656, "learning_rate": 9.833788387727495e-06, "loss": 0.6413, "step": 1202 }, { "epoch": 0.13, "grad_norm": 1.9124403006707325, "learning_rate": 9.833337105947487e-06, "loss": 0.7473, "step": 1203 }, { "epoch": 0.13, "grad_norm": 1.7253676115075405, "learning_rate": 9.832885222742524e-06, "loss": 0.5807, "step": 1204 }, { "epoch": 0.13, "grad_norm": 2.030307507150616, "learning_rate": 9.832432738168834e-06, "loss": 0.7097, "step": 1205 }, { "epoch": 0.13, "grad_norm": 2.226895523226811, "learning_rate": 9.831979652282718e-06, "loss": 0.6809, "step": 1206 }, { "epoch": 0.13, "grad_norm": 1.3659382176950157, "learning_rate": 9.831525965140557e-06, "loss": 0.5691, "step": 1207 }, { "epoch": 0.13, "grad_norm": 1.2925770903974583, "learning_rate": 9.831071676798805e-06, "loss": 0.5643, "step": 1208 }, { "epoch": 0.13, "grad_norm": 2.0556726677793282, "learning_rate": 9.83061678731399e-06, "loss": 0.7053, "step": 1209 }, { "epoch": 0.13, "grad_norm": 1.8105308074833437, "learning_rate": 9.830161296742712e-06, "loss": 0.7016, "step": 1210 }, { "epoch": 0.13, "grad_norm": 2.037918556991115, "learning_rate": 9.829705205141653e-06, "loss": 0.7275, "step": 1211 }, { "epoch": 0.13, "grad_norm": 1.8685276262633082, "learning_rate": 9.829248512567563e-06, "loss": 0.6719, "step": 1212 }, { "epoch": 0.13, "grad_norm": 2.1409465373575944, "learning_rate": 9.82879121907727e-06, "loss": 0.7562, "step": 1213 }, { "epoch": 0.13, "grad_norm": 1.9978217306510804, "learning_rate": 9.82833332472768e-06, "loss": 0.61, "step": 1214 }, { "epoch": 0.13, "grad_norm": 1.9293736109189046, "learning_rate": 9.827874829575766e-06, "loss": 0.7165, "step": 1215 }, { "epoch": 0.13, "grad_norm": 1.9885860231798351, "learning_rate": 9.827415733678578e-06, "loss": 0.7011, "step": 1216 }, { "epoch": 0.13, "grad_norm": 1.9984324341902233, "learning_rate": 9.826956037093247e-06, "loss": 0.6309, "step": 1217 }, { "epoch": 0.13, "grad_norm": 1.964721262099103, "learning_rate": 9.826495739876972e-06, "loss": 0.6379, "step": 1218 }, { "epoch": 0.13, "grad_norm": 1.9199049081877377, "learning_rate": 9.82603484208703e-06, "loss": 0.6093, "step": 1219 }, { "epoch": 0.13, "grad_norm": 2.1034578782513242, "learning_rate": 9.82557334378077e-06, "loss": 0.7274, "step": 1220 }, { "epoch": 0.13, "grad_norm": 2.345725341933322, "learning_rate": 9.825111245015619e-06, "loss": 0.7719, "step": 1221 }, { "epoch": 0.13, "grad_norm": 1.9782790453235797, "learning_rate": 9.824648545849077e-06, "loss": 0.7955, "step": 1222 }, { "epoch": 0.13, "grad_norm": 1.7005902527367065, "learning_rate": 9.824185246338718e-06, "loss": 0.6811, "step": 1223 }, { "epoch": 0.13, "grad_norm": 1.9108631464708596, "learning_rate": 9.823721346542192e-06, "loss": 0.7179, "step": 1224 }, { "epoch": 0.13, "grad_norm": 1.8417977913967636, "learning_rate": 9.823256846517225e-06, "loss": 0.6241, "step": 1225 }, { "epoch": 0.13, "grad_norm": 2.9418723522828425, "learning_rate": 9.822791746321613e-06, "loss": 0.7129, "step": 1226 }, { "epoch": 0.13, "grad_norm": 1.7750802514717023, "learning_rate": 9.822326046013232e-06, "loss": 0.7141, "step": 1227 }, { "epoch": 0.13, "grad_norm": 1.8055956223970067, "learning_rate": 9.821859745650028e-06, "loss": 0.709, "step": 1228 }, { "epoch": 0.13, "grad_norm": 2.042793925494707, "learning_rate": 9.821392845290028e-06, "loss": 0.694, "step": 1229 }, { "epoch": 0.13, "grad_norm": 2.1398994752606164, "learning_rate": 9.820925344991325e-06, "loss": 0.7034, "step": 1230 }, { "epoch": 0.13, "grad_norm": 2.1755240078731863, "learning_rate": 9.820457244812094e-06, "loss": 0.5681, "step": 1231 }, { "epoch": 0.13, "grad_norm": 2.038746015623077, "learning_rate": 9.819988544810582e-06, "loss": 0.6382, "step": 1232 }, { "epoch": 0.13, "grad_norm": 1.8238764595385195, "learning_rate": 9.819519245045109e-06, "loss": 0.6892, "step": 1233 }, { "epoch": 0.13, "grad_norm": 1.9845818824513592, "learning_rate": 9.819049345574072e-06, "loss": 0.6689, "step": 1234 }, { "epoch": 0.13, "grad_norm": 1.957660733067925, "learning_rate": 9.818578846455941e-06, "loss": 0.6518, "step": 1235 }, { "epoch": 0.13, "grad_norm": 1.8409523356904087, "learning_rate": 9.818107747749264e-06, "loss": 0.6923, "step": 1236 }, { "epoch": 0.13, "grad_norm": 2.325203279674646, "learning_rate": 9.81763604951266e-06, "loss": 0.6982, "step": 1237 }, { "epoch": 0.13, "grad_norm": 1.8995602653230252, "learning_rate": 9.817163751804821e-06, "loss": 0.7217, "step": 1238 }, { "epoch": 0.13, "grad_norm": 2.111305966083386, "learning_rate": 9.816690854684519e-06, "loss": 0.7698, "step": 1239 }, { "epoch": 0.13, "grad_norm": 1.712093082276381, "learning_rate": 9.816217358210598e-06, "loss": 0.6481, "step": 1240 }, { "epoch": 0.13, "grad_norm": 1.6890985407410286, "learning_rate": 9.815743262441977e-06, "loss": 0.5635, "step": 1241 }, { "epoch": 0.13, "grad_norm": 1.7736650971729055, "learning_rate": 9.815268567437646e-06, "loss": 0.6731, "step": 1242 }, { "epoch": 0.13, "grad_norm": 1.8472378623281902, "learning_rate": 9.814793273256673e-06, "loss": 0.7163, "step": 1243 }, { "epoch": 0.13, "grad_norm": 1.8986857976229417, "learning_rate": 9.814317379958202e-06, "loss": 0.662, "step": 1244 }, { "epoch": 0.13, "grad_norm": 1.2756437147702961, "learning_rate": 9.813840887601452e-06, "loss": 0.5975, "step": 1245 }, { "epoch": 0.13, "grad_norm": 1.8570949030290282, "learning_rate": 9.813363796245708e-06, "loss": 0.6772, "step": 1246 }, { "epoch": 0.13, "grad_norm": 2.145640586465494, "learning_rate": 9.81288610595034e-06, "loss": 0.7561, "step": 1247 }, { "epoch": 0.13, "grad_norm": 1.9473867223734826, "learning_rate": 9.812407816774788e-06, "loss": 0.6852, "step": 1248 }, { "epoch": 0.13, "grad_norm": 2.5998464255279887, "learning_rate": 9.811928928778564e-06, "loss": 0.7794, "step": 1249 }, { "epoch": 0.13, "grad_norm": 1.6172646325921975, "learning_rate": 9.81144944202126e-06, "loss": 0.6239, "step": 1250 }, { "epoch": 0.13, "grad_norm": 1.9525630739780555, "learning_rate": 9.810969356562539e-06, "loss": 0.6819, "step": 1251 }, { "epoch": 0.13, "grad_norm": 1.9574656199350373, "learning_rate": 9.81048867246214e-06, "loss": 0.7521, "step": 1252 }, { "epoch": 0.13, "grad_norm": 1.9262384077985992, "learning_rate": 9.810007389779875e-06, "loss": 0.5907, "step": 1253 }, { "epoch": 0.13, "grad_norm": 2.060903624535282, "learning_rate": 9.80952550857563e-06, "loss": 0.7141, "step": 1254 }, { "epoch": 0.13, "grad_norm": 1.9270175220000927, "learning_rate": 9.809043028909371e-06, "loss": 0.6927, "step": 1255 }, { "epoch": 0.13, "grad_norm": 2.1775531295813493, "learning_rate": 9.80855995084113e-06, "loss": 0.5787, "step": 1256 }, { "epoch": 0.13, "grad_norm": 2.1410089815171074, "learning_rate": 9.80807627443102e-06, "loss": 0.708, "step": 1257 }, { "epoch": 0.13, "grad_norm": 1.9214083716307757, "learning_rate": 9.807591999739225e-06, "loss": 0.7162, "step": 1258 }, { "epoch": 0.13, "grad_norm": 1.8489071183553123, "learning_rate": 9.807107126826005e-06, "loss": 0.6418, "step": 1259 }, { "epoch": 0.13, "grad_norm": 2.011997892379692, "learning_rate": 9.806621655751692e-06, "loss": 0.7169, "step": 1260 }, { "epoch": 0.13, "grad_norm": 1.7897327169987176, "learning_rate": 9.806135586576697e-06, "loss": 0.593, "step": 1261 }, { "epoch": 0.13, "grad_norm": 2.0935557569166874, "learning_rate": 9.805648919361505e-06, "loss": 0.6506, "step": 1262 }, { "epoch": 0.13, "grad_norm": 2.045103548570946, "learning_rate": 9.805161654166668e-06, "loss": 0.7267, "step": 1263 }, { "epoch": 0.13, "grad_norm": 2.289808639680483, "learning_rate": 9.804673791052822e-06, "loss": 0.7764, "step": 1264 }, { "epoch": 0.13, "grad_norm": 1.8758234962356322, "learning_rate": 9.804185330080668e-06, "loss": 0.7728, "step": 1265 }, { "epoch": 0.14, "grad_norm": 2.0075403782118197, "learning_rate": 9.803696271310994e-06, "loss": 0.6039, "step": 1266 }, { "epoch": 0.14, "grad_norm": 1.6212915633997882, "learning_rate": 9.803206614804647e-06, "loss": 0.6637, "step": 1267 }, { "epoch": 0.14, "grad_norm": 2.2245425774697556, "learning_rate": 9.802716360622564e-06, "loss": 0.6814, "step": 1268 }, { "epoch": 0.14, "grad_norm": 1.734472005205641, "learning_rate": 9.802225508825742e-06, "loss": 0.6303, "step": 1269 }, { "epoch": 0.14, "grad_norm": 2.235106327947789, "learning_rate": 9.801734059475261e-06, "loss": 0.6832, "step": 1270 }, { "epoch": 0.14, "grad_norm": 1.7430071421527409, "learning_rate": 9.801242012632275e-06, "loss": 0.6404, "step": 1271 }, { "epoch": 0.14, "grad_norm": 1.9762266445997858, "learning_rate": 9.80074936835801e-06, "loss": 0.6743, "step": 1272 }, { "epoch": 0.14, "grad_norm": 2.0056117076990145, "learning_rate": 9.800256126713765e-06, "loss": 0.7696, "step": 1273 }, { "epoch": 0.14, "grad_norm": 1.6429836968119107, "learning_rate": 9.79976228776092e-06, "loss": 0.6479, "step": 1274 }, { "epoch": 0.14, "grad_norm": 1.9967231392415936, "learning_rate": 9.799267851560919e-06, "loss": 0.7102, "step": 1275 }, { "epoch": 0.14, "grad_norm": 2.2599236341699376, "learning_rate": 9.798772818175289e-06, "loss": 0.6347, "step": 1276 }, { "epoch": 0.14, "grad_norm": 2.0496718470262474, "learning_rate": 9.79827718766563e-06, "loss": 0.6942, "step": 1277 }, { "epoch": 0.14, "grad_norm": 1.7487714510767849, "learning_rate": 9.79778096009361e-06, "loss": 0.7312, "step": 1278 }, { "epoch": 0.14, "grad_norm": 2.320703568753947, "learning_rate": 9.79728413552098e-06, "loss": 0.662, "step": 1279 }, { "epoch": 0.14, "grad_norm": 2.21119031756161, "learning_rate": 9.796786714009561e-06, "loss": 0.6025, "step": 1280 }, { "epoch": 0.14, "grad_norm": 1.9895176801965326, "learning_rate": 9.796288695621246e-06, "loss": 0.654, "step": 1281 }, { "epoch": 0.14, "grad_norm": 2.012580391463377, "learning_rate": 9.795790080418006e-06, "loss": 0.7377, "step": 1282 }, { "epoch": 0.14, "grad_norm": 1.8069061469499963, "learning_rate": 9.795290868461885e-06, "loss": 0.6439, "step": 1283 }, { "epoch": 0.14, "grad_norm": 2.017026712033188, "learning_rate": 9.794791059815e-06, "loss": 0.6733, "step": 1284 }, { "epoch": 0.14, "grad_norm": 1.8533724508684402, "learning_rate": 9.794290654539546e-06, "loss": 0.6748, "step": 1285 }, { "epoch": 0.14, "grad_norm": 1.7704129954517362, "learning_rate": 9.79378965269779e-06, "loss": 0.6313, "step": 1286 }, { "epoch": 0.14, "grad_norm": 1.8486693511055339, "learning_rate": 9.793288054352068e-06, "loss": 0.6933, "step": 1287 }, { "epoch": 0.14, "grad_norm": 1.815692476206939, "learning_rate": 9.792785859564803e-06, "loss": 0.7973, "step": 1288 }, { "epoch": 0.14, "grad_norm": 1.8496092297103952, "learning_rate": 9.792283068398478e-06, "loss": 0.8363, "step": 1289 }, { "epoch": 0.14, "grad_norm": 1.7682717293620056, "learning_rate": 9.791779680915659e-06, "loss": 0.6029, "step": 1290 }, { "epoch": 0.14, "grad_norm": 2.3341671930594035, "learning_rate": 9.791275697178982e-06, "loss": 0.6574, "step": 1291 }, { "epoch": 0.14, "grad_norm": 1.9002938404297773, "learning_rate": 9.790771117251163e-06, "loss": 0.7543, "step": 1292 }, { "epoch": 0.14, "grad_norm": 2.019911655036872, "learning_rate": 9.790265941194985e-06, "loss": 0.7378, "step": 1293 }, { "epoch": 0.14, "grad_norm": 1.9393327472704418, "learning_rate": 9.78976016907331e-06, "loss": 0.5815, "step": 1294 }, { "epoch": 0.14, "grad_norm": 1.8636282094877497, "learning_rate": 9.78925380094907e-06, "loss": 0.7125, "step": 1295 }, { "epoch": 0.14, "grad_norm": 1.991947451455036, "learning_rate": 9.788746836885278e-06, "loss": 0.7253, "step": 1296 }, { "epoch": 0.14, "grad_norm": 1.9442870210295284, "learning_rate": 9.788239276945014e-06, "loss": 0.6939, "step": 1297 }, { "epoch": 0.14, "grad_norm": 1.945640663537922, "learning_rate": 9.787731121191435e-06, "loss": 0.7333, "step": 1298 }, { "epoch": 0.14, "grad_norm": 2.060248438310587, "learning_rate": 9.787222369687772e-06, "loss": 0.6788, "step": 1299 }, { "epoch": 0.14, "grad_norm": 1.866064095849111, "learning_rate": 9.786713022497332e-06, "loss": 0.7361, "step": 1300 }, { "epoch": 0.14, "grad_norm": 1.4593852143282526, "learning_rate": 9.786203079683492e-06, "loss": 0.5788, "step": 1301 }, { "epoch": 0.14, "grad_norm": 2.0083777785705355, "learning_rate": 9.785692541309708e-06, "loss": 0.6958, "step": 1302 }, { "epoch": 0.14, "grad_norm": 1.781470832575545, "learning_rate": 9.785181407439508e-06, "loss": 0.758, "step": 1303 }, { "epoch": 0.14, "grad_norm": 1.9342186125225889, "learning_rate": 9.78466967813649e-06, "loss": 0.6691, "step": 1304 }, { "epoch": 0.14, "grad_norm": 2.01373268953987, "learning_rate": 9.784157353464334e-06, "loss": 0.751, "step": 1305 }, { "epoch": 0.14, "grad_norm": 1.9146610521860707, "learning_rate": 9.783644433486786e-06, "loss": 0.7133, "step": 1306 }, { "epoch": 0.14, "grad_norm": 1.6569635335880646, "learning_rate": 9.783130918267674e-06, "loss": 0.6165, "step": 1307 }, { "epoch": 0.14, "grad_norm": 2.1054378532107267, "learning_rate": 9.782616807870893e-06, "loss": 0.6428, "step": 1308 }, { "epoch": 0.14, "grad_norm": 1.6252900487438457, "learning_rate": 9.782102102360416e-06, "loss": 0.6296, "step": 1309 }, { "epoch": 0.14, "grad_norm": 1.8639912570996344, "learning_rate": 9.781586801800292e-06, "loss": 0.6725, "step": 1310 }, { "epoch": 0.14, "grad_norm": 2.89191647095448, "learning_rate": 9.781070906254636e-06, "loss": 0.7909, "step": 1311 }, { "epoch": 0.14, "grad_norm": 1.8626960331349869, "learning_rate": 9.780554415787645e-06, "loss": 0.6931, "step": 1312 }, { "epoch": 0.14, "grad_norm": 1.937233225846913, "learning_rate": 9.780037330463588e-06, "loss": 0.6939, "step": 1313 }, { "epoch": 0.14, "grad_norm": 2.0977953659639805, "learning_rate": 9.779519650346807e-06, "loss": 0.7042, "step": 1314 }, { "epoch": 0.14, "grad_norm": 1.6271498613530966, "learning_rate": 9.779001375501718e-06, "loss": 0.6504, "step": 1315 }, { "epoch": 0.14, "grad_norm": 2.0324574371038593, "learning_rate": 9.778482505992809e-06, "loss": 0.7068, "step": 1316 }, { "epoch": 0.14, "grad_norm": 1.9670287528641843, "learning_rate": 9.777963041884648e-06, "loss": 0.6945, "step": 1317 }, { "epoch": 0.14, "grad_norm": 1.843554582397711, "learning_rate": 9.777442983241871e-06, "loss": 0.6666, "step": 1318 }, { "epoch": 0.14, "grad_norm": 1.8161556772424985, "learning_rate": 9.776922330129192e-06, "loss": 0.6085, "step": 1319 }, { "epoch": 0.14, "grad_norm": 2.05043317807712, "learning_rate": 9.776401082611396e-06, "loss": 0.606, "step": 1320 }, { "epoch": 0.14, "grad_norm": 1.833172709598415, "learning_rate": 9.775879240753342e-06, "loss": 0.5727, "step": 1321 }, { "epoch": 0.14, "grad_norm": 1.8452774065022248, "learning_rate": 9.775356804619967e-06, "loss": 0.6936, "step": 1322 }, { "epoch": 0.14, "grad_norm": 1.838184812265879, "learning_rate": 9.774833774276278e-06, "loss": 0.5309, "step": 1323 }, { "epoch": 0.14, "grad_norm": 1.7611389427217903, "learning_rate": 9.774310149787354e-06, "loss": 0.6169, "step": 1324 }, { "epoch": 0.14, "grad_norm": 1.8268497950964302, "learning_rate": 9.773785931218358e-06, "loss": 0.5909, "step": 1325 }, { "epoch": 0.14, "grad_norm": 1.8005114166257772, "learning_rate": 9.773261118634511e-06, "loss": 0.5848, "step": 1326 }, { "epoch": 0.14, "grad_norm": 1.9386083139810428, "learning_rate": 9.772735712101125e-06, "loss": 0.6679, "step": 1327 }, { "epoch": 0.14, "grad_norm": 1.969524330904239, "learning_rate": 9.772209711683573e-06, "loss": 0.6356, "step": 1328 }, { "epoch": 0.14, "grad_norm": 2.1187166277359935, "learning_rate": 9.771683117447308e-06, "loss": 0.7865, "step": 1329 }, { "epoch": 0.14, "grad_norm": 2.031763627817917, "learning_rate": 9.771155929457856e-06, "loss": 0.6548, "step": 1330 }, { "epoch": 0.14, "grad_norm": 1.8432021311625082, "learning_rate": 9.770628147780816e-06, "loss": 0.5721, "step": 1331 }, { "epoch": 0.14, "grad_norm": 2.102408206990477, "learning_rate": 9.77009977248186e-06, "loss": 0.7618, "step": 1332 }, { "epoch": 0.14, "grad_norm": 2.0581673852034097, "learning_rate": 9.769570803626735e-06, "loss": 0.6769, "step": 1333 }, { "epoch": 0.14, "grad_norm": 1.8890355481619938, "learning_rate": 9.769041241281265e-06, "loss": 0.785, "step": 1334 }, { "epoch": 0.14, "grad_norm": 1.8418602966151827, "learning_rate": 9.768511085511344e-06, "loss": 0.707, "step": 1335 }, { "epoch": 0.14, "grad_norm": 1.7662277495694891, "learning_rate": 9.767980336382939e-06, "loss": 0.7276, "step": 1336 }, { "epoch": 0.14, "grad_norm": 2.0759909035371056, "learning_rate": 9.767448993962091e-06, "loss": 0.7692, "step": 1337 }, { "epoch": 0.14, "grad_norm": 1.8607664109610857, "learning_rate": 9.76691705831492e-06, "loss": 0.7423, "step": 1338 }, { "epoch": 0.14, "grad_norm": 1.2822340715251623, "learning_rate": 9.766384529507615e-06, "loss": 0.5923, "step": 1339 }, { "epoch": 0.14, "grad_norm": 1.8979273951882412, "learning_rate": 9.76585140760644e-06, "loss": 0.7013, "step": 1340 }, { "epoch": 0.14, "grad_norm": 1.8756848190827502, "learning_rate": 9.765317692677731e-06, "loss": 0.6687, "step": 1341 }, { "epoch": 0.14, "grad_norm": 1.8602715873627254, "learning_rate": 9.764783384787903e-06, "loss": 0.6442, "step": 1342 }, { "epoch": 0.14, "grad_norm": 1.8950158262105696, "learning_rate": 9.764248484003439e-06, "loss": 0.7141, "step": 1343 }, { "epoch": 0.14, "grad_norm": 2.242658684489952, "learning_rate": 9.763712990390897e-06, "loss": 0.6892, "step": 1344 }, { "epoch": 0.14, "grad_norm": 2.0027178680206865, "learning_rate": 9.763176904016914e-06, "loss": 0.6867, "step": 1345 }, { "epoch": 0.14, "grad_norm": 1.8837893881954209, "learning_rate": 9.762640224948193e-06, "loss": 0.75, "step": 1346 }, { "epoch": 0.14, "grad_norm": 1.8963990253167329, "learning_rate": 9.762102953251514e-06, "loss": 0.6925, "step": 1347 }, { "epoch": 0.14, "grad_norm": 1.8677342567599746, "learning_rate": 9.761565088993734e-06, "loss": 0.6824, "step": 1348 }, { "epoch": 0.14, "grad_norm": 2.075083510519705, "learning_rate": 9.761026632241778e-06, "loss": 0.7433, "step": 1349 }, { "epoch": 0.14, "grad_norm": 1.571204159301885, "learning_rate": 9.760487583062651e-06, "loss": 0.5792, "step": 1350 }, { "epoch": 0.14, "grad_norm": 1.3558320749266088, "learning_rate": 9.759947941523426e-06, "loss": 0.57, "step": 1351 }, { "epoch": 0.14, "grad_norm": 2.0504199225331687, "learning_rate": 9.759407707691251e-06, "loss": 0.795, "step": 1352 }, { "epoch": 0.14, "grad_norm": 1.9073367862461228, "learning_rate": 9.758866881633351e-06, "loss": 0.6673, "step": 1353 }, { "epoch": 0.14, "grad_norm": 2.0647951619519676, "learning_rate": 9.758325463417022e-06, "loss": 0.7675, "step": 1354 }, { "epoch": 0.14, "grad_norm": 2.1349295067275573, "learning_rate": 9.757783453109635e-06, "loss": 0.7904, "step": 1355 }, { "epoch": 0.14, "grad_norm": 1.7055100249281892, "learning_rate": 9.757240850778632e-06, "loss": 0.7329, "step": 1356 }, { "epoch": 0.14, "grad_norm": 2.34827228036844, "learning_rate": 9.756697656491528e-06, "loss": 0.7609, "step": 1357 }, { "epoch": 0.14, "grad_norm": 1.8896063564932104, "learning_rate": 9.75615387031592e-06, "loss": 0.6819, "step": 1358 }, { "epoch": 0.14, "grad_norm": 1.9994852922626174, "learning_rate": 9.755609492319467e-06, "loss": 0.632, "step": 1359 }, { "epoch": 0.15, "grad_norm": 2.063136427563976, "learning_rate": 9.755064522569913e-06, "loss": 0.7103, "step": 1360 }, { "epoch": 0.15, "grad_norm": 1.8102187396537475, "learning_rate": 9.754518961135065e-06, "loss": 0.6612, "step": 1361 }, { "epoch": 0.15, "grad_norm": 1.9760657803197368, "learning_rate": 9.753972808082812e-06, "loss": 0.6859, "step": 1362 }, { "epoch": 0.15, "grad_norm": 1.9422144885357648, "learning_rate": 9.753426063481113e-06, "loss": 0.7232, "step": 1363 }, { "epoch": 0.15, "grad_norm": 1.6665499198045732, "learning_rate": 9.752878727397998e-06, "loss": 0.5755, "step": 1364 }, { "epoch": 0.15, "grad_norm": 1.6646510124184901, "learning_rate": 9.752330799901578e-06, "loss": 0.7013, "step": 1365 }, { "epoch": 0.15, "grad_norm": 1.9134989750693985, "learning_rate": 9.751782281060027e-06, "loss": 0.7269, "step": 1366 }, { "epoch": 0.15, "grad_norm": 1.9932946543256909, "learning_rate": 9.751233170941606e-06, "loss": 0.7635, "step": 1367 }, { "epoch": 0.15, "grad_norm": 1.7502493718345624, "learning_rate": 9.750683469614637e-06, "loss": 0.714, "step": 1368 }, { "epoch": 0.15, "grad_norm": 1.8249457060896617, "learning_rate": 9.750133177147522e-06, "loss": 0.6919, "step": 1369 }, { "epoch": 0.15, "grad_norm": 1.8380954957642568, "learning_rate": 9.749582293608734e-06, "loss": 0.632, "step": 1370 }, { "epoch": 0.15, "grad_norm": 1.8698082289138864, "learning_rate": 9.749030819066824e-06, "loss": 0.6555, "step": 1371 }, { "epoch": 0.15, "grad_norm": 2.4286538268801405, "learning_rate": 9.748478753590414e-06, "loss": 0.612, "step": 1372 }, { "epoch": 0.15, "grad_norm": 2.3263713612438113, "learning_rate": 9.747926097248195e-06, "loss": 0.7629, "step": 1373 }, { "epoch": 0.15, "grad_norm": 1.819651343474728, "learning_rate": 9.747372850108937e-06, "loss": 0.7235, "step": 1374 }, { "epoch": 0.15, "grad_norm": 1.977306678511227, "learning_rate": 9.746819012241483e-06, "loss": 0.7314, "step": 1375 }, { "epoch": 0.15, "grad_norm": 1.8044974734621135, "learning_rate": 9.746264583714748e-06, "loss": 0.6666, "step": 1376 }, { "epoch": 0.15, "grad_norm": 2.0133208300119376, "learning_rate": 9.74570956459772e-06, "loss": 0.7926, "step": 1377 }, { "epoch": 0.15, "grad_norm": 1.9197083876860366, "learning_rate": 9.745153954959464e-06, "loss": 0.6517, "step": 1378 }, { "epoch": 0.15, "grad_norm": 1.847179335585213, "learning_rate": 9.744597754869113e-06, "loss": 0.7518, "step": 1379 }, { "epoch": 0.15, "grad_norm": 2.4071473559167575, "learning_rate": 9.744040964395877e-06, "loss": 0.6857, "step": 1380 }, { "epoch": 0.15, "grad_norm": 1.8184180367810876, "learning_rate": 9.743483583609042e-06, "loss": 0.7301, "step": 1381 }, { "epoch": 0.15, "grad_norm": 1.8485861850365117, "learning_rate": 9.74292561257796e-06, "loss": 0.6517, "step": 1382 }, { "epoch": 0.15, "grad_norm": 1.8568355499696554, "learning_rate": 9.742367051372065e-06, "loss": 0.6483, "step": 1383 }, { "epoch": 0.15, "grad_norm": 1.8248805321808175, "learning_rate": 9.741807900060858e-06, "loss": 0.8109, "step": 1384 }, { "epoch": 0.15, "grad_norm": 2.047046942399166, "learning_rate": 9.741248158713917e-06, "loss": 0.7378, "step": 1385 }, { "epoch": 0.15, "grad_norm": 1.7743564106881584, "learning_rate": 9.74068782740089e-06, "loss": 0.6008, "step": 1386 }, { "epoch": 0.15, "grad_norm": 1.886274892373141, "learning_rate": 9.7401269061915e-06, "loss": 0.7664, "step": 1387 }, { "epoch": 0.15, "grad_norm": 1.9534117116636, "learning_rate": 9.739565395155546e-06, "loss": 0.747, "step": 1388 }, { "epoch": 0.15, "grad_norm": 1.7806324715721762, "learning_rate": 9.739003294362898e-06, "loss": 0.6793, "step": 1389 }, { "epoch": 0.15, "grad_norm": 1.9190015752827283, "learning_rate": 9.7384406038835e-06, "loss": 0.5611, "step": 1390 }, { "epoch": 0.15, "grad_norm": 1.7983382919808983, "learning_rate": 9.737877323787368e-06, "loss": 0.6992, "step": 1391 }, { "epoch": 0.15, "grad_norm": 1.7786553022670455, "learning_rate": 9.737313454144592e-06, "loss": 0.7409, "step": 1392 }, { "epoch": 0.15, "grad_norm": 1.8045112113439388, "learning_rate": 9.736748995025338e-06, "loss": 0.7043, "step": 1393 }, { "epoch": 0.15, "grad_norm": 1.9065544200317495, "learning_rate": 9.736183946499842e-06, "loss": 0.5963, "step": 1394 }, { "epoch": 0.15, "grad_norm": 1.8360235793930046, "learning_rate": 9.735618308638414e-06, "loss": 0.7489, "step": 1395 }, { "epoch": 0.15, "grad_norm": 1.3440855784847614, "learning_rate": 9.735052081511439e-06, "loss": 0.5764, "step": 1396 }, { "epoch": 0.15, "grad_norm": 2.119130536218619, "learning_rate": 9.734485265189374e-06, "loss": 0.7608, "step": 1397 }, { "epoch": 0.15, "grad_norm": 1.4202572815183994, "learning_rate": 9.733917859742746e-06, "loss": 0.5573, "step": 1398 }, { "epoch": 0.15, "grad_norm": 2.0754033713215683, "learning_rate": 9.733349865242163e-06, "loss": 0.6871, "step": 1399 }, { "epoch": 0.15, "grad_norm": 2.0740681574840614, "learning_rate": 9.7327812817583e-06, "loss": 0.6154, "step": 1400 }, { "epoch": 0.15, "grad_norm": 1.7612031745488848, "learning_rate": 9.732212109361909e-06, "loss": 0.6164, "step": 1401 }, { "epoch": 0.15, "grad_norm": 2.134672115236177, "learning_rate": 9.731642348123813e-06, "loss": 0.6778, "step": 1402 }, { "epoch": 0.15, "grad_norm": 1.735970326692552, "learning_rate": 9.731071998114907e-06, "loss": 0.64, "step": 1403 }, { "epoch": 0.15, "grad_norm": 1.9195744293475139, "learning_rate": 9.730501059406163e-06, "loss": 0.6124, "step": 1404 }, { "epoch": 0.15, "grad_norm": 1.9791361871791537, "learning_rate": 9.729929532068626e-06, "loss": 0.6006, "step": 1405 }, { "epoch": 0.15, "grad_norm": 1.9340049685981646, "learning_rate": 9.729357416173409e-06, "loss": 0.66, "step": 1406 }, { "epoch": 0.15, "grad_norm": 1.6923032768895994, "learning_rate": 9.728784711791701e-06, "loss": 0.5818, "step": 1407 }, { "epoch": 0.15, "grad_norm": 2.02749164382107, "learning_rate": 9.728211418994772e-06, "loss": 0.7182, "step": 1408 }, { "epoch": 0.15, "grad_norm": 1.8932870527975845, "learning_rate": 9.727637537853954e-06, "loss": 0.7328, "step": 1409 }, { "epoch": 0.15, "grad_norm": 2.0731224291097345, "learning_rate": 9.727063068440654e-06, "loss": 0.7004, "step": 1410 }, { "epoch": 0.15, "grad_norm": 1.9376156327631546, "learning_rate": 9.726488010826358e-06, "loss": 0.7439, "step": 1411 }, { "epoch": 0.15, "grad_norm": 2.0502161711122433, "learning_rate": 9.725912365082624e-06, "loss": 0.6679, "step": 1412 }, { "epoch": 0.15, "grad_norm": 1.3779152881642232, "learning_rate": 9.725336131281076e-06, "loss": 0.5684, "step": 1413 }, { "epoch": 0.15, "grad_norm": 2.0091447147255432, "learning_rate": 9.72475930949342e-06, "loss": 0.6547, "step": 1414 }, { "epoch": 0.15, "grad_norm": 2.043863049124698, "learning_rate": 9.724181899791428e-06, "loss": 0.7059, "step": 1415 }, { "epoch": 0.15, "grad_norm": 2.0201373374654166, "learning_rate": 9.723603902246954e-06, "loss": 0.6679, "step": 1416 }, { "epoch": 0.15, "grad_norm": 1.9195804992491479, "learning_rate": 9.723025316931914e-06, "loss": 0.7588, "step": 1417 }, { "epoch": 0.15, "grad_norm": 2.154716303373637, "learning_rate": 9.722446143918307e-06, "loss": 0.5322, "step": 1418 }, { "epoch": 0.15, "grad_norm": 1.8630352184452827, "learning_rate": 9.7218663832782e-06, "loss": 0.6897, "step": 1419 }, { "epoch": 0.15, "grad_norm": 2.086512696074193, "learning_rate": 9.721286035083732e-06, "loss": 0.6261, "step": 1420 }, { "epoch": 0.15, "grad_norm": 1.9214849059744985, "learning_rate": 9.720705099407121e-06, "loss": 0.7369, "step": 1421 }, { "epoch": 0.15, "grad_norm": 1.796731685202934, "learning_rate": 9.72012357632065e-06, "loss": 0.7175, "step": 1422 }, { "epoch": 0.15, "grad_norm": 2.4158228391866556, "learning_rate": 9.719541465896684e-06, "loss": 0.7679, "step": 1423 }, { "epoch": 0.15, "grad_norm": 1.679479559587306, "learning_rate": 9.718958768207654e-06, "loss": 0.6367, "step": 1424 }, { "epoch": 0.15, "grad_norm": 1.9350117442595254, "learning_rate": 9.718375483326066e-06, "loss": 0.7112, "step": 1425 }, { "epoch": 0.15, "grad_norm": 1.8349012128679154, "learning_rate": 9.717791611324502e-06, "loss": 0.7427, "step": 1426 }, { "epoch": 0.15, "grad_norm": 1.7600108551960045, "learning_rate": 9.717207152275612e-06, "loss": 0.6699, "step": 1427 }, { "epoch": 0.15, "grad_norm": 1.953003721903906, "learning_rate": 9.716622106252124e-06, "loss": 0.7159, "step": 1428 }, { "epoch": 0.15, "grad_norm": 2.235848664047906, "learning_rate": 9.716036473326836e-06, "loss": 0.7043, "step": 1429 }, { "epoch": 0.15, "grad_norm": 1.9280213677505447, "learning_rate": 9.71545025357262e-06, "loss": 0.6903, "step": 1430 }, { "epoch": 0.15, "grad_norm": 1.8908375941281093, "learning_rate": 9.714863447062422e-06, "loss": 0.7381, "step": 1431 }, { "epoch": 0.15, "grad_norm": 1.6594333057540738, "learning_rate": 9.714276053869256e-06, "loss": 0.7154, "step": 1432 }, { "epoch": 0.15, "grad_norm": 1.8548707206090456, "learning_rate": 9.713688074066218e-06, "loss": 0.762, "step": 1433 }, { "epoch": 0.15, "grad_norm": 2.188753831764548, "learning_rate": 9.713099507726467e-06, "loss": 0.7831, "step": 1434 }, { "epoch": 0.15, "grad_norm": 2.015290094408064, "learning_rate": 9.712510354923245e-06, "loss": 0.8858, "step": 1435 }, { "epoch": 0.15, "grad_norm": 1.861983790018766, "learning_rate": 9.711920615729858e-06, "loss": 0.6448, "step": 1436 }, { "epoch": 0.15, "grad_norm": 1.7269554269577754, "learning_rate": 9.71133029021969e-06, "loss": 0.6858, "step": 1437 }, { "epoch": 0.15, "grad_norm": 1.8934279064750623, "learning_rate": 9.710739378466196e-06, "loss": 0.6722, "step": 1438 }, { "epoch": 0.15, "grad_norm": 1.9873284788659158, "learning_rate": 9.710147880542906e-06, "loss": 0.7139, "step": 1439 }, { "epoch": 0.15, "grad_norm": 2.0325929458609453, "learning_rate": 9.709555796523422e-06, "loss": 0.7131, "step": 1440 }, { "epoch": 0.15, "grad_norm": 1.695005132690052, "learning_rate": 9.708963126481416e-06, "loss": 0.7122, "step": 1441 }, { "epoch": 0.15, "grad_norm": 1.7766569326452735, "learning_rate": 9.708369870490637e-06, "loss": 0.7052, "step": 1442 }, { "epoch": 0.15, "grad_norm": 1.5187727619638267, "learning_rate": 9.707776028624907e-06, "loss": 0.5486, "step": 1443 }, { "epoch": 0.15, "grad_norm": 1.8303792849729186, "learning_rate": 9.707181600958118e-06, "loss": 0.7112, "step": 1444 }, { "epoch": 0.15, "grad_norm": 1.8474806651684417, "learning_rate": 9.706586587564236e-06, "loss": 0.7175, "step": 1445 }, { "epoch": 0.15, "grad_norm": 1.2025253502114774, "learning_rate": 9.7059909885173e-06, "loss": 0.5647, "step": 1446 }, { "epoch": 0.15, "grad_norm": 1.9189668466614245, "learning_rate": 9.705394803891422e-06, "loss": 0.7928, "step": 1447 }, { "epoch": 0.15, "grad_norm": 1.6801212976606694, "learning_rate": 9.704798033760787e-06, "loss": 0.6722, "step": 1448 }, { "epoch": 0.15, "grad_norm": 1.305153818141345, "learning_rate": 9.704200678199653e-06, "loss": 0.571, "step": 1449 }, { "epoch": 0.15, "grad_norm": 1.759190129366583, "learning_rate": 9.703602737282351e-06, "loss": 0.705, "step": 1450 }, { "epoch": 0.15, "grad_norm": 1.7317734237512648, "learning_rate": 9.703004211083283e-06, "loss": 0.6754, "step": 1451 }, { "epoch": 0.15, "grad_norm": 1.1370205458075378, "learning_rate": 9.702405099676925e-06, "loss": 0.5325, "step": 1452 }, { "epoch": 0.15, "grad_norm": 1.6972944306816904, "learning_rate": 9.70180540313783e-06, "loss": 0.587, "step": 1453 }, { "epoch": 0.16, "grad_norm": 1.9076912632156269, "learning_rate": 9.701205121540614e-06, "loss": 0.8391, "step": 1454 }, { "epoch": 0.16, "grad_norm": 1.8810570430670388, "learning_rate": 9.700604254959974e-06, "loss": 0.6887, "step": 1455 }, { "epoch": 0.16, "grad_norm": 1.9054146731510093, "learning_rate": 9.700002803470679e-06, "loss": 0.6685, "step": 1456 }, { "epoch": 0.16, "grad_norm": 1.959875314815094, "learning_rate": 9.699400767147566e-06, "loss": 0.6472, "step": 1457 }, { "epoch": 0.16, "grad_norm": 2.058557390230747, "learning_rate": 9.698798146065554e-06, "loss": 0.7829, "step": 1458 }, { "epoch": 0.16, "grad_norm": 1.893330512629069, "learning_rate": 9.698194940299621e-06, "loss": 0.6779, "step": 1459 }, { "epoch": 0.16, "grad_norm": 1.627675326082267, "learning_rate": 9.69759114992483e-06, "loss": 0.5771, "step": 1460 }, { "epoch": 0.16, "grad_norm": 1.6695525512937524, "learning_rate": 9.696986775016313e-06, "loss": 0.6809, "step": 1461 }, { "epoch": 0.16, "grad_norm": 2.054899589553269, "learning_rate": 9.69638181564927e-06, "loss": 0.7629, "step": 1462 }, { "epoch": 0.16, "grad_norm": 1.342749566120878, "learning_rate": 9.695776271898983e-06, "loss": 0.5621, "step": 1463 }, { "epoch": 0.16, "grad_norm": 1.640574495677777, "learning_rate": 9.695170143840795e-06, "loss": 0.6061, "step": 1464 }, { "epoch": 0.16, "grad_norm": 1.872578395712246, "learning_rate": 9.694563431550133e-06, "loss": 0.6851, "step": 1465 }, { "epoch": 0.16, "grad_norm": 2.3436555465106252, "learning_rate": 9.69395613510249e-06, "loss": 0.6848, "step": 1466 }, { "epoch": 0.16, "grad_norm": 1.8123007888410332, "learning_rate": 9.693348254573435e-06, "loss": 0.6775, "step": 1467 }, { "epoch": 0.16, "grad_norm": 1.7666587877507391, "learning_rate": 9.692739790038607e-06, "loss": 0.6953, "step": 1468 }, { "epoch": 0.16, "grad_norm": 1.8619295994190597, "learning_rate": 9.69213074157372e-06, "loss": 0.7258, "step": 1469 }, { "epoch": 0.16, "grad_norm": 1.8842906626446612, "learning_rate": 9.691521109254558e-06, "loss": 0.6422, "step": 1470 }, { "epoch": 0.16, "grad_norm": 1.8080926466884475, "learning_rate": 9.69091089315698e-06, "loss": 0.7103, "step": 1471 }, { "epoch": 0.16, "grad_norm": 1.9174995744906773, "learning_rate": 9.690300093356915e-06, "loss": 0.6751, "step": 1472 }, { "epoch": 0.16, "grad_norm": 1.8405333959012693, "learning_rate": 9.689688709930368e-06, "loss": 0.6019, "step": 1473 }, { "epoch": 0.16, "grad_norm": 2.1162133441082918, "learning_rate": 9.689076742953416e-06, "loss": 0.6851, "step": 1474 }, { "epoch": 0.16, "grad_norm": 1.8217336826417767, "learning_rate": 9.688464192502207e-06, "loss": 0.6294, "step": 1475 }, { "epoch": 0.16, "grad_norm": 1.8997549686970019, "learning_rate": 9.68785105865296e-06, "loss": 0.8302, "step": 1476 }, { "epoch": 0.16, "grad_norm": 1.789500277192089, "learning_rate": 9.687237341481972e-06, "loss": 0.6726, "step": 1477 }, { "epoch": 0.16, "grad_norm": 1.6282250144286332, "learning_rate": 9.686623041065608e-06, "loss": 0.5612, "step": 1478 }, { "epoch": 0.16, "grad_norm": 1.9676337074189618, "learning_rate": 9.686008157480308e-06, "loss": 0.6819, "step": 1479 }, { "epoch": 0.16, "grad_norm": 1.9666236721822015, "learning_rate": 9.685392690802582e-06, "loss": 0.6943, "step": 1480 }, { "epoch": 0.16, "grad_norm": 1.8622985531773035, "learning_rate": 9.684776641109014e-06, "loss": 0.7222, "step": 1481 }, { "epoch": 0.16, "grad_norm": 1.6707306484069429, "learning_rate": 9.684160008476264e-06, "loss": 0.6793, "step": 1482 }, { "epoch": 0.16, "grad_norm": 1.8436579796465429, "learning_rate": 9.683542792981059e-06, "loss": 0.6081, "step": 1483 }, { "epoch": 0.16, "grad_norm": 1.9189483221114911, "learning_rate": 9.6829249947002e-06, "loss": 0.7044, "step": 1484 }, { "epoch": 0.16, "grad_norm": 2.0263313961795295, "learning_rate": 9.682306613710561e-06, "loss": 0.6612, "step": 1485 }, { "epoch": 0.16, "grad_norm": 2.014966247667118, "learning_rate": 9.681687650089092e-06, "loss": 0.6233, "step": 1486 }, { "epoch": 0.16, "grad_norm": 2.3250719577654833, "learning_rate": 9.681068103912808e-06, "loss": 0.7789, "step": 1487 }, { "epoch": 0.16, "grad_norm": 1.8571891484054082, "learning_rate": 9.680447975258804e-06, "loss": 0.733, "step": 1488 }, { "epoch": 0.16, "grad_norm": 1.953501412752444, "learning_rate": 9.679827264204242e-06, "loss": 0.6301, "step": 1489 }, { "epoch": 0.16, "grad_norm": 1.7618253807618824, "learning_rate": 9.679205970826363e-06, "loss": 0.6495, "step": 1490 }, { "epoch": 0.16, "grad_norm": 1.8518321759936356, "learning_rate": 9.678584095202468e-06, "loss": 0.674, "step": 1491 }, { "epoch": 0.16, "grad_norm": 2.3914674373867273, "learning_rate": 9.677961637409948e-06, "loss": 0.7607, "step": 1492 }, { "epoch": 0.16, "grad_norm": 1.9331178651709382, "learning_rate": 9.67733859752625e-06, "loss": 0.6113, "step": 1493 }, { "epoch": 0.16, "grad_norm": 1.7956183925783267, "learning_rate": 9.676714975628905e-06, "loss": 0.7543, "step": 1494 }, { "epoch": 0.16, "grad_norm": 1.7200991542970645, "learning_rate": 9.67609077179551e-06, "loss": 0.6532, "step": 1495 }, { "epoch": 0.16, "grad_norm": 1.7313326254456278, "learning_rate": 9.675465986103736e-06, "loss": 0.5726, "step": 1496 }, { "epoch": 0.16, "grad_norm": 1.732574528039196, "learning_rate": 9.674840618631327e-06, "loss": 0.6722, "step": 1497 }, { "epoch": 0.16, "grad_norm": 1.2477132376804112, "learning_rate": 9.6742146694561e-06, "loss": 0.5851, "step": 1498 }, { "epoch": 0.16, "grad_norm": 1.5598849782989082, "learning_rate": 9.673588138655941e-06, "loss": 0.6208, "step": 1499 }, { "epoch": 0.16, "grad_norm": 1.9110439337275245, "learning_rate": 9.672961026308815e-06, "loss": 0.6967, "step": 1500 }, { "epoch": 0.16, "grad_norm": 1.781116593399272, "learning_rate": 9.672333332492752e-06, "loss": 0.7121, "step": 1501 }, { "epoch": 0.16, "grad_norm": 1.8262310698988764, "learning_rate": 9.671705057285858e-06, "loss": 0.6603, "step": 1502 }, { "epoch": 0.16, "grad_norm": 1.9144232309584737, "learning_rate": 9.671076200766315e-06, "loss": 0.666, "step": 1503 }, { "epoch": 0.16, "grad_norm": 1.7624728568967027, "learning_rate": 9.670446763012365e-06, "loss": 0.6102, "step": 1504 }, { "epoch": 0.16, "grad_norm": 1.8595307706486788, "learning_rate": 9.669816744102338e-06, "loss": 0.6675, "step": 1505 }, { "epoch": 0.16, "grad_norm": 2.3551817899580882, "learning_rate": 9.669186144114627e-06, "loss": 0.6066, "step": 1506 }, { "epoch": 0.16, "grad_norm": 1.7551199930190313, "learning_rate": 9.668554963127698e-06, "loss": 0.692, "step": 1507 }, { "epoch": 0.16, "grad_norm": 1.8707526278283013, "learning_rate": 9.66792320122009e-06, "loss": 0.6861, "step": 1508 }, { "epoch": 0.16, "grad_norm": 1.906361978624436, "learning_rate": 9.667290858470417e-06, "loss": 0.6504, "step": 1509 }, { "epoch": 0.16, "grad_norm": 1.9531458533478474, "learning_rate": 9.666657934957363e-06, "loss": 0.6918, "step": 1510 }, { "epoch": 0.16, "grad_norm": 1.8653758859893799, "learning_rate": 9.666024430759682e-06, "loss": 0.7232, "step": 1511 }, { "epoch": 0.16, "grad_norm": 1.9379745925280822, "learning_rate": 9.665390345956204e-06, "loss": 0.7578, "step": 1512 }, { "epoch": 0.16, "grad_norm": 1.8944144667283684, "learning_rate": 9.664755680625832e-06, "loss": 0.6724, "step": 1513 }, { "epoch": 0.16, "grad_norm": 2.0085339142583636, "learning_rate": 9.664120434847534e-06, "loss": 0.7023, "step": 1514 }, { "epoch": 0.16, "grad_norm": 1.7607167169413778, "learning_rate": 9.663484608700359e-06, "loss": 0.7515, "step": 1515 }, { "epoch": 0.16, "grad_norm": 1.7176753551727337, "learning_rate": 9.662848202263426e-06, "loss": 0.7317, "step": 1516 }, { "epoch": 0.16, "grad_norm": 1.858163805017489, "learning_rate": 9.66221121561592e-06, "loss": 0.698, "step": 1517 }, { "epoch": 0.16, "grad_norm": 1.777183491678476, "learning_rate": 9.661573648837107e-06, "loss": 0.6668, "step": 1518 }, { "epoch": 0.16, "grad_norm": 1.751301018148878, "learning_rate": 9.66093550200632e-06, "loss": 0.7788, "step": 1519 }, { "epoch": 0.16, "grad_norm": 1.8340711282145248, "learning_rate": 9.660296775202965e-06, "loss": 0.6726, "step": 1520 }, { "epoch": 0.16, "grad_norm": 1.7515699056849678, "learning_rate": 9.659657468506518e-06, "loss": 0.5883, "step": 1521 }, { "epoch": 0.16, "grad_norm": 2.0483272835216755, "learning_rate": 9.659017581996533e-06, "loss": 0.835, "step": 1522 }, { "epoch": 0.16, "grad_norm": 1.9255396022443825, "learning_rate": 9.658377115752633e-06, "loss": 0.7942, "step": 1523 }, { "epoch": 0.16, "grad_norm": 1.9245195976801759, "learning_rate": 9.657736069854512e-06, "loss": 0.7316, "step": 1524 }, { "epoch": 0.16, "grad_norm": 1.9890827552321484, "learning_rate": 9.657094444381934e-06, "loss": 0.723, "step": 1525 }, { "epoch": 0.16, "grad_norm": 1.5594297946133195, "learning_rate": 9.656452239414744e-06, "loss": 0.62, "step": 1526 }, { "epoch": 0.16, "grad_norm": 1.7785776609078356, "learning_rate": 9.655809455032848e-06, "loss": 0.6889, "step": 1527 }, { "epoch": 0.16, "grad_norm": 1.8027654167700065, "learning_rate": 9.655166091316232e-06, "loss": 0.7654, "step": 1528 }, { "epoch": 0.16, "grad_norm": 1.921414154641048, "learning_rate": 9.654522148344952e-06, "loss": 0.6826, "step": 1529 }, { "epoch": 0.16, "grad_norm": 1.7000761576034105, "learning_rate": 9.653877626199133e-06, "loss": 0.581, "step": 1530 }, { "epoch": 0.16, "grad_norm": 1.8773517734099325, "learning_rate": 9.653232524958978e-06, "loss": 0.7014, "step": 1531 }, { "epoch": 0.16, "grad_norm": 1.9153098924833665, "learning_rate": 9.652586844704755e-06, "loss": 0.7876, "step": 1532 }, { "epoch": 0.16, "grad_norm": 1.7370785240765074, "learning_rate": 9.65194058551681e-06, "loss": 0.7313, "step": 1533 }, { "epoch": 0.16, "grad_norm": 1.9453511406553907, "learning_rate": 9.651293747475559e-06, "loss": 0.7819, "step": 1534 }, { "epoch": 0.16, "grad_norm": 1.863948251936242, "learning_rate": 9.650646330661488e-06, "loss": 0.685, "step": 1535 }, { "epoch": 0.16, "grad_norm": 1.9772075344834406, "learning_rate": 9.649998335155159e-06, "loss": 0.6426, "step": 1536 }, { "epoch": 0.16, "grad_norm": 1.4445828782221992, "learning_rate": 9.6493497610372e-06, "loss": 0.5759, "step": 1537 }, { "epoch": 0.16, "grad_norm": 1.812556156414905, "learning_rate": 9.648700608388322e-06, "loss": 0.6105, "step": 1538 }, { "epoch": 0.16, "grad_norm": 1.7579047354335386, "learning_rate": 9.648050877289293e-06, "loss": 0.6789, "step": 1539 }, { "epoch": 0.16, "grad_norm": 1.262277484516034, "learning_rate": 9.647400567820964e-06, "loss": 0.5879, "step": 1540 }, { "epoch": 0.16, "grad_norm": 1.9195004787591001, "learning_rate": 9.646749680064255e-06, "loss": 0.6698, "step": 1541 }, { "epoch": 0.16, "grad_norm": 2.2924063816066838, "learning_rate": 9.646098214100158e-06, "loss": 0.6938, "step": 1542 }, { "epoch": 0.16, "grad_norm": 1.6489091634660626, "learning_rate": 9.645446170009736e-06, "loss": 0.6742, "step": 1543 }, { "epoch": 0.16, "grad_norm": 1.4088855539088203, "learning_rate": 9.644793547874123e-06, "loss": 0.572, "step": 1544 }, { "epoch": 0.16, "grad_norm": 1.8074828705195813, "learning_rate": 9.644140347774529e-06, "loss": 0.7164, "step": 1545 }, { "epoch": 0.16, "grad_norm": 1.8310463365061806, "learning_rate": 9.643486569792233e-06, "loss": 0.6554, "step": 1546 }, { "epoch": 0.17, "grad_norm": 1.6661140307047637, "learning_rate": 9.642832214008586e-06, "loss": 0.6077, "step": 1547 }, { "epoch": 0.17, "grad_norm": 1.748350975860403, "learning_rate": 9.64217728050501e-06, "loss": 0.6755, "step": 1548 }, { "epoch": 0.17, "grad_norm": 1.9249394932726815, "learning_rate": 9.641521769363002e-06, "loss": 0.6934, "step": 1549 }, { "epoch": 0.17, "grad_norm": 1.2992009529891513, "learning_rate": 9.640865680664128e-06, "loss": 0.5678, "step": 1550 }, { "epoch": 0.17, "grad_norm": 1.7194213072598334, "learning_rate": 9.640209014490028e-06, "loss": 0.631, "step": 1551 }, { "epoch": 0.17, "grad_norm": 1.619859035027445, "learning_rate": 9.639551770922411e-06, "loss": 0.5639, "step": 1552 }, { "epoch": 0.17, "grad_norm": 1.231301259230318, "learning_rate": 9.63889395004306e-06, "loss": 0.5585, "step": 1553 }, { "epoch": 0.17, "grad_norm": 1.1996972523800002, "learning_rate": 9.63823555193383e-06, "loss": 0.5619, "step": 1554 }, { "epoch": 0.17, "grad_norm": 1.1733484547869315, "learning_rate": 9.637576576676646e-06, "loss": 0.5715, "step": 1555 }, { "epoch": 0.17, "grad_norm": 1.6396454393170479, "learning_rate": 9.636917024353507e-06, "loss": 0.6391, "step": 1556 }, { "epoch": 0.17, "grad_norm": 1.34458021024111, "learning_rate": 9.636256895046484e-06, "loss": 0.5523, "step": 1557 }, { "epoch": 0.17, "grad_norm": 1.1781092076962703, "learning_rate": 9.635596188837717e-06, "loss": 0.5475, "step": 1558 }, { "epoch": 0.17, "grad_norm": 2.2027498608853135, "learning_rate": 9.63493490580942e-06, "loss": 0.7116, "step": 1559 }, { "epoch": 0.17, "grad_norm": 1.8454517414145506, "learning_rate": 9.634273046043879e-06, "loss": 0.7576, "step": 1560 }, { "epoch": 0.17, "grad_norm": 1.4467409087049499, "learning_rate": 9.633610609623447e-06, "loss": 0.5499, "step": 1561 }, { "epoch": 0.17, "grad_norm": 1.878966856373861, "learning_rate": 9.63294759663056e-06, "loss": 0.7083, "step": 1562 }, { "epoch": 0.17, "grad_norm": 1.3195502162577764, "learning_rate": 9.63228400714771e-06, "loss": 0.5999, "step": 1563 }, { "epoch": 0.17, "grad_norm": 1.2430860308580818, "learning_rate": 9.631619841257477e-06, "loss": 0.5531, "step": 1564 }, { "epoch": 0.17, "grad_norm": 1.8744042028201853, "learning_rate": 9.630955099042499e-06, "loss": 0.7288, "step": 1565 }, { "epoch": 0.17, "grad_norm": 1.8972149552652162, "learning_rate": 9.630289780585493e-06, "loss": 0.689, "step": 1566 }, { "epoch": 0.17, "grad_norm": 1.7234815958709364, "learning_rate": 9.62962388596925e-06, "loss": 0.6845, "step": 1567 }, { "epoch": 0.17, "grad_norm": 1.776577510497923, "learning_rate": 9.628957415276625e-06, "loss": 0.5906, "step": 1568 }, { "epoch": 0.17, "grad_norm": 1.7532874651898898, "learning_rate": 9.628290368590551e-06, "loss": 0.758, "step": 1569 }, { "epoch": 0.17, "grad_norm": 1.6428176038727331, "learning_rate": 9.627622745994028e-06, "loss": 0.5673, "step": 1570 }, { "epoch": 0.17, "grad_norm": 1.8069844328211109, "learning_rate": 9.626954547570133e-06, "loss": 0.8009, "step": 1571 }, { "epoch": 0.17, "grad_norm": 1.9254567177083646, "learning_rate": 9.62628577340201e-06, "loss": 0.7673, "step": 1572 }, { "epoch": 0.17, "grad_norm": 2.176865577073114, "learning_rate": 9.625616423572876e-06, "loss": 0.7813, "step": 1573 }, { "epoch": 0.17, "grad_norm": 1.3383554898826657, "learning_rate": 9.624946498166022e-06, "loss": 0.5707, "step": 1574 }, { "epoch": 0.17, "grad_norm": 1.3352997450244048, "learning_rate": 9.624275997264805e-06, "loss": 0.5456, "step": 1575 }, { "epoch": 0.17, "grad_norm": 1.9731816742692874, "learning_rate": 9.62360492095266e-06, "loss": 0.6033, "step": 1576 }, { "epoch": 0.17, "grad_norm": 1.9264777336437848, "learning_rate": 9.62293326931309e-06, "loss": 0.672, "step": 1577 }, { "epoch": 0.17, "grad_norm": 2.2968409305005095, "learning_rate": 9.622261042429672e-06, "loss": 0.7339, "step": 1578 }, { "epoch": 0.17, "grad_norm": 1.976820533930865, "learning_rate": 9.62158824038605e-06, "loss": 0.633, "step": 1579 }, { "epoch": 0.17, "grad_norm": 1.6420290519986604, "learning_rate": 9.620914863265944e-06, "loss": 0.6519, "step": 1580 }, { "epoch": 0.17, "grad_norm": 1.9556509329635254, "learning_rate": 9.620240911153144e-06, "loss": 0.7325, "step": 1581 }, { "epoch": 0.17, "grad_norm": 1.8578132231708684, "learning_rate": 9.619566384131514e-06, "loss": 0.6746, "step": 1582 }, { "epoch": 0.17, "grad_norm": 1.8978637335355206, "learning_rate": 9.618891282284984e-06, "loss": 0.6443, "step": 1583 }, { "epoch": 0.17, "grad_norm": 1.8244612926668868, "learning_rate": 9.618215605697558e-06, "loss": 0.6621, "step": 1584 }, { "epoch": 0.17, "grad_norm": 1.8962294367816315, "learning_rate": 9.617539354453314e-06, "loss": 0.6205, "step": 1585 }, { "epoch": 0.17, "grad_norm": 1.9971770681695367, "learning_rate": 9.616862528636402e-06, "loss": 0.7242, "step": 1586 }, { "epoch": 0.17, "grad_norm": 2.044907373314334, "learning_rate": 9.616185128331036e-06, "loss": 0.7395, "step": 1587 }, { "epoch": 0.17, "grad_norm": 1.7512056764301072, "learning_rate": 9.615507153621513e-06, "loss": 0.6565, "step": 1588 }, { "epoch": 0.17, "grad_norm": 2.5494982793806997, "learning_rate": 9.61482860459219e-06, "loss": 0.5938, "step": 1589 }, { "epoch": 0.17, "grad_norm": 2.0195230845682444, "learning_rate": 9.614149481327501e-06, "loss": 0.5605, "step": 1590 }, { "epoch": 0.17, "grad_norm": 1.948346266394502, "learning_rate": 9.613469783911954e-06, "loss": 0.7115, "step": 1591 }, { "epoch": 0.17, "grad_norm": 1.9355295019981562, "learning_rate": 9.612789512430126e-06, "loss": 0.6785, "step": 1592 }, { "epoch": 0.17, "grad_norm": 1.858742481007541, "learning_rate": 9.61210866696666e-06, "loss": 0.7723, "step": 1593 }, { "epoch": 0.17, "grad_norm": 1.9256937237432354, "learning_rate": 9.61142724760628e-06, "loss": 0.7108, "step": 1594 }, { "epoch": 0.17, "grad_norm": 2.427416643762972, "learning_rate": 9.610745254433777e-06, "loss": 0.5738, "step": 1595 }, { "epoch": 0.17, "grad_norm": 1.7884312957562767, "learning_rate": 9.61006268753401e-06, "loss": 0.8002, "step": 1596 }, { "epoch": 0.17, "grad_norm": 1.8537737169747401, "learning_rate": 9.609379546991913e-06, "loss": 0.732, "step": 1597 }, { "epoch": 0.17, "grad_norm": 1.7635163320457725, "learning_rate": 9.608695832892492e-06, "loss": 0.5619, "step": 1598 }, { "epoch": 0.17, "grad_norm": 1.8673356299478867, "learning_rate": 9.608011545320825e-06, "loss": 0.6833, "step": 1599 }, { "epoch": 0.17, "grad_norm": 1.857366203956114, "learning_rate": 9.60732668436206e-06, "loss": 0.6406, "step": 1600 }, { "epoch": 0.17, "grad_norm": 2.4994677598148147, "learning_rate": 9.606641250101412e-06, "loss": 0.6642, "step": 1601 }, { "epoch": 0.17, "grad_norm": 1.974285538877628, "learning_rate": 9.605955242624173e-06, "loss": 0.704, "step": 1602 }, { "epoch": 0.17, "grad_norm": 2.152962744128212, "learning_rate": 9.605268662015707e-06, "loss": 0.7745, "step": 1603 }, { "epoch": 0.17, "grad_norm": 1.8229902197413972, "learning_rate": 9.604581508361444e-06, "loss": 0.673, "step": 1604 }, { "epoch": 0.17, "grad_norm": 2.1855997690343503, "learning_rate": 9.60389378174689e-06, "loss": 0.697, "step": 1605 }, { "epoch": 0.17, "grad_norm": 1.9786134797630384, "learning_rate": 9.603205482257623e-06, "loss": 0.7758, "step": 1606 }, { "epoch": 0.17, "grad_norm": 1.924399530996352, "learning_rate": 9.602516609979285e-06, "loss": 0.7798, "step": 1607 }, { "epoch": 0.17, "grad_norm": 1.8722594105523325, "learning_rate": 9.601827164997597e-06, "loss": 0.7611, "step": 1608 }, { "epoch": 0.17, "grad_norm": 1.66940302162068, "learning_rate": 9.601137147398347e-06, "loss": 0.6897, "step": 1609 }, { "epoch": 0.17, "grad_norm": 1.7769025708905062, "learning_rate": 9.600446557267399e-06, "loss": 0.6885, "step": 1610 }, { "epoch": 0.17, "grad_norm": 1.7537678002651673, "learning_rate": 9.599755394690681e-06, "loss": 0.6559, "step": 1611 }, { "epoch": 0.17, "grad_norm": 1.8217155341157047, "learning_rate": 9.5990636597542e-06, "loss": 0.6372, "step": 1612 }, { "epoch": 0.17, "grad_norm": 1.8933643261346522, "learning_rate": 9.598371352544027e-06, "loss": 0.6523, "step": 1613 }, { "epoch": 0.17, "grad_norm": 1.7898917862082502, "learning_rate": 9.597678473146309e-06, "loss": 0.6664, "step": 1614 }, { "epoch": 0.17, "grad_norm": 1.9075476613682394, "learning_rate": 9.596985021647262e-06, "loss": 0.664, "step": 1615 }, { "epoch": 0.17, "grad_norm": 1.930166530803293, "learning_rate": 9.596290998133177e-06, "loss": 0.771, "step": 1616 }, { "epoch": 0.17, "grad_norm": 1.8194121931544787, "learning_rate": 9.595596402690411e-06, "loss": 0.7588, "step": 1617 }, { "epoch": 0.17, "grad_norm": 2.0151654650507758, "learning_rate": 9.594901235405392e-06, "loss": 0.6934, "step": 1618 }, { "epoch": 0.17, "grad_norm": 1.9611554989821067, "learning_rate": 9.594205496364625e-06, "loss": 0.7302, "step": 1619 }, { "epoch": 0.17, "grad_norm": 1.903245574841541, "learning_rate": 9.59350918565468e-06, "loss": 0.672, "step": 1620 }, { "epoch": 0.17, "grad_norm": 1.7145622836808125, "learning_rate": 9.592812303362206e-06, "loss": 0.6611, "step": 1621 }, { "epoch": 0.17, "grad_norm": 2.0027510276087725, "learning_rate": 9.592114849573911e-06, "loss": 0.6916, "step": 1622 }, { "epoch": 0.17, "grad_norm": 1.8524799708494577, "learning_rate": 9.591416824376586e-06, "loss": 0.6834, "step": 1623 }, { "epoch": 0.17, "grad_norm": 1.8133833191817703, "learning_rate": 9.590718227857086e-06, "loss": 0.6514, "step": 1624 }, { "epoch": 0.17, "grad_norm": 2.1227111813991213, "learning_rate": 9.590019060102339e-06, "loss": 0.7177, "step": 1625 }, { "epoch": 0.17, "grad_norm": 2.0772582619155506, "learning_rate": 9.589319321199346e-06, "loss": 0.6623, "step": 1626 }, { "epoch": 0.17, "grad_norm": 1.9595516398580906, "learning_rate": 9.588619011235176e-06, "loss": 0.5823, "step": 1627 }, { "epoch": 0.17, "grad_norm": 2.3674623391517855, "learning_rate": 9.587918130296969e-06, "loss": 0.7657, "step": 1628 }, { "epoch": 0.17, "grad_norm": 1.8881807522096858, "learning_rate": 9.58721667847194e-06, "loss": 0.7473, "step": 1629 }, { "epoch": 0.17, "grad_norm": 1.9051311473908035, "learning_rate": 9.586514655847373e-06, "loss": 0.7112, "step": 1630 }, { "epoch": 0.17, "grad_norm": 1.9667060961617868, "learning_rate": 9.58581206251062e-06, "loss": 0.6634, "step": 1631 }, { "epoch": 0.17, "grad_norm": 2.0004884247907184, "learning_rate": 9.58510889854911e-06, "loss": 0.6982, "step": 1632 }, { "epoch": 0.17, "grad_norm": 1.787531001621629, "learning_rate": 9.584405164050334e-06, "loss": 0.6522, "step": 1633 }, { "epoch": 0.17, "grad_norm": 1.9501088393417887, "learning_rate": 9.583700859101865e-06, "loss": 0.6838, "step": 1634 }, { "epoch": 0.17, "grad_norm": 2.368024623143559, "learning_rate": 9.582995983791337e-06, "loss": 0.5576, "step": 1635 }, { "epoch": 0.17, "grad_norm": 1.8797478714607445, "learning_rate": 9.582290538206465e-06, "loss": 0.6736, "step": 1636 }, { "epoch": 0.17, "grad_norm": 2.251365415466192, "learning_rate": 9.581584522435025e-06, "loss": 0.6718, "step": 1637 }, { "epoch": 0.17, "grad_norm": 2.022780087151841, "learning_rate": 9.580877936564869e-06, "loss": 0.7388, "step": 1638 }, { "epoch": 0.17, "grad_norm": 2.270986803626415, "learning_rate": 9.580170780683921e-06, "loss": 0.7233, "step": 1639 }, { "epoch": 0.17, "grad_norm": 1.8570364316752366, "learning_rate": 9.579463054880173e-06, "loss": 0.6753, "step": 1640 }, { "epoch": 0.18, "grad_norm": 1.8745859229950188, "learning_rate": 9.57875475924169e-06, "loss": 0.6001, "step": 1641 }, { "epoch": 0.18, "grad_norm": 1.801851393332587, "learning_rate": 9.578045893856607e-06, "loss": 0.7294, "step": 1642 }, { "epoch": 0.18, "grad_norm": 1.8524281364409416, "learning_rate": 9.57733645881313e-06, "loss": 0.7279, "step": 1643 }, { "epoch": 0.18, "grad_norm": 1.7276103475190812, "learning_rate": 9.576626454199536e-06, "loss": 0.6266, "step": 1644 }, { "epoch": 0.18, "grad_norm": 1.9028315001953546, "learning_rate": 9.57591588010417e-06, "loss": 0.7192, "step": 1645 }, { "epoch": 0.18, "grad_norm": 1.3882141167243767, "learning_rate": 9.575204736615455e-06, "loss": 0.6025, "step": 1646 }, { "epoch": 0.18, "grad_norm": 1.809148268996464, "learning_rate": 9.574493023821879e-06, "loss": 0.68, "step": 1647 }, { "epoch": 0.18, "grad_norm": 1.8370325184841445, "learning_rate": 9.573780741812e-06, "loss": 0.6689, "step": 1648 }, { "epoch": 0.18, "grad_norm": 1.815343823202121, "learning_rate": 9.573067890674451e-06, "loss": 0.7388, "step": 1649 }, { "epoch": 0.18, "grad_norm": 2.1937421738405605, "learning_rate": 9.572354470497936e-06, "loss": 0.7052, "step": 1650 }, { "epoch": 0.18, "grad_norm": 1.854603971749436, "learning_rate": 9.571640481371222e-06, "loss": 0.7254, "step": 1651 }, { "epoch": 0.18, "grad_norm": 1.6327684060393945, "learning_rate": 9.570925923383159e-06, "loss": 0.6338, "step": 1652 }, { "epoch": 0.18, "grad_norm": 2.0660586457960415, "learning_rate": 9.570210796622658e-06, "loss": 0.7068, "step": 1653 }, { "epoch": 0.18, "grad_norm": 1.6413351318998297, "learning_rate": 9.569495101178706e-06, "loss": 0.5723, "step": 1654 }, { "epoch": 0.18, "grad_norm": 1.3470512541795898, "learning_rate": 9.568778837140355e-06, "loss": 0.5391, "step": 1655 }, { "epoch": 0.18, "grad_norm": 1.973469031495307, "learning_rate": 9.568062004596736e-06, "loss": 0.6166, "step": 1656 }, { "epoch": 0.18, "grad_norm": 2.1681899850567503, "learning_rate": 9.567344603637043e-06, "loss": 0.6495, "step": 1657 }, { "epoch": 0.18, "grad_norm": 1.9501270724792878, "learning_rate": 9.566626634350547e-06, "loss": 0.7204, "step": 1658 }, { "epoch": 0.18, "grad_norm": 1.9287040335018533, "learning_rate": 9.565908096826584e-06, "loss": 0.6591, "step": 1659 }, { "epoch": 0.18, "grad_norm": 1.9607975235982766, "learning_rate": 9.565188991154566e-06, "loss": 0.7127, "step": 1660 }, { "epoch": 0.18, "grad_norm": 1.7891829746514132, "learning_rate": 9.564469317423974e-06, "loss": 0.713, "step": 1661 }, { "epoch": 0.18, "grad_norm": 1.9195250646116648, "learning_rate": 9.563749075724354e-06, "loss": 0.7403, "step": 1662 }, { "epoch": 0.18, "grad_norm": 1.7993461011950707, "learning_rate": 9.563028266145333e-06, "loss": 0.714, "step": 1663 }, { "epoch": 0.18, "grad_norm": 1.900435112275794, "learning_rate": 9.5623068887766e-06, "loss": 0.7315, "step": 1664 }, { "epoch": 0.18, "grad_norm": 2.6291539529403396, "learning_rate": 9.561584943707919e-06, "loss": 0.7672, "step": 1665 }, { "epoch": 0.18, "grad_norm": 2.0698454831598556, "learning_rate": 9.560862431029124e-06, "loss": 0.737, "step": 1666 }, { "epoch": 0.18, "grad_norm": 1.7777827975207314, "learning_rate": 9.560139350830119e-06, "loss": 0.6536, "step": 1667 }, { "epoch": 0.18, "grad_norm": 1.9359853715262951, "learning_rate": 9.559415703200876e-06, "loss": 0.7133, "step": 1668 }, { "epoch": 0.18, "grad_norm": 4.215870500756726, "learning_rate": 9.558691488231443e-06, "loss": 0.5688, "step": 1669 }, { "epoch": 0.18, "grad_norm": 1.781687157096329, "learning_rate": 9.557966706011939e-06, "loss": 0.6739, "step": 1670 }, { "epoch": 0.18, "grad_norm": 1.6790303005790221, "learning_rate": 9.557241356632546e-06, "loss": 0.646, "step": 1671 }, { "epoch": 0.18, "grad_norm": 2.091534260822834, "learning_rate": 9.55651544018352e-06, "loss": 0.6471, "step": 1672 }, { "epoch": 0.18, "grad_norm": 2.0033499271729247, "learning_rate": 9.555788956755193e-06, "loss": 0.7384, "step": 1673 }, { "epoch": 0.18, "grad_norm": 2.2602498580552046, "learning_rate": 9.55506190643796e-06, "loss": 0.6375, "step": 1674 }, { "epoch": 0.18, "grad_norm": 3.3395293814917286, "learning_rate": 9.554334289322295e-06, "loss": 0.6176, "step": 1675 }, { "epoch": 0.18, "grad_norm": 1.966793366027715, "learning_rate": 9.553606105498732e-06, "loss": 0.6711, "step": 1676 }, { "epoch": 0.18, "grad_norm": 1.7539774270063693, "learning_rate": 9.552877355057882e-06, "loss": 0.6396, "step": 1677 }, { "epoch": 0.18, "grad_norm": 1.9771233459645476, "learning_rate": 9.552148038090425e-06, "loss": 0.6365, "step": 1678 }, { "epoch": 0.18, "grad_norm": 2.0487060995246025, "learning_rate": 9.551418154687112e-06, "loss": 0.7058, "step": 1679 }, { "epoch": 0.18, "grad_norm": 1.8230459374561487, "learning_rate": 9.550687704938765e-06, "loss": 0.7282, "step": 1680 }, { "epoch": 0.18, "grad_norm": 1.793172344239564, "learning_rate": 9.54995668893628e-06, "loss": 0.6065, "step": 1681 }, { "epoch": 0.18, "grad_norm": 1.8368032636798306, "learning_rate": 9.54922510677061e-06, "loss": 0.6543, "step": 1682 }, { "epoch": 0.18, "grad_norm": 1.6846421232266413, "learning_rate": 9.548492958532795e-06, "loss": 0.6361, "step": 1683 }, { "epoch": 0.18, "grad_norm": 1.96968123885379, "learning_rate": 9.547760244313935e-06, "loss": 0.6997, "step": 1684 }, { "epoch": 0.18, "grad_norm": 1.3811197567960203, "learning_rate": 9.547026964205207e-06, "loss": 0.5789, "step": 1685 }, { "epoch": 0.18, "grad_norm": 1.818242296170776, "learning_rate": 9.54629311829785e-06, "loss": 0.7231, "step": 1686 }, { "epoch": 0.18, "grad_norm": 1.9479668317353418, "learning_rate": 9.54555870668318e-06, "loss": 0.755, "step": 1687 }, { "epoch": 0.18, "grad_norm": 1.710210722931027, "learning_rate": 9.544823729452584e-06, "loss": 0.6794, "step": 1688 }, { "epoch": 0.18, "grad_norm": 1.3383711439936197, "learning_rate": 9.544088186697515e-06, "loss": 0.5653, "step": 1689 }, { "epoch": 0.18, "grad_norm": 1.6073142992175533, "learning_rate": 9.5433520785095e-06, "loss": 0.6422, "step": 1690 }, { "epoch": 0.18, "grad_norm": 1.9381932293375677, "learning_rate": 9.542615404980135e-06, "loss": 0.7807, "step": 1691 }, { "epoch": 0.18, "grad_norm": 2.0786829417518273, "learning_rate": 9.541878166201084e-06, "loss": 0.7157, "step": 1692 }, { "epoch": 0.18, "grad_norm": 1.9378823098815765, "learning_rate": 9.541140362264086e-06, "loss": 0.7127, "step": 1693 }, { "epoch": 0.18, "grad_norm": 2.1501458105076394, "learning_rate": 9.540401993260946e-06, "loss": 0.6695, "step": 1694 }, { "epoch": 0.18, "grad_norm": 2.0282909882669977, "learning_rate": 9.539663059283543e-06, "loss": 0.8236, "step": 1695 }, { "epoch": 0.18, "grad_norm": 1.9101264787591925, "learning_rate": 9.538923560423822e-06, "loss": 0.5877, "step": 1696 }, { "epoch": 0.18, "grad_norm": 1.5777805723375624, "learning_rate": 9.538183496773805e-06, "loss": 0.5433, "step": 1697 }, { "epoch": 0.18, "grad_norm": 1.7705335559078064, "learning_rate": 9.537442868425575e-06, "loss": 0.7005, "step": 1698 }, { "epoch": 0.18, "grad_norm": 1.9677697251790185, "learning_rate": 9.536701675471297e-06, "loss": 0.7587, "step": 1699 }, { "epoch": 0.18, "grad_norm": 1.9760213177433106, "learning_rate": 9.535959918003191e-06, "loss": 0.6074, "step": 1700 }, { "epoch": 0.18, "grad_norm": 1.841678681921242, "learning_rate": 9.535217596113561e-06, "loss": 0.6248, "step": 1701 }, { "epoch": 0.18, "grad_norm": 2.0546525273709806, "learning_rate": 9.534474709894778e-06, "loss": 0.673, "step": 1702 }, { "epoch": 0.18, "grad_norm": 2.5889056859260595, "learning_rate": 9.533731259439276e-06, "loss": 0.698, "step": 1703 }, { "epoch": 0.18, "grad_norm": 1.674140619492669, "learning_rate": 9.532987244839569e-06, "loss": 0.6863, "step": 1704 }, { "epoch": 0.18, "grad_norm": 1.9405374365025199, "learning_rate": 9.532242666188233e-06, "loss": 0.5909, "step": 1705 }, { "epoch": 0.18, "grad_norm": 1.8871643759810277, "learning_rate": 9.531497523577923e-06, "loss": 0.6975, "step": 1706 }, { "epoch": 0.18, "grad_norm": 1.7613510718290994, "learning_rate": 9.530751817101354e-06, "loss": 0.6849, "step": 1707 }, { "epoch": 0.18, "grad_norm": 1.7709409453057858, "learning_rate": 9.530005546851319e-06, "loss": 0.5623, "step": 1708 }, { "epoch": 0.18, "grad_norm": 3.2820431240256243, "learning_rate": 9.529258712920676e-06, "loss": 0.6104, "step": 1709 }, { "epoch": 0.18, "grad_norm": 2.801375170779831, "learning_rate": 9.528511315402358e-06, "loss": 0.5944, "step": 1710 }, { "epoch": 0.18, "grad_norm": 2.0609541192098395, "learning_rate": 9.527763354389365e-06, "loss": 0.7998, "step": 1711 }, { "epoch": 0.18, "grad_norm": 2.1760692130735975, "learning_rate": 9.527014829974768e-06, "loss": 0.7603, "step": 1712 }, { "epoch": 0.18, "grad_norm": 2.42329205270534, "learning_rate": 9.526265742251705e-06, "loss": 0.6052, "step": 1713 }, { "epoch": 0.18, "grad_norm": 1.9981620722524376, "learning_rate": 9.525516091313392e-06, "loss": 0.7042, "step": 1714 }, { "epoch": 0.18, "grad_norm": 2.147611573603305, "learning_rate": 9.524765877253106e-06, "loss": 0.6447, "step": 1715 }, { "epoch": 0.18, "grad_norm": 1.9274798055364277, "learning_rate": 9.524015100164199e-06, "loss": 0.5831, "step": 1716 }, { "epoch": 0.18, "grad_norm": 2.0841718774790166, "learning_rate": 9.523263760140094e-06, "loss": 0.5617, "step": 1717 }, { "epoch": 0.18, "grad_norm": 1.9513853002017323, "learning_rate": 9.522511857274281e-06, "loss": 0.6292, "step": 1718 }, { "epoch": 0.18, "grad_norm": 2.048845909568649, "learning_rate": 9.52175939166032e-06, "loss": 0.6426, "step": 1719 }, { "epoch": 0.18, "grad_norm": 2.1283024950491956, "learning_rate": 9.521006363391845e-06, "loss": 0.6018, "step": 1720 }, { "epoch": 0.18, "grad_norm": 2.018408115760628, "learning_rate": 9.520252772562556e-06, "loss": 0.7879, "step": 1721 }, { "epoch": 0.18, "grad_norm": 2.0442425350444866, "learning_rate": 9.519498619266222e-06, "loss": 0.7895, "step": 1722 }, { "epoch": 0.18, "grad_norm": 2.099410163327942, "learning_rate": 9.518743903596688e-06, "loss": 0.6517, "step": 1723 }, { "epoch": 0.18, "grad_norm": 2.0280340513550303, "learning_rate": 9.517988625647862e-06, "loss": 0.7019, "step": 1724 }, { "epoch": 0.18, "grad_norm": 1.765039946992357, "learning_rate": 9.517232785513729e-06, "loss": 0.7487, "step": 1725 }, { "epoch": 0.18, "grad_norm": 1.8461563237822072, "learning_rate": 9.516476383288338e-06, "loss": 0.7084, "step": 1726 }, { "epoch": 0.18, "grad_norm": 2.0355775043822253, "learning_rate": 9.51571941906581e-06, "loss": 0.7507, "step": 1727 }, { "epoch": 0.18, "grad_norm": 1.7571325270296767, "learning_rate": 9.514961892940335e-06, "loss": 0.6484, "step": 1728 }, { "epoch": 0.18, "grad_norm": 2.108930305710261, "learning_rate": 9.514203805006177e-06, "loss": 0.7234, "step": 1729 }, { "epoch": 0.18, "grad_norm": 1.8068466311784155, "learning_rate": 9.513445155357663e-06, "loss": 0.6077, "step": 1730 }, { "epoch": 0.18, "grad_norm": 1.7766333702423054, "learning_rate": 9.512685944089198e-06, "loss": 0.663, "step": 1731 }, { "epoch": 0.18, "grad_norm": 1.9301934572440793, "learning_rate": 9.51192617129525e-06, "loss": 0.6933, "step": 1732 }, { "epoch": 0.18, "grad_norm": 2.2326135588650025, "learning_rate": 9.51116583707036e-06, "loss": 0.6058, "step": 1733 }, { "epoch": 0.18, "grad_norm": 1.7974968801098583, "learning_rate": 9.51040494150914e-06, "loss": 0.6681, "step": 1734 }, { "epoch": 0.19, "grad_norm": 1.9848040445711874, "learning_rate": 9.509643484706268e-06, "loss": 0.6883, "step": 1735 }, { "epoch": 0.19, "grad_norm": 2.067037806703393, "learning_rate": 9.508881466756494e-06, "loss": 0.7476, "step": 1736 }, { "epoch": 0.19, "grad_norm": 1.4534672803491817, "learning_rate": 9.50811888775464e-06, "loss": 0.5825, "step": 1737 }, { "epoch": 0.19, "grad_norm": 1.9287876788866254, "learning_rate": 9.507355747795592e-06, "loss": 0.6497, "step": 1738 }, { "epoch": 0.19, "grad_norm": 1.2389159671019574, "learning_rate": 9.506592046974316e-06, "loss": 0.5885, "step": 1739 }, { "epoch": 0.19, "grad_norm": 1.6710060292085822, "learning_rate": 9.505827785385835e-06, "loss": 0.6041, "step": 1740 }, { "epoch": 0.19, "grad_norm": 1.1862847282033846, "learning_rate": 9.505062963125251e-06, "loss": 0.5656, "step": 1741 }, { "epoch": 0.19, "grad_norm": 1.8833800005290802, "learning_rate": 9.504297580287734e-06, "loss": 0.723, "step": 1742 }, { "epoch": 0.19, "grad_norm": 1.9157513924957208, "learning_rate": 9.50353163696852e-06, "loss": 0.7426, "step": 1743 }, { "epoch": 0.19, "grad_norm": 1.3327208442199825, "learning_rate": 9.502765133262917e-06, "loss": 0.573, "step": 1744 }, { "epoch": 0.19, "grad_norm": 1.7036230241329469, "learning_rate": 9.501998069266305e-06, "loss": 0.6556, "step": 1745 }, { "epoch": 0.19, "grad_norm": 1.92586191022713, "learning_rate": 9.501230445074131e-06, "loss": 0.7639, "step": 1746 }, { "epoch": 0.19, "grad_norm": 1.3111597970072446, "learning_rate": 9.500462260781915e-06, "loss": 0.5309, "step": 1747 }, { "epoch": 0.19, "grad_norm": 2.0714448263906116, "learning_rate": 9.49969351648524e-06, "loss": 0.6509, "step": 1748 }, { "epoch": 0.19, "grad_norm": 1.195148097252327, "learning_rate": 9.498924212279766e-06, "loss": 0.5641, "step": 1749 }, { "epoch": 0.19, "grad_norm": 1.6402951445694514, "learning_rate": 9.498154348261217e-06, "loss": 0.697, "step": 1750 }, { "epoch": 0.19, "grad_norm": 1.6571409252086242, "learning_rate": 9.497383924525391e-06, "loss": 0.6589, "step": 1751 }, { "epoch": 0.19, "grad_norm": 1.8190691571340272, "learning_rate": 9.496612941168155e-06, "loss": 0.6571, "step": 1752 }, { "epoch": 0.19, "grad_norm": 1.795743601562123, "learning_rate": 9.495841398285443e-06, "loss": 0.6217, "step": 1753 }, { "epoch": 0.19, "grad_norm": 1.6496411326880682, "learning_rate": 9.495069295973258e-06, "loss": 0.5918, "step": 1754 }, { "epoch": 0.19, "grad_norm": 1.930031023111735, "learning_rate": 9.494296634327679e-06, "loss": 0.7063, "step": 1755 }, { "epoch": 0.19, "grad_norm": 1.8394000033330737, "learning_rate": 9.493523413444848e-06, "loss": 0.6533, "step": 1756 }, { "epoch": 0.19, "grad_norm": 1.7902468319173932, "learning_rate": 9.49274963342098e-06, "loss": 0.6212, "step": 1757 }, { "epoch": 0.19, "grad_norm": 1.8156524332721098, "learning_rate": 9.491975294352355e-06, "loss": 0.6398, "step": 1758 }, { "epoch": 0.19, "grad_norm": 1.5621479415899644, "learning_rate": 9.49120039633533e-06, "loss": 0.6442, "step": 1759 }, { "epoch": 0.19, "grad_norm": 1.9151189506902355, "learning_rate": 9.490424939466326e-06, "loss": 0.5897, "step": 1760 }, { "epoch": 0.19, "grad_norm": 1.7820491002672096, "learning_rate": 9.489648923841837e-06, "loss": 0.6737, "step": 1761 }, { "epoch": 0.19, "grad_norm": 1.9613240895282904, "learning_rate": 9.488872349558422e-06, "loss": 0.669, "step": 1762 }, { "epoch": 0.19, "grad_norm": 1.7888933682195187, "learning_rate": 9.488095216712713e-06, "loss": 0.6818, "step": 1763 }, { "epoch": 0.19, "grad_norm": 1.7035542756910627, "learning_rate": 9.487317525401411e-06, "loss": 0.59, "step": 1764 }, { "epoch": 0.19, "grad_norm": 1.9367065858523043, "learning_rate": 9.486539275721288e-06, "loss": 0.7238, "step": 1765 }, { "epoch": 0.19, "grad_norm": 1.6861781845119463, "learning_rate": 9.48576046776918e-06, "loss": 0.5895, "step": 1766 }, { "epoch": 0.19, "grad_norm": 1.82473527991064, "learning_rate": 9.484981101641998e-06, "loss": 0.629, "step": 1767 }, { "epoch": 0.19, "grad_norm": 1.8626978084640322, "learning_rate": 9.484201177436722e-06, "loss": 0.5481, "step": 1768 }, { "epoch": 0.19, "grad_norm": 1.956751535227598, "learning_rate": 9.4834206952504e-06, "loss": 0.7257, "step": 1769 }, { "epoch": 0.19, "grad_norm": 2.122473219867544, "learning_rate": 9.482639655180145e-06, "loss": 0.7478, "step": 1770 }, { "epoch": 0.19, "grad_norm": 1.8682184048574262, "learning_rate": 9.48185805732315e-06, "loss": 0.6414, "step": 1771 }, { "epoch": 0.19, "grad_norm": 1.9247598451115815, "learning_rate": 9.481075901776668e-06, "loss": 0.7164, "step": 1772 }, { "epoch": 0.19, "grad_norm": 1.8166618961219907, "learning_rate": 9.480293188638024e-06, "loss": 0.6662, "step": 1773 }, { "epoch": 0.19, "grad_norm": 1.7746389975382797, "learning_rate": 9.479509918004614e-06, "loss": 0.7569, "step": 1774 }, { "epoch": 0.19, "grad_norm": 1.7642776697795197, "learning_rate": 9.478726089973902e-06, "loss": 0.6666, "step": 1775 }, { "epoch": 0.19, "grad_norm": 1.641465000193083, "learning_rate": 9.477941704643424e-06, "loss": 0.5925, "step": 1776 }, { "epoch": 0.19, "grad_norm": 1.619287679665088, "learning_rate": 9.47715676211078e-06, "loss": 0.5642, "step": 1777 }, { "epoch": 0.19, "grad_norm": 1.9118115974552554, "learning_rate": 9.476371262473645e-06, "loss": 0.6217, "step": 1778 }, { "epoch": 0.19, "grad_norm": 2.2351225170345046, "learning_rate": 9.47558520582976e-06, "loss": 0.7835, "step": 1779 }, { "epoch": 0.19, "grad_norm": 1.8371064034115698, "learning_rate": 9.474798592276935e-06, "loss": 0.7005, "step": 1780 }, { "epoch": 0.19, "grad_norm": 2.1151776486135563, "learning_rate": 9.474011421913052e-06, "loss": 0.7855, "step": 1781 }, { "epoch": 0.19, "grad_norm": 1.9076150466925366, "learning_rate": 9.47322369483606e-06, "loss": 0.6324, "step": 1782 }, { "epoch": 0.19, "grad_norm": 1.304252220417805, "learning_rate": 9.472435411143979e-06, "loss": 0.5766, "step": 1783 }, { "epoch": 0.19, "grad_norm": 1.874598817516213, "learning_rate": 9.471646570934894e-06, "loss": 0.6652, "step": 1784 }, { "epoch": 0.19, "grad_norm": 1.7464043580717865, "learning_rate": 9.470857174306967e-06, "loss": 0.6713, "step": 1785 }, { "epoch": 0.19, "grad_norm": 1.9734270048836386, "learning_rate": 9.470067221358421e-06, "loss": 0.7557, "step": 1786 }, { "epoch": 0.19, "grad_norm": 2.0651431688109785, "learning_rate": 9.469276712187554e-06, "loss": 0.588, "step": 1787 }, { "epoch": 0.19, "grad_norm": 1.8335911396424047, "learning_rate": 9.468485646892731e-06, "loss": 0.7383, "step": 1788 }, { "epoch": 0.19, "grad_norm": 1.9373865065241542, "learning_rate": 9.467694025572385e-06, "loss": 0.7016, "step": 1789 }, { "epoch": 0.19, "grad_norm": 1.9401769429693425, "learning_rate": 9.46690184832502e-06, "loss": 0.7694, "step": 1790 }, { "epoch": 0.19, "grad_norm": 1.954112599289977, "learning_rate": 9.46610911524921e-06, "loss": 0.729, "step": 1791 }, { "epoch": 0.19, "grad_norm": 1.6101759571847691, "learning_rate": 9.465315826443596e-06, "loss": 0.5893, "step": 1792 }, { "epoch": 0.19, "grad_norm": 1.8971880445509266, "learning_rate": 9.464521982006888e-06, "loss": 0.7335, "step": 1793 }, { "epoch": 0.19, "grad_norm": 1.8166159423386878, "learning_rate": 9.46372758203787e-06, "loss": 0.7383, "step": 1794 }, { "epoch": 0.19, "grad_norm": 1.7913435623009413, "learning_rate": 9.462932626635386e-06, "loss": 0.7115, "step": 1795 }, { "epoch": 0.19, "grad_norm": 1.9964502748142245, "learning_rate": 9.462137115898356e-06, "loss": 0.6451, "step": 1796 }, { "epoch": 0.19, "grad_norm": 1.9913975547002762, "learning_rate": 9.461341049925771e-06, "loss": 0.7697, "step": 1797 }, { "epoch": 0.19, "grad_norm": 2.082405943653714, "learning_rate": 9.460544428816683e-06, "loss": 0.6903, "step": 1798 }, { "epoch": 0.19, "grad_norm": 1.8390198612266553, "learning_rate": 9.459747252670222e-06, "loss": 0.6809, "step": 1799 }, { "epoch": 0.19, "grad_norm": 2.060281661834327, "learning_rate": 9.458949521585578e-06, "loss": 0.6259, "step": 1800 }, { "epoch": 0.19, "grad_norm": 1.561961623074151, "learning_rate": 9.458151235662018e-06, "loss": 0.5834, "step": 1801 }, { "epoch": 0.19, "grad_norm": 1.63995876968034, "learning_rate": 9.457352394998876e-06, "loss": 0.6273, "step": 1802 }, { "epoch": 0.19, "grad_norm": 1.835788197063427, "learning_rate": 9.45655299969555e-06, "loss": 0.6612, "step": 1803 }, { "epoch": 0.19, "grad_norm": 1.7653083728475083, "learning_rate": 9.455753049851512e-06, "loss": 0.6858, "step": 1804 }, { "epoch": 0.19, "grad_norm": 4.195224249854875, "learning_rate": 9.454952545566305e-06, "loss": 0.7677, "step": 1805 }, { "epoch": 0.19, "grad_norm": 1.286560313960254, "learning_rate": 9.454151486939535e-06, "loss": 0.5518, "step": 1806 }, { "epoch": 0.19, "grad_norm": 1.7291175591364814, "learning_rate": 9.45334987407088e-06, "loss": 0.6882, "step": 1807 }, { "epoch": 0.19, "grad_norm": 1.953611796258017, "learning_rate": 9.452547707060087e-06, "loss": 0.7403, "step": 1808 }, { "epoch": 0.19, "grad_norm": 1.6382301695975878, "learning_rate": 9.451744986006973e-06, "loss": 0.7132, "step": 1809 }, { "epoch": 0.19, "grad_norm": 1.3230628464042669, "learning_rate": 9.45094171101142e-06, "loss": 0.5901, "step": 1810 }, { "epoch": 0.19, "grad_norm": 1.7766352260790825, "learning_rate": 9.450137882173385e-06, "loss": 0.6801, "step": 1811 }, { "epoch": 0.19, "grad_norm": 1.9213232029934435, "learning_rate": 9.449333499592888e-06, "loss": 0.7295, "step": 1812 }, { "epoch": 0.19, "grad_norm": 2.072302640937305, "learning_rate": 9.44852856337002e-06, "loss": 0.7486, "step": 1813 }, { "epoch": 0.19, "grad_norm": 1.8144825011938748, "learning_rate": 9.447723073604945e-06, "loss": 0.7294, "step": 1814 }, { "epoch": 0.19, "grad_norm": 1.9331925303552884, "learning_rate": 9.446917030397889e-06, "loss": 0.6735, "step": 1815 }, { "epoch": 0.19, "grad_norm": 1.854727070504025, "learning_rate": 9.446110433849152e-06, "loss": 0.6304, "step": 1816 }, { "epoch": 0.19, "grad_norm": 1.9071417177252767, "learning_rate": 9.4453032840591e-06, "loss": 0.7329, "step": 1817 }, { "epoch": 0.19, "grad_norm": 1.7824306827755048, "learning_rate": 9.444495581128169e-06, "loss": 0.69, "step": 1818 }, { "epoch": 0.19, "grad_norm": 1.787111794685336, "learning_rate": 9.44368732515686e-06, "loss": 0.5905, "step": 1819 }, { "epoch": 0.19, "grad_norm": 1.8272761515215987, "learning_rate": 9.442878516245753e-06, "loss": 0.6482, "step": 1820 }, { "epoch": 0.19, "grad_norm": 1.8142602146180786, "learning_rate": 9.442069154495487e-06, "loss": 0.5892, "step": 1821 }, { "epoch": 0.19, "grad_norm": 1.812700371242439, "learning_rate": 9.441259240006772e-06, "loss": 0.6816, "step": 1822 }, { "epoch": 0.19, "grad_norm": 2.2250282533149544, "learning_rate": 9.44044877288039e-06, "loss": 0.6777, "step": 1823 }, { "epoch": 0.19, "grad_norm": 1.8342019167121861, "learning_rate": 9.439637753217187e-06, "loss": 0.6655, "step": 1824 }, { "epoch": 0.19, "grad_norm": 1.8903524561900709, "learning_rate": 9.438826181118083e-06, "loss": 0.7342, "step": 1825 }, { "epoch": 0.19, "grad_norm": 1.7359184991269447, "learning_rate": 9.438014056684063e-06, "loss": 0.5613, "step": 1826 }, { "epoch": 0.19, "grad_norm": 1.9743571059512421, "learning_rate": 9.437201380016181e-06, "loss": 0.6888, "step": 1827 }, { "epoch": 0.19, "grad_norm": 1.8167940160585407, "learning_rate": 9.436388151215561e-06, "loss": 0.7086, "step": 1828 }, { "epoch": 0.2, "grad_norm": 1.7249213129731162, "learning_rate": 9.435574370383397e-06, "loss": 0.6106, "step": 1829 }, { "epoch": 0.2, "grad_norm": 2.0100577516692213, "learning_rate": 9.434760037620947e-06, "loss": 0.7102, "step": 1830 }, { "epoch": 0.2, "grad_norm": 1.7666452492796878, "learning_rate": 9.433945153029542e-06, "loss": 0.6519, "step": 1831 }, { "epoch": 0.2, "grad_norm": 1.8323777778538395, "learning_rate": 9.433129716710581e-06, "loss": 0.6718, "step": 1832 }, { "epoch": 0.2, "grad_norm": 2.0058688847294905, "learning_rate": 9.43231372876553e-06, "loss": 0.7521, "step": 1833 }, { "epoch": 0.2, "grad_norm": 1.7646208782707302, "learning_rate": 9.431497189295922e-06, "loss": 0.5623, "step": 1834 }, { "epoch": 0.2, "grad_norm": 1.9123463289537668, "learning_rate": 9.430680098403366e-06, "loss": 0.7068, "step": 1835 }, { "epoch": 0.2, "grad_norm": 1.1900242818200129, "learning_rate": 9.429862456189533e-06, "loss": 0.5429, "step": 1836 }, { "epoch": 0.2, "grad_norm": 1.835903369073257, "learning_rate": 9.429044262756164e-06, "loss": 0.8426, "step": 1837 }, { "epoch": 0.2, "grad_norm": 1.8963539758902206, "learning_rate": 9.428225518205068e-06, "loss": 0.7277, "step": 1838 }, { "epoch": 0.2, "grad_norm": 1.954932924517611, "learning_rate": 9.427406222638125e-06, "loss": 0.7244, "step": 1839 }, { "epoch": 0.2, "grad_norm": 2.0268987546638964, "learning_rate": 9.42658637615728e-06, "loss": 0.6932, "step": 1840 }, { "epoch": 0.2, "grad_norm": 1.7129663749476305, "learning_rate": 9.425765978864552e-06, "loss": 0.6437, "step": 1841 }, { "epoch": 0.2, "grad_norm": 1.6549805969945772, "learning_rate": 9.424945030862023e-06, "loss": 0.7091, "step": 1842 }, { "epoch": 0.2, "grad_norm": 1.9565165959996171, "learning_rate": 9.424123532251846e-06, "loss": 0.7285, "step": 1843 }, { "epoch": 0.2, "grad_norm": 1.6952539646063494, "learning_rate": 9.423301483136244e-06, "loss": 0.6274, "step": 1844 }, { "epoch": 0.2, "grad_norm": 2.0587571067113144, "learning_rate": 9.422478883617503e-06, "loss": 0.5754, "step": 1845 }, { "epoch": 0.2, "grad_norm": 1.9362652358161112, "learning_rate": 9.421655733797985e-06, "loss": 0.622, "step": 1846 }, { "epoch": 0.2, "grad_norm": 2.042121305030732, "learning_rate": 9.420832033780115e-06, "loss": 0.7476, "step": 1847 }, { "epoch": 0.2, "grad_norm": 1.342704137796963, "learning_rate": 9.420007783666387e-06, "loss": 0.5458, "step": 1848 }, { "epoch": 0.2, "grad_norm": 1.857311694227123, "learning_rate": 9.419182983559368e-06, "loss": 0.6965, "step": 1849 }, { "epoch": 0.2, "grad_norm": 1.9118447951318105, "learning_rate": 9.418357633561688e-06, "loss": 0.71, "step": 1850 }, { "epoch": 0.2, "grad_norm": 2.031883096037389, "learning_rate": 9.417531733776046e-06, "loss": 0.7331, "step": 1851 }, { "epoch": 0.2, "grad_norm": 1.6188009138581918, "learning_rate": 9.416705284305212e-06, "loss": 0.5726, "step": 1852 }, { "epoch": 0.2, "grad_norm": 1.5294057233822491, "learning_rate": 9.415878285252025e-06, "loss": 0.5615, "step": 1853 }, { "epoch": 0.2, "grad_norm": 1.6538504358428738, "learning_rate": 9.415050736719392e-06, "loss": 0.655, "step": 1854 }, { "epoch": 0.2, "grad_norm": 1.7036198560958757, "learning_rate": 9.414222638810279e-06, "loss": 0.6311, "step": 1855 }, { "epoch": 0.2, "grad_norm": 2.4942941486358814, "learning_rate": 9.413393991627737e-06, "loss": 0.8021, "step": 1856 }, { "epoch": 0.2, "grad_norm": 1.7023947758893057, "learning_rate": 9.412564795274874e-06, "loss": 0.6647, "step": 1857 }, { "epoch": 0.2, "grad_norm": 2.6734109942237057, "learning_rate": 9.41173504985487e-06, "loss": 0.6062, "step": 1858 }, { "epoch": 0.2, "grad_norm": 1.7064630220651724, "learning_rate": 9.410904755470968e-06, "loss": 0.5934, "step": 1859 }, { "epoch": 0.2, "grad_norm": 2.227606336904033, "learning_rate": 9.410073912226489e-06, "loss": 0.6804, "step": 1860 }, { "epoch": 0.2, "grad_norm": 1.8179345288817221, "learning_rate": 9.409242520224813e-06, "loss": 0.6542, "step": 1861 }, { "epoch": 0.2, "grad_norm": 1.680338323376156, "learning_rate": 9.408410579569396e-06, "loss": 0.6191, "step": 1862 }, { "epoch": 0.2, "grad_norm": 1.7950035357784413, "learning_rate": 9.407578090363758e-06, "loss": 0.6055, "step": 1863 }, { "epoch": 0.2, "grad_norm": 2.0252050073015475, "learning_rate": 9.406745052711483e-06, "loss": 0.6788, "step": 1864 }, { "epoch": 0.2, "grad_norm": 1.896068459880187, "learning_rate": 9.405911466716236e-06, "loss": 0.6635, "step": 1865 }, { "epoch": 0.2, "grad_norm": 2.6937524491975227, "learning_rate": 9.405077332481736e-06, "loss": 0.5818, "step": 1866 }, { "epoch": 0.2, "grad_norm": 1.7083139778325078, "learning_rate": 9.404242650111777e-06, "loss": 0.7514, "step": 1867 }, { "epoch": 0.2, "grad_norm": 1.810118995654297, "learning_rate": 9.403407419710226e-06, "loss": 0.7454, "step": 1868 }, { "epoch": 0.2, "grad_norm": 2.0895544082891937, "learning_rate": 9.402571641381006e-06, "loss": 0.6298, "step": 1869 }, { "epoch": 0.2, "grad_norm": 1.8440586396158987, "learning_rate": 9.40173531522812e-06, "loss": 0.7474, "step": 1870 }, { "epoch": 0.2, "grad_norm": 1.7847184631247037, "learning_rate": 9.400898441355633e-06, "loss": 0.6639, "step": 1871 }, { "epoch": 0.2, "grad_norm": 1.9060065617291997, "learning_rate": 9.40006101986768e-06, "loss": 0.6857, "step": 1872 }, { "epoch": 0.2, "grad_norm": 1.935463123302034, "learning_rate": 9.39922305086846e-06, "loss": 0.7478, "step": 1873 }, { "epoch": 0.2, "grad_norm": 1.8886763710199888, "learning_rate": 9.398384534462248e-06, "loss": 0.6316, "step": 1874 }, { "epoch": 0.2, "grad_norm": 1.9987667986247597, "learning_rate": 9.397545470753383e-06, "loss": 0.6461, "step": 1875 }, { "epoch": 0.2, "grad_norm": 1.9125000419895117, "learning_rate": 9.396705859846267e-06, "loss": 0.7311, "step": 1876 }, { "epoch": 0.2, "grad_norm": 1.6606722220508279, "learning_rate": 9.39586570184538e-06, "loss": 0.612, "step": 1877 }, { "epoch": 0.2, "grad_norm": 2.1154544618954754, "learning_rate": 9.395024996855262e-06, "loss": 0.6945, "step": 1878 }, { "epoch": 0.2, "grad_norm": 1.960421888591534, "learning_rate": 9.394183744980526e-06, "loss": 0.6311, "step": 1879 }, { "epoch": 0.2, "grad_norm": 1.6505982320308465, "learning_rate": 9.393341946325852e-06, "loss": 0.575, "step": 1880 }, { "epoch": 0.2, "grad_norm": 1.8936526616821587, "learning_rate": 9.392499600995984e-06, "loss": 0.6998, "step": 1881 }, { "epoch": 0.2, "grad_norm": 1.9752370714536203, "learning_rate": 9.39165670909574e-06, "loss": 0.6938, "step": 1882 }, { "epoch": 0.2, "grad_norm": 1.7173924296604874, "learning_rate": 9.390813270730001e-06, "loss": 0.6736, "step": 1883 }, { "epoch": 0.2, "grad_norm": 1.7092525300754213, "learning_rate": 9.389969286003722e-06, "loss": 0.642, "step": 1884 }, { "epoch": 0.2, "grad_norm": 1.7128452289406249, "learning_rate": 9.389124755021918e-06, "loss": 0.7079, "step": 1885 }, { "epoch": 0.2, "grad_norm": 1.936436093369331, "learning_rate": 9.38827967788968e-06, "loss": 0.7064, "step": 1886 }, { "epoch": 0.2, "grad_norm": 1.6900743361533983, "learning_rate": 9.387434054712161e-06, "loss": 0.6719, "step": 1887 }, { "epoch": 0.2, "grad_norm": 1.9965784881629967, "learning_rate": 9.386587885594583e-06, "loss": 0.6859, "step": 1888 }, { "epoch": 0.2, "grad_norm": 1.8416580193489747, "learning_rate": 9.385741170642239e-06, "loss": 0.613, "step": 1889 }, { "epoch": 0.2, "grad_norm": 1.9366700976492632, "learning_rate": 9.384893909960488e-06, "loss": 0.7148, "step": 1890 }, { "epoch": 0.2, "grad_norm": 1.8463476505991945, "learning_rate": 9.384046103654758e-06, "loss": 0.6603, "step": 1891 }, { "epoch": 0.2, "grad_norm": 1.8428893092002343, "learning_rate": 9.38319775183054e-06, "loss": 0.7285, "step": 1892 }, { "epoch": 0.2, "grad_norm": 1.865458665268008, "learning_rate": 9.382348854593399e-06, "loss": 0.7131, "step": 1893 }, { "epoch": 0.2, "grad_norm": 1.9572717482812196, "learning_rate": 9.381499412048966e-06, "loss": 0.6406, "step": 1894 }, { "epoch": 0.2, "grad_norm": 1.7831006895222115, "learning_rate": 9.38064942430294e-06, "loss": 0.6084, "step": 1895 }, { "epoch": 0.2, "grad_norm": 2.034168257048498, "learning_rate": 9.379798891461085e-06, "loss": 0.5759, "step": 1896 }, { "epoch": 0.2, "grad_norm": 1.8618705186064979, "learning_rate": 9.378947813629235e-06, "loss": 0.7455, "step": 1897 }, { "epoch": 0.2, "grad_norm": 1.8366523721490469, "learning_rate": 9.378096190913295e-06, "loss": 0.7525, "step": 1898 }, { "epoch": 0.2, "grad_norm": 1.8042413034196498, "learning_rate": 9.377244023419232e-06, "loss": 0.6349, "step": 1899 }, { "epoch": 0.2, "grad_norm": 1.3280517228512538, "learning_rate": 9.376391311253084e-06, "loss": 0.5613, "step": 1900 }, { "epoch": 0.2, "grad_norm": 1.8706454436316566, "learning_rate": 9.375538054520957e-06, "loss": 0.6443, "step": 1901 }, { "epoch": 0.2, "grad_norm": 1.9993960288090267, "learning_rate": 9.374684253329021e-06, "loss": 0.6453, "step": 1902 }, { "epoch": 0.2, "grad_norm": 1.84516854587555, "learning_rate": 9.37382990778352e-06, "loss": 0.6121, "step": 1903 }, { "epoch": 0.2, "grad_norm": 1.858604518345387, "learning_rate": 9.372975017990764e-06, "loss": 0.7149, "step": 1904 }, { "epoch": 0.2, "grad_norm": 1.7257702681053135, "learning_rate": 9.372119584057125e-06, "loss": 0.7311, "step": 1905 }, { "epoch": 0.2, "grad_norm": 2.026633452461654, "learning_rate": 9.371263606089047e-06, "loss": 0.7422, "step": 1906 }, { "epoch": 0.2, "grad_norm": 1.8569246933734986, "learning_rate": 9.370407084193046e-06, "loss": 0.6506, "step": 1907 }, { "epoch": 0.2, "grad_norm": 1.9226040147545094, "learning_rate": 9.369550018475697e-06, "loss": 0.7094, "step": 1908 }, { "epoch": 0.2, "grad_norm": 1.8628269823611865, "learning_rate": 9.368692409043649e-06, "loss": 0.6363, "step": 1909 }, { "epoch": 0.2, "grad_norm": 1.7590325903453876, "learning_rate": 9.367834256003618e-06, "loss": 0.634, "step": 1910 }, { "epoch": 0.2, "grad_norm": 1.7961917238212388, "learning_rate": 9.366975559462382e-06, "loss": 0.5787, "step": 1911 }, { "epoch": 0.2, "grad_norm": 1.865510513612434, "learning_rate": 9.366116319526795e-06, "loss": 0.6525, "step": 1912 }, { "epoch": 0.2, "grad_norm": 1.2039916742901011, "learning_rate": 9.36525653630377e-06, "loss": 0.5592, "step": 1913 }, { "epoch": 0.2, "grad_norm": 2.104630529792118, "learning_rate": 9.364396209900298e-06, "loss": 0.6339, "step": 1914 }, { "epoch": 0.2, "grad_norm": 2.0323200756794852, "learning_rate": 9.36353534042343e-06, "loss": 0.7085, "step": 1915 }, { "epoch": 0.2, "grad_norm": 1.9971289036934148, "learning_rate": 9.36267392798028e-06, "loss": 0.7254, "step": 1916 }, { "epoch": 0.2, "grad_norm": 2.0269646389618927, "learning_rate": 9.361811972678043e-06, "loss": 0.6388, "step": 1917 }, { "epoch": 0.2, "grad_norm": 2.07427156981128, "learning_rate": 9.360949474623973e-06, "loss": 0.6471, "step": 1918 }, { "epoch": 0.2, "grad_norm": 1.7328493985305806, "learning_rate": 9.360086433925391e-06, "loss": 0.6156, "step": 1919 }, { "epoch": 0.2, "grad_norm": 1.7178211867423627, "learning_rate": 9.359222850689689e-06, "loss": 0.7563, "step": 1920 }, { "epoch": 0.2, "grad_norm": 2.035203833216075, "learning_rate": 9.358358725024324e-06, "loss": 0.7543, "step": 1921 }, { "epoch": 0.21, "grad_norm": 2.1500144070654383, "learning_rate": 9.357494057036822e-06, "loss": 0.7779, "step": 1922 }, { "epoch": 0.21, "grad_norm": 1.7739952749871801, "learning_rate": 9.356628846834777e-06, "loss": 0.6139, "step": 1923 }, { "epoch": 0.21, "grad_norm": 2.107252750631275, "learning_rate": 9.355763094525848e-06, "loss": 0.7244, "step": 1924 }, { "epoch": 0.21, "grad_norm": 1.9046527892073855, "learning_rate": 9.354896800217761e-06, "loss": 0.7981, "step": 1925 }, { "epoch": 0.21, "grad_norm": 1.94491275334351, "learning_rate": 9.354029964018316e-06, "loss": 0.6664, "step": 1926 }, { "epoch": 0.21, "grad_norm": 1.8650020822423734, "learning_rate": 9.353162586035372e-06, "loss": 0.7089, "step": 1927 }, { "epoch": 0.21, "grad_norm": 1.8346831907575016, "learning_rate": 9.35229466637686e-06, "loss": 0.8005, "step": 1928 }, { "epoch": 0.21, "grad_norm": 1.7888146702872552, "learning_rate": 9.351426205150778e-06, "loss": 0.6208, "step": 1929 }, { "epoch": 0.21, "grad_norm": 1.779440508158106, "learning_rate": 9.35055720246519e-06, "loss": 0.6306, "step": 1930 }, { "epoch": 0.21, "grad_norm": 1.8634506696054192, "learning_rate": 9.34968765842823e-06, "loss": 0.6764, "step": 1931 }, { "epoch": 0.21, "grad_norm": 1.8831125819916916, "learning_rate": 9.348817573148096e-06, "loss": 0.6997, "step": 1932 }, { "epoch": 0.21, "grad_norm": 1.922113145806042, "learning_rate": 9.347946946733055e-06, "loss": 0.8463, "step": 1933 }, { "epoch": 0.21, "grad_norm": 1.7464004884960895, "learning_rate": 9.347075779291444e-06, "loss": 0.7541, "step": 1934 }, { "epoch": 0.21, "grad_norm": 1.7328619297528538, "learning_rate": 9.346204070931662e-06, "loss": 0.6595, "step": 1935 }, { "epoch": 0.21, "grad_norm": 2.0458737327474754, "learning_rate": 9.345331821762178e-06, "loss": 0.6076, "step": 1936 }, { "epoch": 0.21, "grad_norm": 2.035992753086892, "learning_rate": 9.34445903189153e-06, "loss": 0.7265, "step": 1937 }, { "epoch": 0.21, "grad_norm": 1.9994222125072105, "learning_rate": 9.343585701428321e-06, "loss": 0.7651, "step": 1938 }, { "epoch": 0.21, "grad_norm": 1.599790738146068, "learning_rate": 9.34271183048122e-06, "loss": 0.5766, "step": 1939 }, { "epoch": 0.21, "grad_norm": 2.020870076359163, "learning_rate": 9.341837419158967e-06, "loss": 0.7114, "step": 1940 }, { "epoch": 0.21, "grad_norm": 1.9932871151583493, "learning_rate": 9.340962467570368e-06, "loss": 0.6589, "step": 1941 }, { "epoch": 0.21, "grad_norm": 1.8418685725298503, "learning_rate": 9.340086975824292e-06, "loss": 0.6439, "step": 1942 }, { "epoch": 0.21, "grad_norm": 1.8793498248542164, "learning_rate": 9.339210944029683e-06, "loss": 0.6802, "step": 1943 }, { "epoch": 0.21, "grad_norm": 2.1403485664252635, "learning_rate": 9.338334372295546e-06, "loss": 0.6972, "step": 1944 }, { "epoch": 0.21, "grad_norm": 1.9146269586554296, "learning_rate": 9.337457260730955e-06, "loss": 0.7388, "step": 1945 }, { "epoch": 0.21, "grad_norm": 1.8299385583747654, "learning_rate": 9.336579609445051e-06, "loss": 0.6683, "step": 1946 }, { "epoch": 0.21, "grad_norm": 1.6932587727715738, "learning_rate": 9.335701418547044e-06, "loss": 0.6038, "step": 1947 }, { "epoch": 0.21, "grad_norm": 1.8383561969652802, "learning_rate": 9.334822688146208e-06, "loss": 0.651, "step": 1948 }, { "epoch": 0.21, "grad_norm": 2.2168300806510115, "learning_rate": 9.333943418351885e-06, "loss": 0.7749, "step": 1949 }, { "epoch": 0.21, "grad_norm": 2.0572785268524383, "learning_rate": 9.333063609273488e-06, "loss": 0.795, "step": 1950 }, { "epoch": 0.21, "grad_norm": 1.9564033942424468, "learning_rate": 9.332183261020493e-06, "loss": 0.6134, "step": 1951 }, { "epoch": 0.21, "grad_norm": 1.8556784986195618, "learning_rate": 9.331302373702443e-06, "loss": 0.7545, "step": 1952 }, { "epoch": 0.21, "grad_norm": 1.7144246726538928, "learning_rate": 9.330420947428946e-06, "loss": 0.623, "step": 1953 }, { "epoch": 0.21, "grad_norm": 1.9097348908851384, "learning_rate": 9.329538982309687e-06, "loss": 0.6822, "step": 1954 }, { "epoch": 0.21, "grad_norm": 1.863149816008922, "learning_rate": 9.328656478454408e-06, "loss": 0.6323, "step": 1955 }, { "epoch": 0.21, "grad_norm": 2.072413223439683, "learning_rate": 9.32777343597292e-06, "loss": 0.6794, "step": 1956 }, { "epoch": 0.21, "grad_norm": 1.7623777883834222, "learning_rate": 9.326889854975105e-06, "loss": 0.5993, "step": 1957 }, { "epoch": 0.21, "grad_norm": 2.076019131257642, "learning_rate": 9.326005735570906e-06, "loss": 0.5751, "step": 1958 }, { "epoch": 0.21, "grad_norm": 1.906725507828874, "learning_rate": 9.325121077870338e-06, "loss": 0.6208, "step": 1959 }, { "epoch": 0.21, "grad_norm": 2.2392828364758506, "learning_rate": 9.324235881983484e-06, "loss": 0.6993, "step": 1960 }, { "epoch": 0.21, "grad_norm": 1.8317677966903563, "learning_rate": 9.323350148020488e-06, "loss": 0.5801, "step": 1961 }, { "epoch": 0.21, "grad_norm": 1.759306804234246, "learning_rate": 9.322463876091562e-06, "loss": 0.6687, "step": 1962 }, { "epoch": 0.21, "grad_norm": 1.9116911041666944, "learning_rate": 9.321577066306994e-06, "loss": 0.6355, "step": 1963 }, { "epoch": 0.21, "grad_norm": 1.7093367811381595, "learning_rate": 9.320689718777126e-06, "loss": 0.7183, "step": 1964 }, { "epoch": 0.21, "grad_norm": 1.8382783265928293, "learning_rate": 9.319801833612377e-06, "loss": 0.5725, "step": 1965 }, { "epoch": 0.21, "grad_norm": 1.722677773595463, "learning_rate": 9.318913410923225e-06, "loss": 0.6201, "step": 1966 }, { "epoch": 0.21, "grad_norm": 2.0505450697488836, "learning_rate": 9.318024450820222e-06, "loss": 0.6933, "step": 1967 }, { "epoch": 0.21, "grad_norm": 1.8296246140131482, "learning_rate": 9.317134953413982e-06, "loss": 0.691, "step": 1968 }, { "epoch": 0.21, "grad_norm": 3.1712684234092126, "learning_rate": 9.31624491881519e-06, "loss": 0.612, "step": 1969 }, { "epoch": 0.21, "grad_norm": 1.6377045970315898, "learning_rate": 9.315354347134593e-06, "loss": 0.6327, "step": 1970 }, { "epoch": 0.21, "grad_norm": 1.7635331828315135, "learning_rate": 9.314463238483006e-06, "loss": 0.6495, "step": 1971 }, { "epoch": 0.21, "grad_norm": 1.713264454787253, "learning_rate": 9.313571592971316e-06, "loss": 0.5603, "step": 1972 }, { "epoch": 0.21, "grad_norm": 1.698349162144944, "learning_rate": 9.31267941071047e-06, "loss": 0.6737, "step": 1973 }, { "epoch": 0.21, "grad_norm": 1.883983891579887, "learning_rate": 9.311786691811484e-06, "loss": 0.7622, "step": 1974 }, { "epoch": 0.21, "grad_norm": 1.7152382638095345, "learning_rate": 9.310893436385442e-06, "loss": 0.6671, "step": 1975 }, { "epoch": 0.21, "grad_norm": 1.7408064826931589, "learning_rate": 9.309999644543497e-06, "loss": 0.661, "step": 1976 }, { "epoch": 0.21, "grad_norm": 1.6338486784558142, "learning_rate": 9.309105316396863e-06, "loss": 0.6028, "step": 1977 }, { "epoch": 0.21, "grad_norm": 2.0486474760669275, "learning_rate": 9.308210452056823e-06, "loss": 0.7346, "step": 1978 }, { "epoch": 0.21, "grad_norm": 1.8251708752185685, "learning_rate": 9.307315051634728e-06, "loss": 0.7064, "step": 1979 }, { "epoch": 0.21, "grad_norm": 2.6225048040532912, "learning_rate": 9.306419115241997e-06, "loss": 0.7169, "step": 1980 }, { "epoch": 0.21, "grad_norm": 1.8921064107623635, "learning_rate": 9.305522642990113e-06, "loss": 0.843, "step": 1981 }, { "epoch": 0.21, "grad_norm": 1.798930736403929, "learning_rate": 9.304625634990623e-06, "loss": 0.5928, "step": 1982 }, { "epoch": 0.21, "grad_norm": 1.9052265490716813, "learning_rate": 9.303728091355148e-06, "loss": 0.7617, "step": 1983 }, { "epoch": 0.21, "grad_norm": 1.6714944821946849, "learning_rate": 9.302830012195373e-06, "loss": 0.7204, "step": 1984 }, { "epoch": 0.21, "grad_norm": 2.0798568048062296, "learning_rate": 9.301931397623044e-06, "loss": 0.6551, "step": 1985 }, { "epoch": 0.21, "grad_norm": 1.9705953695723157, "learning_rate": 9.301032247749978e-06, "loss": 0.6606, "step": 1986 }, { "epoch": 0.21, "grad_norm": 1.7462276692309302, "learning_rate": 9.300132562688063e-06, "loss": 0.6789, "step": 1987 }, { "epoch": 0.21, "grad_norm": 1.9446792360378422, "learning_rate": 9.299232342549246e-06, "loss": 0.7664, "step": 1988 }, { "epoch": 0.21, "grad_norm": 1.7819187579601083, "learning_rate": 9.298331587445543e-06, "loss": 0.7044, "step": 1989 }, { "epoch": 0.21, "grad_norm": 2.170746570806558, "learning_rate": 9.297430297489042e-06, "loss": 0.6621, "step": 1990 }, { "epoch": 0.21, "grad_norm": 1.7055483708948374, "learning_rate": 9.296528472791888e-06, "loss": 0.629, "step": 1991 }, { "epoch": 0.21, "grad_norm": 2.0783170375069, "learning_rate": 9.295626113466298e-06, "loss": 0.7817, "step": 1992 }, { "epoch": 0.21, "grad_norm": 1.563775370479302, "learning_rate": 9.294723219624558e-06, "loss": 0.5806, "step": 1993 }, { "epoch": 0.21, "grad_norm": 1.9999452955017403, "learning_rate": 9.293819791379016e-06, "loss": 0.7426, "step": 1994 }, { "epoch": 0.21, "grad_norm": 2.1483543585089837, "learning_rate": 9.292915828842086e-06, "loss": 0.7924, "step": 1995 }, { "epoch": 0.21, "grad_norm": 1.9171293977171906, "learning_rate": 9.292011332126254e-06, "loss": 0.7863, "step": 1996 }, { "epoch": 0.21, "grad_norm": 2.3240890940139423, "learning_rate": 9.291106301344065e-06, "loss": 0.7906, "step": 1997 }, { "epoch": 0.21, "grad_norm": 1.9652951582605023, "learning_rate": 9.290200736608136e-06, "loss": 0.6844, "step": 1998 }, { "epoch": 0.21, "grad_norm": 1.2223078357569617, "learning_rate": 9.289294638031152e-06, "loss": 0.5333, "step": 1999 }, { "epoch": 0.21, "grad_norm": 1.8818254010007136, "learning_rate": 9.288388005725858e-06, "loss": 0.5965, "step": 2000 }, { "epoch": 0.21, "grad_norm": 1.9440364678240003, "learning_rate": 9.287480839805067e-06, "loss": 0.6907, "step": 2001 }, { "epoch": 0.21, "grad_norm": 1.7607230007146952, "learning_rate": 9.286573140381663e-06, "loss": 0.6532, "step": 2002 }, { "epoch": 0.21, "grad_norm": 1.5760811125149732, "learning_rate": 9.285664907568593e-06, "loss": 0.5723, "step": 2003 }, { "epoch": 0.21, "grad_norm": 1.735105552439402, "learning_rate": 9.284756141478869e-06, "loss": 0.5734, "step": 2004 }, { "epoch": 0.21, "grad_norm": 2.0185134561585754, "learning_rate": 9.283846842225574e-06, "loss": 0.7375, "step": 2005 }, { "epoch": 0.21, "grad_norm": 1.7176342246880558, "learning_rate": 9.282937009921853e-06, "loss": 0.6258, "step": 2006 }, { "epoch": 0.21, "grad_norm": 1.891948683289419, "learning_rate": 9.28202664468092e-06, "loss": 0.6681, "step": 2007 }, { "epoch": 0.21, "grad_norm": 2.0159799434687344, "learning_rate": 9.28111574661605e-06, "loss": 0.69, "step": 2008 }, { "epoch": 0.21, "grad_norm": 1.979305268205953, "learning_rate": 9.280204315840594e-06, "loss": 0.6752, "step": 2009 }, { "epoch": 0.21, "grad_norm": 1.5022663097869942, "learning_rate": 9.27929235246796e-06, "loss": 0.5606, "step": 2010 }, { "epoch": 0.21, "grad_norm": 1.2908279451803188, "learning_rate": 9.27837985661163e-06, "loss": 0.5743, "step": 2011 }, { "epoch": 0.21, "grad_norm": 1.8011535171999642, "learning_rate": 9.277466828385142e-06, "loss": 0.6673, "step": 2012 }, { "epoch": 0.21, "grad_norm": 1.746874637642245, "learning_rate": 9.276553267902113e-06, "loss": 0.5918, "step": 2013 }, { "epoch": 0.21, "grad_norm": 1.227454440121317, "learning_rate": 9.275639175276216e-06, "loss": 0.5598, "step": 2014 }, { "epoch": 0.21, "grad_norm": 1.6192382605363649, "learning_rate": 9.274724550621194e-06, "loss": 0.6648, "step": 2015 }, { "epoch": 0.22, "grad_norm": 2.099092625991865, "learning_rate": 9.27380939405086e-06, "loss": 0.7376, "step": 2016 }, { "epoch": 0.22, "grad_norm": 1.274170761425221, "learning_rate": 9.272893705679083e-06, "loss": 0.5456, "step": 2017 }, { "epoch": 0.22, "grad_norm": 1.2767833815400853, "learning_rate": 9.27197748561981e-06, "loss": 0.5412, "step": 2018 }, { "epoch": 0.22, "grad_norm": 1.966391984510949, "learning_rate": 9.271060733987045e-06, "loss": 0.7379, "step": 2019 }, { "epoch": 0.22, "grad_norm": 2.3675222398349853, "learning_rate": 9.270143450894864e-06, "loss": 0.6994, "step": 2020 }, { "epoch": 0.22, "grad_norm": 1.869977491585817, "learning_rate": 9.269225636457407e-06, "loss": 0.6883, "step": 2021 }, { "epoch": 0.22, "grad_norm": 1.9464872138403144, "learning_rate": 9.268307290788879e-06, "loss": 0.642, "step": 2022 }, { "epoch": 0.22, "grad_norm": 1.8929174516057954, "learning_rate": 9.267388414003553e-06, "loss": 0.6342, "step": 2023 }, { "epoch": 0.22, "grad_norm": 1.8156249870850816, "learning_rate": 9.266469006215769e-06, "loss": 0.7426, "step": 2024 }, { "epoch": 0.22, "grad_norm": 2.0781132295050084, "learning_rate": 9.265549067539926e-06, "loss": 0.6742, "step": 2025 }, { "epoch": 0.22, "grad_norm": 2.0075866657641512, "learning_rate": 9.264628598090499e-06, "loss": 0.6984, "step": 2026 }, { "epoch": 0.22, "grad_norm": 1.7606507684605215, "learning_rate": 9.263707597982023e-06, "loss": 0.6638, "step": 2027 }, { "epoch": 0.22, "grad_norm": 2.014807711505767, "learning_rate": 9.262786067329101e-06, "loss": 0.6218, "step": 2028 }, { "epoch": 0.22, "grad_norm": 1.8161911832432462, "learning_rate": 9.261864006246402e-06, "loss": 0.6799, "step": 2029 }, { "epoch": 0.22, "grad_norm": 1.9545325435187426, "learning_rate": 9.260941414848659e-06, "loss": 0.7688, "step": 2030 }, { "epoch": 0.22, "grad_norm": 2.5855144061709154, "learning_rate": 9.260018293250674e-06, "loss": 0.5712, "step": 2031 }, { "epoch": 0.22, "grad_norm": 1.672330797214812, "learning_rate": 9.259094641567313e-06, "loss": 0.6644, "step": 2032 }, { "epoch": 0.22, "grad_norm": 1.9882608391183296, "learning_rate": 9.258170459913507e-06, "loss": 0.747, "step": 2033 }, { "epoch": 0.22, "grad_norm": 1.8231713744702316, "learning_rate": 9.257245748404255e-06, "loss": 0.7276, "step": 2034 }, { "epoch": 0.22, "grad_norm": 1.3700719837902067, "learning_rate": 9.256320507154622e-06, "loss": 0.5482, "step": 2035 }, { "epoch": 0.22, "grad_norm": 1.8876982009879792, "learning_rate": 9.255394736279737e-06, "loss": 0.733, "step": 2036 }, { "epoch": 0.22, "grad_norm": 1.8522540158617147, "learning_rate": 9.2544684358948e-06, "loss": 0.7518, "step": 2037 }, { "epoch": 0.22, "grad_norm": 1.8190295712176903, "learning_rate": 9.253541606115069e-06, "loss": 0.6003, "step": 2038 }, { "epoch": 0.22, "grad_norm": 1.530949917058455, "learning_rate": 9.252614247055872e-06, "loss": 0.5101, "step": 2039 }, { "epoch": 0.22, "grad_norm": 1.74861122181759, "learning_rate": 9.251686358832603e-06, "loss": 0.6499, "step": 2040 }, { "epoch": 0.22, "grad_norm": 1.7340741892248892, "learning_rate": 9.250757941560723e-06, "loss": 0.6943, "step": 2041 }, { "epoch": 0.22, "grad_norm": 1.5424927994046702, "learning_rate": 9.249828995355759e-06, "loss": 0.6019, "step": 2042 }, { "epoch": 0.22, "grad_norm": 1.7428140961217147, "learning_rate": 9.2488995203333e-06, "loss": 0.5977, "step": 2043 }, { "epoch": 0.22, "grad_norm": 2.319466059855257, "learning_rate": 9.247969516609002e-06, "loss": 0.6442, "step": 2044 }, { "epoch": 0.22, "grad_norm": 2.014578102351187, "learning_rate": 9.247038984298587e-06, "loss": 0.7626, "step": 2045 }, { "epoch": 0.22, "grad_norm": 2.0357426483991703, "learning_rate": 9.246107923517847e-06, "loss": 0.7749, "step": 2046 }, { "epoch": 0.22, "grad_norm": 2.1011358977704115, "learning_rate": 9.245176334382634e-06, "loss": 0.7375, "step": 2047 }, { "epoch": 0.22, "grad_norm": 1.8084540643584794, "learning_rate": 9.244244217008873e-06, "loss": 0.5822, "step": 2048 }, { "epoch": 0.22, "grad_norm": 1.9374131940674288, "learning_rate": 9.24331157151254e-06, "loss": 0.7425, "step": 2049 }, { "epoch": 0.22, "grad_norm": 2.06671049718341, "learning_rate": 9.242378398009697e-06, "loss": 0.6556, "step": 2050 }, { "epoch": 0.22, "grad_norm": 1.5691747835036944, "learning_rate": 9.241444696616455e-06, "loss": 0.6475, "step": 2051 }, { "epoch": 0.22, "grad_norm": 1.7394368408667165, "learning_rate": 9.240510467448998e-06, "loss": 0.6073, "step": 2052 }, { "epoch": 0.22, "grad_norm": 1.9077224478089534, "learning_rate": 9.239575710623577e-06, "loss": 0.6646, "step": 2053 }, { "epoch": 0.22, "grad_norm": 1.527382123827003, "learning_rate": 9.238640426256503e-06, "loss": 0.6668, "step": 2054 }, { "epoch": 0.22, "grad_norm": 1.7867095091546092, "learning_rate": 9.237704614464157e-06, "loss": 0.633, "step": 2055 }, { "epoch": 0.22, "grad_norm": 1.8770926354276498, "learning_rate": 9.236768275362985e-06, "loss": 0.667, "step": 2056 }, { "epoch": 0.22, "grad_norm": 2.684039981260309, "learning_rate": 9.235831409069497e-06, "loss": 0.59, "step": 2057 }, { "epoch": 0.22, "grad_norm": 1.8632684263486465, "learning_rate": 9.234894015700273e-06, "loss": 0.6922, "step": 2058 }, { "epoch": 0.22, "grad_norm": 1.841858123782224, "learning_rate": 9.233956095371954e-06, "loss": 0.6305, "step": 2059 }, { "epoch": 0.22, "grad_norm": 1.5248204116545638, "learning_rate": 9.233017648201244e-06, "loss": 0.564, "step": 2060 }, { "epoch": 0.22, "grad_norm": 1.1974547352255518, "learning_rate": 9.232078674304922e-06, "loss": 0.5599, "step": 2061 }, { "epoch": 0.22, "grad_norm": 1.7023616791096452, "learning_rate": 9.231139173799823e-06, "loss": 0.578, "step": 2062 }, { "epoch": 0.22, "grad_norm": 2.0691747559179223, "learning_rate": 9.230199146802854e-06, "loss": 0.666, "step": 2063 }, { "epoch": 0.22, "grad_norm": 2.119214385059536, "learning_rate": 9.229258593430985e-06, "loss": 0.8241, "step": 2064 }, { "epoch": 0.22, "grad_norm": 1.8968820927426648, "learning_rate": 9.228317513801249e-06, "loss": 0.7795, "step": 2065 }, { "epoch": 0.22, "grad_norm": 1.8222034385764803, "learning_rate": 9.227375908030748e-06, "loss": 0.686, "step": 2066 }, { "epoch": 0.22, "grad_norm": 1.7201773647761929, "learning_rate": 9.226433776236652e-06, "loss": 0.7058, "step": 2067 }, { "epoch": 0.22, "grad_norm": 1.7346470116674593, "learning_rate": 9.22549111853619e-06, "loss": 0.6652, "step": 2068 }, { "epoch": 0.22, "grad_norm": 1.892137187624878, "learning_rate": 9.224547935046657e-06, "loss": 0.7162, "step": 2069 }, { "epoch": 0.22, "grad_norm": 1.81342080946197, "learning_rate": 9.22360422588542e-06, "loss": 0.7667, "step": 2070 }, { "epoch": 0.22, "grad_norm": 1.8095144461378656, "learning_rate": 9.222659991169906e-06, "loss": 0.6966, "step": 2071 }, { "epoch": 0.22, "grad_norm": 1.7277927319600215, "learning_rate": 9.221715231017609e-06, "loss": 0.7085, "step": 2072 }, { "epoch": 0.22, "grad_norm": 1.8055976920658148, "learning_rate": 9.220769945546086e-06, "loss": 0.7078, "step": 2073 }, { "epoch": 0.22, "grad_norm": 2.027453365211168, "learning_rate": 9.219824134872963e-06, "loss": 0.7259, "step": 2074 }, { "epoch": 0.22, "grad_norm": 2.297057118210341, "learning_rate": 9.218877799115929e-06, "loss": 0.5902, "step": 2075 }, { "epoch": 0.22, "grad_norm": 1.9325146286165478, "learning_rate": 9.217930938392741e-06, "loss": 0.5903, "step": 2076 }, { "epoch": 0.22, "grad_norm": 2.0367207883185783, "learning_rate": 9.216983552821217e-06, "loss": 0.6434, "step": 2077 }, { "epoch": 0.22, "grad_norm": 1.823096999447672, "learning_rate": 9.216035642519243e-06, "loss": 0.6325, "step": 2078 }, { "epoch": 0.22, "grad_norm": 2.1058821662521368, "learning_rate": 9.215087207604769e-06, "loss": 0.755, "step": 2079 }, { "epoch": 0.22, "grad_norm": 1.89618388719891, "learning_rate": 9.214138248195816e-06, "loss": 0.6924, "step": 2080 }, { "epoch": 0.22, "grad_norm": 1.839751660868552, "learning_rate": 9.21318876441046e-06, "loss": 0.6618, "step": 2081 }, { "epoch": 0.22, "grad_norm": 1.9342694653890544, "learning_rate": 9.212238756366851e-06, "loss": 0.6958, "step": 2082 }, { "epoch": 0.22, "grad_norm": 1.7070367884577662, "learning_rate": 9.211288224183202e-06, "loss": 0.6204, "step": 2083 }, { "epoch": 0.22, "grad_norm": 1.844896872313402, "learning_rate": 9.210337167977786e-06, "loss": 0.6875, "step": 2084 }, { "epoch": 0.22, "grad_norm": 1.7162298091229966, "learning_rate": 9.209385587868949e-06, "loss": 0.5757, "step": 2085 }, { "epoch": 0.22, "grad_norm": 3.96368618185046, "learning_rate": 9.208433483975097e-06, "loss": 0.6224, "step": 2086 }, { "epoch": 0.22, "grad_norm": 1.9087101834649436, "learning_rate": 9.207480856414704e-06, "loss": 0.6795, "step": 2087 }, { "epoch": 0.22, "grad_norm": 2.043401823862137, "learning_rate": 9.206527705306308e-06, "loss": 0.6698, "step": 2088 }, { "epoch": 0.22, "grad_norm": 1.8416047636552786, "learning_rate": 9.20557403076851e-06, "loss": 0.7265, "step": 2089 }, { "epoch": 0.22, "grad_norm": 1.7545926620370975, "learning_rate": 9.20461983291998e-06, "loss": 0.6009, "step": 2090 }, { "epoch": 0.22, "grad_norm": 2.0232720246196716, "learning_rate": 9.203665111879451e-06, "loss": 0.5818, "step": 2091 }, { "epoch": 0.22, "grad_norm": 1.7089733626161763, "learning_rate": 9.202709867765722e-06, "loss": 0.5685, "step": 2092 }, { "epoch": 0.22, "grad_norm": 1.9075313977376676, "learning_rate": 9.201754100697655e-06, "loss": 0.7018, "step": 2093 }, { "epoch": 0.22, "grad_norm": 1.885036446887107, "learning_rate": 9.200797810794181e-06, "loss": 0.7122, "step": 2094 }, { "epoch": 0.22, "grad_norm": 1.9313514350688183, "learning_rate": 9.199840998174292e-06, "loss": 0.7748, "step": 2095 }, { "epoch": 0.22, "grad_norm": 1.8287723273878405, "learning_rate": 9.198883662957046e-06, "loss": 0.6692, "step": 2096 }, { "epoch": 0.22, "grad_norm": 1.7442749317566557, "learning_rate": 9.197925805261569e-06, "loss": 0.6192, "step": 2097 }, { "epoch": 0.22, "grad_norm": 2.5610650050824075, "learning_rate": 9.196967425207048e-06, "loss": 0.5851, "step": 2098 }, { "epoch": 0.22, "grad_norm": 2.087305688064861, "learning_rate": 9.196008522912739e-06, "loss": 0.6618, "step": 2099 }, { "epoch": 0.22, "grad_norm": 1.958781994924478, "learning_rate": 9.195049098497958e-06, "loss": 0.7334, "step": 2100 }, { "epoch": 0.22, "grad_norm": 1.6519031665036736, "learning_rate": 9.19408915208209e-06, "loss": 0.6019, "step": 2101 }, { "epoch": 0.22, "grad_norm": 1.7740459481157052, "learning_rate": 9.193128683784584e-06, "loss": 0.6839, "step": 2102 }, { "epoch": 0.22, "grad_norm": 1.7590982314131454, "learning_rate": 9.192167693724952e-06, "loss": 0.6465, "step": 2103 }, { "epoch": 0.22, "grad_norm": 1.6550363451386152, "learning_rate": 9.191206182022775e-06, "loss": 0.6882, "step": 2104 }, { "epoch": 0.22, "grad_norm": 1.6984915433084966, "learning_rate": 9.190244148797696e-06, "loss": 0.6461, "step": 2105 }, { "epoch": 0.22, "grad_norm": 1.852326666449561, "learning_rate": 9.18928159416942e-06, "loss": 0.6479, "step": 2106 }, { "epoch": 0.22, "grad_norm": 1.8175888658408625, "learning_rate": 9.188318518257724e-06, "loss": 0.7387, "step": 2107 }, { "epoch": 0.22, "grad_norm": 1.3924638312070605, "learning_rate": 9.187354921182446e-06, "loss": 0.5699, "step": 2108 }, { "epoch": 0.22, "grad_norm": 1.8693735051108733, "learning_rate": 9.186390803063486e-06, "loss": 0.71, "step": 2109 }, { "epoch": 0.23, "grad_norm": 1.9952584903371402, "learning_rate": 9.185426164020815e-06, "loss": 0.7024, "step": 2110 }, { "epoch": 0.23, "grad_norm": 1.9664346788671292, "learning_rate": 9.184461004174463e-06, "loss": 0.7003, "step": 2111 }, { "epoch": 0.23, "grad_norm": 1.664314253914168, "learning_rate": 9.183495323644529e-06, "loss": 0.64, "step": 2112 }, { "epoch": 0.23, "grad_norm": 1.8554268424963425, "learning_rate": 9.182529122551176e-06, "loss": 0.7707, "step": 2113 }, { "epoch": 0.23, "grad_norm": 1.6310265681248808, "learning_rate": 9.181562401014628e-06, "loss": 0.6343, "step": 2114 }, { "epoch": 0.23, "grad_norm": 1.7493107283295515, "learning_rate": 9.18059515915518e-06, "loss": 0.6875, "step": 2115 }, { "epoch": 0.23, "grad_norm": 1.6778992776869879, "learning_rate": 9.179627397093184e-06, "loss": 0.6105, "step": 2116 }, { "epoch": 0.23, "grad_norm": 1.422529644213089, "learning_rate": 9.178659114949066e-06, "loss": 0.5723, "step": 2117 }, { "epoch": 0.23, "grad_norm": 1.797754303222298, "learning_rate": 9.177690312843311e-06, "loss": 0.5668, "step": 2118 }, { "epoch": 0.23, "grad_norm": 1.6481290016496843, "learning_rate": 9.176720990896468e-06, "loss": 0.6864, "step": 2119 }, { "epoch": 0.23, "grad_norm": 1.9566017066334809, "learning_rate": 9.175751149229152e-06, "loss": 0.6819, "step": 2120 }, { "epoch": 0.23, "grad_norm": 1.873536785307549, "learning_rate": 9.174780787962046e-06, "loss": 0.6241, "step": 2121 }, { "epoch": 0.23, "grad_norm": 1.4863006016781022, "learning_rate": 9.173809907215892e-06, "loss": 0.5726, "step": 2122 }, { "epoch": 0.23, "grad_norm": 2.2153256797505843, "learning_rate": 9.172838507111499e-06, "loss": 0.6894, "step": 2123 }, { "epoch": 0.23, "grad_norm": 1.953917632784726, "learning_rate": 9.171866587769741e-06, "loss": 0.7911, "step": 2124 }, { "epoch": 0.23, "grad_norm": 1.6810535194435705, "learning_rate": 9.170894149311559e-06, "loss": 0.7, "step": 2125 }, { "epoch": 0.23, "grad_norm": 2.155100683901839, "learning_rate": 9.169921191857954e-06, "loss": 0.7221, "step": 2126 }, { "epoch": 0.23, "grad_norm": 2.028621350844731, "learning_rate": 9.168947715529993e-06, "loss": 0.7271, "step": 2127 }, { "epoch": 0.23, "grad_norm": 1.8635684722888737, "learning_rate": 9.16797372044881e-06, "loss": 0.6997, "step": 2128 }, { "epoch": 0.23, "grad_norm": 2.3085554077135537, "learning_rate": 9.1669992067356e-06, "loss": 0.6027, "step": 2129 }, { "epoch": 0.23, "grad_norm": 1.7744514381437426, "learning_rate": 9.166024174511625e-06, "loss": 0.6331, "step": 2130 }, { "epoch": 0.23, "grad_norm": 1.4516012808565135, "learning_rate": 9.165048623898213e-06, "loss": 0.5627, "step": 2131 }, { "epoch": 0.23, "grad_norm": 1.7176190942837082, "learning_rate": 9.164072555016752e-06, "loss": 0.6178, "step": 2132 }, { "epoch": 0.23, "grad_norm": 1.8756829934811718, "learning_rate": 9.163095967988698e-06, "loss": 0.6535, "step": 2133 }, { "epoch": 0.23, "grad_norm": 1.9383695676876038, "learning_rate": 9.162118862935568e-06, "loss": 0.7139, "step": 2134 }, { "epoch": 0.23, "grad_norm": 1.9700053552751238, "learning_rate": 9.161141239978949e-06, "loss": 0.7174, "step": 2135 }, { "epoch": 0.23, "grad_norm": 1.1612841999543093, "learning_rate": 9.160163099240487e-06, "loss": 0.5559, "step": 2136 }, { "epoch": 0.23, "grad_norm": 1.935950322219365, "learning_rate": 9.159184440841895e-06, "loss": 0.707, "step": 2137 }, { "epoch": 0.23, "grad_norm": 1.8905681650515256, "learning_rate": 9.158205264904952e-06, "loss": 0.6078, "step": 2138 }, { "epoch": 0.23, "grad_norm": 1.2192138810805868, "learning_rate": 9.157225571551496e-06, "loss": 0.5677, "step": 2139 }, { "epoch": 0.23, "grad_norm": 1.980367945141541, "learning_rate": 9.156245360903436e-06, "loss": 0.6367, "step": 2140 }, { "epoch": 0.23, "grad_norm": 1.7406946036971809, "learning_rate": 9.155264633082738e-06, "loss": 0.6154, "step": 2141 }, { "epoch": 0.23, "grad_norm": 1.9333654781197662, "learning_rate": 9.154283388211442e-06, "loss": 0.7171, "step": 2142 }, { "epoch": 0.23, "grad_norm": 1.7078186566936078, "learning_rate": 9.153301626411646e-06, "loss": 0.5983, "step": 2143 }, { "epoch": 0.23, "grad_norm": 1.666182495147188, "learning_rate": 9.15231934780551e-06, "loss": 0.63, "step": 2144 }, { "epoch": 0.23, "grad_norm": 1.9766896409502634, "learning_rate": 9.151336552515262e-06, "loss": 0.7348, "step": 2145 }, { "epoch": 0.23, "grad_norm": 1.8276556944926452, "learning_rate": 9.150353240663195e-06, "loss": 0.6705, "step": 2146 }, { "epoch": 0.23, "grad_norm": 1.7076783193787377, "learning_rate": 9.149369412371667e-06, "loss": 0.5813, "step": 2147 }, { "epoch": 0.23, "grad_norm": 2.1172107589677363, "learning_rate": 9.148385067763094e-06, "loss": 0.696, "step": 2148 }, { "epoch": 0.23, "grad_norm": 1.874833166162897, "learning_rate": 9.147400206959966e-06, "loss": 0.5917, "step": 2149 }, { "epoch": 0.23, "grad_norm": 1.8105034166139726, "learning_rate": 9.146414830084827e-06, "loss": 0.7301, "step": 2150 }, { "epoch": 0.23, "grad_norm": 2.1736817203716545, "learning_rate": 9.145428937260292e-06, "loss": 0.6384, "step": 2151 }, { "epoch": 0.23, "grad_norm": 1.8503708781181483, "learning_rate": 9.144442528609039e-06, "loss": 0.7142, "step": 2152 }, { "epoch": 0.23, "grad_norm": 1.9245603629631802, "learning_rate": 9.143455604253808e-06, "loss": 0.6379, "step": 2153 }, { "epoch": 0.23, "grad_norm": 2.054642963818888, "learning_rate": 9.142468164317406e-06, "loss": 0.6318, "step": 2154 }, { "epoch": 0.23, "grad_norm": 1.4012799193978076, "learning_rate": 9.1414802089227e-06, "loss": 0.5366, "step": 2155 }, { "epoch": 0.23, "grad_norm": 1.2526537540523734, "learning_rate": 9.140491738192627e-06, "loss": 0.5778, "step": 2156 }, { "epoch": 0.23, "grad_norm": 2.137430577847528, "learning_rate": 9.139502752250186e-06, "loss": 0.8018, "step": 2157 }, { "epoch": 0.23, "grad_norm": 2.0872871830782533, "learning_rate": 9.138513251218435e-06, "loss": 0.7922, "step": 2158 }, { "epoch": 0.23, "grad_norm": 2.2273790205289483, "learning_rate": 9.137523235220502e-06, "loss": 0.5652, "step": 2159 }, { "epoch": 0.23, "grad_norm": 2.1158837478818238, "learning_rate": 9.136532704379579e-06, "loss": 0.7137, "step": 2160 }, { "epoch": 0.23, "grad_norm": 1.9177715067383536, "learning_rate": 9.135541658818918e-06, "loss": 0.7325, "step": 2161 }, { "epoch": 0.23, "grad_norm": 1.743597745043648, "learning_rate": 9.134550098661839e-06, "loss": 0.6253, "step": 2162 }, { "epoch": 0.23, "grad_norm": 2.087915568516298, "learning_rate": 9.133558024031724e-06, "loss": 0.6479, "step": 2163 }, { "epoch": 0.23, "grad_norm": 1.9565598888559663, "learning_rate": 9.13256543505202e-06, "loss": 0.6623, "step": 2164 }, { "epoch": 0.23, "grad_norm": 2.414633478132087, "learning_rate": 9.131572331846237e-06, "loss": 0.5676, "step": 2165 }, { "epoch": 0.23, "grad_norm": 1.845157018386305, "learning_rate": 9.130578714537948e-06, "loss": 0.6969, "step": 2166 }, { "epoch": 0.23, "grad_norm": 1.985311521745487, "learning_rate": 9.129584583250793e-06, "loss": 0.7742, "step": 2167 }, { "epoch": 0.23, "grad_norm": 1.792717932519866, "learning_rate": 9.128589938108473e-06, "loss": 0.6199, "step": 2168 }, { "epoch": 0.23, "grad_norm": 1.7453265176180623, "learning_rate": 9.127594779234759e-06, "loss": 0.6126, "step": 2169 }, { "epoch": 0.23, "grad_norm": 1.839174484491052, "learning_rate": 9.126599106753476e-06, "loss": 0.7624, "step": 2170 }, { "epoch": 0.23, "grad_norm": 1.8029889631964024, "learning_rate": 9.12560292078852e-06, "loss": 0.6568, "step": 2171 }, { "epoch": 0.23, "grad_norm": 1.6235696279181668, "learning_rate": 9.124606221463849e-06, "loss": 0.7021, "step": 2172 }, { "epoch": 0.23, "grad_norm": 1.9402885397790675, "learning_rate": 9.123609008903485e-06, "loss": 0.6881, "step": 2173 }, { "epoch": 0.23, "grad_norm": 1.7583401209834646, "learning_rate": 9.122611283231512e-06, "loss": 0.6961, "step": 2174 }, { "epoch": 0.23, "grad_norm": 1.6424156301815507, "learning_rate": 9.121613044572085e-06, "loss": 0.6424, "step": 2175 }, { "epoch": 0.23, "grad_norm": 1.6129674011992938, "learning_rate": 9.120614293049412e-06, "loss": 0.5423, "step": 2176 }, { "epoch": 0.23, "grad_norm": 1.7012557361477156, "learning_rate": 9.119615028787771e-06, "loss": 0.6533, "step": 2177 }, { "epoch": 0.23, "grad_norm": 1.7268255439144733, "learning_rate": 9.118615251911507e-06, "loss": 0.6513, "step": 2178 }, { "epoch": 0.23, "grad_norm": 1.830027802536018, "learning_rate": 9.117614962545021e-06, "loss": 0.7141, "step": 2179 }, { "epoch": 0.23, "grad_norm": 2.3089678272104655, "learning_rate": 9.116614160812783e-06, "loss": 0.7177, "step": 2180 }, { "epoch": 0.23, "grad_norm": 2.012637612685895, "learning_rate": 9.115612846839323e-06, "loss": 0.6674, "step": 2181 }, { "epoch": 0.23, "grad_norm": 2.1880330902574405, "learning_rate": 9.114611020749242e-06, "loss": 0.7176, "step": 2182 }, { "epoch": 0.23, "grad_norm": 1.728135906063868, "learning_rate": 9.113608682667197e-06, "loss": 0.6905, "step": 2183 }, { "epoch": 0.23, "grad_norm": 1.8786291240842226, "learning_rate": 9.112605832717911e-06, "loss": 0.6469, "step": 2184 }, { "epoch": 0.23, "grad_norm": 1.856748117218554, "learning_rate": 9.111602471026174e-06, "loss": 0.5649, "step": 2185 }, { "epoch": 0.23, "grad_norm": 2.347602574054247, "learning_rate": 9.110598597716833e-06, "loss": 0.6261, "step": 2186 }, { "epoch": 0.23, "grad_norm": 1.8514951418829075, "learning_rate": 9.109594212914805e-06, "loss": 0.676, "step": 2187 }, { "epoch": 0.23, "grad_norm": 1.965622359347167, "learning_rate": 9.10858931674507e-06, "loss": 0.6895, "step": 2188 }, { "epoch": 0.23, "grad_norm": 1.7186272262780955, "learning_rate": 9.107583909332665e-06, "loss": 0.6427, "step": 2189 }, { "epoch": 0.23, "grad_norm": 1.8522617984603416, "learning_rate": 9.106577990802698e-06, "loss": 0.5809, "step": 2190 }, { "epoch": 0.23, "grad_norm": 1.7139333568253492, "learning_rate": 9.105571561280342e-06, "loss": 0.6856, "step": 2191 }, { "epoch": 0.23, "grad_norm": 1.6419827516776615, "learning_rate": 9.104564620890825e-06, "loss": 0.656, "step": 2192 }, { "epoch": 0.23, "grad_norm": 2.096682476712609, "learning_rate": 9.103557169759444e-06, "loss": 0.6439, "step": 2193 }, { "epoch": 0.23, "grad_norm": 1.763043566627885, "learning_rate": 9.10254920801156e-06, "loss": 0.6519, "step": 2194 }, { "epoch": 0.23, "grad_norm": 1.8532733669821335, "learning_rate": 9.101540735772593e-06, "loss": 0.6031, "step": 2195 }, { "epoch": 0.23, "grad_norm": 2.060271816335249, "learning_rate": 9.100531753168036e-06, "loss": 0.7271, "step": 2196 }, { "epoch": 0.23, "grad_norm": 2.1515812369411855, "learning_rate": 9.099522260323435e-06, "loss": 0.621, "step": 2197 }, { "epoch": 0.23, "grad_norm": 2.3286497982208347, "learning_rate": 9.098512257364402e-06, "loss": 0.6533, "step": 2198 }, { "epoch": 0.23, "grad_norm": 1.8676323881644703, "learning_rate": 9.09750174441662e-06, "loss": 0.6488, "step": 2199 }, { "epoch": 0.23, "grad_norm": 1.7721268026802068, "learning_rate": 9.096490721605826e-06, "loss": 0.596, "step": 2200 }, { "epoch": 0.23, "grad_norm": 1.6622842387860022, "learning_rate": 9.095479189057827e-06, "loss": 0.5543, "step": 2201 }, { "epoch": 0.23, "grad_norm": 1.9298629459669492, "learning_rate": 9.094467146898487e-06, "loss": 0.5807, "step": 2202 }, { "epoch": 0.23, "grad_norm": 1.7573555028723329, "learning_rate": 9.09345459525374e-06, "loss": 0.7103, "step": 2203 }, { "epoch": 0.24, "grad_norm": 1.824872041184545, "learning_rate": 9.092441534249577e-06, "loss": 0.6755, "step": 2204 }, { "epoch": 0.24, "grad_norm": 1.7074953012039544, "learning_rate": 9.091427964012061e-06, "loss": 0.566, "step": 2205 }, { "epoch": 0.24, "grad_norm": 1.912570248894482, "learning_rate": 9.090413884667308e-06, "loss": 0.578, "step": 2206 }, { "epoch": 0.24, "grad_norm": 1.972684771198685, "learning_rate": 9.089399296341507e-06, "loss": 0.7146, "step": 2207 }, { "epoch": 0.24, "grad_norm": 1.7416079883651134, "learning_rate": 9.088384199160904e-06, "loss": 0.6483, "step": 2208 }, { "epoch": 0.24, "grad_norm": 1.868096287346581, "learning_rate": 9.087368593251811e-06, "loss": 0.6701, "step": 2209 }, { "epoch": 0.24, "grad_norm": 1.5744078429212025, "learning_rate": 9.086352478740601e-06, "loss": 0.5716, "step": 2210 }, { "epoch": 0.24, "grad_norm": 1.4465119367664439, "learning_rate": 9.085335855753712e-06, "loss": 0.5732, "step": 2211 }, { "epoch": 0.24, "grad_norm": 30.284984154394664, "learning_rate": 9.084318724417647e-06, "loss": 0.647, "step": 2212 }, { "epoch": 0.24, "grad_norm": 2.2664370484739007, "learning_rate": 9.083301084858969e-06, "loss": 0.6613, "step": 2213 }, { "epoch": 0.24, "grad_norm": 1.7950223076322072, "learning_rate": 9.082282937204302e-06, "loss": 0.6925, "step": 2214 }, { "epoch": 0.24, "grad_norm": 2.6113966433713256, "learning_rate": 9.081264281580346e-06, "loss": 0.7174, "step": 2215 }, { "epoch": 0.24, "grad_norm": 1.6596410404963775, "learning_rate": 9.080245118113847e-06, "loss": 0.5182, "step": 2216 }, { "epoch": 0.24, "grad_norm": 1.6232691241533217, "learning_rate": 9.079225446931625e-06, "loss": 0.7502, "step": 2217 }, { "epoch": 0.24, "grad_norm": 1.9633285706681083, "learning_rate": 9.078205268160559e-06, "loss": 0.6605, "step": 2218 }, { "epoch": 0.24, "grad_norm": 1.579858313886315, "learning_rate": 9.077184581927594e-06, "loss": 0.6238, "step": 2219 }, { "epoch": 0.24, "grad_norm": 1.7688117339127511, "learning_rate": 9.076163388359738e-06, "loss": 0.6588, "step": 2220 }, { "epoch": 0.24, "grad_norm": 2.1855384774817144, "learning_rate": 9.075141687584056e-06, "loss": 0.6804, "step": 2221 }, { "epoch": 0.24, "grad_norm": 2.014580284050796, "learning_rate": 9.074119479727688e-06, "loss": 0.7173, "step": 2222 }, { "epoch": 0.24, "grad_norm": 1.7383038747081032, "learning_rate": 9.073096764917823e-06, "loss": 0.6607, "step": 2223 }, { "epoch": 0.24, "grad_norm": 1.7781584700644897, "learning_rate": 9.072073543281725e-06, "loss": 0.7478, "step": 2224 }, { "epoch": 0.24, "grad_norm": 1.9833604402184764, "learning_rate": 9.071049814946715e-06, "loss": 0.6645, "step": 2225 }, { "epoch": 0.24, "grad_norm": 1.6587548108993977, "learning_rate": 9.070025580040176e-06, "loss": 0.6401, "step": 2226 }, { "epoch": 0.24, "grad_norm": 1.7269086990878604, "learning_rate": 9.069000838689558e-06, "loss": 0.6137, "step": 2227 }, { "epoch": 0.24, "grad_norm": 1.5041380949621457, "learning_rate": 9.067975591022374e-06, "loss": 0.5016, "step": 2228 }, { "epoch": 0.24, "grad_norm": 1.7704152496067718, "learning_rate": 9.066949837166196e-06, "loss": 0.6334, "step": 2229 }, { "epoch": 0.24, "grad_norm": 1.8214844274599606, "learning_rate": 9.065923577248661e-06, "loss": 0.7631, "step": 2230 }, { "epoch": 0.24, "grad_norm": 3.6350348607807446, "learning_rate": 9.06489681139747e-06, "loss": 0.6002, "step": 2231 }, { "epoch": 0.24, "grad_norm": 1.8613969358816511, "learning_rate": 9.063869539740387e-06, "loss": 0.6083, "step": 2232 }, { "epoch": 0.24, "grad_norm": 2.309890392740833, "learning_rate": 9.062841762405236e-06, "loss": 0.7857, "step": 2233 }, { "epoch": 0.24, "grad_norm": 2.056582411367385, "learning_rate": 9.06181347951991e-06, "loss": 0.8613, "step": 2234 }, { "epoch": 0.24, "grad_norm": 1.7576351478535712, "learning_rate": 9.060784691212357e-06, "loss": 0.6169, "step": 2235 }, { "epoch": 0.24, "grad_norm": 1.8064179854661409, "learning_rate": 9.059755397610592e-06, "loss": 0.6717, "step": 2236 }, { "epoch": 0.24, "grad_norm": 1.5845288908004445, "learning_rate": 9.058725598842695e-06, "loss": 0.67, "step": 2237 }, { "epoch": 0.24, "grad_norm": 1.786512426050072, "learning_rate": 9.057695295036806e-06, "loss": 0.7875, "step": 2238 }, { "epoch": 0.24, "grad_norm": 1.7115106121683612, "learning_rate": 9.056664486321126e-06, "loss": 0.7337, "step": 2239 }, { "epoch": 0.24, "grad_norm": 1.7563526393553093, "learning_rate": 9.055633172823927e-06, "loss": 0.6761, "step": 2240 }, { "epoch": 0.24, "grad_norm": 1.782396157538099, "learning_rate": 9.054601354673531e-06, "loss": 0.6189, "step": 2241 }, { "epoch": 0.24, "grad_norm": 1.7337972767097276, "learning_rate": 9.053569031998334e-06, "loss": 0.6598, "step": 2242 }, { "epoch": 0.24, "grad_norm": 2.03624394108782, "learning_rate": 9.052536204926791e-06, "loss": 0.5983, "step": 2243 }, { "epoch": 0.24, "grad_norm": 1.8552326079502113, "learning_rate": 9.051502873587418e-06, "loss": 0.6531, "step": 2244 }, { "epoch": 0.24, "grad_norm": 2.0650560254460832, "learning_rate": 9.050469038108796e-06, "loss": 0.6858, "step": 2245 }, { "epoch": 0.24, "grad_norm": 1.85353872183387, "learning_rate": 9.049434698619566e-06, "loss": 0.6729, "step": 2246 }, { "epoch": 0.24, "grad_norm": 1.7710861346910116, "learning_rate": 9.048399855248435e-06, "loss": 0.6554, "step": 2247 }, { "epoch": 0.24, "grad_norm": 2.7041840770579224, "learning_rate": 9.047364508124173e-06, "loss": 0.7499, "step": 2248 }, { "epoch": 0.24, "grad_norm": 1.8392155611319734, "learning_rate": 9.046328657375608e-06, "loss": 0.7285, "step": 2249 }, { "epoch": 0.24, "grad_norm": 1.823817878107279, "learning_rate": 9.045292303131634e-06, "loss": 0.7243, "step": 2250 }, { "epoch": 0.24, "grad_norm": 1.8189984188924442, "learning_rate": 9.04425544552121e-06, "loss": 0.7182, "step": 2251 }, { "epoch": 0.24, "grad_norm": 1.8445317841124438, "learning_rate": 9.043218084673351e-06, "loss": 0.63, "step": 2252 }, { "epoch": 0.24, "grad_norm": 1.9633401045750667, "learning_rate": 9.042180220717143e-06, "loss": 0.655, "step": 2253 }, { "epoch": 0.24, "grad_norm": 1.9002222174407846, "learning_rate": 9.041141853781727e-06, "loss": 0.6999, "step": 2254 }, { "epoch": 0.24, "grad_norm": 1.8707170053063347, "learning_rate": 9.04010298399631e-06, "loss": 0.6644, "step": 2255 }, { "epoch": 0.24, "grad_norm": 1.6908519447405026, "learning_rate": 9.039063611490163e-06, "loss": 0.6537, "step": 2256 }, { "epoch": 0.24, "grad_norm": 1.7007899497519274, "learning_rate": 9.038023736392616e-06, "loss": 0.5857, "step": 2257 }, { "epoch": 0.24, "grad_norm": 1.9638952691067393, "learning_rate": 9.036983358833065e-06, "loss": 0.7523, "step": 2258 }, { "epoch": 0.24, "grad_norm": 1.7149288048978932, "learning_rate": 9.035942478940964e-06, "loss": 0.6497, "step": 2259 }, { "epoch": 0.24, "grad_norm": 1.800614511397118, "learning_rate": 9.034901096845837e-06, "loss": 0.6825, "step": 2260 }, { "epoch": 0.24, "grad_norm": 1.6137884358896841, "learning_rate": 9.033859212677262e-06, "loss": 0.6861, "step": 2261 }, { "epoch": 0.24, "grad_norm": 1.6377226659852102, "learning_rate": 9.032816826564887e-06, "loss": 0.5871, "step": 2262 }, { "epoch": 0.24, "grad_norm": 1.883145060808369, "learning_rate": 9.031773938638415e-06, "loss": 0.7361, "step": 2263 }, { "epoch": 0.24, "grad_norm": 1.2738695124657329, "learning_rate": 9.030730549027619e-06, "loss": 0.5674, "step": 2264 }, { "epoch": 0.24, "grad_norm": 1.8766980460675942, "learning_rate": 9.029686657862327e-06, "loss": 0.626, "step": 2265 }, { "epoch": 0.24, "grad_norm": 1.741071241033623, "learning_rate": 9.028642265272435e-06, "loss": 0.6119, "step": 2266 }, { "epoch": 0.24, "grad_norm": 1.8831412540176224, "learning_rate": 9.027597371387901e-06, "loss": 0.6975, "step": 2267 }, { "epoch": 0.24, "grad_norm": 1.8528608840261849, "learning_rate": 9.026551976338742e-06, "loss": 0.7176, "step": 2268 }, { "epoch": 0.24, "grad_norm": 1.970007572127345, "learning_rate": 9.02550608025504e-06, "loss": 0.6564, "step": 2269 }, { "epoch": 0.24, "grad_norm": 1.7441927747575134, "learning_rate": 9.02445968326694e-06, "loss": 0.7332, "step": 2270 }, { "epoch": 0.24, "grad_norm": 1.7921699975511318, "learning_rate": 9.023412785504642e-06, "loss": 0.6872, "step": 2271 }, { "epoch": 0.24, "grad_norm": 1.8248012704657617, "learning_rate": 9.022365387098422e-06, "loss": 0.7007, "step": 2272 }, { "epoch": 0.24, "grad_norm": 1.794221161944292, "learning_rate": 9.021317488178606e-06, "loss": 0.6402, "step": 2273 }, { "epoch": 0.24, "grad_norm": 2.3007203091954205, "learning_rate": 9.020269088875588e-06, "loss": 0.5909, "step": 2274 }, { "epoch": 0.24, "grad_norm": 1.9118253986987845, "learning_rate": 9.019220189319824e-06, "loss": 0.7236, "step": 2275 }, { "epoch": 0.24, "grad_norm": 1.8449854324564174, "learning_rate": 9.01817078964183e-06, "loss": 0.6657, "step": 2276 }, { "epoch": 0.24, "grad_norm": 1.8185802534407527, "learning_rate": 9.017120889972185e-06, "loss": 0.6827, "step": 2277 }, { "epoch": 0.24, "grad_norm": 2.0356691146115526, "learning_rate": 9.016070490441532e-06, "loss": 0.6293, "step": 2278 }, { "epoch": 0.24, "grad_norm": 1.2635443255347274, "learning_rate": 9.015019591180577e-06, "loss": 0.5638, "step": 2279 }, { "epoch": 0.24, "grad_norm": 1.938036601320785, "learning_rate": 9.013968192320082e-06, "loss": 0.6981, "step": 2280 }, { "epoch": 0.24, "grad_norm": 1.5974803342782162, "learning_rate": 9.012916293990879e-06, "loss": 0.6219, "step": 2281 }, { "epoch": 0.24, "grad_norm": 2.0637265063627326, "learning_rate": 9.011863896323856e-06, "loss": 0.6847, "step": 2282 }, { "epoch": 0.24, "grad_norm": 1.7575174455479825, "learning_rate": 9.010810999449968e-06, "loss": 0.6741, "step": 2283 }, { "epoch": 0.24, "grad_norm": 2.1374468711291486, "learning_rate": 9.009757603500225e-06, "loss": 0.6855, "step": 2284 }, { "epoch": 0.24, "grad_norm": 1.7967047109507883, "learning_rate": 9.008703708605711e-06, "loss": 0.7571, "step": 2285 }, { "epoch": 0.24, "grad_norm": 1.9336035385732664, "learning_rate": 9.007649314897557e-06, "loss": 0.6378, "step": 2286 }, { "epoch": 0.24, "grad_norm": 1.6886416417613985, "learning_rate": 9.006594422506971e-06, "loss": 0.557, "step": 2287 }, { "epoch": 0.24, "grad_norm": 1.8784739892171478, "learning_rate": 9.005539031565212e-06, "loss": 0.7084, "step": 2288 }, { "epoch": 0.24, "grad_norm": 1.8642124774282454, "learning_rate": 9.004483142203606e-06, "loss": 0.6899, "step": 2289 }, { "epoch": 0.24, "grad_norm": 2.5645918072423406, "learning_rate": 9.003426754553543e-06, "loss": 0.6116, "step": 2290 }, { "epoch": 0.24, "grad_norm": 2.004211236650765, "learning_rate": 9.002369868746466e-06, "loss": 0.7311, "step": 2291 }, { "epoch": 0.24, "grad_norm": 1.7102012695423616, "learning_rate": 9.001312484913891e-06, "loss": 0.6538, "step": 2292 }, { "epoch": 0.24, "grad_norm": 1.794746261983655, "learning_rate": 9.00025460318739e-06, "loss": 0.5676, "step": 2293 }, { "epoch": 0.24, "grad_norm": 1.3744422247603134, "learning_rate": 8.999196223698599e-06, "loss": 0.5296, "step": 2294 }, { "epoch": 0.24, "grad_norm": 1.7650031129614694, "learning_rate": 8.99813734657921e-06, "loss": 0.7334, "step": 2295 }, { "epoch": 0.24, "grad_norm": 1.6970792053357855, "learning_rate": 8.997077971960987e-06, "loss": 0.5732, "step": 2296 }, { "epoch": 0.25, "grad_norm": 1.847046847226363, "learning_rate": 8.996018099975752e-06, "loss": 0.7701, "step": 2297 }, { "epoch": 0.25, "grad_norm": 1.7853918869968748, "learning_rate": 8.994957730755383e-06, "loss": 0.5791, "step": 2298 }, { "epoch": 0.25, "grad_norm": 1.5902134584688588, "learning_rate": 8.993896864431825e-06, "loss": 0.596, "step": 2299 }, { "epoch": 0.25, "grad_norm": 1.738678796277338, "learning_rate": 8.992835501137088e-06, "loss": 0.6609, "step": 2300 }, { "epoch": 0.25, "grad_norm": 2.0016127784630426, "learning_rate": 8.991773641003237e-06, "loss": 0.6239, "step": 2301 }, { "epoch": 0.25, "grad_norm": 1.7454467464421715, "learning_rate": 8.990711284162405e-06, "loss": 0.6923, "step": 2302 }, { "epoch": 0.25, "grad_norm": 1.9409511925447267, "learning_rate": 8.98964843074678e-06, "loss": 0.7137, "step": 2303 }, { "epoch": 0.25, "grad_norm": 1.8819576259877968, "learning_rate": 8.988585080888619e-06, "loss": 0.6524, "step": 2304 }, { "epoch": 0.25, "grad_norm": 1.9342252800823396, "learning_rate": 8.987521234720237e-06, "loss": 0.7866, "step": 2305 }, { "epoch": 0.25, "grad_norm": 1.914353414752681, "learning_rate": 8.986456892374008e-06, "loss": 0.7061, "step": 2306 }, { "epoch": 0.25, "grad_norm": 1.8700067681885177, "learning_rate": 8.985392053982377e-06, "loss": 0.7388, "step": 2307 }, { "epoch": 0.25, "grad_norm": 1.7491044339793786, "learning_rate": 8.984326719677838e-06, "loss": 0.6147, "step": 2308 }, { "epoch": 0.25, "grad_norm": 1.7872922868173322, "learning_rate": 8.983260889592958e-06, "loss": 0.6592, "step": 2309 }, { "epoch": 0.25, "grad_norm": 1.775158171658695, "learning_rate": 8.98219456386036e-06, "loss": 0.5838, "step": 2310 }, { "epoch": 0.25, "grad_norm": 2.25040867406742, "learning_rate": 8.981127742612728e-06, "loss": 0.6099, "step": 2311 }, { "epoch": 0.25, "grad_norm": 1.9139480461382836, "learning_rate": 8.980060425982811e-06, "loss": 0.6506, "step": 2312 }, { "epoch": 0.25, "grad_norm": 2.1457662651523126, "learning_rate": 8.97899261410342e-06, "loss": 0.5767, "step": 2313 }, { "epoch": 0.25, "grad_norm": 2.361963508330204, "learning_rate": 8.97792430710742e-06, "loss": 0.6663, "step": 2314 }, { "epoch": 0.25, "grad_norm": 1.8970466386118485, "learning_rate": 8.976855505127748e-06, "loss": 0.6674, "step": 2315 }, { "epoch": 0.25, "grad_norm": 1.9077252425220397, "learning_rate": 8.975786208297397e-06, "loss": 0.769, "step": 2316 }, { "epoch": 0.25, "grad_norm": 1.7264770120117963, "learning_rate": 8.97471641674942e-06, "loss": 0.6574, "step": 2317 }, { "epoch": 0.25, "grad_norm": 1.9344160915577413, "learning_rate": 8.973646130616938e-06, "loss": 0.7494, "step": 2318 }, { "epoch": 0.25, "grad_norm": 1.809556691246954, "learning_rate": 8.972575350033127e-06, "loss": 0.7116, "step": 2319 }, { "epoch": 0.25, "grad_norm": 1.7694418833893006, "learning_rate": 8.971504075131227e-06, "loss": 0.6762, "step": 2320 }, { "epoch": 0.25, "grad_norm": 1.974091680178075, "learning_rate": 8.970432306044543e-06, "loss": 0.7115, "step": 2321 }, { "epoch": 0.25, "grad_norm": 1.6900143704975952, "learning_rate": 8.969360042906432e-06, "loss": 0.6775, "step": 2322 }, { "epoch": 0.25, "grad_norm": 1.7466203330135202, "learning_rate": 8.968287285850323e-06, "loss": 0.6367, "step": 2323 }, { "epoch": 0.25, "grad_norm": 1.8371722637218775, "learning_rate": 8.967214035009699e-06, "loss": 0.7507, "step": 2324 }, { "epoch": 0.25, "grad_norm": 1.7782455896625382, "learning_rate": 8.966140290518111e-06, "loss": 0.6356, "step": 2325 }, { "epoch": 0.25, "grad_norm": 1.9351839130795532, "learning_rate": 8.965066052509167e-06, "loss": 0.6737, "step": 2326 }, { "epoch": 0.25, "grad_norm": 1.821073918042729, "learning_rate": 8.963991321116534e-06, "loss": 0.6675, "step": 2327 }, { "epoch": 0.25, "grad_norm": 1.7768035936116862, "learning_rate": 8.962916096473948e-06, "loss": 0.5603, "step": 2328 }, { "epoch": 0.25, "grad_norm": 1.7075404694212466, "learning_rate": 8.961840378715199e-06, "loss": 0.6531, "step": 2329 }, { "epoch": 0.25, "grad_norm": 1.258744786947668, "learning_rate": 8.960764167974144e-06, "loss": 0.5399, "step": 2330 }, { "epoch": 0.25, "grad_norm": 1.716458110273871, "learning_rate": 8.959687464384695e-06, "loss": 0.6729, "step": 2331 }, { "epoch": 0.25, "grad_norm": 1.3457616721286134, "learning_rate": 8.958610268080832e-06, "loss": 0.5973, "step": 2332 }, { "epoch": 0.25, "grad_norm": 1.9737083645608666, "learning_rate": 8.957532579196592e-06, "loss": 0.7499, "step": 2333 }, { "epoch": 0.25, "grad_norm": 1.7000791945762002, "learning_rate": 8.956454397866079e-06, "loss": 0.6249, "step": 2334 }, { "epoch": 0.25, "grad_norm": 1.757325932861698, "learning_rate": 8.955375724223447e-06, "loss": 0.6899, "step": 2335 }, { "epoch": 0.25, "grad_norm": 1.7000419236827464, "learning_rate": 8.954296558402922e-06, "loss": 0.675, "step": 2336 }, { "epoch": 0.25, "grad_norm": 1.8554167628943645, "learning_rate": 8.953216900538788e-06, "loss": 0.7498, "step": 2337 }, { "epoch": 0.25, "grad_norm": 1.8755494539899797, "learning_rate": 8.952136750765388e-06, "loss": 0.6993, "step": 2338 }, { "epoch": 0.25, "grad_norm": 2.0649455355937265, "learning_rate": 8.95105610921713e-06, "loss": 0.6645, "step": 2339 }, { "epoch": 0.25, "grad_norm": 1.8110443160942067, "learning_rate": 8.949974976028479e-06, "loss": 0.7239, "step": 2340 }, { "epoch": 0.25, "grad_norm": 1.7379025188461104, "learning_rate": 8.948893351333965e-06, "loss": 0.5428, "step": 2341 }, { "epoch": 0.25, "grad_norm": 1.430159829077057, "learning_rate": 8.947811235268176e-06, "loss": 0.5717, "step": 2342 }, { "epoch": 0.25, "grad_norm": 1.812673661183072, "learning_rate": 8.946728627965762e-06, "loss": 0.5897, "step": 2343 }, { "epoch": 0.25, "grad_norm": 1.702227606799054, "learning_rate": 8.945645529561437e-06, "loss": 0.6458, "step": 2344 }, { "epoch": 0.25, "grad_norm": 1.7550616971488193, "learning_rate": 8.94456194018997e-06, "loss": 0.6636, "step": 2345 }, { "epoch": 0.25, "grad_norm": 1.9365018026545633, "learning_rate": 8.9434778599862e-06, "loss": 0.5821, "step": 2346 }, { "epoch": 0.25, "grad_norm": 1.6269063162785895, "learning_rate": 8.94239328908502e-06, "loss": 0.5604, "step": 2347 }, { "epoch": 0.25, "grad_norm": 1.244684298895421, "learning_rate": 8.941308227621384e-06, "loss": 0.5679, "step": 2348 }, { "epoch": 0.25, "grad_norm": 1.7097739370938507, "learning_rate": 8.940222675730312e-06, "loss": 0.6288, "step": 2349 }, { "epoch": 0.25, "grad_norm": 1.7702763665964014, "learning_rate": 8.939136633546879e-06, "loss": 0.6459, "step": 2350 }, { "epoch": 0.25, "grad_norm": 1.9480090507673078, "learning_rate": 8.938050101206226e-06, "loss": 0.7146, "step": 2351 }, { "epoch": 0.25, "grad_norm": 1.7762969713471541, "learning_rate": 8.936963078843553e-06, "loss": 0.5735, "step": 2352 }, { "epoch": 0.25, "grad_norm": 1.9851201309647803, "learning_rate": 8.935875566594122e-06, "loss": 0.6499, "step": 2353 }, { "epoch": 0.25, "grad_norm": 1.7200659316218336, "learning_rate": 8.934787564593252e-06, "loss": 0.645, "step": 2354 }, { "epoch": 0.25, "grad_norm": 1.775038280752869, "learning_rate": 8.93369907297633e-06, "loss": 0.6511, "step": 2355 }, { "epoch": 0.25, "grad_norm": 1.966622524940903, "learning_rate": 8.932610091878798e-06, "loss": 0.7153, "step": 2356 }, { "epoch": 0.25, "grad_norm": 1.8719932636885308, "learning_rate": 8.931520621436158e-06, "loss": 0.706, "step": 2357 }, { "epoch": 0.25, "grad_norm": 3.332177740677145, "learning_rate": 8.93043066178398e-06, "loss": 0.563, "step": 2358 }, { "epoch": 0.25, "grad_norm": 1.7559791602350752, "learning_rate": 8.929340213057889e-06, "loss": 0.5572, "step": 2359 }, { "epoch": 0.25, "grad_norm": 2.670929442754762, "learning_rate": 8.928249275393572e-06, "loss": 0.5419, "step": 2360 }, { "epoch": 0.25, "grad_norm": 1.7020852105406497, "learning_rate": 8.927157848926778e-06, "loss": 0.5713, "step": 2361 }, { "epoch": 0.25, "grad_norm": 2.0159412914346766, "learning_rate": 8.926065933793316e-06, "loss": 0.7125, "step": 2362 }, { "epoch": 0.25, "grad_norm": 1.2979732534556836, "learning_rate": 8.924973530129054e-06, "loss": 0.5664, "step": 2363 }, { "epoch": 0.25, "grad_norm": 2.0589615638935417, "learning_rate": 8.923880638069926e-06, "loss": 0.6935, "step": 2364 }, { "epoch": 0.25, "grad_norm": 1.8925020183022248, "learning_rate": 8.92278725775192e-06, "loss": 0.6439, "step": 2365 }, { "epoch": 0.25, "grad_norm": 1.931907883119492, "learning_rate": 8.921693389311092e-06, "loss": 0.6466, "step": 2366 }, { "epoch": 0.25, "grad_norm": 1.8227277805973547, "learning_rate": 8.920599032883553e-06, "loss": 0.7042, "step": 2367 }, { "epoch": 0.25, "grad_norm": 1.9023226512692588, "learning_rate": 8.919504188605476e-06, "loss": 0.6802, "step": 2368 }, { "epoch": 0.25, "grad_norm": 1.7696096434023565, "learning_rate": 8.918408856613094e-06, "loss": 0.7066, "step": 2369 }, { "epoch": 0.25, "grad_norm": 1.7532002721230633, "learning_rate": 8.917313037042708e-06, "loss": 0.7175, "step": 2370 }, { "epoch": 0.25, "grad_norm": 1.8868075260416308, "learning_rate": 8.916216730030667e-06, "loss": 0.689, "step": 2371 }, { "epoch": 0.25, "grad_norm": 1.8571499449797917, "learning_rate": 8.91511993571339e-06, "loss": 0.6595, "step": 2372 }, { "epoch": 0.25, "grad_norm": 1.732179815132586, "learning_rate": 8.914022654227354e-06, "loss": 0.6078, "step": 2373 }, { "epoch": 0.25, "grad_norm": 1.6707232269690233, "learning_rate": 8.912924885709098e-06, "loss": 0.6142, "step": 2374 }, { "epoch": 0.25, "grad_norm": 2.0804279309716276, "learning_rate": 8.91182663029522e-06, "loss": 0.6444, "step": 2375 }, { "epoch": 0.25, "grad_norm": 2.0916863893821596, "learning_rate": 8.910727888122375e-06, "loss": 0.6497, "step": 2376 }, { "epoch": 0.25, "grad_norm": 2.0261119699134125, "learning_rate": 8.909628659327287e-06, "loss": 0.6087, "step": 2377 }, { "epoch": 0.25, "grad_norm": 2.056617329635166, "learning_rate": 8.908528944046735e-06, "loss": 0.5858, "step": 2378 }, { "epoch": 0.25, "grad_norm": 1.7622711937325755, "learning_rate": 8.907428742417557e-06, "loss": 0.6545, "step": 2379 }, { "epoch": 0.25, "grad_norm": 1.908412169850343, "learning_rate": 8.906328054576657e-06, "loss": 0.6955, "step": 2380 }, { "epoch": 0.25, "grad_norm": 2.0466319921447664, "learning_rate": 8.905226880660994e-06, "loss": 0.7193, "step": 2381 }, { "epoch": 0.25, "grad_norm": 1.6561873443834008, "learning_rate": 8.904125220807593e-06, "loss": 0.6334, "step": 2382 }, { "epoch": 0.25, "grad_norm": 2.8873322276715303, "learning_rate": 8.903023075153534e-06, "loss": 0.6048, "step": 2383 }, { "epoch": 0.25, "grad_norm": 2.5042809621584894, "learning_rate": 8.90192044383596e-06, "loss": 0.7303, "step": 2384 }, { "epoch": 0.25, "grad_norm": 1.908043588130302, "learning_rate": 8.900817326992075e-06, "loss": 0.6546, "step": 2385 }, { "epoch": 0.25, "grad_norm": 1.8808298345937229, "learning_rate": 8.899713724759145e-06, "loss": 0.7028, "step": 2386 }, { "epoch": 0.25, "grad_norm": 1.7961524209612105, "learning_rate": 8.898609637274489e-06, "loss": 0.6768, "step": 2387 }, { "epoch": 0.25, "grad_norm": 1.6822956697788898, "learning_rate": 8.897505064675495e-06, "loss": 0.6761, "step": 2388 }, { "epoch": 0.25, "grad_norm": 1.8812174904293906, "learning_rate": 8.896400007099608e-06, "loss": 0.7041, "step": 2389 }, { "epoch": 0.25, "grad_norm": 1.8868160042412085, "learning_rate": 8.895294464684333e-06, "loss": 0.664, "step": 2390 }, { "epoch": 0.26, "grad_norm": 2.5010685580700365, "learning_rate": 8.894188437567235e-06, "loss": 0.6546, "step": 2391 }, { "epoch": 0.26, "grad_norm": 1.6212119391080189, "learning_rate": 8.893081925885937e-06, "loss": 0.6341, "step": 2392 }, { "epoch": 0.26, "grad_norm": 1.790785871766376, "learning_rate": 8.891974929778133e-06, "loss": 0.6861, "step": 2393 }, { "epoch": 0.26, "grad_norm": 1.5762212273845717, "learning_rate": 8.890867449381561e-06, "loss": 0.5615, "step": 2394 }, { "epoch": 0.26, "grad_norm": 1.693350167215357, "learning_rate": 8.889759484834033e-06, "loss": 0.622, "step": 2395 }, { "epoch": 0.26, "grad_norm": 3.6844748725295258, "learning_rate": 8.888651036273415e-06, "loss": 0.7031, "step": 2396 }, { "epoch": 0.26, "grad_norm": 2.0272000114483912, "learning_rate": 8.887542103837633e-06, "loss": 0.7416, "step": 2397 }, { "epoch": 0.26, "grad_norm": 2.050496017546378, "learning_rate": 8.886432687664675e-06, "loss": 0.7025, "step": 2398 }, { "epoch": 0.26, "grad_norm": 1.8494155456252241, "learning_rate": 8.885322787892588e-06, "loss": 0.7069, "step": 2399 }, { "epoch": 0.26, "grad_norm": 1.735477152412518, "learning_rate": 8.88421240465948e-06, "loss": 0.7531, "step": 2400 }, { "epoch": 0.26, "grad_norm": 1.6092484286650048, "learning_rate": 8.883101538103518e-06, "loss": 0.6407, "step": 2401 }, { "epoch": 0.26, "grad_norm": 1.8336658361107447, "learning_rate": 8.881990188362934e-06, "loss": 0.7292, "step": 2402 }, { "epoch": 0.26, "grad_norm": 1.7470417651646621, "learning_rate": 8.880878355576013e-06, "loss": 0.677, "step": 2403 }, { "epoch": 0.26, "grad_norm": 1.6483854252583414, "learning_rate": 8.879766039881104e-06, "loss": 0.6073, "step": 2404 }, { "epoch": 0.26, "grad_norm": 1.6315725305275117, "learning_rate": 8.878653241416614e-06, "loss": 0.6114, "step": 2405 }, { "epoch": 0.26, "grad_norm": 1.7618157596901058, "learning_rate": 8.877539960321013e-06, "loss": 0.6924, "step": 2406 }, { "epoch": 0.26, "grad_norm": 1.7714593448469882, "learning_rate": 8.876426196732831e-06, "loss": 0.7467, "step": 2407 }, { "epoch": 0.26, "grad_norm": 1.976680800058807, "learning_rate": 8.875311950790653e-06, "loss": 0.6853, "step": 2408 }, { "epoch": 0.26, "grad_norm": 1.7887563317864994, "learning_rate": 8.874197222633129e-06, "loss": 0.6458, "step": 2409 }, { "epoch": 0.26, "grad_norm": 1.7942362023607592, "learning_rate": 8.873082012398969e-06, "loss": 0.6058, "step": 2410 }, { "epoch": 0.26, "grad_norm": 1.6197337661536055, "learning_rate": 8.87196632022694e-06, "loss": 0.6688, "step": 2411 }, { "epoch": 0.26, "grad_norm": 1.7271175848831486, "learning_rate": 8.870850146255871e-06, "loss": 0.6449, "step": 2412 }, { "epoch": 0.26, "grad_norm": 1.8410826810751404, "learning_rate": 8.86973349062465e-06, "loss": 0.6273, "step": 2413 }, { "epoch": 0.26, "grad_norm": 1.721957520266157, "learning_rate": 8.868616353472226e-06, "loss": 0.7073, "step": 2414 }, { "epoch": 0.26, "grad_norm": 1.7857669070749707, "learning_rate": 8.867498734937609e-06, "loss": 0.6125, "step": 2415 }, { "epoch": 0.26, "grad_norm": 1.8552815754091234, "learning_rate": 8.866380635159864e-06, "loss": 0.5703, "step": 2416 }, { "epoch": 0.26, "grad_norm": 1.7703780380872132, "learning_rate": 8.865262054278121e-06, "loss": 0.7247, "step": 2417 }, { "epoch": 0.26, "grad_norm": 1.7032371566550903, "learning_rate": 8.864142992431567e-06, "loss": 0.6773, "step": 2418 }, { "epoch": 0.26, "grad_norm": 1.7460973413606016, "learning_rate": 8.863023449759451e-06, "loss": 0.6241, "step": 2419 }, { "epoch": 0.26, "grad_norm": 1.6799725160634342, "learning_rate": 8.861903426401079e-06, "loss": 0.62, "step": 2420 }, { "epoch": 0.26, "grad_norm": 1.7391495466768991, "learning_rate": 8.860782922495821e-06, "loss": 0.6495, "step": 2421 }, { "epoch": 0.26, "grad_norm": 2.4705643698823714, "learning_rate": 8.859661938183104e-06, "loss": 0.6828, "step": 2422 }, { "epoch": 0.26, "grad_norm": 1.7305170844998277, "learning_rate": 8.858540473602412e-06, "loss": 0.7758, "step": 2423 }, { "epoch": 0.26, "grad_norm": 1.4385866845688153, "learning_rate": 8.857418528893293e-06, "loss": 0.5751, "step": 2424 }, { "epoch": 0.26, "grad_norm": 1.5009072751874863, "learning_rate": 8.856296104195357e-06, "loss": 0.6409, "step": 2425 }, { "epoch": 0.26, "grad_norm": 1.8134114577903844, "learning_rate": 8.855173199648267e-06, "loss": 0.6609, "step": 2426 }, { "epoch": 0.26, "grad_norm": 1.6877928914282903, "learning_rate": 8.854049815391748e-06, "loss": 0.8121, "step": 2427 }, { "epoch": 0.26, "grad_norm": 1.7070967831766595, "learning_rate": 8.852925951565588e-06, "loss": 0.6725, "step": 2428 }, { "epoch": 0.26, "grad_norm": 1.8136652033807228, "learning_rate": 8.851801608309632e-06, "loss": 0.5919, "step": 2429 }, { "epoch": 0.26, "grad_norm": 1.582659311640394, "learning_rate": 8.850676785763784e-06, "loss": 0.5663, "step": 2430 }, { "epoch": 0.26, "grad_norm": 1.6779743124431878, "learning_rate": 8.849551484068008e-06, "loss": 0.5916, "step": 2431 }, { "epoch": 0.26, "grad_norm": 1.7918028632004954, "learning_rate": 8.84842570336233e-06, "loss": 0.6373, "step": 2432 }, { "epoch": 0.26, "grad_norm": 2.0137380027702876, "learning_rate": 8.847299443786836e-06, "loss": 0.647, "step": 2433 }, { "epoch": 0.26, "grad_norm": 1.88213573452768, "learning_rate": 8.846172705481665e-06, "loss": 0.6801, "step": 2434 }, { "epoch": 0.26, "grad_norm": 1.8566296124579016, "learning_rate": 8.845045488587022e-06, "loss": 0.5914, "step": 2435 }, { "epoch": 0.26, "grad_norm": 1.9729574570642914, "learning_rate": 8.84391779324317e-06, "loss": 0.5924, "step": 2436 }, { "epoch": 0.26, "grad_norm": 1.8296238384490953, "learning_rate": 8.842789619590431e-06, "loss": 0.6914, "step": 2437 }, { "epoch": 0.26, "grad_norm": 1.8907400911862244, "learning_rate": 8.841660967769184e-06, "loss": 0.7641, "step": 2438 }, { "epoch": 0.26, "grad_norm": 1.750016296510187, "learning_rate": 8.840531837919875e-06, "loss": 0.5835, "step": 2439 }, { "epoch": 0.26, "grad_norm": 1.6410421486274074, "learning_rate": 8.839402230183e-06, "loss": 0.5794, "step": 2440 }, { "epoch": 0.26, "grad_norm": 1.9312717184862294, "learning_rate": 8.838272144699123e-06, "loss": 0.8416, "step": 2441 }, { "epoch": 0.26, "grad_norm": 2.069159181356407, "learning_rate": 8.83714158160886e-06, "loss": 0.7208, "step": 2442 }, { "epoch": 0.26, "grad_norm": 1.4620854733869282, "learning_rate": 8.836010541052894e-06, "loss": 0.5761, "step": 2443 }, { "epoch": 0.26, "grad_norm": 1.7058332356929637, "learning_rate": 8.834879023171958e-06, "loss": 0.6161, "step": 2444 }, { "epoch": 0.26, "grad_norm": 1.8638244475112247, "learning_rate": 8.833747028106855e-06, "loss": 0.6922, "step": 2445 }, { "epoch": 0.26, "grad_norm": 2.245142035152422, "learning_rate": 8.83261455599844e-06, "loss": 0.7306, "step": 2446 }, { "epoch": 0.26, "grad_norm": 1.8732398539936004, "learning_rate": 8.831481606987628e-06, "loss": 0.6565, "step": 2447 }, { "epoch": 0.26, "grad_norm": 2.0875574581633174, "learning_rate": 8.830348181215396e-06, "loss": 0.64, "step": 2448 }, { "epoch": 0.26, "grad_norm": 2.0544392166871006, "learning_rate": 8.829214278822782e-06, "loss": 0.6009, "step": 2449 }, { "epoch": 0.26, "grad_norm": 1.8925467900191046, "learning_rate": 8.828079899950875e-06, "loss": 0.5795, "step": 2450 }, { "epoch": 0.26, "grad_norm": 1.6834741820828731, "learning_rate": 8.826945044740834e-06, "loss": 0.674, "step": 2451 }, { "epoch": 0.26, "grad_norm": 1.4272976006690672, "learning_rate": 8.825809713333868e-06, "loss": 0.5625, "step": 2452 }, { "epoch": 0.26, "grad_norm": 1.987721725867695, "learning_rate": 8.824673905871252e-06, "loss": 0.7098, "step": 2453 }, { "epoch": 0.26, "grad_norm": 2.0053774393533894, "learning_rate": 8.823537622494315e-06, "loss": 0.7327, "step": 2454 }, { "epoch": 0.26, "grad_norm": 1.8338937997408578, "learning_rate": 8.822400863344453e-06, "loss": 0.6424, "step": 2455 }, { "epoch": 0.26, "grad_norm": 2.0413376757708637, "learning_rate": 8.82126362856311e-06, "loss": 0.7222, "step": 2456 }, { "epoch": 0.26, "grad_norm": 1.8384259676267725, "learning_rate": 8.820125918291797e-06, "loss": 0.701, "step": 2457 }, { "epoch": 0.26, "grad_norm": 1.8550631456862923, "learning_rate": 8.818987732672082e-06, "loss": 0.6754, "step": 2458 }, { "epoch": 0.26, "grad_norm": 1.6812458714754848, "learning_rate": 8.817849071845595e-06, "loss": 0.6009, "step": 2459 }, { "epoch": 0.26, "grad_norm": 1.7394528874472996, "learning_rate": 8.816709935954022e-06, "loss": 0.6715, "step": 2460 }, { "epoch": 0.26, "grad_norm": 1.97704475497559, "learning_rate": 8.815570325139105e-06, "loss": 0.7583, "step": 2461 }, { "epoch": 0.26, "grad_norm": 1.4032069050891653, "learning_rate": 8.814430239542652e-06, "loss": 0.5808, "step": 2462 }, { "epoch": 0.26, "grad_norm": 1.9032228286689028, "learning_rate": 8.813289679306526e-06, "loss": 0.559, "step": 2463 }, { "epoch": 0.26, "grad_norm": 1.985344334130491, "learning_rate": 8.81214864457265e-06, "loss": 0.6377, "step": 2464 }, { "epoch": 0.26, "grad_norm": 1.959902160347469, "learning_rate": 8.811007135483008e-06, "loss": 0.702, "step": 2465 }, { "epoch": 0.26, "grad_norm": 1.8116504066648758, "learning_rate": 8.809865152179636e-06, "loss": 0.633, "step": 2466 }, { "epoch": 0.26, "grad_norm": 1.2416934088467106, "learning_rate": 8.80872269480464e-06, "loss": 0.5518, "step": 2467 }, { "epoch": 0.26, "grad_norm": 1.8436403316206063, "learning_rate": 8.807579763500174e-06, "loss": 0.6535, "step": 2468 }, { "epoch": 0.26, "grad_norm": 1.8077006815741639, "learning_rate": 8.806436358408457e-06, "loss": 0.6321, "step": 2469 }, { "epoch": 0.26, "grad_norm": 1.853203954811162, "learning_rate": 8.80529247967177e-06, "loss": 0.7628, "step": 2470 }, { "epoch": 0.26, "grad_norm": 1.9250179441561361, "learning_rate": 8.804148127432443e-06, "loss": 0.6553, "step": 2471 }, { "epoch": 0.26, "grad_norm": 1.9894310594358504, "learning_rate": 8.803003301832876e-06, "loss": 0.7464, "step": 2472 }, { "epoch": 0.26, "grad_norm": 1.8800102789583768, "learning_rate": 8.80185800301552e-06, "loss": 0.6951, "step": 2473 }, { "epoch": 0.26, "grad_norm": 1.7369088442761613, "learning_rate": 8.800712231122887e-06, "loss": 0.6847, "step": 2474 }, { "epoch": 0.26, "grad_norm": 1.7827674938350406, "learning_rate": 8.79956598629755e-06, "loss": 0.7, "step": 2475 }, { "epoch": 0.26, "grad_norm": 1.2937601742610803, "learning_rate": 8.79841926868214e-06, "loss": 0.577, "step": 2476 }, { "epoch": 0.26, "grad_norm": 1.7373252846285858, "learning_rate": 8.797272078419342e-06, "loss": 0.6224, "step": 2477 }, { "epoch": 0.26, "grad_norm": 1.8540509614607628, "learning_rate": 8.796124415651908e-06, "loss": 0.6349, "step": 2478 }, { "epoch": 0.26, "grad_norm": 1.8283719413236752, "learning_rate": 8.794976280522642e-06, "loss": 0.6993, "step": 2479 }, { "epoch": 0.26, "grad_norm": 1.7699279472944762, "learning_rate": 8.793827673174413e-06, "loss": 0.667, "step": 2480 }, { "epoch": 0.26, "grad_norm": 1.7587874911707875, "learning_rate": 8.792678593750141e-06, "loss": 0.6493, "step": 2481 }, { "epoch": 0.26, "grad_norm": 1.6308429255234906, "learning_rate": 8.791529042392813e-06, "loss": 0.6065, "step": 2482 }, { "epoch": 0.26, "grad_norm": 2.026482807344097, "learning_rate": 8.790379019245468e-06, "loss": 0.5869, "step": 2483 }, { "epoch": 0.26, "grad_norm": 1.6004124275140845, "learning_rate": 8.789228524451207e-06, "loss": 0.6145, "step": 2484 }, { "epoch": 0.27, "grad_norm": 1.9669041957955118, "learning_rate": 8.788077558153193e-06, "loss": 0.7224, "step": 2485 }, { "epoch": 0.27, "grad_norm": 1.3440445057796357, "learning_rate": 8.786926120494635e-06, "loss": 0.5596, "step": 2486 }, { "epoch": 0.27, "grad_norm": 1.30572529379402, "learning_rate": 8.785774211618817e-06, "loss": 0.5727, "step": 2487 }, { "epoch": 0.27, "grad_norm": 1.619720633248047, "learning_rate": 8.784621831669072e-06, "loss": 0.5632, "step": 2488 }, { "epoch": 0.27, "grad_norm": 1.8512680411743974, "learning_rate": 8.783468980788793e-06, "loss": 0.6348, "step": 2489 }, { "epoch": 0.27, "grad_norm": 1.846718870134402, "learning_rate": 8.782315659121432e-06, "loss": 0.7508, "step": 2490 }, { "epoch": 0.27, "grad_norm": 1.578023309376675, "learning_rate": 8.7811618668105e-06, "loss": 0.5675, "step": 2491 }, { "epoch": 0.27, "grad_norm": 1.9213916556619297, "learning_rate": 8.78000760399957e-06, "loss": 0.6788, "step": 2492 }, { "epoch": 0.27, "grad_norm": 1.9069966488537942, "learning_rate": 8.778852870832266e-06, "loss": 0.7205, "step": 2493 }, { "epoch": 0.27, "grad_norm": 1.817541241621002, "learning_rate": 8.777697667452273e-06, "loss": 0.6899, "step": 2494 }, { "epoch": 0.27, "grad_norm": 1.2979290723766714, "learning_rate": 8.776541994003342e-06, "loss": 0.5338, "step": 2495 }, { "epoch": 0.27, "grad_norm": 1.8544791163423633, "learning_rate": 8.775385850629271e-06, "loss": 0.7182, "step": 2496 }, { "epoch": 0.27, "grad_norm": 1.9691656877585, "learning_rate": 8.774229237473926e-06, "loss": 0.7038, "step": 2497 }, { "epoch": 0.27, "grad_norm": 1.8996610881323275, "learning_rate": 8.773072154681224e-06, "loss": 0.6707, "step": 2498 }, { "epoch": 0.27, "grad_norm": 1.5978077945888995, "learning_rate": 8.771914602395144e-06, "loss": 0.5845, "step": 2499 }, { "epoch": 0.27, "grad_norm": 1.9926947426789037, "learning_rate": 8.770756580759728e-06, "loss": 0.6657, "step": 2500 }, { "epoch": 0.27, "grad_norm": 1.710376983887937, "learning_rate": 8.769598089919065e-06, "loss": 0.5494, "step": 2501 }, { "epoch": 0.27, "grad_norm": 1.8900750525284455, "learning_rate": 8.768439130017315e-06, "loss": 0.6621, "step": 2502 }, { "epoch": 0.27, "grad_norm": 1.9009332545801063, "learning_rate": 8.767279701198686e-06, "loss": 0.694, "step": 2503 }, { "epoch": 0.27, "grad_norm": 1.771487626203962, "learning_rate": 8.766119803607451e-06, "loss": 0.6642, "step": 2504 }, { "epoch": 0.27, "grad_norm": 1.8650618356124715, "learning_rate": 8.764959437387939e-06, "loss": 0.6757, "step": 2505 }, { "epoch": 0.27, "grad_norm": 1.8688401073384862, "learning_rate": 8.763798602684539e-06, "loss": 0.6587, "step": 2506 }, { "epoch": 0.27, "grad_norm": 1.9688988403210606, "learning_rate": 8.762637299641692e-06, "loss": 0.7343, "step": 2507 }, { "epoch": 0.27, "grad_norm": 1.7511751692579125, "learning_rate": 8.761475528403907e-06, "loss": 0.6497, "step": 2508 }, { "epoch": 0.27, "grad_norm": 1.6009803262743174, "learning_rate": 8.760313289115745e-06, "loss": 0.5486, "step": 2509 }, { "epoch": 0.27, "grad_norm": 1.7035504806035247, "learning_rate": 8.759150581921825e-06, "loss": 0.6686, "step": 2510 }, { "epoch": 0.27, "grad_norm": 1.9038654010450604, "learning_rate": 8.757987406966826e-06, "loss": 0.7345, "step": 2511 }, { "epoch": 0.27, "grad_norm": 1.1465673756892452, "learning_rate": 8.756823764395486e-06, "loss": 0.5517, "step": 2512 }, { "epoch": 0.27, "grad_norm": 1.76530669332024, "learning_rate": 8.755659654352599e-06, "loss": 0.6983, "step": 2513 }, { "epoch": 0.27, "grad_norm": 2.0563745360444785, "learning_rate": 8.75449507698302e-06, "loss": 0.7277, "step": 2514 }, { "epoch": 0.27, "grad_norm": 1.3954315300340399, "learning_rate": 8.75333003243166e-06, "loss": 0.5975, "step": 2515 }, { "epoch": 0.27, "grad_norm": 1.6867877644324563, "learning_rate": 8.752164520843487e-06, "loss": 0.6022, "step": 2516 }, { "epoch": 0.27, "grad_norm": 1.8438540124347582, "learning_rate": 8.75099854236353e-06, "loss": 0.6356, "step": 2517 }, { "epoch": 0.27, "grad_norm": 1.723764484686223, "learning_rate": 8.749832097136874e-06, "loss": 0.7356, "step": 2518 }, { "epoch": 0.27, "grad_norm": 1.7609436533374305, "learning_rate": 8.748665185308665e-06, "loss": 0.6918, "step": 2519 }, { "epoch": 0.27, "grad_norm": 1.853019414287174, "learning_rate": 8.7474978070241e-06, "loss": 0.669, "step": 2520 }, { "epoch": 0.27, "grad_norm": 1.9738002633723521, "learning_rate": 8.746329962428446e-06, "loss": 0.7342, "step": 2521 }, { "epoch": 0.27, "grad_norm": 1.972509159242629, "learning_rate": 8.745161651667016e-06, "loss": 0.6908, "step": 2522 }, { "epoch": 0.27, "grad_norm": 1.3536947456019142, "learning_rate": 8.743992874885187e-06, "loss": 0.5757, "step": 2523 }, { "epoch": 0.27, "grad_norm": 1.7011020070828953, "learning_rate": 8.742823632228392e-06, "loss": 0.6602, "step": 2524 }, { "epoch": 0.27, "grad_norm": 1.7736855397148032, "learning_rate": 8.741653923842126e-06, "loss": 0.6112, "step": 2525 }, { "epoch": 0.27, "grad_norm": 1.7313566076245135, "learning_rate": 8.740483749871938e-06, "loss": 0.6365, "step": 2526 }, { "epoch": 0.27, "grad_norm": 1.9273367096663103, "learning_rate": 8.739313110463432e-06, "loss": 0.7305, "step": 2527 }, { "epoch": 0.27, "grad_norm": 1.7678004077034462, "learning_rate": 8.738142005762279e-06, "loss": 0.6353, "step": 2528 }, { "epoch": 0.27, "grad_norm": 1.7707419506353073, "learning_rate": 8.736970435914201e-06, "loss": 0.5948, "step": 2529 }, { "epoch": 0.27, "grad_norm": 1.7502442495408896, "learning_rate": 8.735798401064976e-06, "loss": 0.6551, "step": 2530 }, { "epoch": 0.27, "grad_norm": 2.207957483755071, "learning_rate": 8.734625901360449e-06, "loss": 0.726, "step": 2531 }, { "epoch": 0.27, "grad_norm": 1.826240244475983, "learning_rate": 8.733452936946513e-06, "loss": 0.6566, "step": 2532 }, { "epoch": 0.27, "grad_norm": 1.7347160017683454, "learning_rate": 8.732279507969128e-06, "loss": 0.6279, "step": 2533 }, { "epoch": 0.27, "grad_norm": 1.5138205471263844, "learning_rate": 8.7311056145743e-06, "loss": 0.6605, "step": 2534 }, { "epoch": 0.27, "grad_norm": 1.9091235777678386, "learning_rate": 8.729931256908106e-06, "loss": 0.6137, "step": 2535 }, { "epoch": 0.27, "grad_norm": 1.7763778911852763, "learning_rate": 8.72875643511667e-06, "loss": 0.6608, "step": 2536 }, { "epoch": 0.27, "grad_norm": 1.8324367059941424, "learning_rate": 8.72758114934618e-06, "loss": 0.7695, "step": 2537 }, { "epoch": 0.27, "grad_norm": 1.9520241461326804, "learning_rate": 8.72640539974288e-06, "loss": 0.6781, "step": 2538 }, { "epoch": 0.27, "grad_norm": 1.6694091106694773, "learning_rate": 8.725229186453072e-06, "loss": 0.5486, "step": 2539 }, { "epoch": 0.27, "grad_norm": 1.9520063045817866, "learning_rate": 8.724052509623113e-06, "loss": 0.7061, "step": 2540 }, { "epoch": 0.27, "grad_norm": 1.770579325008354, "learning_rate": 8.722875369399424e-06, "loss": 0.5761, "step": 2541 }, { "epoch": 0.27, "grad_norm": 1.8305759758233702, "learning_rate": 8.721697765928477e-06, "loss": 0.6344, "step": 2542 }, { "epoch": 0.27, "grad_norm": 1.7524603170055617, "learning_rate": 8.720519699356804e-06, "loss": 0.6559, "step": 2543 }, { "epoch": 0.27, "grad_norm": 1.2931603078046232, "learning_rate": 8.719341169830997e-06, "loss": 0.5221, "step": 2544 }, { "epoch": 0.27, "grad_norm": 1.9892201656486121, "learning_rate": 8.7181621774977e-06, "loss": 0.6459, "step": 2545 }, { "epoch": 0.27, "grad_norm": 1.9577893892466693, "learning_rate": 8.716982722503622e-06, "loss": 0.6241, "step": 2546 }, { "epoch": 0.27, "grad_norm": 1.8683439462623845, "learning_rate": 8.715802804995526e-06, "loss": 0.6861, "step": 2547 }, { "epoch": 0.27, "grad_norm": 1.9229076526221676, "learning_rate": 8.714622425120229e-06, "loss": 0.6928, "step": 2548 }, { "epoch": 0.27, "grad_norm": 1.7871885006568784, "learning_rate": 8.713441583024608e-06, "loss": 0.6899, "step": 2549 }, { "epoch": 0.27, "grad_norm": 1.8231661216421489, "learning_rate": 8.712260278855605e-06, "loss": 0.577, "step": 2550 }, { "epoch": 0.27, "grad_norm": 1.9197622651776423, "learning_rate": 8.711078512760206e-06, "loss": 0.6261, "step": 2551 }, { "epoch": 0.27, "grad_norm": 1.5139036176275513, "learning_rate": 8.709896284885463e-06, "loss": 0.5698, "step": 2552 }, { "epoch": 0.27, "grad_norm": 1.3295243539111843, "learning_rate": 8.708713595378484e-06, "loss": 0.5758, "step": 2553 }, { "epoch": 0.27, "grad_norm": 1.8186890826335984, "learning_rate": 8.707530444386437e-06, "loss": 0.6491, "step": 2554 }, { "epoch": 0.27, "grad_norm": 2.0960231239198746, "learning_rate": 8.70634683205654e-06, "loss": 0.5455, "step": 2555 }, { "epoch": 0.27, "grad_norm": 1.8679289716278416, "learning_rate": 8.705162758536077e-06, "loss": 0.6834, "step": 2556 }, { "epoch": 0.27, "grad_norm": 1.8788351904985374, "learning_rate": 8.703978223972382e-06, "loss": 0.7011, "step": 2557 }, { "epoch": 0.27, "grad_norm": 1.9105786424077256, "learning_rate": 8.702793228512853e-06, "loss": 0.7373, "step": 2558 }, { "epoch": 0.27, "grad_norm": 1.6162002296514228, "learning_rate": 8.70160777230494e-06, "loss": 0.6273, "step": 2559 }, { "epoch": 0.27, "grad_norm": 1.5699444723273008, "learning_rate": 8.700421855496153e-06, "loss": 0.7008, "step": 2560 }, { "epoch": 0.27, "grad_norm": 2.347672109286751, "learning_rate": 8.699235478234059e-06, "loss": 0.5884, "step": 2561 }, { "epoch": 0.27, "grad_norm": 2.150886409146185, "learning_rate": 8.698048640666282e-06, "loss": 0.7791, "step": 2562 }, { "epoch": 0.27, "grad_norm": 2.081926694872289, "learning_rate": 8.696861342940502e-06, "loss": 0.6758, "step": 2563 }, { "epoch": 0.27, "grad_norm": 2.099300438153473, "learning_rate": 8.69567358520446e-06, "loss": 0.6802, "step": 2564 }, { "epoch": 0.27, "grad_norm": 1.9226178584947957, "learning_rate": 8.69448536760595e-06, "loss": 0.5809, "step": 2565 }, { "epoch": 0.27, "grad_norm": 1.8521268168791745, "learning_rate": 8.693296690292827e-06, "loss": 0.7311, "step": 2566 }, { "epoch": 0.27, "grad_norm": 1.3128348263110161, "learning_rate": 8.692107553412998e-06, "loss": 0.5469, "step": 2567 }, { "epoch": 0.27, "grad_norm": 1.7334057629110802, "learning_rate": 8.690917957114435e-06, "loss": 0.7085, "step": 2568 }, { "epoch": 0.27, "grad_norm": 1.8305431564427324, "learning_rate": 8.689727901545157e-06, "loss": 0.704, "step": 2569 }, { "epoch": 0.27, "grad_norm": 1.956196539782985, "learning_rate": 8.688537386853252e-06, "loss": 0.6205, "step": 2570 }, { "epoch": 0.27, "grad_norm": 1.9854761892436803, "learning_rate": 8.687346413186854e-06, "loss": 0.6887, "step": 2571 }, { "epoch": 0.27, "grad_norm": 1.9422231010124482, "learning_rate": 8.686154980694161e-06, "loss": 0.6446, "step": 2572 }, { "epoch": 0.27, "grad_norm": 1.799220448870738, "learning_rate": 8.684963089523427e-06, "loss": 0.5349, "step": 2573 }, { "epoch": 0.27, "grad_norm": 2.2677032103685497, "learning_rate": 8.683770739822958e-06, "loss": 0.6553, "step": 2574 }, { "epoch": 0.27, "grad_norm": 1.7863723938432154, "learning_rate": 8.682577931741127e-06, "loss": 0.5519, "step": 2575 }, { "epoch": 0.27, "grad_norm": 1.9948592687646904, "learning_rate": 8.681384665426355e-06, "loss": 0.7018, "step": 2576 }, { "epoch": 0.27, "grad_norm": 2.0506748490913016, "learning_rate": 8.680190941027124e-06, "loss": 0.6133, "step": 2577 }, { "epoch": 0.27, "grad_norm": 1.8526088394709677, "learning_rate": 8.678996758691971e-06, "loss": 0.5549, "step": 2578 }, { "epoch": 0.28, "grad_norm": 1.8203501866473188, "learning_rate": 8.677802118569495e-06, "loss": 0.76, "step": 2579 }, { "epoch": 0.28, "grad_norm": 1.9921537244341834, "learning_rate": 8.676607020808344e-06, "loss": 0.6624, "step": 2580 }, { "epoch": 0.28, "grad_norm": 1.819903389968583, "learning_rate": 8.675411465557228e-06, "loss": 0.6632, "step": 2581 }, { "epoch": 0.28, "grad_norm": 1.804200659211632, "learning_rate": 8.674215452964916e-06, "loss": 0.6352, "step": 2582 }, { "epoch": 0.28, "grad_norm": 1.7300254794882182, "learning_rate": 8.673018983180228e-06, "loss": 0.6936, "step": 2583 }, { "epoch": 0.28, "grad_norm": 1.7708473896000996, "learning_rate": 8.671822056352044e-06, "loss": 0.6379, "step": 2584 }, { "epoch": 0.28, "grad_norm": 1.8019690610355417, "learning_rate": 8.670624672629302e-06, "loss": 0.6368, "step": 2585 }, { "epoch": 0.28, "grad_norm": 1.8009406975625588, "learning_rate": 8.669426832160997e-06, "loss": 0.7892, "step": 2586 }, { "epoch": 0.28, "grad_norm": 1.5501658468579393, "learning_rate": 8.668228535096176e-06, "loss": 0.5556, "step": 2587 }, { "epoch": 0.28, "grad_norm": 1.6248981668179885, "learning_rate": 8.66702978158395e-06, "loss": 0.6855, "step": 2588 }, { "epoch": 0.28, "grad_norm": 1.2665316403886913, "learning_rate": 8.665830571773478e-06, "loss": 0.5554, "step": 2589 }, { "epoch": 0.28, "grad_norm": 1.7759335216487986, "learning_rate": 8.664630905813987e-06, "loss": 0.7152, "step": 2590 }, { "epoch": 0.28, "grad_norm": 1.2041771599900415, "learning_rate": 8.66343078385475e-06, "loss": 0.5382, "step": 2591 }, { "epoch": 0.28, "grad_norm": 2.2654383087426004, "learning_rate": 8.662230206045103e-06, "loss": 0.7108, "step": 2592 }, { "epoch": 0.28, "grad_norm": 1.8306934495231804, "learning_rate": 8.661029172534436e-06, "loss": 0.6549, "step": 2593 }, { "epoch": 0.28, "grad_norm": 1.5619669857300629, "learning_rate": 8.6598276834722e-06, "loss": 0.602, "step": 2594 }, { "epoch": 0.28, "grad_norm": 1.9327750874263678, "learning_rate": 8.658625739007897e-06, "loss": 0.7104, "step": 2595 }, { "epoch": 0.28, "grad_norm": 1.8514842483877785, "learning_rate": 8.65742333929109e-06, "loss": 0.7017, "step": 2596 }, { "epoch": 0.28, "grad_norm": 1.9067872453396082, "learning_rate": 8.656220484471392e-06, "loss": 0.6716, "step": 2597 }, { "epoch": 0.28, "grad_norm": 1.8351065226676686, "learning_rate": 8.655017174698481e-06, "loss": 0.7037, "step": 2598 }, { "epoch": 0.28, "grad_norm": 1.6967212960884366, "learning_rate": 8.65381341012209e-06, "loss": 0.6772, "step": 2599 }, { "epoch": 0.28, "grad_norm": 1.98361534237915, "learning_rate": 8.652609190892002e-06, "loss": 0.6461, "step": 2600 }, { "epoch": 0.28, "grad_norm": 2.0339867662398374, "learning_rate": 8.651404517158064e-06, "loss": 0.7535, "step": 2601 }, { "epoch": 0.28, "grad_norm": 2.165260999593777, "learning_rate": 8.650199389070178e-06, "loss": 0.6841, "step": 2602 }, { "epoch": 0.28, "grad_norm": 1.7882313864094315, "learning_rate": 8.648993806778297e-06, "loss": 0.7442, "step": 2603 }, { "epoch": 0.28, "grad_norm": 1.9652155707244332, "learning_rate": 8.647787770432439e-06, "loss": 0.6962, "step": 2604 }, { "epoch": 0.28, "grad_norm": 1.9456709384880844, "learning_rate": 8.64658128018267e-06, "loss": 0.7099, "step": 2605 }, { "epoch": 0.28, "grad_norm": 1.8452954403042086, "learning_rate": 8.645374336179122e-06, "loss": 0.6521, "step": 2606 }, { "epoch": 0.28, "grad_norm": 1.7411173676968204, "learning_rate": 8.644166938571975e-06, "loss": 0.6136, "step": 2607 }, { "epoch": 0.28, "grad_norm": 1.6123229351409774, "learning_rate": 8.642959087511471e-06, "loss": 0.5707, "step": 2608 }, { "epoch": 0.28, "grad_norm": 1.6741509700225399, "learning_rate": 8.641750783147902e-06, "loss": 0.6412, "step": 2609 }, { "epoch": 0.28, "grad_norm": 2.6977083139533833, "learning_rate": 8.640542025631625e-06, "loss": 0.6241, "step": 2610 }, { "epoch": 0.28, "grad_norm": 2.10567174309463, "learning_rate": 8.639332815113046e-06, "loss": 0.7753, "step": 2611 }, { "epoch": 0.28, "grad_norm": 1.6239966484233188, "learning_rate": 8.63812315174263e-06, "loss": 0.586, "step": 2612 }, { "epoch": 0.28, "grad_norm": 1.5717652727972402, "learning_rate": 8.636913035670901e-06, "loss": 0.5298, "step": 2613 }, { "epoch": 0.28, "grad_norm": 1.7407361739680893, "learning_rate": 8.635702467048434e-06, "loss": 0.654, "step": 2614 }, { "epoch": 0.28, "grad_norm": 1.7868120226684079, "learning_rate": 8.634491446025868e-06, "loss": 0.6902, "step": 2615 }, { "epoch": 0.28, "grad_norm": 1.8732269360455225, "learning_rate": 8.63327997275389e-06, "loss": 0.715, "step": 2616 }, { "epoch": 0.28, "grad_norm": 1.9418245302623263, "learning_rate": 8.632068047383246e-06, "loss": 0.6945, "step": 2617 }, { "epoch": 0.28, "grad_norm": 1.6845433628351674, "learning_rate": 8.63085567006474e-06, "loss": 0.6417, "step": 2618 }, { "epoch": 0.28, "grad_norm": 1.8223448321174665, "learning_rate": 8.629642840949232e-06, "loss": 0.5747, "step": 2619 }, { "epoch": 0.28, "grad_norm": 1.7922296335889911, "learning_rate": 8.628429560187638e-06, "loss": 0.6302, "step": 2620 }, { "epoch": 0.28, "grad_norm": 2.078152077261566, "learning_rate": 8.62721582793093e-06, "loss": 0.6751, "step": 2621 }, { "epoch": 0.28, "grad_norm": 1.837699845562609, "learning_rate": 8.626001644330134e-06, "loss": 0.6614, "step": 2622 }, { "epoch": 0.28, "grad_norm": 1.150524489521407, "learning_rate": 8.624787009536334e-06, "loss": 0.5633, "step": 2623 }, { "epoch": 0.28, "grad_norm": 1.7533547837154455, "learning_rate": 8.623571923700673e-06, "loss": 0.5905, "step": 2624 }, { "epoch": 0.28, "grad_norm": 1.1820283388608697, "learning_rate": 8.622356386974346e-06, "loss": 0.5739, "step": 2625 }, { "epoch": 0.28, "grad_norm": 2.037603194422014, "learning_rate": 8.621140399508604e-06, "loss": 0.6418, "step": 2626 }, { "epoch": 0.28, "grad_norm": 1.7529553682769152, "learning_rate": 8.619923961454757e-06, "loss": 0.725, "step": 2627 }, { "epoch": 0.28, "grad_norm": 1.149334226342297, "learning_rate": 8.61870707296417e-06, "loss": 0.5338, "step": 2628 }, { "epoch": 0.28, "grad_norm": 1.712876406600509, "learning_rate": 8.61748973418826e-06, "loss": 0.5732, "step": 2629 }, { "epoch": 0.28, "grad_norm": 1.610347569643792, "learning_rate": 8.61627194527851e-06, "loss": 0.6254, "step": 2630 }, { "epoch": 0.28, "grad_norm": 2.1134229086650995, "learning_rate": 8.615053706386448e-06, "loss": 0.6964, "step": 2631 }, { "epoch": 0.28, "grad_norm": 1.5416935046599043, "learning_rate": 8.613835017663664e-06, "loss": 0.61, "step": 2632 }, { "epoch": 0.28, "grad_norm": 1.5310022390587317, "learning_rate": 8.612615879261803e-06, "loss": 0.6425, "step": 2633 }, { "epoch": 0.28, "grad_norm": 1.9996477376103794, "learning_rate": 8.611396291332565e-06, "loss": 0.6387, "step": 2634 }, { "epoch": 0.28, "grad_norm": 1.9670360961884974, "learning_rate": 8.610176254027707e-06, "loss": 0.6546, "step": 2635 }, { "epoch": 0.28, "grad_norm": 1.6559395360755809, "learning_rate": 8.608955767499042e-06, "loss": 0.633, "step": 2636 }, { "epoch": 0.28, "grad_norm": 1.8045924862708937, "learning_rate": 8.607734831898437e-06, "loss": 0.632, "step": 2637 }, { "epoch": 0.28, "grad_norm": 1.6439641651728927, "learning_rate": 8.606513447377817e-06, "loss": 0.5799, "step": 2638 }, { "epoch": 0.28, "grad_norm": 2.0279186312591087, "learning_rate": 8.605291614089164e-06, "loss": 0.7916, "step": 2639 }, { "epoch": 0.28, "grad_norm": 1.8250485391395506, "learning_rate": 8.60406933218451e-06, "loss": 0.7233, "step": 2640 }, { "epoch": 0.28, "grad_norm": 1.9379743816447799, "learning_rate": 8.60284660181595e-06, "loss": 0.7504, "step": 2641 }, { "epoch": 0.28, "grad_norm": 2.0072554360074695, "learning_rate": 8.60162342313563e-06, "loss": 0.6649, "step": 2642 }, { "epoch": 0.28, "grad_norm": 1.9047200369386557, "learning_rate": 8.600399796295754e-06, "loss": 0.7036, "step": 2643 }, { "epoch": 0.28, "grad_norm": 1.8018290043598957, "learning_rate": 8.59917572144858e-06, "loss": 0.7677, "step": 2644 }, { "epoch": 0.28, "grad_norm": 1.8032984307314008, "learning_rate": 8.597951198746424e-06, "loss": 0.6866, "step": 2645 }, { "epoch": 0.28, "grad_norm": 1.5399390693987924, "learning_rate": 8.596726228341656e-06, "loss": 0.5866, "step": 2646 }, { "epoch": 0.28, "grad_norm": 1.8315413569987338, "learning_rate": 8.595500810386705e-06, "loss": 0.6154, "step": 2647 }, { "epoch": 0.28, "grad_norm": 1.2747434026497435, "learning_rate": 8.594274945034048e-06, "loss": 0.584, "step": 2648 }, { "epoch": 0.28, "grad_norm": 1.7492423587321646, "learning_rate": 8.593048632436229e-06, "loss": 0.678, "step": 2649 }, { "epoch": 0.28, "grad_norm": 1.8770824487236148, "learning_rate": 8.591821872745834e-06, "loss": 0.704, "step": 2650 }, { "epoch": 0.28, "grad_norm": 1.832681669756683, "learning_rate": 8.59059466611552e-06, "loss": 0.668, "step": 2651 }, { "epoch": 0.28, "grad_norm": 1.8531889195097648, "learning_rate": 8.589367012697984e-06, "loss": 0.6337, "step": 2652 }, { "epoch": 0.28, "grad_norm": 1.842783327706929, "learning_rate": 8.588138912645989e-06, "loss": 0.7026, "step": 2653 }, { "epoch": 0.28, "grad_norm": 1.869765812999309, "learning_rate": 8.586910366112353e-06, "loss": 0.63, "step": 2654 }, { "epoch": 0.28, "grad_norm": 1.9157503115142192, "learning_rate": 8.585681373249945e-06, "loss": 0.5765, "step": 2655 }, { "epoch": 0.28, "grad_norm": 1.529225346899084, "learning_rate": 8.584451934211692e-06, "loss": 0.5697, "step": 2656 }, { "epoch": 0.28, "grad_norm": 1.9236596620503508, "learning_rate": 8.583222049150577e-06, "loss": 0.7084, "step": 2657 }, { "epoch": 0.28, "grad_norm": 1.674087836665629, "learning_rate": 8.581991718219638e-06, "loss": 0.6276, "step": 2658 }, { "epoch": 0.28, "grad_norm": 1.7392452301226884, "learning_rate": 8.580760941571968e-06, "loss": 0.5676, "step": 2659 }, { "epoch": 0.28, "grad_norm": 1.623102041367945, "learning_rate": 8.579529719360715e-06, "loss": 0.6052, "step": 2660 }, { "epoch": 0.28, "grad_norm": 1.2957488718956711, "learning_rate": 8.578298051739085e-06, "loss": 0.5656, "step": 2661 }, { "epoch": 0.28, "grad_norm": 1.7644845824115083, "learning_rate": 8.577065938860335e-06, "loss": 0.6047, "step": 2662 }, { "epoch": 0.28, "grad_norm": 1.7078758493815662, "learning_rate": 8.575833380877782e-06, "loss": 0.608, "step": 2663 }, { "epoch": 0.28, "grad_norm": 1.6754502825161075, "learning_rate": 8.574600377944798e-06, "loss": 0.6441, "step": 2664 }, { "epoch": 0.28, "grad_norm": 1.9103821582815588, "learning_rate": 8.573366930214807e-06, "loss": 0.6764, "step": 2665 }, { "epoch": 0.28, "grad_norm": 1.9078552619776445, "learning_rate": 8.572133037841287e-06, "loss": 0.7287, "step": 2666 }, { "epoch": 0.28, "grad_norm": 1.6883440160329517, "learning_rate": 8.570898700977781e-06, "loss": 0.5554, "step": 2667 }, { "epoch": 0.28, "grad_norm": 1.8004335198969466, "learning_rate": 8.569663919777876e-06, "loss": 0.6887, "step": 2668 }, { "epoch": 0.28, "grad_norm": 1.3033760482598558, "learning_rate": 8.56842869439522e-06, "loss": 0.5387, "step": 2669 }, { "epoch": 0.28, "grad_norm": 1.9874727442924442, "learning_rate": 8.567193024983517e-06, "loss": 0.6453, "step": 2670 }, { "epoch": 0.28, "grad_norm": 1.7012695217455527, "learning_rate": 8.565956911696524e-06, "loss": 0.6074, "step": 2671 }, { "epoch": 0.29, "grad_norm": 1.860765724271203, "learning_rate": 8.564720354688053e-06, "loss": 0.6831, "step": 2672 }, { "epoch": 0.29, "grad_norm": 1.2033508487179743, "learning_rate": 8.563483354111972e-06, "loss": 0.5521, "step": 2673 }, { "epoch": 0.29, "grad_norm": 1.7872713001691418, "learning_rate": 8.562245910122205e-06, "loss": 0.645, "step": 2674 }, { "epoch": 0.29, "grad_norm": 1.5976248608369554, "learning_rate": 8.56100802287273e-06, "loss": 0.6237, "step": 2675 }, { "epoch": 0.29, "grad_norm": 1.8020543797471744, "learning_rate": 8.559769692517582e-06, "loss": 0.6497, "step": 2676 }, { "epoch": 0.29, "grad_norm": 1.8994734615740596, "learning_rate": 8.558530919210847e-06, "loss": 0.7125, "step": 2677 }, { "epoch": 0.29, "grad_norm": 1.7976447844434877, "learning_rate": 8.557291703106671e-06, "loss": 0.6041, "step": 2678 }, { "epoch": 0.29, "grad_norm": 1.7277786862977245, "learning_rate": 8.556052044359254e-06, "loss": 0.633, "step": 2679 }, { "epoch": 0.29, "grad_norm": 1.7410277125208962, "learning_rate": 8.554811943122847e-06, "loss": 0.5766, "step": 2680 }, { "epoch": 0.29, "grad_norm": 1.7462252065568218, "learning_rate": 8.55357139955176e-06, "loss": 0.5973, "step": 2681 }, { "epoch": 0.29, "grad_norm": 1.7602967294374219, "learning_rate": 8.552330413800357e-06, "loss": 0.6013, "step": 2682 }, { "epoch": 0.29, "grad_norm": 1.7537988030964626, "learning_rate": 8.55108898602306e-06, "loss": 0.5794, "step": 2683 }, { "epoch": 0.29, "grad_norm": 1.6346751762787588, "learning_rate": 8.549847116374339e-06, "loss": 0.5673, "step": 2684 }, { "epoch": 0.29, "grad_norm": 2.066529144982327, "learning_rate": 8.548604805008725e-06, "loss": 0.6789, "step": 2685 }, { "epoch": 0.29, "grad_norm": 1.7610084757025777, "learning_rate": 8.547362052080802e-06, "loss": 0.6068, "step": 2686 }, { "epoch": 0.29, "grad_norm": 1.965448328520437, "learning_rate": 8.54611885774521e-06, "loss": 0.7168, "step": 2687 }, { "epoch": 0.29, "grad_norm": 1.6046709776480432, "learning_rate": 8.54487522215664e-06, "loss": 0.663, "step": 2688 }, { "epoch": 0.29, "grad_norm": 1.8384920026998892, "learning_rate": 8.543631145469845e-06, "loss": 0.7324, "step": 2689 }, { "epoch": 0.29, "grad_norm": 1.9236704396186077, "learning_rate": 8.542386627839628e-06, "loss": 0.6479, "step": 2690 }, { "epoch": 0.29, "grad_norm": 1.7504331056889153, "learning_rate": 8.541141669420845e-06, "loss": 0.6333, "step": 2691 }, { "epoch": 0.29, "grad_norm": 1.945110326753644, "learning_rate": 8.53989627036841e-06, "loss": 0.5932, "step": 2692 }, { "epoch": 0.29, "grad_norm": 1.7332876287703514, "learning_rate": 8.538650430837295e-06, "loss": 0.6262, "step": 2693 }, { "epoch": 0.29, "grad_norm": 1.8464572469923666, "learning_rate": 8.537404150982518e-06, "loss": 0.6317, "step": 2694 }, { "epoch": 0.29, "grad_norm": 1.8417444675101344, "learning_rate": 8.53615743095916e-06, "loss": 0.7377, "step": 2695 }, { "epoch": 0.29, "grad_norm": 1.8851395733273062, "learning_rate": 8.534910270922352e-06, "loss": 0.6479, "step": 2696 }, { "epoch": 0.29, "grad_norm": 2.082696427750965, "learning_rate": 8.533662671027282e-06, "loss": 0.7049, "step": 2697 }, { "epoch": 0.29, "grad_norm": 2.5757139389161074, "learning_rate": 8.532414631429193e-06, "loss": 0.6276, "step": 2698 }, { "epoch": 0.29, "grad_norm": 1.758881010714808, "learning_rate": 8.53116615228338e-06, "loss": 0.7939, "step": 2699 }, { "epoch": 0.29, "grad_norm": 1.8710361937809774, "learning_rate": 8.529917233745199e-06, "loss": 0.6996, "step": 2700 }, { "epoch": 0.29, "grad_norm": 1.7918090149934205, "learning_rate": 8.528667875970052e-06, "loss": 0.7217, "step": 2701 }, { "epoch": 0.29, "grad_norm": 1.717847240507791, "learning_rate": 8.5274180791134e-06, "loss": 0.5716, "step": 2702 }, { "epoch": 0.29, "grad_norm": 1.7559597686585207, "learning_rate": 8.526167843330762e-06, "loss": 0.6189, "step": 2703 }, { "epoch": 0.29, "grad_norm": 1.9076487715172492, "learning_rate": 8.524917168777703e-06, "loss": 0.6093, "step": 2704 }, { "epoch": 0.29, "grad_norm": 2.0225493709387545, "learning_rate": 8.523666055609852e-06, "loss": 0.6525, "step": 2705 }, { "epoch": 0.29, "grad_norm": 1.8341522812332143, "learning_rate": 8.522414503982886e-06, "loss": 0.6729, "step": 2706 }, { "epoch": 0.29, "grad_norm": 3.0657609343363337, "learning_rate": 8.52116251405254e-06, "loss": 0.5749, "step": 2707 }, { "epoch": 0.29, "grad_norm": 2.3179435756110784, "learning_rate": 8.519910085974603e-06, "loss": 0.6626, "step": 2708 }, { "epoch": 0.29, "grad_norm": 1.7592434307464349, "learning_rate": 8.518657219904916e-06, "loss": 0.6574, "step": 2709 }, { "epoch": 0.29, "grad_norm": 2.2627709620802827, "learning_rate": 8.517403915999378e-06, "loss": 0.6853, "step": 2710 }, { "epoch": 0.29, "grad_norm": 1.7887386402012369, "learning_rate": 8.516150174413941e-06, "loss": 0.6931, "step": 2711 }, { "epoch": 0.29, "grad_norm": 1.833277108496748, "learning_rate": 8.514895995304611e-06, "loss": 0.7378, "step": 2712 }, { "epoch": 0.29, "grad_norm": 2.007783081654383, "learning_rate": 8.513641378827447e-06, "loss": 0.6563, "step": 2713 }, { "epoch": 0.29, "grad_norm": 1.9946515366685889, "learning_rate": 8.512386325138567e-06, "loss": 0.6472, "step": 2714 }, { "epoch": 0.29, "grad_norm": 1.5996043137798428, "learning_rate": 8.511130834394141e-06, "loss": 0.6602, "step": 2715 }, { "epoch": 0.29, "grad_norm": 1.711148160268334, "learning_rate": 8.50987490675039e-06, "loss": 0.7088, "step": 2716 }, { "epoch": 0.29, "grad_norm": 1.742664285152398, "learning_rate": 8.508618542363593e-06, "loss": 0.5642, "step": 2717 }, { "epoch": 0.29, "grad_norm": 1.6668679753583973, "learning_rate": 8.507361741390086e-06, "loss": 0.5789, "step": 2718 }, { "epoch": 0.29, "grad_norm": 1.8026575196050187, "learning_rate": 8.506104503986253e-06, "loss": 0.6757, "step": 2719 }, { "epoch": 0.29, "grad_norm": 1.7920976859475537, "learning_rate": 8.504846830308537e-06, "loss": 0.7016, "step": 2720 }, { "epoch": 0.29, "grad_norm": 1.9319219087329513, "learning_rate": 8.503588720513433e-06, "loss": 0.6168, "step": 2721 }, { "epoch": 0.29, "grad_norm": 1.8277728439837613, "learning_rate": 8.50233017475749e-06, "loss": 0.6422, "step": 2722 }, { "epoch": 0.29, "grad_norm": 1.3266130013190043, "learning_rate": 8.501071193197315e-06, "loss": 0.5495, "step": 2723 }, { "epoch": 0.29, "grad_norm": 1.874170339269225, "learning_rate": 8.499811775989561e-06, "loss": 0.6388, "step": 2724 }, { "epoch": 0.29, "grad_norm": 1.7391848187317935, "learning_rate": 8.498551923290947e-06, "loss": 0.7781, "step": 2725 }, { "epoch": 0.29, "grad_norm": 1.8446484194915085, "learning_rate": 8.497291635258235e-06, "loss": 0.6706, "step": 2726 }, { "epoch": 0.29, "grad_norm": 1.7658003078032372, "learning_rate": 8.49603091204825e-06, "loss": 0.668, "step": 2727 }, { "epoch": 0.29, "grad_norm": 1.8327879293330651, "learning_rate": 8.494769753817865e-06, "loss": 0.6335, "step": 2728 }, { "epoch": 0.29, "grad_norm": 1.7778530207566001, "learning_rate": 8.493508160724008e-06, "loss": 0.6826, "step": 2729 }, { "epoch": 0.29, "grad_norm": 1.6812078254658755, "learning_rate": 8.492246132923665e-06, "loss": 0.6325, "step": 2730 }, { "epoch": 0.29, "grad_norm": 1.7830663563770017, "learning_rate": 8.49098367057387e-06, "loss": 0.6475, "step": 2731 }, { "epoch": 0.29, "grad_norm": 1.6975855393609576, "learning_rate": 8.489720773831717e-06, "loss": 0.6838, "step": 2732 }, { "epoch": 0.29, "grad_norm": 1.8542566893419623, "learning_rate": 8.488457442854354e-06, "loss": 0.6154, "step": 2733 }, { "epoch": 0.29, "grad_norm": 1.4253223834119444, "learning_rate": 8.487193677798976e-06, "loss": 0.5631, "step": 2734 }, { "epoch": 0.29, "grad_norm": 1.9346943449856902, "learning_rate": 8.485929478822838e-06, "loss": 0.7719, "step": 2735 }, { "epoch": 0.29, "grad_norm": 1.932347836657873, "learning_rate": 8.48466484608325e-06, "loss": 0.723, "step": 2736 }, { "epoch": 0.29, "grad_norm": 1.18712950398638, "learning_rate": 8.483399779737572e-06, "loss": 0.5397, "step": 2737 }, { "epoch": 0.29, "grad_norm": 1.889455683813056, "learning_rate": 8.482134279943218e-06, "loss": 0.6933, "step": 2738 }, { "epoch": 0.29, "grad_norm": 1.7301956963922867, "learning_rate": 8.480868346857659e-06, "loss": 0.6678, "step": 2739 }, { "epoch": 0.29, "grad_norm": 1.7267849884792559, "learning_rate": 8.479601980638417e-06, "loss": 0.6926, "step": 2740 }, { "epoch": 0.29, "grad_norm": 2.009303949537342, "learning_rate": 8.478335181443072e-06, "loss": 0.689, "step": 2741 }, { "epoch": 0.29, "grad_norm": 1.8163142632243392, "learning_rate": 8.477067949429254e-06, "loss": 0.6276, "step": 2742 }, { "epoch": 0.29, "grad_norm": 1.3929723135128653, "learning_rate": 8.475800284754648e-06, "loss": 0.5546, "step": 2743 }, { "epoch": 0.29, "grad_norm": 2.0384796759950174, "learning_rate": 8.474532187576992e-06, "loss": 0.5556, "step": 2744 }, { "epoch": 0.29, "grad_norm": 1.1368729574395329, "learning_rate": 8.47326365805408e-06, "loss": 0.5573, "step": 2745 }, { "epoch": 0.29, "grad_norm": 1.6757251428723652, "learning_rate": 8.471994696343758e-06, "loss": 0.6841, "step": 2746 }, { "epoch": 0.29, "grad_norm": 1.6554991432689479, "learning_rate": 8.470725302603925e-06, "loss": 0.6196, "step": 2747 }, { "epoch": 0.29, "grad_norm": 1.7456756283484158, "learning_rate": 8.469455476992536e-06, "loss": 0.6822, "step": 2748 }, { "epoch": 0.29, "grad_norm": 1.7566183329918819, "learning_rate": 8.4681852196676e-06, "loss": 0.579, "step": 2749 }, { "epoch": 0.29, "grad_norm": 1.6014777463723795, "learning_rate": 8.466914530787178e-06, "loss": 0.651, "step": 2750 }, { "epoch": 0.29, "grad_norm": 1.7970129189736292, "learning_rate": 8.465643410509383e-06, "loss": 0.6238, "step": 2751 }, { "epoch": 0.29, "grad_norm": 1.6669224313240627, "learning_rate": 8.464371858992385e-06, "loss": 0.5817, "step": 2752 }, { "epoch": 0.29, "grad_norm": 1.7665187850387756, "learning_rate": 8.46309987639441e-06, "loss": 0.7361, "step": 2753 }, { "epoch": 0.29, "grad_norm": 1.855617619106816, "learning_rate": 8.461827462873729e-06, "loss": 0.7462, "step": 2754 }, { "epoch": 0.29, "grad_norm": 1.8561197479610667, "learning_rate": 8.460554618588674e-06, "loss": 0.5546, "step": 2755 }, { "epoch": 0.29, "grad_norm": 1.553170148954793, "learning_rate": 8.45928134369763e-06, "loss": 0.6045, "step": 2756 }, { "epoch": 0.29, "grad_norm": 1.8438349630650837, "learning_rate": 8.458007638359032e-06, "loss": 0.7021, "step": 2757 }, { "epoch": 0.29, "grad_norm": 1.7435287633494594, "learning_rate": 8.45673350273137e-06, "loss": 0.6964, "step": 2758 }, { "epoch": 0.29, "grad_norm": 1.9018121582580143, "learning_rate": 8.455458936973188e-06, "loss": 0.6019, "step": 2759 }, { "epoch": 0.29, "grad_norm": 1.8205786568432687, "learning_rate": 8.454183941243085e-06, "loss": 0.6865, "step": 2760 }, { "epoch": 0.29, "grad_norm": 1.7476951475767266, "learning_rate": 8.452908515699713e-06, "loss": 0.6962, "step": 2761 }, { "epoch": 0.29, "grad_norm": 2.091518506950981, "learning_rate": 8.451632660501774e-06, "loss": 0.6822, "step": 2762 }, { "epoch": 0.29, "grad_norm": 2.0934166289964105, "learning_rate": 8.450356375808028e-06, "loss": 0.7202, "step": 2763 }, { "epoch": 0.29, "grad_norm": 1.6280189204899573, "learning_rate": 8.449079661777286e-06, "loss": 0.6129, "step": 2764 }, { "epoch": 0.29, "grad_norm": 2.1940919138315054, "learning_rate": 8.447802518568411e-06, "loss": 0.6595, "step": 2765 }, { "epoch": 0.3, "grad_norm": 2.00101356958702, "learning_rate": 8.446524946340323e-06, "loss": 0.5435, "step": 2766 }, { "epoch": 0.3, "grad_norm": 1.97994890584122, "learning_rate": 8.445246945251997e-06, "loss": 0.6339, "step": 2767 }, { "epoch": 0.3, "grad_norm": 1.7974894980133609, "learning_rate": 8.443968515462452e-06, "loss": 0.6224, "step": 2768 }, { "epoch": 0.3, "grad_norm": 1.7000860122938886, "learning_rate": 8.442689657130769e-06, "loss": 0.5867, "step": 2769 }, { "epoch": 0.3, "grad_norm": 1.684693984280307, "learning_rate": 8.44141037041608e-06, "loss": 0.624, "step": 2770 }, { "epoch": 0.3, "grad_norm": 2.006070230783546, "learning_rate": 8.440130655477572e-06, "loss": 0.6632, "step": 2771 }, { "epoch": 0.3, "grad_norm": 1.7802790737755707, "learning_rate": 8.43885051247448e-06, "loss": 0.6931, "step": 2772 }, { "epoch": 0.3, "grad_norm": 1.368591706472793, "learning_rate": 8.437569941566097e-06, "loss": 0.5605, "step": 2773 }, { "epoch": 0.3, "grad_norm": 1.2679300452718927, "learning_rate": 8.436288942911767e-06, "loss": 0.5636, "step": 2774 }, { "epoch": 0.3, "grad_norm": 1.672384283030594, "learning_rate": 8.43500751667089e-06, "loss": 0.6174, "step": 2775 }, { "epoch": 0.3, "grad_norm": 1.7909934285783184, "learning_rate": 8.433725663002914e-06, "loss": 0.7166, "step": 2776 }, { "epoch": 0.3, "grad_norm": 1.7708578770454282, "learning_rate": 8.432443382067348e-06, "loss": 0.6326, "step": 2777 }, { "epoch": 0.3, "grad_norm": 1.9075693508751255, "learning_rate": 8.431160674023746e-06, "loss": 0.7243, "step": 2778 }, { "epoch": 0.3, "grad_norm": 1.7380214623771488, "learning_rate": 8.42987753903172e-06, "loss": 0.5514, "step": 2779 }, { "epoch": 0.3, "grad_norm": 1.7147022648494996, "learning_rate": 8.428593977250932e-06, "loss": 0.6141, "step": 2780 }, { "epoch": 0.3, "grad_norm": 2.178822439100278, "learning_rate": 8.427309988841102e-06, "loss": 0.7163, "step": 2781 }, { "epoch": 0.3, "grad_norm": 1.7895113718713123, "learning_rate": 8.426025573962e-06, "loss": 0.655, "step": 2782 }, { "epoch": 0.3, "grad_norm": 1.3947117306369952, "learning_rate": 8.424740732773446e-06, "loss": 0.5556, "step": 2783 }, { "epoch": 0.3, "grad_norm": 1.7134311738288373, "learning_rate": 8.42345546543532e-06, "loss": 0.6121, "step": 2784 }, { "epoch": 0.3, "grad_norm": 1.826778993770942, "learning_rate": 8.422169772107547e-06, "loss": 0.7191, "step": 2785 }, { "epoch": 0.3, "grad_norm": 1.7732443066640302, "learning_rate": 8.420883652950114e-06, "loss": 0.6433, "step": 2786 }, { "epoch": 0.3, "grad_norm": 1.8088091588013695, "learning_rate": 8.419597108123054e-06, "loss": 0.714, "step": 2787 }, { "epoch": 0.3, "grad_norm": 1.7368179849681786, "learning_rate": 8.418310137786454e-06, "loss": 0.5796, "step": 2788 }, { "epoch": 0.3, "grad_norm": 1.6417154049127913, "learning_rate": 8.417022742100455e-06, "loss": 0.5638, "step": 2789 }, { "epoch": 0.3, "grad_norm": 1.786307094273412, "learning_rate": 8.415734921225254e-06, "loss": 0.5745, "step": 2790 }, { "epoch": 0.3, "grad_norm": 2.0144924613862063, "learning_rate": 8.414446675321096e-06, "loss": 0.6191, "step": 2791 }, { "epoch": 0.3, "grad_norm": 1.6435575616024773, "learning_rate": 8.41315800454828e-06, "loss": 0.6055, "step": 2792 }, { "epoch": 0.3, "grad_norm": 1.7198849170994543, "learning_rate": 8.411868909067163e-06, "loss": 0.6753, "step": 2793 }, { "epoch": 0.3, "grad_norm": 1.6058527427138116, "learning_rate": 8.410579389038145e-06, "loss": 0.6492, "step": 2794 }, { "epoch": 0.3, "grad_norm": 1.680795857983982, "learning_rate": 8.409289444621684e-06, "loss": 0.6887, "step": 2795 }, { "epoch": 0.3, "grad_norm": 1.7954399349307923, "learning_rate": 8.407999075978297e-06, "loss": 0.6324, "step": 2796 }, { "epoch": 0.3, "grad_norm": 1.8169720521228063, "learning_rate": 8.406708283268545e-06, "loss": 0.5602, "step": 2797 }, { "epoch": 0.3, "grad_norm": 1.9956158306108178, "learning_rate": 8.405417066653044e-06, "loss": 0.7527, "step": 2798 }, { "epoch": 0.3, "grad_norm": 1.7409083505235132, "learning_rate": 8.404125426292465e-06, "loss": 0.6879, "step": 2799 }, { "epoch": 0.3, "grad_norm": 1.7126778261548479, "learning_rate": 8.402833362347529e-06, "loss": 0.6884, "step": 2800 }, { "epoch": 0.3, "grad_norm": 1.7763557953988127, "learning_rate": 8.40154087497901e-06, "loss": 0.6135, "step": 2801 }, { "epoch": 0.3, "grad_norm": 1.7833674455566701, "learning_rate": 8.400247964347739e-06, "loss": 0.6934, "step": 2802 }, { "epoch": 0.3, "grad_norm": 1.8521266621466999, "learning_rate": 8.398954630614594e-06, "loss": 0.5979, "step": 2803 }, { "epoch": 0.3, "grad_norm": 1.7868934152790987, "learning_rate": 8.397660873940507e-06, "loss": 0.6503, "step": 2804 }, { "epoch": 0.3, "grad_norm": 1.6971749975031074, "learning_rate": 8.396366694486466e-06, "loss": 0.6261, "step": 2805 }, { "epoch": 0.3, "grad_norm": 1.696368182632704, "learning_rate": 8.395072092413507e-06, "loss": 0.6152, "step": 2806 }, { "epoch": 0.3, "grad_norm": 3.2556178708495396, "learning_rate": 8.393777067882724e-06, "loss": 0.7312, "step": 2807 }, { "epoch": 0.3, "grad_norm": 1.7481633244530992, "learning_rate": 8.392481621055258e-06, "loss": 0.5661, "step": 2808 }, { "epoch": 0.3, "grad_norm": 1.9201983781256342, "learning_rate": 8.391185752092303e-06, "loss": 0.6775, "step": 2809 }, { "epoch": 0.3, "grad_norm": 2.0154863137692547, "learning_rate": 8.38988946115511e-06, "loss": 0.725, "step": 2810 }, { "epoch": 0.3, "grad_norm": 1.5716802403643195, "learning_rate": 8.388592748404979e-06, "loss": 0.5517, "step": 2811 }, { "epoch": 0.3, "grad_norm": 1.860385072405061, "learning_rate": 8.387295614003263e-06, "loss": 0.7578, "step": 2812 }, { "epoch": 0.3, "grad_norm": 1.7004064099278804, "learning_rate": 8.385998058111371e-06, "loss": 0.6791, "step": 2813 }, { "epoch": 0.3, "grad_norm": 2.035533092341801, "learning_rate": 8.384700080890758e-06, "loss": 0.5984, "step": 2814 }, { "epoch": 0.3, "grad_norm": 1.7297318290898935, "learning_rate": 8.383401682502936e-06, "loss": 0.6471, "step": 2815 }, { "epoch": 0.3, "grad_norm": 1.7931713300440264, "learning_rate": 8.382102863109468e-06, "loss": 0.7113, "step": 2816 }, { "epoch": 0.3, "grad_norm": 1.2860257540620637, "learning_rate": 8.380803622871967e-06, "loss": 0.5516, "step": 2817 }, { "epoch": 0.3, "grad_norm": 1.8992538171694306, "learning_rate": 8.379503961952106e-06, "loss": 0.5934, "step": 2818 }, { "epoch": 0.3, "grad_norm": 1.7445694987716243, "learning_rate": 8.378203880511601e-06, "loss": 0.6615, "step": 2819 }, { "epoch": 0.3, "grad_norm": 1.860174003364864, "learning_rate": 8.376903378712226e-06, "loss": 0.6847, "step": 2820 }, { "epoch": 0.3, "grad_norm": 1.9758365935636628, "learning_rate": 8.375602456715808e-06, "loss": 0.6706, "step": 2821 }, { "epoch": 0.3, "grad_norm": 1.7745218252479238, "learning_rate": 8.37430111468422e-06, "loss": 0.5988, "step": 2822 }, { "epoch": 0.3, "grad_norm": 1.9939558166390647, "learning_rate": 8.372999352779397e-06, "loss": 0.6789, "step": 2823 }, { "epoch": 0.3, "grad_norm": 2.304753392874021, "learning_rate": 8.371697171163314e-06, "loss": 0.7333, "step": 2824 }, { "epoch": 0.3, "grad_norm": 1.7508069925945189, "learning_rate": 8.370394569998009e-06, "loss": 0.6423, "step": 2825 }, { "epoch": 0.3, "grad_norm": 1.597611792290609, "learning_rate": 8.369091549445568e-06, "loss": 0.5772, "step": 2826 }, { "epoch": 0.3, "grad_norm": 1.8935888778625445, "learning_rate": 8.367788109668127e-06, "loss": 0.5945, "step": 2827 }, { "epoch": 0.3, "grad_norm": 1.7612183334656377, "learning_rate": 8.36648425082788e-06, "loss": 0.7324, "step": 2828 }, { "epoch": 0.3, "grad_norm": 1.988838968026341, "learning_rate": 8.365179973087067e-06, "loss": 0.7767, "step": 2829 }, { "epoch": 0.3, "grad_norm": 1.6959074872213846, "learning_rate": 8.363875276607985e-06, "loss": 0.6204, "step": 2830 }, { "epoch": 0.3, "grad_norm": 1.735613803425044, "learning_rate": 8.36257016155298e-06, "loss": 0.603, "step": 2831 }, { "epoch": 0.3, "grad_norm": 1.783755862553806, "learning_rate": 8.361264628084447e-06, "loss": 0.682, "step": 2832 }, { "epoch": 0.3, "grad_norm": 2.0673995102688103, "learning_rate": 8.35995867636484e-06, "loss": 0.6752, "step": 2833 }, { "epoch": 0.3, "grad_norm": 1.5546307431504414, "learning_rate": 8.358652306556666e-06, "loss": 0.5628, "step": 2834 }, { "epoch": 0.3, "grad_norm": 1.481470469709809, "learning_rate": 8.357345518822473e-06, "loss": 0.5452, "step": 2835 }, { "epoch": 0.3, "grad_norm": 1.675253020266256, "learning_rate": 8.356038313324872e-06, "loss": 0.6168, "step": 2836 }, { "epoch": 0.3, "grad_norm": 1.8657898268487028, "learning_rate": 8.354730690226522e-06, "loss": 0.6505, "step": 2837 }, { "epoch": 0.3, "grad_norm": 1.14954351853053, "learning_rate": 8.353422649690134e-06, "loss": 0.54, "step": 2838 }, { "epoch": 0.3, "grad_norm": 1.6932663967403896, "learning_rate": 8.352114191878471e-06, "loss": 0.6952, "step": 2839 }, { "epoch": 0.3, "grad_norm": 1.962141448388577, "learning_rate": 8.350805316954345e-06, "loss": 0.6983, "step": 2840 }, { "epoch": 0.3, "grad_norm": 1.728495642283686, "learning_rate": 8.349496025080628e-06, "loss": 0.6326, "step": 2841 }, { "epoch": 0.3, "grad_norm": 1.9968469164740874, "learning_rate": 8.348186316420234e-06, "loss": 0.6895, "step": 2842 }, { "epoch": 0.3, "grad_norm": 1.9898995185729615, "learning_rate": 8.346876191136135e-06, "loss": 0.704, "step": 2843 }, { "epoch": 0.3, "grad_norm": 1.9391151646475746, "learning_rate": 8.345565649391354e-06, "loss": 0.7094, "step": 2844 }, { "epoch": 0.3, "grad_norm": 1.9730405597312435, "learning_rate": 8.344254691348966e-06, "loss": 0.6356, "step": 2845 }, { "epoch": 0.3, "grad_norm": 1.7185910022517399, "learning_rate": 8.342943317172094e-06, "loss": 0.5615, "step": 2846 }, { "epoch": 0.3, "grad_norm": 1.89231794123184, "learning_rate": 8.34163152702392e-06, "loss": 0.7403, "step": 2847 }, { "epoch": 0.3, "grad_norm": 1.283197072761008, "learning_rate": 8.340319321067668e-06, "loss": 0.5553, "step": 2848 }, { "epoch": 0.3, "grad_norm": 1.2658759572857896, "learning_rate": 8.339006699466624e-06, "loss": 0.544, "step": 2849 }, { "epoch": 0.3, "grad_norm": 1.7802037235594155, "learning_rate": 8.33769366238412e-06, "loss": 0.6642, "step": 2850 }, { "epoch": 0.3, "grad_norm": 1.756827624423068, "learning_rate": 8.336380209983539e-06, "loss": 0.5463, "step": 2851 }, { "epoch": 0.3, "grad_norm": 1.334112677419667, "learning_rate": 8.335066342428317e-06, "loss": 0.5367, "step": 2852 }, { "epoch": 0.3, "grad_norm": 1.7222915964841914, "learning_rate": 8.333752059881945e-06, "loss": 0.6577, "step": 2853 }, { "epoch": 0.3, "grad_norm": 2.0449339328695175, "learning_rate": 8.332437362507963e-06, "loss": 0.5893, "step": 2854 }, { "epoch": 0.3, "grad_norm": 1.72142017141372, "learning_rate": 8.331122250469959e-06, "loss": 0.61, "step": 2855 }, { "epoch": 0.3, "grad_norm": 1.7843200485701238, "learning_rate": 8.329806723931575e-06, "loss": 0.6804, "step": 2856 }, { "epoch": 0.3, "grad_norm": 1.7092810174420037, "learning_rate": 8.328490783056509e-06, "loss": 0.6805, "step": 2857 }, { "epoch": 0.3, "grad_norm": 1.8095796885737374, "learning_rate": 8.327174428008509e-06, "loss": 0.6207, "step": 2858 }, { "epoch": 0.3, "grad_norm": 1.6535163909410606, "learning_rate": 8.325857658951367e-06, "loss": 0.5303, "step": 2859 }, { "epoch": 0.31, "grad_norm": 1.933552408889543, "learning_rate": 8.324540476048933e-06, "loss": 0.6422, "step": 2860 }, { "epoch": 0.31, "grad_norm": 1.8992097299735629, "learning_rate": 8.323222879465109e-06, "loss": 0.6463, "step": 2861 }, { "epoch": 0.31, "grad_norm": 1.155150381287324, "learning_rate": 8.321904869363848e-06, "loss": 0.5284, "step": 2862 }, { "epoch": 0.31, "grad_norm": 2.042359333370018, "learning_rate": 8.320586445909151e-06, "loss": 0.6601, "step": 2863 }, { "epoch": 0.31, "grad_norm": 1.7434216970488094, "learning_rate": 8.319267609265076e-06, "loss": 0.6913, "step": 2864 }, { "epoch": 0.31, "grad_norm": 1.6845918011120233, "learning_rate": 8.317948359595729e-06, "loss": 0.6434, "step": 2865 }, { "epoch": 0.31, "grad_norm": 1.7933808340242325, "learning_rate": 8.316628697065265e-06, "loss": 0.6858, "step": 2866 }, { "epoch": 0.31, "grad_norm": 1.737524453826504, "learning_rate": 8.315308621837895e-06, "loss": 0.6078, "step": 2867 }, { "epoch": 0.31, "grad_norm": 1.8592162646277757, "learning_rate": 8.313988134077878e-06, "loss": 0.7207, "step": 2868 }, { "epoch": 0.31, "grad_norm": 1.8086633823229585, "learning_rate": 8.312667233949525e-06, "loss": 0.6423, "step": 2869 }, { "epoch": 0.31, "grad_norm": 1.7174059686789083, "learning_rate": 8.311345921617205e-06, "loss": 0.5634, "step": 2870 }, { "epoch": 0.31, "grad_norm": 1.7084752789630573, "learning_rate": 8.310024197245326e-06, "loss": 0.6889, "step": 2871 }, { "epoch": 0.31, "grad_norm": 1.3475797872826294, "learning_rate": 8.308702060998355e-06, "loss": 0.5782, "step": 2872 }, { "epoch": 0.31, "grad_norm": 1.9039979638957745, "learning_rate": 8.307379513040813e-06, "loss": 0.6954, "step": 2873 }, { "epoch": 0.31, "grad_norm": 1.7350802444899047, "learning_rate": 8.306056553537262e-06, "loss": 0.6714, "step": 2874 }, { "epoch": 0.31, "grad_norm": 1.2432916194550685, "learning_rate": 8.304733182652327e-06, "loss": 0.5382, "step": 2875 }, { "epoch": 0.31, "grad_norm": 1.7911009853847972, "learning_rate": 8.303409400550675e-06, "loss": 0.7693, "step": 2876 }, { "epoch": 0.31, "grad_norm": 1.8485986062257609, "learning_rate": 8.302085207397027e-06, "loss": 0.6945, "step": 2877 }, { "epoch": 0.31, "grad_norm": 1.9661964949367365, "learning_rate": 8.30076060335616e-06, "loss": 0.6307, "step": 2878 }, { "epoch": 0.31, "grad_norm": 1.7952829133205948, "learning_rate": 8.299435588592895e-06, "loss": 0.6151, "step": 2879 }, { "epoch": 0.31, "grad_norm": 1.4723020630479173, "learning_rate": 8.298110163272106e-06, "loss": 0.5541, "step": 2880 }, { "epoch": 0.31, "grad_norm": 1.8507592471724295, "learning_rate": 8.296784327558723e-06, "loss": 0.6332, "step": 2881 }, { "epoch": 0.31, "grad_norm": 1.2022727170643122, "learning_rate": 8.29545808161772e-06, "loss": 0.5596, "step": 2882 }, { "epoch": 0.31, "grad_norm": 1.88596792361233, "learning_rate": 8.294131425614126e-06, "loss": 0.6329, "step": 2883 }, { "epoch": 0.31, "grad_norm": 2.285097492173403, "learning_rate": 8.292804359713021e-06, "loss": 0.6921, "step": 2884 }, { "epoch": 0.31, "grad_norm": 1.737588692947117, "learning_rate": 8.291476884079535e-06, "loss": 0.6521, "step": 2885 }, { "epoch": 0.31, "grad_norm": 1.6122661049258016, "learning_rate": 8.290148998878851e-06, "loss": 0.6088, "step": 2886 }, { "epoch": 0.31, "grad_norm": 1.9896373622783297, "learning_rate": 8.288820704276199e-06, "loss": 0.6637, "step": 2887 }, { "epoch": 0.31, "grad_norm": 1.8442905480798404, "learning_rate": 8.28749200043686e-06, "loss": 0.6649, "step": 2888 }, { "epoch": 0.31, "grad_norm": 1.9640735599096102, "learning_rate": 8.286162887526175e-06, "loss": 0.6899, "step": 2889 }, { "epoch": 0.31, "grad_norm": 1.7727379720229495, "learning_rate": 8.284833365709522e-06, "loss": 0.6335, "step": 2890 }, { "epoch": 0.31, "grad_norm": 1.673772705451235, "learning_rate": 8.283503435152343e-06, "loss": 0.5766, "step": 2891 }, { "epoch": 0.31, "grad_norm": 1.8145045803823652, "learning_rate": 8.282173096020119e-06, "loss": 0.6771, "step": 2892 }, { "epoch": 0.31, "grad_norm": 1.7715472703397088, "learning_rate": 8.280842348478391e-06, "loss": 0.6052, "step": 2893 }, { "epoch": 0.31, "grad_norm": 1.7409096795016905, "learning_rate": 8.27951119269275e-06, "loss": 0.5906, "step": 2894 }, { "epoch": 0.31, "grad_norm": 1.8876268774095384, "learning_rate": 8.278179628828826e-06, "loss": 0.6429, "step": 2895 }, { "epoch": 0.31, "grad_norm": 1.8383428249045775, "learning_rate": 8.27684765705232e-06, "loss": 0.6278, "step": 2896 }, { "epoch": 0.31, "grad_norm": 1.8236171615612013, "learning_rate": 8.275515277528965e-06, "loss": 0.6484, "step": 2897 }, { "epoch": 0.31, "grad_norm": 1.6702852330505846, "learning_rate": 8.274182490424557e-06, "loss": 0.5646, "step": 2898 }, { "epoch": 0.31, "grad_norm": 1.819912964368552, "learning_rate": 8.272849295904937e-06, "loss": 0.6644, "step": 2899 }, { "epoch": 0.31, "grad_norm": 1.809398822231953, "learning_rate": 8.271515694135997e-06, "loss": 0.7081, "step": 2900 }, { "epoch": 0.31, "grad_norm": 1.7344132079714067, "learning_rate": 8.27018168528368e-06, "loss": 0.6997, "step": 2901 }, { "epoch": 0.31, "grad_norm": 2.0090128324139136, "learning_rate": 8.268847269513984e-06, "loss": 0.6928, "step": 2902 }, { "epoch": 0.31, "grad_norm": 1.7499807455895122, "learning_rate": 8.267512446992948e-06, "loss": 0.5808, "step": 2903 }, { "epoch": 0.31, "grad_norm": 1.737631326558025, "learning_rate": 8.266177217886674e-06, "loss": 0.5615, "step": 2904 }, { "epoch": 0.31, "grad_norm": 1.793983534637457, "learning_rate": 8.264841582361304e-06, "loss": 0.6883, "step": 2905 }, { "epoch": 0.31, "grad_norm": 1.677214695261287, "learning_rate": 8.263505540583034e-06, "loss": 0.6483, "step": 2906 }, { "epoch": 0.31, "grad_norm": 1.8408721876699756, "learning_rate": 8.262169092718116e-06, "loss": 0.6832, "step": 2907 }, { "epoch": 0.31, "grad_norm": 1.2435186597793761, "learning_rate": 8.260832238932842e-06, "loss": 0.5557, "step": 2908 }, { "epoch": 0.31, "grad_norm": 1.8341530131910047, "learning_rate": 8.259494979393563e-06, "loss": 0.6388, "step": 2909 }, { "epoch": 0.31, "grad_norm": 1.8519475626149526, "learning_rate": 8.25815731426668e-06, "loss": 0.7166, "step": 2910 }, { "epoch": 0.31, "grad_norm": 1.635435546351988, "learning_rate": 8.256819243718637e-06, "loss": 0.6545, "step": 2911 }, { "epoch": 0.31, "grad_norm": 1.704865097708281, "learning_rate": 8.255480767915938e-06, "loss": 0.7176, "step": 2912 }, { "epoch": 0.31, "grad_norm": 1.9971099434833595, "learning_rate": 8.25414188702513e-06, "loss": 0.6026, "step": 2913 }, { "epoch": 0.31, "grad_norm": 1.4453548918638919, "learning_rate": 8.252802601212816e-06, "loss": 0.5262, "step": 2914 }, { "epoch": 0.31, "grad_norm": 1.691836247547866, "learning_rate": 8.251462910645647e-06, "loss": 0.6986, "step": 2915 }, { "epoch": 0.31, "grad_norm": 1.7119719046917934, "learning_rate": 8.250122815490322e-06, "loss": 0.6997, "step": 2916 }, { "epoch": 0.31, "grad_norm": 1.7017436757249322, "learning_rate": 8.248782315913595e-06, "loss": 0.6448, "step": 2917 }, { "epoch": 0.31, "grad_norm": 1.8445607993648248, "learning_rate": 8.247441412082265e-06, "loss": 0.6852, "step": 2918 }, { "epoch": 0.31, "grad_norm": 1.9137311221009614, "learning_rate": 8.246100104163186e-06, "loss": 0.7316, "step": 2919 }, { "epoch": 0.31, "grad_norm": 1.9188998794473422, "learning_rate": 8.244758392323262e-06, "loss": 0.7273, "step": 2920 }, { "epoch": 0.31, "grad_norm": 1.8242574992653624, "learning_rate": 8.243416276729443e-06, "loss": 0.6344, "step": 2921 }, { "epoch": 0.31, "grad_norm": 1.8691010660604652, "learning_rate": 8.242073757548734e-06, "loss": 0.6992, "step": 2922 }, { "epoch": 0.31, "grad_norm": 1.665323088567825, "learning_rate": 8.240730834948187e-06, "loss": 0.5747, "step": 2923 }, { "epoch": 0.31, "grad_norm": 1.5749459478653305, "learning_rate": 8.239387509094906e-06, "loss": 0.6363, "step": 2924 }, { "epoch": 0.31, "grad_norm": 1.8573344777173613, "learning_rate": 8.238043780156044e-06, "loss": 0.6231, "step": 2925 }, { "epoch": 0.31, "grad_norm": 1.8201318629759005, "learning_rate": 8.236699648298807e-06, "loss": 0.6892, "step": 2926 }, { "epoch": 0.31, "grad_norm": 3.0301222604480813, "learning_rate": 8.235355113690447e-06, "loss": 0.6505, "step": 2927 }, { "epoch": 0.31, "grad_norm": 1.9981258846836458, "learning_rate": 8.234010176498266e-06, "loss": 0.633, "step": 2928 }, { "epoch": 0.31, "grad_norm": 1.8575274611300208, "learning_rate": 8.23266483688962e-06, "loss": 0.6818, "step": 2929 }, { "epoch": 0.31, "grad_norm": 1.6974856928083293, "learning_rate": 8.231319095031915e-06, "loss": 0.5825, "step": 2930 }, { "epoch": 0.31, "grad_norm": 1.4621660175731577, "learning_rate": 8.229972951092604e-06, "loss": 0.5486, "step": 2931 }, { "epoch": 0.31, "grad_norm": 1.3517877408237289, "learning_rate": 8.22862640523919e-06, "loss": 0.5786, "step": 2932 }, { "epoch": 0.31, "grad_norm": 1.8999904333323432, "learning_rate": 8.227279457639227e-06, "loss": 0.7473, "step": 2933 }, { "epoch": 0.31, "grad_norm": 1.7097076996216503, "learning_rate": 8.225932108460322e-06, "loss": 0.6336, "step": 2934 }, { "epoch": 0.31, "grad_norm": 1.8559519319425248, "learning_rate": 8.22458435787013e-06, "loss": 0.6133, "step": 2935 }, { "epoch": 0.31, "grad_norm": 1.6846740362591666, "learning_rate": 8.22323620603635e-06, "loss": 0.5467, "step": 2936 }, { "epoch": 0.31, "grad_norm": 1.8093283165846565, "learning_rate": 8.221887653126739e-06, "loss": 0.6587, "step": 2937 }, { "epoch": 0.31, "grad_norm": 1.4555032812620619, "learning_rate": 8.2205386993091e-06, "loss": 0.5485, "step": 2938 }, { "epoch": 0.31, "grad_norm": 1.9671886444953541, "learning_rate": 8.219189344751289e-06, "loss": 0.6999, "step": 2939 }, { "epoch": 0.31, "grad_norm": 1.6542722392652163, "learning_rate": 8.217839589621208e-06, "loss": 0.6581, "step": 2940 }, { "epoch": 0.31, "grad_norm": 1.2904337385033076, "learning_rate": 8.216489434086813e-06, "loss": 0.5482, "step": 2941 }, { "epoch": 0.31, "grad_norm": 1.9227655530061039, "learning_rate": 8.215138878316104e-06, "loss": 0.6379, "step": 2942 }, { "epoch": 0.31, "grad_norm": 1.3327308843054688, "learning_rate": 8.213787922477137e-06, "loss": 0.5258, "step": 2943 }, { "epoch": 0.31, "grad_norm": 1.7492345052820768, "learning_rate": 8.212436566738013e-06, "loss": 0.6829, "step": 2944 }, { "epoch": 0.31, "grad_norm": 1.8129342530827575, "learning_rate": 8.211084811266886e-06, "loss": 0.553, "step": 2945 }, { "epoch": 0.31, "grad_norm": 1.9675111874743767, "learning_rate": 8.209732656231959e-06, "loss": 0.7152, "step": 2946 }, { "epoch": 0.31, "grad_norm": 1.3271536085252145, "learning_rate": 8.208380101801483e-06, "loss": 0.5274, "step": 2947 }, { "epoch": 0.31, "grad_norm": 1.785088922993912, "learning_rate": 8.20702714814376e-06, "loss": 0.6539, "step": 2948 }, { "epoch": 0.31, "grad_norm": 1.7170293962054595, "learning_rate": 8.205673795427143e-06, "loss": 0.6275, "step": 2949 }, { "epoch": 0.31, "grad_norm": 1.7367966955022125, "learning_rate": 8.204320043820032e-06, "loss": 0.6566, "step": 2950 }, { "epoch": 0.31, "grad_norm": 1.79275117345263, "learning_rate": 8.202965893490877e-06, "loss": 0.6204, "step": 2951 }, { "epoch": 0.31, "grad_norm": 1.238961903225938, "learning_rate": 8.20161134460818e-06, "loss": 0.5441, "step": 2952 }, { "epoch": 0.31, "grad_norm": 1.6923776593794262, "learning_rate": 8.200256397340492e-06, "loss": 0.6296, "step": 2953 }, { "epoch": 0.32, "grad_norm": 1.9435918086279604, "learning_rate": 8.198901051856409e-06, "loss": 0.6671, "step": 2954 }, { "epoch": 0.32, "grad_norm": 1.7475459019349686, "learning_rate": 8.197545308324586e-06, "loss": 0.6658, "step": 2955 }, { "epoch": 0.32, "grad_norm": 1.8456333106601892, "learning_rate": 8.196189166913717e-06, "loss": 0.6886, "step": 2956 }, { "epoch": 0.32, "grad_norm": 1.74693317759642, "learning_rate": 8.19483262779255e-06, "loss": 0.6242, "step": 2957 }, { "epoch": 0.32, "grad_norm": 1.7897093004960005, "learning_rate": 8.193475691129888e-06, "loss": 0.7414, "step": 2958 }, { "epoch": 0.32, "grad_norm": 1.8159779370608786, "learning_rate": 8.192118357094574e-06, "loss": 0.6928, "step": 2959 }, { "epoch": 0.32, "grad_norm": 1.6582589135398476, "learning_rate": 8.190760625855504e-06, "loss": 0.6093, "step": 2960 }, { "epoch": 0.32, "grad_norm": 1.6172161983737223, "learning_rate": 8.189402497581626e-06, "loss": 0.5637, "step": 2961 }, { "epoch": 0.32, "grad_norm": 2.617080496383657, "learning_rate": 8.188043972441934e-06, "loss": 0.6404, "step": 2962 }, { "epoch": 0.32, "grad_norm": 1.8869828613228, "learning_rate": 8.186685050605477e-06, "loss": 0.6289, "step": 2963 }, { "epoch": 0.32, "grad_norm": 1.7480195149388955, "learning_rate": 8.185325732241343e-06, "loss": 0.6716, "step": 2964 }, { "epoch": 0.32, "grad_norm": 1.8006682353197478, "learning_rate": 8.183966017518681e-06, "loss": 0.6152, "step": 2965 }, { "epoch": 0.32, "grad_norm": 1.599448069200546, "learning_rate": 8.182605906606679e-06, "loss": 0.6703, "step": 2966 }, { "epoch": 0.32, "grad_norm": 1.937129901200186, "learning_rate": 8.181245399674585e-06, "loss": 0.665, "step": 2967 }, { "epoch": 0.32, "grad_norm": 1.9431375171053222, "learning_rate": 8.179884496891686e-06, "loss": 0.7577, "step": 2968 }, { "epoch": 0.32, "grad_norm": 1.7293569319302373, "learning_rate": 8.178523198427323e-06, "loss": 0.6359, "step": 2969 }, { "epoch": 0.32, "grad_norm": 1.6452322309400191, "learning_rate": 8.177161504450887e-06, "loss": 0.6255, "step": 2970 }, { "epoch": 0.32, "grad_norm": 1.9522905420420442, "learning_rate": 8.17579941513182e-06, "loss": 0.6447, "step": 2971 }, { "epoch": 0.32, "grad_norm": 1.8119161262916872, "learning_rate": 8.174436930639605e-06, "loss": 0.7227, "step": 2972 }, { "epoch": 0.32, "grad_norm": 1.7793852582219043, "learning_rate": 8.173074051143785e-06, "loss": 0.6636, "step": 2973 }, { "epoch": 0.32, "grad_norm": 1.445248970203924, "learning_rate": 8.171710776813942e-06, "loss": 0.5659, "step": 2974 }, { "epoch": 0.32, "grad_norm": 1.2725658157733555, "learning_rate": 8.170347107819714e-06, "loss": 0.5675, "step": 2975 }, { "epoch": 0.32, "grad_norm": 1.877624348273659, "learning_rate": 8.168983044330785e-06, "loss": 0.6214, "step": 2976 }, { "epoch": 0.32, "grad_norm": 1.8181983426994641, "learning_rate": 8.16761858651689e-06, "loss": 0.6472, "step": 2977 }, { "epoch": 0.32, "grad_norm": 1.3495529985702555, "learning_rate": 8.166253734547814e-06, "loss": 0.5435, "step": 2978 }, { "epoch": 0.32, "grad_norm": 2.047901739118464, "learning_rate": 8.164888488593384e-06, "loss": 0.7014, "step": 2979 }, { "epoch": 0.32, "grad_norm": 1.8217480558040549, "learning_rate": 8.163522848823486e-06, "loss": 0.6535, "step": 2980 }, { "epoch": 0.32, "grad_norm": 1.6307579543206088, "learning_rate": 8.162156815408048e-06, "loss": 0.5418, "step": 2981 }, { "epoch": 0.32, "grad_norm": 1.9254124244388389, "learning_rate": 8.16079038851705e-06, "loss": 0.7777, "step": 2982 }, { "epoch": 0.32, "grad_norm": 1.6636425648332236, "learning_rate": 8.15942356832052e-06, "loss": 0.6141, "step": 2983 }, { "epoch": 0.32, "grad_norm": 1.9570548712186033, "learning_rate": 8.158056354988532e-06, "loss": 0.6374, "step": 2984 }, { "epoch": 0.32, "grad_norm": 1.7719643833412326, "learning_rate": 8.156688748691217e-06, "loss": 0.5635, "step": 2985 }, { "epoch": 0.32, "grad_norm": 1.7005156329264712, "learning_rate": 8.155320749598747e-06, "loss": 0.6671, "step": 2986 }, { "epoch": 0.32, "grad_norm": 1.7066652296834715, "learning_rate": 8.153952357881348e-06, "loss": 0.6671, "step": 2987 }, { "epoch": 0.32, "grad_norm": 1.8974446786690764, "learning_rate": 8.15258357370929e-06, "loss": 0.6969, "step": 2988 }, { "epoch": 0.32, "grad_norm": 1.963338190693293, "learning_rate": 8.151214397252893e-06, "loss": 0.7687, "step": 2989 }, { "epoch": 0.32, "grad_norm": 1.5332770605519455, "learning_rate": 8.149844828682532e-06, "loss": 0.5869, "step": 2990 }, { "epoch": 0.32, "grad_norm": 2.1981408222156698, "learning_rate": 8.148474868168624e-06, "loss": 0.646, "step": 2991 }, { "epoch": 0.32, "grad_norm": 1.756281896873065, "learning_rate": 8.147104515881635e-06, "loss": 0.7022, "step": 2992 }, { "epoch": 0.32, "grad_norm": 1.9872906194676707, "learning_rate": 8.145733771992084e-06, "loss": 0.641, "step": 2993 }, { "epoch": 0.32, "grad_norm": 1.8791524372157005, "learning_rate": 8.144362636670537e-06, "loss": 0.6176, "step": 2994 }, { "epoch": 0.32, "grad_norm": 2.16782912470526, "learning_rate": 8.142991110087603e-06, "loss": 0.7713, "step": 2995 }, { "epoch": 0.32, "grad_norm": 1.7813758313803394, "learning_rate": 8.141619192413951e-06, "loss": 0.5964, "step": 2996 }, { "epoch": 0.32, "grad_norm": 1.6135202144117313, "learning_rate": 8.140246883820289e-06, "loss": 0.6203, "step": 2997 }, { "epoch": 0.32, "grad_norm": 1.7589178220890558, "learning_rate": 8.138874184477377e-06, "loss": 0.5837, "step": 2998 }, { "epoch": 0.32, "grad_norm": 2.1643482780187933, "learning_rate": 8.137501094556022e-06, "loss": 0.7405, "step": 2999 }, { "epoch": 0.32, "grad_norm": 1.5896507504429722, "learning_rate": 8.136127614227086e-06, "loss": 0.5699, "step": 3000 }, { "epoch": 0.32, "grad_norm": 1.7575634908007631, "learning_rate": 8.134753743661472e-06, "loss": 0.6401, "step": 3001 }, { "epoch": 0.32, "grad_norm": 1.8858503107057418, "learning_rate": 8.133379483030133e-06, "loss": 0.7104, "step": 3002 }, { "epoch": 0.32, "grad_norm": 1.8072358934137407, "learning_rate": 8.132004832504074e-06, "loss": 0.5923, "step": 3003 }, { "epoch": 0.32, "grad_norm": 1.8680963290785055, "learning_rate": 8.130629792254345e-06, "loss": 0.6751, "step": 3004 }, { "epoch": 0.32, "grad_norm": 1.2880215671115078, "learning_rate": 8.129254362452048e-06, "loss": 0.541, "step": 3005 }, { "epoch": 0.32, "grad_norm": 1.2830611142853419, "learning_rate": 8.12787854326833e-06, "loss": 0.5549, "step": 3006 }, { "epoch": 0.32, "grad_norm": 1.9443955753074273, "learning_rate": 8.126502334874387e-06, "loss": 0.6799, "step": 3007 }, { "epoch": 0.32, "grad_norm": 1.2540619789893837, "learning_rate": 8.125125737441466e-06, "loss": 0.5663, "step": 3008 }, { "epoch": 0.32, "grad_norm": 1.7204969119012834, "learning_rate": 8.123748751140858e-06, "loss": 0.6778, "step": 3009 }, { "epoch": 0.32, "grad_norm": 1.9466559504796785, "learning_rate": 8.122371376143909e-06, "loss": 0.6015, "step": 3010 }, { "epoch": 0.32, "grad_norm": 1.4438186857670936, "learning_rate": 8.120993612622005e-06, "loss": 0.5719, "step": 3011 }, { "epoch": 0.32, "grad_norm": 1.9183342325100678, "learning_rate": 8.11961546074659e-06, "loss": 0.6924, "step": 3012 }, { "epoch": 0.32, "grad_norm": 1.625260163210079, "learning_rate": 8.118236920689147e-06, "loss": 0.6205, "step": 3013 }, { "epoch": 0.32, "grad_norm": 1.8432322574706625, "learning_rate": 8.116857992621212e-06, "loss": 0.6934, "step": 3014 }, { "epoch": 0.32, "grad_norm": 1.6355775438352047, "learning_rate": 8.11547867671437e-06, "loss": 0.5795, "step": 3015 }, { "epoch": 0.32, "grad_norm": 1.7588663220877923, "learning_rate": 8.114098973140251e-06, "loss": 0.6764, "step": 3016 }, { "epoch": 0.32, "grad_norm": 1.899912008857857, "learning_rate": 8.112718882070537e-06, "loss": 0.6861, "step": 3017 }, { "epoch": 0.32, "grad_norm": 1.7280334004138882, "learning_rate": 8.111338403676957e-06, "loss": 0.6372, "step": 3018 }, { "epoch": 0.32, "grad_norm": 1.6449704785944037, "learning_rate": 8.109957538131283e-06, "loss": 0.5889, "step": 3019 }, { "epoch": 0.32, "grad_norm": 1.8126626562626262, "learning_rate": 8.108576285605346e-06, "loss": 0.6815, "step": 3020 }, { "epoch": 0.32, "grad_norm": 1.9588508030025944, "learning_rate": 8.107194646271015e-06, "loss": 0.7633, "step": 3021 }, { "epoch": 0.32, "grad_norm": 1.405544856642618, "learning_rate": 8.10581262030021e-06, "loss": 0.5638, "step": 3022 }, { "epoch": 0.32, "grad_norm": 1.7409755394439548, "learning_rate": 8.104430207864906e-06, "loss": 0.6269, "step": 3023 }, { "epoch": 0.32, "grad_norm": 1.9593791311516695, "learning_rate": 8.103047409137114e-06, "loss": 0.6635, "step": 3024 }, { "epoch": 0.32, "grad_norm": 2.3296171011658378, "learning_rate": 8.101664224288904e-06, "loss": 0.6609, "step": 3025 }, { "epoch": 0.32, "grad_norm": 1.6902012561171185, "learning_rate": 8.100280653492385e-06, "loss": 0.6863, "step": 3026 }, { "epoch": 0.32, "grad_norm": 1.762823469231938, "learning_rate": 8.09889669691972e-06, "loss": 0.6693, "step": 3027 }, { "epoch": 0.32, "grad_norm": 1.8080728629367242, "learning_rate": 8.09751235474312e-06, "loss": 0.7188, "step": 3028 }, { "epoch": 0.32, "grad_norm": 1.6602697990466, "learning_rate": 8.096127627134842e-06, "loss": 0.6379, "step": 3029 }, { "epoch": 0.32, "grad_norm": 1.8641916615379863, "learning_rate": 8.094742514267191e-06, "loss": 0.6227, "step": 3030 }, { "epoch": 0.32, "grad_norm": 1.7623701611060438, "learning_rate": 8.093357016312518e-06, "loss": 0.6924, "step": 3031 }, { "epoch": 0.32, "grad_norm": 1.3583057471732678, "learning_rate": 8.091971133443226e-06, "loss": 0.5583, "step": 3032 }, { "epoch": 0.32, "grad_norm": 1.7382601771476618, "learning_rate": 8.090584865831766e-06, "loss": 0.6528, "step": 3033 }, { "epoch": 0.32, "grad_norm": 1.646108268074157, "learning_rate": 8.08919821365063e-06, "loss": 0.6444, "step": 3034 }, { "epoch": 0.32, "grad_norm": 1.7617973785633914, "learning_rate": 8.087811177072369e-06, "loss": 0.7642, "step": 3035 }, { "epoch": 0.32, "grad_norm": 1.971305008773828, "learning_rate": 8.086423756269571e-06, "loss": 0.7487, "step": 3036 }, { "epoch": 0.32, "grad_norm": 1.821071082362171, "learning_rate": 8.08503595141488e-06, "loss": 0.6324, "step": 3037 }, { "epoch": 0.32, "grad_norm": 1.6989691238378146, "learning_rate": 8.08364776268098e-06, "loss": 0.631, "step": 3038 }, { "epoch": 0.32, "grad_norm": 1.5903083301639953, "learning_rate": 8.082259190240608e-06, "loss": 0.5664, "step": 3039 }, { "epoch": 0.32, "grad_norm": 1.975652809364929, "learning_rate": 8.080870234266552e-06, "loss": 0.7523, "step": 3040 }, { "epoch": 0.32, "grad_norm": 1.562997298472747, "learning_rate": 8.079480894931639e-06, "loss": 0.5934, "step": 3041 }, { "epoch": 0.32, "grad_norm": 1.675213801956366, "learning_rate": 8.07809117240875e-06, "loss": 0.6786, "step": 3042 }, { "epoch": 0.32, "grad_norm": 1.914781037848305, "learning_rate": 8.076701066870811e-06, "loss": 0.7284, "step": 3043 }, { "epoch": 0.32, "grad_norm": 1.4579522537576646, "learning_rate": 8.075310578490798e-06, "loss": 0.5649, "step": 3044 }, { "epoch": 0.32, "grad_norm": 1.858393193200626, "learning_rate": 8.073919707441732e-06, "loss": 0.6984, "step": 3045 }, { "epoch": 0.32, "grad_norm": 1.8261435412700118, "learning_rate": 8.072528453896683e-06, "loss": 0.66, "step": 3046 }, { "epoch": 0.33, "grad_norm": 1.6631291962055843, "learning_rate": 8.071136818028767e-06, "loss": 0.5863, "step": 3047 }, { "epoch": 0.33, "grad_norm": 1.5593725634592, "learning_rate": 8.069744800011152e-06, "loss": 0.564, "step": 3048 }, { "epoch": 0.33, "grad_norm": 1.91112890573976, "learning_rate": 8.068352400017048e-06, "loss": 0.6262, "step": 3049 }, { "epoch": 0.33, "grad_norm": 1.6694475796902746, "learning_rate": 8.066959618219714e-06, "loss": 0.7091, "step": 3050 }, { "epoch": 0.33, "grad_norm": 2.1424272742221246, "learning_rate": 8.065566454792462e-06, "loss": 0.6178, "step": 3051 }, { "epoch": 0.33, "grad_norm": 2.024076687640051, "learning_rate": 8.064172909908643e-06, "loss": 0.7445, "step": 3052 }, { "epoch": 0.33, "grad_norm": 1.7840865805968626, "learning_rate": 8.062778983741661e-06, "loss": 0.6509, "step": 3053 }, { "epoch": 0.33, "grad_norm": 1.8132166117172637, "learning_rate": 8.061384676464966e-06, "loss": 0.6022, "step": 3054 }, { "epoch": 0.33, "grad_norm": 1.9998032222556656, "learning_rate": 8.059989988252055e-06, "loss": 0.5996, "step": 3055 }, { "epoch": 0.33, "grad_norm": 1.7479377128881108, "learning_rate": 8.058594919276472e-06, "loss": 0.56, "step": 3056 }, { "epoch": 0.33, "grad_norm": 1.66815770776614, "learning_rate": 8.057199469711811e-06, "loss": 0.5414, "step": 3057 }, { "epoch": 0.33, "grad_norm": 1.8253934992340972, "learning_rate": 8.05580363973171e-06, "loss": 0.6827, "step": 3058 }, { "epoch": 0.33, "grad_norm": 1.6022290886498116, "learning_rate": 8.054407429509858e-06, "loss": 0.5565, "step": 3059 }, { "epoch": 0.33, "grad_norm": 1.9475679749552728, "learning_rate": 8.053010839219986e-06, "loss": 0.6205, "step": 3060 }, { "epoch": 0.33, "grad_norm": 1.780000974302362, "learning_rate": 8.051613869035876e-06, "loss": 0.6495, "step": 3061 }, { "epoch": 0.33, "grad_norm": 1.6648490900449369, "learning_rate": 8.050216519131359e-06, "loss": 0.5737, "step": 3062 }, { "epoch": 0.33, "grad_norm": 1.6663247247539907, "learning_rate": 8.048818789680308e-06, "loss": 0.5743, "step": 3063 }, { "epoch": 0.33, "grad_norm": 1.7692769563187265, "learning_rate": 8.047420680856648e-06, "loss": 0.7257, "step": 3064 }, { "epoch": 0.33, "grad_norm": 1.8208728212336989, "learning_rate": 8.046022192834351e-06, "loss": 0.7016, "step": 3065 }, { "epoch": 0.33, "grad_norm": 1.702135568823915, "learning_rate": 8.044623325787432e-06, "loss": 0.6438, "step": 3066 }, { "epoch": 0.33, "grad_norm": 1.9953421075232922, "learning_rate": 8.043224079889956e-06, "loss": 0.6755, "step": 3067 }, { "epoch": 0.33, "grad_norm": 1.703328396408762, "learning_rate": 8.041824455316036e-06, "loss": 0.6332, "step": 3068 }, { "epoch": 0.33, "grad_norm": 1.6617230047857663, "learning_rate": 8.040424452239828e-06, "loss": 0.5529, "step": 3069 }, { "epoch": 0.33, "grad_norm": 1.384128714688414, "learning_rate": 8.039024070835542e-06, "loss": 0.5512, "step": 3070 }, { "epoch": 0.33, "grad_norm": 1.5725859457137807, "learning_rate": 8.037623311277429e-06, "loss": 0.5849, "step": 3071 }, { "epoch": 0.33, "grad_norm": 1.7121079214873525, "learning_rate": 8.036222173739791e-06, "loss": 0.6083, "step": 3072 }, { "epoch": 0.33, "grad_norm": 2.0560621828429433, "learning_rate": 8.034820658396975e-06, "loss": 0.768, "step": 3073 }, { "epoch": 0.33, "grad_norm": 1.7989478115479445, "learning_rate": 8.033418765423372e-06, "loss": 0.6055, "step": 3074 }, { "epoch": 0.33, "grad_norm": 2.019394401516684, "learning_rate": 8.032016494993426e-06, "loss": 0.6638, "step": 3075 }, { "epoch": 0.33, "grad_norm": 2.171629119877362, "learning_rate": 8.030613847281626e-06, "loss": 0.5883, "step": 3076 }, { "epoch": 0.33, "grad_norm": 1.988608594505692, "learning_rate": 8.029210822462504e-06, "loss": 0.5796, "step": 3077 }, { "epoch": 0.33, "grad_norm": 1.784811891443913, "learning_rate": 8.027807420710645e-06, "loss": 0.6285, "step": 3078 }, { "epoch": 0.33, "grad_norm": 2.922489673808912, "learning_rate": 8.026403642200677e-06, "loss": 0.5796, "step": 3079 }, { "epoch": 0.33, "grad_norm": 2.4498387772228867, "learning_rate": 8.024999487107278e-06, "loss": 0.5878, "step": 3080 }, { "epoch": 0.33, "grad_norm": 1.8858795892645202, "learning_rate": 8.023594955605167e-06, "loss": 0.7041, "step": 3081 }, { "epoch": 0.33, "grad_norm": 2.2330871331630626, "learning_rate": 8.022190047869115e-06, "loss": 0.7228, "step": 3082 }, { "epoch": 0.33, "grad_norm": 2.0282160738720414, "learning_rate": 8.020784764073938e-06, "loss": 0.67, "step": 3083 }, { "epoch": 0.33, "grad_norm": 1.6569762533798271, "learning_rate": 8.0193791043945e-06, "loss": 0.5334, "step": 3084 }, { "epoch": 0.33, "grad_norm": 1.905977846728685, "learning_rate": 8.017973069005714e-06, "loss": 0.6503, "step": 3085 }, { "epoch": 0.33, "grad_norm": 1.7870632430757747, "learning_rate": 8.01656665808253e-06, "loss": 0.5935, "step": 3086 }, { "epoch": 0.33, "grad_norm": 1.8100909108602448, "learning_rate": 8.015159871799957e-06, "loss": 0.598, "step": 3087 }, { "epoch": 0.33, "grad_norm": 1.7703694452460959, "learning_rate": 8.013752710333042e-06, "loss": 0.5748, "step": 3088 }, { "epoch": 0.33, "grad_norm": 2.286061146108286, "learning_rate": 8.012345173856885e-06, "loss": 0.5676, "step": 3089 }, { "epoch": 0.33, "grad_norm": 1.6897969991697164, "learning_rate": 8.010937262546625e-06, "loss": 0.6622, "step": 3090 }, { "epoch": 0.33, "grad_norm": 1.6498594603185126, "learning_rate": 8.009528976577456e-06, "loss": 0.5783, "step": 3091 }, { "epoch": 0.33, "grad_norm": 1.5363093475640117, "learning_rate": 8.008120316124612e-06, "loss": 0.5398, "step": 3092 }, { "epoch": 0.33, "grad_norm": 1.894641359664739, "learning_rate": 8.00671128136338e-06, "loss": 0.6459, "step": 3093 }, { "epoch": 0.33, "grad_norm": 1.8827415907330665, "learning_rate": 8.005301872469086e-06, "loss": 0.6263, "step": 3094 }, { "epoch": 0.33, "grad_norm": 2.0889358017773003, "learning_rate": 8.00389208961711e-06, "loss": 0.7573, "step": 3095 }, { "epoch": 0.33, "grad_norm": 1.7532448526304845, "learning_rate": 8.002481932982871e-06, "loss": 0.5588, "step": 3096 }, { "epoch": 0.33, "grad_norm": 2.0186171724211714, "learning_rate": 8.001071402741843e-06, "loss": 0.7057, "step": 3097 }, { "epoch": 0.33, "grad_norm": 1.625391789921825, "learning_rate": 7.999660499069537e-06, "loss": 0.5403, "step": 3098 }, { "epoch": 0.33, "grad_norm": 1.7452790784628336, "learning_rate": 7.99824922214152e-06, "loss": 0.6259, "step": 3099 }, { "epoch": 0.33, "grad_norm": 1.795074089686999, "learning_rate": 7.996837572133397e-06, "loss": 0.5966, "step": 3100 }, { "epoch": 0.33, "grad_norm": 1.5289872057717213, "learning_rate": 7.995425549220828e-06, "loss": 0.5825, "step": 3101 }, { "epoch": 0.33, "grad_norm": 1.7513635537738224, "learning_rate": 7.994013153579512e-06, "loss": 0.6375, "step": 3102 }, { "epoch": 0.33, "grad_norm": 1.8049169796997107, "learning_rate": 7.992600385385197e-06, "loss": 0.6506, "step": 3103 }, { "epoch": 0.33, "grad_norm": 1.8073380490272077, "learning_rate": 7.991187244813679e-06, "loss": 0.5511, "step": 3104 }, { "epoch": 0.33, "grad_norm": 1.6872454571761597, "learning_rate": 7.989773732040795e-06, "loss": 0.5476, "step": 3105 }, { "epoch": 0.33, "grad_norm": 1.725683764466079, "learning_rate": 7.988359847242438e-06, "loss": 0.6832, "step": 3106 }, { "epoch": 0.33, "grad_norm": 1.5918180876249517, "learning_rate": 7.986945590594535e-06, "loss": 0.5631, "step": 3107 }, { "epoch": 0.33, "grad_norm": 1.7747646377655553, "learning_rate": 7.985530962273071e-06, "loss": 0.6722, "step": 3108 }, { "epoch": 0.33, "grad_norm": 1.8785368475762272, "learning_rate": 7.98411596245407e-06, "loss": 0.7581, "step": 3109 }, { "epoch": 0.33, "grad_norm": 1.7447680159973449, "learning_rate": 7.982700591313603e-06, "loss": 0.6349, "step": 3110 }, { "epoch": 0.33, "grad_norm": 2.0853150451241538, "learning_rate": 7.98128484902779e-06, "loss": 0.6843, "step": 3111 }, { "epoch": 0.33, "grad_norm": 2.143096309694557, "learning_rate": 7.979868735772795e-06, "loss": 0.6926, "step": 3112 }, { "epoch": 0.33, "grad_norm": 2.0171259607555503, "learning_rate": 7.978452251724831e-06, "loss": 0.6899, "step": 3113 }, { "epoch": 0.33, "grad_norm": 1.7377185192943168, "learning_rate": 7.97703539706015e-06, "loss": 0.6461, "step": 3114 }, { "epoch": 0.33, "grad_norm": 1.7884182645539464, "learning_rate": 7.975618171955059e-06, "loss": 0.6923, "step": 3115 }, { "epoch": 0.33, "grad_norm": 2.1244300947749752, "learning_rate": 7.974200576585906e-06, "loss": 0.6987, "step": 3116 }, { "epoch": 0.33, "grad_norm": 2.0397706746202107, "learning_rate": 7.972782611129084e-06, "loss": 0.621, "step": 3117 }, { "epoch": 0.33, "grad_norm": 1.9183229597875138, "learning_rate": 7.971364275761038e-06, "loss": 0.6649, "step": 3118 }, { "epoch": 0.33, "grad_norm": 1.8989271297270427, "learning_rate": 7.969945570658253e-06, "loss": 0.6854, "step": 3119 }, { "epoch": 0.33, "grad_norm": 1.8937100637500393, "learning_rate": 7.968526495997262e-06, "loss": 0.6957, "step": 3120 }, { "epoch": 0.33, "grad_norm": 1.9301184485488847, "learning_rate": 7.967107051954646e-06, "loss": 0.7387, "step": 3121 }, { "epoch": 0.33, "grad_norm": 1.8439730594602113, "learning_rate": 7.965687238707027e-06, "loss": 0.7048, "step": 3122 }, { "epoch": 0.33, "grad_norm": 1.9282646089765492, "learning_rate": 7.96426705643108e-06, "loss": 0.6739, "step": 3123 }, { "epoch": 0.33, "grad_norm": 1.6697340957419202, "learning_rate": 7.962846505303519e-06, "loss": 0.6045, "step": 3124 }, { "epoch": 0.33, "grad_norm": 1.7906483181449269, "learning_rate": 7.96142558550111e-06, "loss": 0.6947, "step": 3125 }, { "epoch": 0.33, "grad_norm": 1.8961570583895935, "learning_rate": 7.960004297200657e-06, "loss": 0.6824, "step": 3126 }, { "epoch": 0.33, "grad_norm": 1.929630398828314, "learning_rate": 7.95858264057902e-06, "loss": 0.7278, "step": 3127 }, { "epoch": 0.33, "grad_norm": 1.9229219015749652, "learning_rate": 7.957160615813095e-06, "loss": 0.7138, "step": 3128 }, { "epoch": 0.33, "grad_norm": 1.86238732094276, "learning_rate": 7.955738223079831e-06, "loss": 0.7235, "step": 3129 }, { "epoch": 0.33, "grad_norm": 1.870122492655283, "learning_rate": 7.954315462556221e-06, "loss": 0.6588, "step": 3130 }, { "epoch": 0.33, "grad_norm": 1.7913231100941212, "learning_rate": 7.952892334419302e-06, "loss": 0.672, "step": 3131 }, { "epoch": 0.33, "grad_norm": 1.8190340849598627, "learning_rate": 7.951468838846154e-06, "loss": 0.6649, "step": 3132 }, { "epoch": 0.33, "grad_norm": 1.851062859567545, "learning_rate": 7.950044976013912e-06, "loss": 0.7251, "step": 3133 }, { "epoch": 0.33, "grad_norm": 1.620928856895891, "learning_rate": 7.948620746099746e-06, "loss": 0.5335, "step": 3134 }, { "epoch": 0.33, "grad_norm": 1.7619296135557443, "learning_rate": 7.947196149280883e-06, "loss": 0.6492, "step": 3135 }, { "epoch": 0.33, "grad_norm": 1.1703611970248708, "learning_rate": 7.945771185734581e-06, "loss": 0.5436, "step": 3136 }, { "epoch": 0.33, "grad_norm": 1.676179439971438, "learning_rate": 7.944345855638157e-06, "loss": 0.6058, "step": 3137 }, { "epoch": 0.33, "grad_norm": 1.28534395607725, "learning_rate": 7.942920159168973e-06, "loss": 0.5498, "step": 3138 }, { "epoch": 0.33, "grad_norm": 1.9922330237168344, "learning_rate": 7.941494096504422e-06, "loss": 0.7538, "step": 3139 }, { "epoch": 0.33, "grad_norm": 1.6498503012153412, "learning_rate": 7.94006766782196e-06, "loss": 0.6108, "step": 3140 }, { "epoch": 0.34, "grad_norm": 1.6947915550696884, "learning_rate": 7.938640873299076e-06, "loss": 0.6076, "step": 3141 }, { "epoch": 0.34, "grad_norm": 1.820140716373658, "learning_rate": 7.937213713113318e-06, "loss": 0.6362, "step": 3142 }, { "epoch": 0.34, "grad_norm": 1.8568160967234484, "learning_rate": 7.935786187442265e-06, "loss": 0.6272, "step": 3143 }, { "epoch": 0.34, "grad_norm": 1.9101877875745719, "learning_rate": 7.934358296463547e-06, "loss": 0.7129, "step": 3144 }, { "epoch": 0.34, "grad_norm": 1.6233218886692922, "learning_rate": 7.932930040354845e-06, "loss": 0.6827, "step": 3145 }, { "epoch": 0.34, "grad_norm": 1.8094546626011654, "learning_rate": 7.931501419293878e-06, "loss": 0.6698, "step": 3146 }, { "epoch": 0.34, "grad_norm": 2.2934084107263337, "learning_rate": 7.930072433458412e-06, "loss": 0.6429, "step": 3147 }, { "epoch": 0.34, "grad_norm": 1.6573710631984715, "learning_rate": 7.928643083026263e-06, "loss": 0.6783, "step": 3148 }, { "epoch": 0.34, "grad_norm": 1.7593385321313315, "learning_rate": 7.927213368175284e-06, "loss": 0.5647, "step": 3149 }, { "epoch": 0.34, "grad_norm": 1.9873119051136419, "learning_rate": 7.925783289083381e-06, "loss": 0.6017, "step": 3150 }, { "epoch": 0.34, "grad_norm": 1.9205597285425249, "learning_rate": 7.924352845928505e-06, "loss": 0.7153, "step": 3151 }, { "epoch": 0.34, "grad_norm": 1.739011454480751, "learning_rate": 7.922922038888645e-06, "loss": 0.5883, "step": 3152 }, { "epoch": 0.34, "grad_norm": 1.9110564142678392, "learning_rate": 7.921490868141843e-06, "loss": 0.624, "step": 3153 }, { "epoch": 0.34, "grad_norm": 1.2438365258113155, "learning_rate": 7.920059333866183e-06, "loss": 0.5401, "step": 3154 }, { "epoch": 0.34, "grad_norm": 1.1659268320028997, "learning_rate": 7.918627436239794e-06, "loss": 0.5433, "step": 3155 }, { "epoch": 0.34, "grad_norm": 2.0025545415881245, "learning_rate": 7.917195175440851e-06, "loss": 0.744, "step": 3156 }, { "epoch": 0.34, "grad_norm": 1.8222175358433454, "learning_rate": 7.915762551647575e-06, "loss": 0.6936, "step": 3157 }, { "epoch": 0.34, "grad_norm": 1.9565000689387952, "learning_rate": 7.91432956503823e-06, "loss": 0.6869, "step": 3158 }, { "epoch": 0.34, "grad_norm": 1.721493904724374, "learning_rate": 7.912896215791128e-06, "loss": 0.6565, "step": 3159 }, { "epoch": 0.34, "grad_norm": 2.054275630324874, "learning_rate": 7.911462504084621e-06, "loss": 0.7048, "step": 3160 }, { "epoch": 0.34, "grad_norm": 1.7383253076622092, "learning_rate": 7.910028430097112e-06, "loss": 0.526, "step": 3161 }, { "epoch": 0.34, "grad_norm": 1.8570402952948046, "learning_rate": 7.908593994007046e-06, "loss": 0.716, "step": 3162 }, { "epoch": 0.34, "grad_norm": 1.478181442753059, "learning_rate": 7.907159195992915e-06, "loss": 0.5429, "step": 3163 }, { "epoch": 0.34, "grad_norm": 2.0875500818576818, "learning_rate": 7.905724036233254e-06, "loss": 0.5463, "step": 3164 }, { "epoch": 0.34, "grad_norm": 2.0406543114051843, "learning_rate": 7.904288514906643e-06, "loss": 0.6197, "step": 3165 }, { "epoch": 0.34, "grad_norm": 1.7184125340579142, "learning_rate": 7.90285263219171e-06, "loss": 0.7389, "step": 3166 }, { "epoch": 0.34, "grad_norm": 2.337686207616654, "learning_rate": 7.90141638826712e-06, "loss": 0.6665, "step": 3167 }, { "epoch": 0.34, "grad_norm": 1.7262209145224177, "learning_rate": 7.899979783311595e-06, "loss": 0.5651, "step": 3168 }, { "epoch": 0.34, "grad_norm": 1.6393638772175307, "learning_rate": 7.898542817503892e-06, "loss": 0.6238, "step": 3169 }, { "epoch": 0.34, "grad_norm": 1.6457666529707016, "learning_rate": 7.897105491022819e-06, "loss": 0.696, "step": 3170 }, { "epoch": 0.34, "grad_norm": 1.9213342507400426, "learning_rate": 7.895667804047224e-06, "loss": 0.6811, "step": 3171 }, { "epoch": 0.34, "grad_norm": 1.5741179202654438, "learning_rate": 7.894229756756004e-06, "loss": 0.53, "step": 3172 }, { "epoch": 0.34, "grad_norm": 1.6026091193199046, "learning_rate": 7.892791349328097e-06, "loss": 0.481, "step": 3173 }, { "epoch": 0.34, "grad_norm": 1.7545788943187757, "learning_rate": 7.891352581942491e-06, "loss": 0.6206, "step": 3174 }, { "epoch": 0.34, "grad_norm": 1.989111590110034, "learning_rate": 7.889913454778211e-06, "loss": 0.6427, "step": 3175 }, { "epoch": 0.34, "grad_norm": 1.7335429862268297, "learning_rate": 7.888473968014335e-06, "loss": 0.6584, "step": 3176 }, { "epoch": 0.34, "grad_norm": 1.8115905743212886, "learning_rate": 7.887034121829981e-06, "loss": 0.7596, "step": 3177 }, { "epoch": 0.34, "grad_norm": 1.8083846179366128, "learning_rate": 7.885593916404314e-06, "loss": 0.6211, "step": 3178 }, { "epoch": 0.34, "grad_norm": 1.854192952361774, "learning_rate": 7.884153351916541e-06, "loss": 0.686, "step": 3179 }, { "epoch": 0.34, "grad_norm": 2.006080921842904, "learning_rate": 7.882712428545916e-06, "loss": 0.6902, "step": 3180 }, { "epoch": 0.34, "grad_norm": 1.8169983267377259, "learning_rate": 7.881271146471737e-06, "loss": 0.7232, "step": 3181 }, { "epoch": 0.34, "grad_norm": 1.6307662121754392, "learning_rate": 7.879829505873346e-06, "loss": 0.5977, "step": 3182 }, { "epoch": 0.34, "grad_norm": 1.6242811914701654, "learning_rate": 7.878387506930132e-06, "loss": 0.5671, "step": 3183 }, { "epoch": 0.34, "grad_norm": 1.8059776304560673, "learning_rate": 7.876945149821525e-06, "loss": 0.6923, "step": 3184 }, { "epoch": 0.34, "grad_norm": 1.4261937074432685, "learning_rate": 7.875502434727e-06, "loss": 0.55, "step": 3185 }, { "epoch": 0.34, "grad_norm": 1.7515473176252614, "learning_rate": 7.874059361826079e-06, "loss": 0.6462, "step": 3186 }, { "epoch": 0.34, "grad_norm": 1.778629520263953, "learning_rate": 7.87261593129833e-06, "loss": 0.7335, "step": 3187 }, { "epoch": 0.34, "grad_norm": 1.9587773036285792, "learning_rate": 7.871172143323362e-06, "loss": 0.704, "step": 3188 }, { "epoch": 0.34, "grad_norm": 1.7682057468211216, "learning_rate": 7.869727998080825e-06, "loss": 0.5557, "step": 3189 }, { "epoch": 0.34, "grad_norm": 1.7548832699126387, "learning_rate": 7.868283495750423e-06, "loss": 0.6933, "step": 3190 }, { "epoch": 0.34, "grad_norm": 1.1628529925698585, "learning_rate": 7.866838636511895e-06, "loss": 0.5495, "step": 3191 }, { "epoch": 0.34, "grad_norm": 1.6869505892862056, "learning_rate": 7.865393420545033e-06, "loss": 0.658, "step": 3192 }, { "epoch": 0.34, "grad_norm": 1.1212370976003099, "learning_rate": 7.863947848029666e-06, "loss": 0.5272, "step": 3193 }, { "epoch": 0.34, "grad_norm": 1.638076526181936, "learning_rate": 7.86250191914567e-06, "loss": 0.6073, "step": 3194 }, { "epoch": 0.34, "grad_norm": 1.7554590877122378, "learning_rate": 7.86105563407297e-06, "loss": 0.655, "step": 3195 }, { "epoch": 0.34, "grad_norm": 1.687905266264669, "learning_rate": 7.859608992991527e-06, "loss": 0.6735, "step": 3196 }, { "epoch": 0.34, "grad_norm": 1.6515244267094389, "learning_rate": 7.858161996081353e-06, "loss": 0.6412, "step": 3197 }, { "epoch": 0.34, "grad_norm": 2.6114343133129605, "learning_rate": 7.856714643522498e-06, "loss": 0.7054, "step": 3198 }, { "epoch": 0.34, "grad_norm": 1.812188920276069, "learning_rate": 7.855266935495064e-06, "loss": 0.5718, "step": 3199 }, { "epoch": 0.34, "grad_norm": 1.7800153472510518, "learning_rate": 7.85381887217919e-06, "loss": 0.6826, "step": 3200 }, { "epoch": 0.34, "grad_norm": 1.5748929810699366, "learning_rate": 7.852370453755064e-06, "loss": 0.5275, "step": 3201 }, { "epoch": 0.34, "grad_norm": 1.8717251442628966, "learning_rate": 7.850921680402917e-06, "loss": 0.6753, "step": 3202 }, { "epoch": 0.34, "grad_norm": 1.841799350544821, "learning_rate": 7.849472552303024e-06, "loss": 0.5841, "step": 3203 }, { "epoch": 0.34, "grad_norm": 1.849992625171743, "learning_rate": 7.848023069635701e-06, "loss": 0.6159, "step": 3204 }, { "epoch": 0.34, "grad_norm": 1.6347116232077652, "learning_rate": 7.846573232581313e-06, "loss": 0.6354, "step": 3205 }, { "epoch": 0.34, "grad_norm": 1.709972851100207, "learning_rate": 7.845123041320266e-06, "loss": 0.5811, "step": 3206 }, { "epoch": 0.34, "grad_norm": 1.8932000826065403, "learning_rate": 7.843672496033013e-06, "loss": 0.683, "step": 3207 }, { "epoch": 0.34, "grad_norm": 1.5447730876072236, "learning_rate": 7.842221596900049e-06, "loss": 0.6499, "step": 3208 }, { "epoch": 0.34, "grad_norm": 1.762623533168464, "learning_rate": 7.840770344101911e-06, "loss": 0.6461, "step": 3209 }, { "epoch": 0.34, "grad_norm": 1.904051634194514, "learning_rate": 7.839318737819184e-06, "loss": 0.6611, "step": 3210 }, { "epoch": 0.34, "grad_norm": 1.7914855652527273, "learning_rate": 7.837866778232496e-06, "loss": 0.6885, "step": 3211 }, { "epoch": 0.34, "grad_norm": 1.7089215814092322, "learning_rate": 7.836414465522518e-06, "loss": 0.6375, "step": 3212 }, { "epoch": 0.34, "grad_norm": 1.7507599572538552, "learning_rate": 7.834961799869962e-06, "loss": 0.6382, "step": 3213 }, { "epoch": 0.34, "grad_norm": 1.6664908079262208, "learning_rate": 7.83350878145559e-06, "loss": 0.6231, "step": 3214 }, { "epoch": 0.34, "grad_norm": 1.9022053221002202, "learning_rate": 7.832055410460202e-06, "loss": 0.5693, "step": 3215 }, { "epoch": 0.34, "grad_norm": 2.054219483265457, "learning_rate": 7.83060168706465e-06, "loss": 0.6779, "step": 3216 }, { "epoch": 0.34, "grad_norm": 1.8201163318387161, "learning_rate": 7.829147611449821e-06, "loss": 0.7339, "step": 3217 }, { "epoch": 0.34, "grad_norm": 1.7074683404100464, "learning_rate": 7.82769318379665e-06, "loss": 0.5874, "step": 3218 }, { "epoch": 0.34, "grad_norm": 1.6259113963237155, "learning_rate": 7.826238404286116e-06, "loss": 0.573, "step": 3219 }, { "epoch": 0.34, "grad_norm": 1.9481498439132394, "learning_rate": 7.824783273099238e-06, "loss": 0.6869, "step": 3220 }, { "epoch": 0.34, "grad_norm": 1.8444863799682603, "learning_rate": 7.823327790417086e-06, "loss": 0.573, "step": 3221 }, { "epoch": 0.34, "grad_norm": 1.7617338267163036, "learning_rate": 7.821871956420769e-06, "loss": 0.5445, "step": 3222 }, { "epoch": 0.34, "grad_norm": 1.675396400545884, "learning_rate": 7.820415771291437e-06, "loss": 0.685, "step": 3223 }, { "epoch": 0.34, "grad_norm": 1.6992690197987976, "learning_rate": 7.81895923521029e-06, "loss": 0.6185, "step": 3224 }, { "epoch": 0.34, "grad_norm": 1.7527726392630143, "learning_rate": 7.817502348358569e-06, "loss": 0.6963, "step": 3225 }, { "epoch": 0.34, "grad_norm": 1.821643669798772, "learning_rate": 7.816045110917556e-06, "loss": 0.7847, "step": 3226 }, { "epoch": 0.34, "grad_norm": 1.828927821432201, "learning_rate": 7.81458752306858e-06, "loss": 0.734, "step": 3227 }, { "epoch": 0.34, "grad_norm": 1.6918215246292692, "learning_rate": 7.813129584993013e-06, "loss": 0.682, "step": 3228 }, { "epoch": 0.34, "grad_norm": 1.7909574052047625, "learning_rate": 7.811671296872269e-06, "loss": 0.6103, "step": 3229 }, { "epoch": 0.34, "grad_norm": 1.741463455727674, "learning_rate": 7.810212658887808e-06, "loss": 0.6692, "step": 3230 }, { "epoch": 0.34, "grad_norm": 1.7410146976512473, "learning_rate": 7.80875367122113e-06, "loss": 0.6309, "step": 3231 }, { "epoch": 0.34, "grad_norm": 1.8092867051388546, "learning_rate": 7.807294334053785e-06, "loss": 0.6068, "step": 3232 }, { "epoch": 0.34, "grad_norm": 1.8086165204222218, "learning_rate": 7.805834647567356e-06, "loss": 0.656, "step": 3233 }, { "epoch": 0.34, "grad_norm": 1.808720649240552, "learning_rate": 7.804374611943481e-06, "loss": 0.6499, "step": 3234 }, { "epoch": 0.35, "grad_norm": 1.9560790721740489, "learning_rate": 7.802914227363834e-06, "loss": 0.68, "step": 3235 }, { "epoch": 0.35, "grad_norm": 1.8090068462253486, "learning_rate": 7.801453494010135e-06, "loss": 0.7281, "step": 3236 }, { "epoch": 0.35, "grad_norm": 1.7322877515959507, "learning_rate": 7.799992412064147e-06, "loss": 0.6821, "step": 3237 }, { "epoch": 0.35, "grad_norm": 1.6081789473369623, "learning_rate": 7.798530981707674e-06, "loss": 0.497, "step": 3238 }, { "epoch": 0.35, "grad_norm": 1.9120147325808396, "learning_rate": 7.79706920312257e-06, "loss": 0.5287, "step": 3239 }, { "epoch": 0.35, "grad_norm": 1.7464664881527068, "learning_rate": 7.795607076490725e-06, "loss": 0.7359, "step": 3240 }, { "epoch": 0.35, "grad_norm": 1.6244014350554545, "learning_rate": 7.794144601994075e-06, "loss": 0.6236, "step": 3241 }, { "epoch": 0.35, "grad_norm": 1.6391060742310677, "learning_rate": 7.792681779814602e-06, "loss": 0.6053, "step": 3242 }, { "epoch": 0.35, "grad_norm": 1.4540134244513763, "learning_rate": 7.791218610134324e-06, "loss": 0.5619, "step": 3243 }, { "epoch": 0.35, "grad_norm": 1.8192907260522357, "learning_rate": 7.789755093135313e-06, "loss": 0.5762, "step": 3244 }, { "epoch": 0.35, "grad_norm": 1.8904272488370777, "learning_rate": 7.788291228999675e-06, "loss": 0.6143, "step": 3245 }, { "epoch": 0.35, "grad_norm": 1.685781410064708, "learning_rate": 7.786827017909563e-06, "loss": 0.6571, "step": 3246 }, { "epoch": 0.35, "grad_norm": 1.99700513751262, "learning_rate": 7.785362460047174e-06, "loss": 0.5647, "step": 3247 }, { "epoch": 0.35, "grad_norm": 1.7799846049442885, "learning_rate": 7.783897555594743e-06, "loss": 0.5895, "step": 3248 }, { "epoch": 0.35, "grad_norm": 1.68701084508509, "learning_rate": 7.782432304734558e-06, "loss": 0.6629, "step": 3249 }, { "epoch": 0.35, "grad_norm": 1.8098730109757866, "learning_rate": 7.780966707648938e-06, "loss": 0.606, "step": 3250 }, { "epoch": 0.35, "grad_norm": 1.5921509015584663, "learning_rate": 7.779500764520257e-06, "loss": 0.6493, "step": 3251 }, { "epoch": 0.35, "grad_norm": 1.6925868667085264, "learning_rate": 7.778034475530919e-06, "loss": 0.5299, "step": 3252 }, { "epoch": 0.35, "grad_norm": 1.7686200967621908, "learning_rate": 7.776567840863385e-06, "loss": 0.6403, "step": 3253 }, { "epoch": 0.35, "grad_norm": 1.5338801771965096, "learning_rate": 7.775100860700149e-06, "loss": 0.61, "step": 3254 }, { "epoch": 0.35, "grad_norm": 1.7788960755870724, "learning_rate": 7.77363353522375e-06, "loss": 0.6199, "step": 3255 }, { "epoch": 0.35, "grad_norm": 1.8175602131833257, "learning_rate": 7.772165864616774e-06, "loss": 0.6192, "step": 3256 }, { "epoch": 0.35, "grad_norm": 1.6212455168313131, "learning_rate": 7.770697849061845e-06, "loss": 0.6263, "step": 3257 }, { "epoch": 0.35, "grad_norm": 1.9702510314359758, "learning_rate": 7.769229488741637e-06, "loss": 0.7254, "step": 3258 }, { "epoch": 0.35, "grad_norm": 1.7321988036674152, "learning_rate": 7.767760783838854e-06, "loss": 0.5854, "step": 3259 }, { "epoch": 0.35, "grad_norm": 1.84954396753189, "learning_rate": 7.766291734536256e-06, "loss": 0.6372, "step": 3260 }, { "epoch": 0.35, "grad_norm": 1.9604752786661874, "learning_rate": 7.76482234101664e-06, "loss": 0.7608, "step": 3261 }, { "epoch": 0.35, "grad_norm": 1.752998004911322, "learning_rate": 7.763352603462846e-06, "loss": 0.6365, "step": 3262 }, { "epoch": 0.35, "grad_norm": 1.7084663262336557, "learning_rate": 7.761882522057758e-06, "loss": 0.7322, "step": 3263 }, { "epoch": 0.35, "grad_norm": 1.8029604168842082, "learning_rate": 7.7604120969843e-06, "loss": 0.6721, "step": 3264 }, { "epoch": 0.35, "grad_norm": 1.9906107363178496, "learning_rate": 7.758941328425445e-06, "loss": 0.6484, "step": 3265 }, { "epoch": 0.35, "grad_norm": 1.8892869868949245, "learning_rate": 7.7574702165642e-06, "loss": 0.6432, "step": 3266 }, { "epoch": 0.35, "grad_norm": 1.4318244031386256, "learning_rate": 7.755998761583623e-06, "loss": 0.5565, "step": 3267 }, { "epoch": 0.35, "grad_norm": 1.702352877475695, "learning_rate": 7.754526963666809e-06, "loss": 0.6327, "step": 3268 }, { "epoch": 0.35, "grad_norm": 2.124198498813597, "learning_rate": 7.753054822996898e-06, "loss": 0.7151, "step": 3269 }, { "epoch": 0.35, "grad_norm": 1.6816271514301024, "learning_rate": 7.751582339757073e-06, "loss": 0.6761, "step": 3270 }, { "epoch": 0.35, "grad_norm": 1.257299839478177, "learning_rate": 7.750109514130558e-06, "loss": 0.5632, "step": 3271 }, { "epoch": 0.35, "grad_norm": 1.710380793135744, "learning_rate": 7.748636346300622e-06, "loss": 0.4971, "step": 3272 }, { "epoch": 0.35, "grad_norm": 1.805938403894929, "learning_rate": 7.747162836450574e-06, "loss": 0.6718, "step": 3273 }, { "epoch": 0.35, "grad_norm": 1.751194062213831, "learning_rate": 7.745688984763767e-06, "loss": 0.6194, "step": 3274 }, { "epoch": 0.35, "grad_norm": 1.8861570262217664, "learning_rate": 7.744214791423597e-06, "loss": 0.6994, "step": 3275 }, { "epoch": 0.35, "grad_norm": 1.7358134240915406, "learning_rate": 7.7427402566135e-06, "loss": 0.6496, "step": 3276 }, { "epoch": 0.35, "grad_norm": 1.9631661488210563, "learning_rate": 7.74126538051696e-06, "loss": 0.6917, "step": 3277 }, { "epoch": 0.35, "grad_norm": 1.7948743550942192, "learning_rate": 7.739790163317496e-06, "loss": 0.7156, "step": 3278 }, { "epoch": 0.35, "grad_norm": 1.7252243185921239, "learning_rate": 7.738314605198673e-06, "loss": 0.709, "step": 3279 }, { "epoch": 0.35, "grad_norm": 1.5548774142295174, "learning_rate": 7.736838706344101e-06, "loss": 0.56, "step": 3280 }, { "epoch": 0.35, "grad_norm": 1.8402925676537958, "learning_rate": 7.73536246693743e-06, "loss": 0.6601, "step": 3281 }, { "epoch": 0.35, "grad_norm": 1.2718430702927934, "learning_rate": 7.73388588716235e-06, "loss": 0.5438, "step": 3282 }, { "epoch": 0.35, "grad_norm": 1.966487430785713, "learning_rate": 7.732408967202598e-06, "loss": 0.6746, "step": 3283 }, { "epoch": 0.35, "grad_norm": 1.7671349505265415, "learning_rate": 7.73093170724195e-06, "loss": 0.7178, "step": 3284 }, { "epoch": 0.35, "grad_norm": 1.716043813361937, "learning_rate": 7.729454107464226e-06, "loss": 0.6259, "step": 3285 }, { "epoch": 0.35, "grad_norm": 1.7313892686239172, "learning_rate": 7.727976168053285e-06, "loss": 0.6451, "step": 3286 }, { "epoch": 0.35, "grad_norm": 1.6892965303496545, "learning_rate": 7.726497889193036e-06, "loss": 0.5491, "step": 3287 }, { "epoch": 0.35, "grad_norm": 1.9881219528357448, "learning_rate": 7.725019271067423e-06, "loss": 0.7099, "step": 3288 }, { "epoch": 0.35, "grad_norm": 1.763405824571493, "learning_rate": 7.723540313860431e-06, "loss": 0.631, "step": 3289 }, { "epoch": 0.35, "grad_norm": 1.95257332854629, "learning_rate": 7.722061017756094e-06, "loss": 0.7114, "step": 3290 }, { "epoch": 0.35, "grad_norm": 1.7663825925656507, "learning_rate": 7.720581382938486e-06, "loss": 0.7236, "step": 3291 }, { "epoch": 0.35, "grad_norm": 1.667455641917024, "learning_rate": 7.719101409591718e-06, "loss": 0.6098, "step": 3292 }, { "epoch": 0.35, "grad_norm": 2.1390874743051906, "learning_rate": 7.717621097899949e-06, "loss": 0.68, "step": 3293 }, { "epoch": 0.35, "grad_norm": 2.1645004682022635, "learning_rate": 7.716140448047378e-06, "loss": 0.6388, "step": 3294 }, { "epoch": 0.35, "grad_norm": 1.6486160475149967, "learning_rate": 7.714659460218247e-06, "loss": 0.6566, "step": 3295 }, { "epoch": 0.35, "grad_norm": 1.7242125722198236, "learning_rate": 7.71317813459684e-06, "loss": 0.6653, "step": 3296 }, { "epoch": 0.35, "grad_norm": 1.7378912118247398, "learning_rate": 7.711696471367477e-06, "loss": 0.5627, "step": 3297 }, { "epoch": 0.35, "grad_norm": 1.9269041767926418, "learning_rate": 7.710214470714533e-06, "loss": 0.5622, "step": 3298 }, { "epoch": 0.35, "grad_norm": 1.6908925956507983, "learning_rate": 7.70873213282241e-06, "loss": 0.6674, "step": 3299 }, { "epoch": 0.35, "grad_norm": 1.3910596914227131, "learning_rate": 7.707249457875563e-06, "loss": 0.5351, "step": 3300 }, { "epoch": 0.35, "grad_norm": 1.7850953946429537, "learning_rate": 7.705766446058486e-06, "loss": 0.583, "step": 3301 }, { "epoch": 0.35, "grad_norm": 1.9147978876697247, "learning_rate": 7.704283097555712e-06, "loss": 0.6814, "step": 3302 }, { "epoch": 0.35, "grad_norm": 1.8027854691375034, "learning_rate": 7.702799412551821e-06, "loss": 0.632, "step": 3303 }, { "epoch": 0.35, "grad_norm": 1.8226868717078968, "learning_rate": 7.701315391231428e-06, "loss": 0.6172, "step": 3304 }, { "epoch": 0.35, "grad_norm": 1.9705355623665106, "learning_rate": 7.699831033779195e-06, "loss": 0.6286, "step": 3305 }, { "epoch": 0.35, "grad_norm": 1.6798207334928832, "learning_rate": 7.698346340379824e-06, "loss": 0.5768, "step": 3306 }, { "epoch": 0.35, "grad_norm": 1.8806136045660433, "learning_rate": 7.696861311218063e-06, "loss": 0.6208, "step": 3307 }, { "epoch": 0.35, "grad_norm": 1.7765677476814812, "learning_rate": 7.695375946478692e-06, "loss": 0.691, "step": 3308 }, { "epoch": 0.35, "grad_norm": 1.9887853860911335, "learning_rate": 7.693890246346545e-06, "loss": 0.5565, "step": 3309 }, { "epoch": 0.35, "grad_norm": 1.7643542428842254, "learning_rate": 7.692404211006489e-06, "loss": 0.6632, "step": 3310 }, { "epoch": 0.35, "grad_norm": 1.9708430454877424, "learning_rate": 7.690917840643433e-06, "loss": 0.6183, "step": 3311 }, { "epoch": 0.35, "grad_norm": 1.5879405236133581, "learning_rate": 7.689431135442333e-06, "loss": 0.51, "step": 3312 }, { "epoch": 0.35, "grad_norm": 1.8568425201963927, "learning_rate": 7.687944095588183e-06, "loss": 0.5842, "step": 3313 }, { "epoch": 0.35, "grad_norm": 1.8402135488875389, "learning_rate": 7.686456721266018e-06, "loss": 0.7296, "step": 3314 }, { "epoch": 0.35, "grad_norm": 1.6194501004845834, "learning_rate": 7.684969012660919e-06, "loss": 0.5702, "step": 3315 }, { "epoch": 0.35, "grad_norm": 1.711994882838156, "learning_rate": 7.683480969958005e-06, "loss": 0.6849, "step": 3316 }, { "epoch": 0.35, "grad_norm": 1.7314455514636369, "learning_rate": 7.681992593342431e-06, "loss": 0.5998, "step": 3317 }, { "epoch": 0.35, "grad_norm": 1.6544155378207324, "learning_rate": 7.680503882999408e-06, "loss": 0.6166, "step": 3318 }, { "epoch": 0.35, "grad_norm": 2.023765598315483, "learning_rate": 7.679014839114176e-06, "loss": 0.7591, "step": 3319 }, { "epoch": 0.35, "grad_norm": 1.6710335247899926, "learning_rate": 7.67752546187202e-06, "loss": 0.6769, "step": 3320 }, { "epoch": 0.35, "grad_norm": 2.0910797784723227, "learning_rate": 7.676035751458269e-06, "loss": 0.7832, "step": 3321 }, { "epoch": 0.35, "grad_norm": 1.6276744903658336, "learning_rate": 7.674545708058292e-06, "loss": 0.7225, "step": 3322 }, { "epoch": 0.35, "grad_norm": 1.779589292511293, "learning_rate": 7.673055331857498e-06, "loss": 0.6721, "step": 3323 }, { "epoch": 0.35, "grad_norm": 1.7097806078891427, "learning_rate": 7.671564623041338e-06, "loss": 0.6455, "step": 3324 }, { "epoch": 0.35, "grad_norm": 1.800439639237566, "learning_rate": 7.670073581795304e-06, "loss": 0.5508, "step": 3325 }, { "epoch": 0.35, "grad_norm": 1.8402401260254226, "learning_rate": 7.668582208304933e-06, "loss": 0.6643, "step": 3326 }, { "epoch": 0.35, "grad_norm": 1.7463291438786903, "learning_rate": 7.667090502755798e-06, "loss": 0.7124, "step": 3327 }, { "epoch": 0.35, "grad_norm": 1.8024767148406926, "learning_rate": 7.665598465333519e-06, "loss": 0.5955, "step": 3328 }, { "epoch": 0.36, "grad_norm": 1.654025001889264, "learning_rate": 7.66410609622375e-06, "loss": 0.615, "step": 3329 }, { "epoch": 0.36, "grad_norm": 1.7010902427989918, "learning_rate": 7.662613395612195e-06, "loss": 0.6553, "step": 3330 }, { "epoch": 0.36, "grad_norm": 1.7757709497060161, "learning_rate": 7.66112036368459e-06, "loss": 0.6881, "step": 3331 }, { "epoch": 0.36, "grad_norm": 1.7542441031378448, "learning_rate": 7.659627000626724e-06, "loss": 0.6286, "step": 3332 }, { "epoch": 0.36, "grad_norm": 1.8356185394591906, "learning_rate": 7.65813330662441e-06, "loss": 0.6669, "step": 3333 }, { "epoch": 0.36, "grad_norm": 1.7292845778197863, "learning_rate": 7.65663928186352e-06, "loss": 0.5839, "step": 3334 }, { "epoch": 0.36, "grad_norm": 1.5839549514868283, "learning_rate": 7.655144926529957e-06, "loss": 0.6194, "step": 3335 }, { "epoch": 0.36, "grad_norm": 1.908552844589617, "learning_rate": 7.653650240809667e-06, "loss": 0.5945, "step": 3336 }, { "epoch": 0.36, "grad_norm": 1.6736378816460737, "learning_rate": 7.65215522488864e-06, "loss": 0.5909, "step": 3337 }, { "epoch": 0.36, "grad_norm": 1.9857114021027205, "learning_rate": 7.650659878952902e-06, "loss": 0.6082, "step": 3338 }, { "epoch": 0.36, "grad_norm": 2.0232831112517276, "learning_rate": 7.649164203188524e-06, "loss": 0.558, "step": 3339 }, { "epoch": 0.36, "grad_norm": 1.6548010175441064, "learning_rate": 7.64766819778162e-06, "loss": 0.6626, "step": 3340 }, { "epoch": 0.36, "grad_norm": 1.7392261856417026, "learning_rate": 7.646171862918334e-06, "loss": 0.5904, "step": 3341 }, { "epoch": 0.36, "grad_norm": 1.6925349376947167, "learning_rate": 7.644675198784867e-06, "loss": 0.6128, "step": 3342 }, { "epoch": 0.36, "grad_norm": 1.8888839973279643, "learning_rate": 7.643178205567449e-06, "loss": 0.5776, "step": 3343 }, { "epoch": 0.36, "grad_norm": 1.8943308200487852, "learning_rate": 7.641680883452356e-06, "loss": 0.6588, "step": 3344 }, { "epoch": 0.36, "grad_norm": 2.1531779900709824, "learning_rate": 7.640183232625902e-06, "loss": 0.6682, "step": 3345 }, { "epoch": 0.36, "grad_norm": 2.0150563978663696, "learning_rate": 7.638685253274446e-06, "loss": 0.7242, "step": 3346 }, { "epoch": 0.36, "grad_norm": 1.9317130446495543, "learning_rate": 7.637186945584386e-06, "loss": 0.5607, "step": 3347 }, { "epoch": 0.36, "grad_norm": 1.335330183199838, "learning_rate": 7.635688309742157e-06, "loss": 0.5473, "step": 3348 }, { "epoch": 0.36, "grad_norm": 1.9379896707231918, "learning_rate": 7.634189345934241e-06, "loss": 0.6596, "step": 3349 }, { "epoch": 0.36, "grad_norm": 1.7952043366544588, "learning_rate": 7.632690054347157e-06, "loss": 0.5388, "step": 3350 }, { "epoch": 0.36, "grad_norm": 1.712239868104288, "learning_rate": 7.631190435167466e-06, "loss": 0.6516, "step": 3351 }, { "epoch": 0.36, "grad_norm": 1.8912080517168377, "learning_rate": 7.62969048858177e-06, "loss": 0.6742, "step": 3352 }, { "epoch": 0.36, "grad_norm": 1.6909346527326188, "learning_rate": 7.628190214776712e-06, "loss": 0.569, "step": 3353 }, { "epoch": 0.36, "grad_norm": 2.1672291620085704, "learning_rate": 7.626689613938974e-06, "loss": 0.5852, "step": 3354 }, { "epoch": 0.36, "grad_norm": 1.6766072119559692, "learning_rate": 7.625188686255278e-06, "loss": 0.6249, "step": 3355 }, { "epoch": 0.36, "grad_norm": 1.776238298432095, "learning_rate": 7.623687431912391e-06, "loss": 0.6457, "step": 3356 }, { "epoch": 0.36, "grad_norm": 1.6550262756860208, "learning_rate": 7.622185851097117e-06, "loss": 0.6681, "step": 3357 }, { "epoch": 0.36, "grad_norm": 1.9888725727503036, "learning_rate": 7.6206839439963035e-06, "loss": 0.7091, "step": 3358 }, { "epoch": 0.36, "grad_norm": 1.6942627916574264, "learning_rate": 7.6191817107968325e-06, "loss": 0.6711, "step": 3359 }, { "epoch": 0.36, "grad_norm": 1.8713551828911887, "learning_rate": 7.617679151685635e-06, "loss": 0.6377, "step": 3360 }, { "epoch": 0.36, "grad_norm": 1.6924574639485925, "learning_rate": 7.6161762668496775e-06, "loss": 0.6261, "step": 3361 }, { "epoch": 0.36, "grad_norm": 1.8600059600602552, "learning_rate": 7.614673056475967e-06, "loss": 0.6765, "step": 3362 }, { "epoch": 0.36, "grad_norm": 1.7515902139161177, "learning_rate": 7.6131695207515535e-06, "loss": 0.6558, "step": 3363 }, { "epoch": 0.36, "grad_norm": 1.6634470699043162, "learning_rate": 7.611665659863523e-06, "loss": 0.6395, "step": 3364 }, { "epoch": 0.36, "grad_norm": 1.4357487347359135, "learning_rate": 7.610161473999008e-06, "loss": 0.5486, "step": 3365 }, { "epoch": 0.36, "grad_norm": 1.5715221666300072, "learning_rate": 7.6086569633451765e-06, "loss": 0.6719, "step": 3366 }, { "epoch": 0.36, "grad_norm": 1.821247555755963, "learning_rate": 7.607152128089239e-06, "loss": 0.6648, "step": 3367 }, { "epoch": 0.36, "grad_norm": 1.8732832765495395, "learning_rate": 7.605646968418448e-06, "loss": 0.713, "step": 3368 }, { "epoch": 0.36, "grad_norm": 1.1706680650685288, "learning_rate": 7.604141484520091e-06, "loss": 0.5535, "step": 3369 }, { "epoch": 0.36, "grad_norm": 1.8214927044942162, "learning_rate": 7.6026356765815035e-06, "loss": 0.7218, "step": 3370 }, { "epoch": 0.36, "grad_norm": 1.8002724916339685, "learning_rate": 7.6011295447900534e-06, "loss": 0.6643, "step": 3371 }, { "epoch": 0.36, "grad_norm": 1.7476120507882844, "learning_rate": 7.599623089333154e-06, "loss": 0.7172, "step": 3372 }, { "epoch": 0.36, "grad_norm": 1.6083687461386125, "learning_rate": 7.59811631039826e-06, "loss": 0.6546, "step": 3373 }, { "epoch": 0.36, "grad_norm": 1.9492596929797454, "learning_rate": 7.5966092081728606e-06, "loss": 0.6396, "step": 3374 }, { "epoch": 0.36, "grad_norm": 1.821947812594322, "learning_rate": 7.5951017828444896e-06, "loss": 0.704, "step": 3375 }, { "epoch": 0.36, "grad_norm": 1.7094602315804224, "learning_rate": 7.59359403460072e-06, "loss": 0.5829, "step": 3376 }, { "epoch": 0.36, "grad_norm": 1.5213542831191391, "learning_rate": 7.592085963629165e-06, "loss": 0.5425, "step": 3377 }, { "epoch": 0.36, "grad_norm": 1.8061297063333106, "learning_rate": 7.590577570117476e-06, "loss": 0.67, "step": 3378 }, { "epoch": 0.36, "grad_norm": 1.77953035953973, "learning_rate": 7.589068854253351e-06, "loss": 0.64, "step": 3379 }, { "epoch": 0.36, "grad_norm": 2.0672884948954064, "learning_rate": 7.587559816224519e-06, "loss": 0.6002, "step": 3380 }, { "epoch": 0.36, "grad_norm": 1.6977127722490868, "learning_rate": 7.586050456218756e-06, "loss": 0.6326, "step": 3381 }, { "epoch": 0.36, "grad_norm": 1.8135296477637315, "learning_rate": 7.5845407744238755e-06, "loss": 0.6508, "step": 3382 }, { "epoch": 0.36, "grad_norm": 1.4062009238636308, "learning_rate": 7.583030771027729e-06, "loss": 0.5482, "step": 3383 }, { "epoch": 0.36, "grad_norm": 1.7795096183545664, "learning_rate": 7.5815204462182135e-06, "loss": 0.6624, "step": 3384 }, { "epoch": 0.36, "grad_norm": 1.8254173686437005, "learning_rate": 7.580009800183261e-06, "loss": 0.5684, "step": 3385 }, { "epoch": 0.36, "grad_norm": 1.8448111170022008, "learning_rate": 7.578498833110846e-06, "loss": 0.7085, "step": 3386 }, { "epoch": 0.36, "grad_norm": 1.8593754955200803, "learning_rate": 7.5769875451889795e-06, "loss": 0.6538, "step": 3387 }, { "epoch": 0.36, "grad_norm": 1.2452078747165956, "learning_rate": 7.575475936605719e-06, "loss": 0.5533, "step": 3388 }, { "epoch": 0.36, "grad_norm": 1.6458852890022808, "learning_rate": 7.5739640075491546e-06, "loss": 0.5998, "step": 3389 }, { "epoch": 0.36, "grad_norm": 1.8465390385929177, "learning_rate": 7.572451758207422e-06, "loss": 0.6821, "step": 3390 }, { "epoch": 0.36, "grad_norm": 1.457630973324436, "learning_rate": 7.570939188768693e-06, "loss": 0.5165, "step": 3391 }, { "epoch": 0.36, "grad_norm": 1.5013226461245286, "learning_rate": 7.569426299421181e-06, "loss": 0.5511, "step": 3392 }, { "epoch": 0.36, "grad_norm": 1.7984335207301465, "learning_rate": 7.567913090353138e-06, "loss": 0.6655, "step": 3393 }, { "epoch": 0.36, "grad_norm": 1.6986516935376372, "learning_rate": 7.5663995617528595e-06, "loss": 0.6387, "step": 3394 }, { "epoch": 0.36, "grad_norm": 2.054398386579512, "learning_rate": 7.564885713808674e-06, "loss": 0.6599, "step": 3395 }, { "epoch": 0.36, "grad_norm": 1.7625529154361095, "learning_rate": 7.5633715467089575e-06, "loss": 0.6487, "step": 3396 }, { "epoch": 0.36, "grad_norm": 1.7645118450598831, "learning_rate": 7.56185706064212e-06, "loss": 0.6535, "step": 3397 }, { "epoch": 0.36, "grad_norm": 1.9088603429783777, "learning_rate": 7.560342255796611e-06, "loss": 0.5758, "step": 3398 }, { "epoch": 0.36, "grad_norm": 1.708993375981094, "learning_rate": 7.558827132360924e-06, "loss": 0.6181, "step": 3399 }, { "epoch": 0.36, "grad_norm": 1.7855284471165687, "learning_rate": 7.557311690523589e-06, "loss": 0.7271, "step": 3400 }, { "epoch": 0.36, "grad_norm": 2.2868169657578155, "learning_rate": 7.555795930473178e-06, "loss": 0.6187, "step": 3401 }, { "epoch": 0.36, "grad_norm": 1.6660499083848328, "learning_rate": 7.554279852398299e-06, "loss": 0.6218, "step": 3402 }, { "epoch": 0.36, "grad_norm": 1.7558270674300878, "learning_rate": 7.552763456487604e-06, "loss": 0.6577, "step": 3403 }, { "epoch": 0.36, "grad_norm": 1.7281878304476186, "learning_rate": 7.55124674292978e-06, "loss": 0.6905, "step": 3404 }, { "epoch": 0.36, "grad_norm": 1.946903091027249, "learning_rate": 7.549729711913556e-06, "loss": 0.7402, "step": 3405 }, { "epoch": 0.36, "grad_norm": 1.6533087638170507, "learning_rate": 7.548212363627701e-06, "loss": 0.5661, "step": 3406 }, { "epoch": 0.36, "grad_norm": 1.774553867912558, "learning_rate": 7.54669469826102e-06, "loss": 0.6059, "step": 3407 }, { "epoch": 0.36, "grad_norm": 1.8142545051076586, "learning_rate": 7.545176716002365e-06, "loss": 0.6596, "step": 3408 }, { "epoch": 0.36, "grad_norm": 1.7141323158827133, "learning_rate": 7.543658417040618e-06, "loss": 0.6704, "step": 3409 }, { "epoch": 0.36, "grad_norm": 1.8383495357412616, "learning_rate": 7.542139801564708e-06, "loss": 0.631, "step": 3410 }, { "epoch": 0.36, "grad_norm": 1.9151710108871802, "learning_rate": 7.5406208697636e-06, "loss": 0.7461, "step": 3411 }, { "epoch": 0.36, "grad_norm": 1.75871103758668, "learning_rate": 7.539101621826297e-06, "loss": 0.6181, "step": 3412 }, { "epoch": 0.36, "grad_norm": 1.3602299238294189, "learning_rate": 7.537582057941843e-06, "loss": 0.532, "step": 3413 }, { "epoch": 0.36, "grad_norm": 1.7968873409323358, "learning_rate": 7.536062178299324e-06, "loss": 0.6027, "step": 3414 }, { "epoch": 0.36, "grad_norm": 1.7998952507245436, "learning_rate": 7.534541983087859e-06, "loss": 0.5987, "step": 3415 }, { "epoch": 0.36, "grad_norm": 2.2338844547645453, "learning_rate": 7.533021472496614e-06, "loss": 0.6166, "step": 3416 }, { "epoch": 0.36, "grad_norm": 1.739616843894053, "learning_rate": 7.531500646714788e-06, "loss": 0.608, "step": 3417 }, { "epoch": 0.36, "grad_norm": 1.6649940618360057, "learning_rate": 7.5299795059316215e-06, "loss": 0.6438, "step": 3418 }, { "epoch": 0.36, "grad_norm": 1.7453862088742356, "learning_rate": 7.528458050336393e-06, "loss": 0.5704, "step": 3419 }, { "epoch": 0.36, "grad_norm": 1.8300158436485028, "learning_rate": 7.526936280118424e-06, "loss": 0.684, "step": 3420 }, { "epoch": 0.36, "grad_norm": 1.7906736947748159, "learning_rate": 7.525414195467069e-06, "loss": 0.7405, "step": 3421 }, { "epoch": 0.37, "grad_norm": 1.7596078219365954, "learning_rate": 7.523891796571727e-06, "loss": 0.5935, "step": 3422 }, { "epoch": 0.37, "grad_norm": 1.8181505821648063, "learning_rate": 7.522369083621835e-06, "loss": 0.6576, "step": 3423 }, { "epoch": 0.37, "grad_norm": 1.560105080263446, "learning_rate": 7.520846056806865e-06, "loss": 0.6547, "step": 3424 }, { "epoch": 0.37, "grad_norm": 1.7701641454881678, "learning_rate": 7.5193227163163354e-06, "loss": 0.6025, "step": 3425 }, { "epoch": 0.37, "grad_norm": 2.0492523564189766, "learning_rate": 7.517799062339797e-06, "loss": 0.6173, "step": 3426 }, { "epoch": 0.37, "grad_norm": 1.5779177656365502, "learning_rate": 7.516275095066843e-06, "loss": 0.7303, "step": 3427 }, { "epoch": 0.37, "grad_norm": 1.565126990380978, "learning_rate": 7.514750814687104e-06, "loss": 0.5624, "step": 3428 }, { "epoch": 0.37, "grad_norm": 1.718348342051446, "learning_rate": 7.51322622139025e-06, "loss": 0.6113, "step": 3429 }, { "epoch": 0.37, "grad_norm": 1.696925853430075, "learning_rate": 7.5117013153659905e-06, "loss": 0.715, "step": 3430 }, { "epoch": 0.37, "grad_norm": 1.3787864637558305, "learning_rate": 7.510176096804074e-06, "loss": 0.5568, "step": 3431 }, { "epoch": 0.37, "grad_norm": 2.187960877400775, "learning_rate": 7.508650565894286e-06, "loss": 0.5995, "step": 3432 }, { "epoch": 0.37, "grad_norm": 1.8769236108278682, "learning_rate": 7.507124722826456e-06, "loss": 0.6424, "step": 3433 }, { "epoch": 0.37, "grad_norm": 1.8035257222067838, "learning_rate": 7.505598567790446e-06, "loss": 0.62, "step": 3434 }, { "epoch": 0.37, "grad_norm": 2.146925592270491, "learning_rate": 7.504072100976159e-06, "loss": 0.6639, "step": 3435 }, { "epoch": 0.37, "grad_norm": 2.2179684164193745, "learning_rate": 7.502545322573539e-06, "loss": 0.6512, "step": 3436 }, { "epoch": 0.37, "grad_norm": 2.0102709999000377, "learning_rate": 7.501018232772566e-06, "loss": 0.5931, "step": 3437 }, { "epoch": 0.37, "grad_norm": 2.0167038475356898, "learning_rate": 7.49949083176326e-06, "loss": 0.6918, "step": 3438 }, { "epoch": 0.37, "grad_norm": 1.8028850210145009, "learning_rate": 7.497963119735679e-06, "loss": 0.6541, "step": 3439 }, { "epoch": 0.37, "grad_norm": 1.985389443388676, "learning_rate": 7.496435096879922e-06, "loss": 0.7203, "step": 3440 }, { "epoch": 0.37, "grad_norm": 1.7130510476090879, "learning_rate": 7.494906763386123e-06, "loss": 0.6616, "step": 3441 }, { "epoch": 0.37, "grad_norm": 1.9052656443343945, "learning_rate": 7.4933781194444586e-06, "loss": 0.7001, "step": 3442 }, { "epoch": 0.37, "grad_norm": 1.8021174084381368, "learning_rate": 7.491849165245139e-06, "loss": 0.5948, "step": 3443 }, { "epoch": 0.37, "grad_norm": 1.6551454346018415, "learning_rate": 7.49031990097842e-06, "loss": 0.589, "step": 3444 }, { "epoch": 0.37, "grad_norm": 1.659155020249627, "learning_rate": 7.488790326834588e-06, "loss": 0.572, "step": 3445 }, { "epoch": 0.37, "grad_norm": 1.7337506472686295, "learning_rate": 7.487260443003975e-06, "loss": 0.6185, "step": 3446 }, { "epoch": 0.37, "grad_norm": 2.2009467917910452, "learning_rate": 7.485730249676948e-06, "loss": 0.7211, "step": 3447 }, { "epoch": 0.37, "grad_norm": 1.8941998992308007, "learning_rate": 7.484199747043912e-06, "loss": 0.7589, "step": 3448 }, { "epoch": 0.37, "grad_norm": 1.8813564210680853, "learning_rate": 7.4826689352953104e-06, "loss": 0.6212, "step": 3449 }, { "epoch": 0.37, "grad_norm": 1.6561347408136113, "learning_rate": 7.4811378146216294e-06, "loss": 0.6511, "step": 3450 }, { "epoch": 0.37, "grad_norm": 2.0211900502307683, "learning_rate": 7.479606385213388e-06, "loss": 0.6936, "step": 3451 }, { "epoch": 0.37, "grad_norm": 1.9245039204464647, "learning_rate": 7.478074647261147e-06, "loss": 0.6688, "step": 3452 }, { "epoch": 0.37, "grad_norm": 1.7324759797261233, "learning_rate": 7.476542600955504e-06, "loss": 0.6367, "step": 3453 }, { "epoch": 0.37, "grad_norm": 1.7537708164355053, "learning_rate": 7.475010246487098e-06, "loss": 0.6823, "step": 3454 }, { "epoch": 0.37, "grad_norm": 1.9056589687647336, "learning_rate": 7.4734775840465975e-06, "loss": 0.6619, "step": 3455 }, { "epoch": 0.37, "grad_norm": 1.6802488118900678, "learning_rate": 7.471944613824723e-06, "loss": 0.5606, "step": 3456 }, { "epoch": 0.37, "grad_norm": 1.5107072558431762, "learning_rate": 7.470411336012222e-06, "loss": 0.536, "step": 3457 }, { "epoch": 0.37, "grad_norm": 1.6714175278466303, "learning_rate": 7.468877750799887e-06, "loss": 0.6196, "step": 3458 }, { "epoch": 0.37, "grad_norm": 1.6416147881614669, "learning_rate": 7.467343858378544e-06, "loss": 0.6482, "step": 3459 }, { "epoch": 0.37, "grad_norm": 1.527044336297754, "learning_rate": 7.465809658939059e-06, "loss": 0.5521, "step": 3460 }, { "epoch": 0.37, "grad_norm": 1.860993300327028, "learning_rate": 7.464275152672339e-06, "loss": 0.6495, "step": 3461 }, { "epoch": 0.37, "grad_norm": 1.6471111902246383, "learning_rate": 7.462740339769323e-06, "loss": 0.6154, "step": 3462 }, { "epoch": 0.37, "grad_norm": 1.7176478383013172, "learning_rate": 7.461205220420995e-06, "loss": 0.5964, "step": 3463 }, { "epoch": 0.37, "grad_norm": 1.8042878554330033, "learning_rate": 7.459669794818371e-06, "loss": 0.6715, "step": 3464 }, { "epoch": 0.37, "grad_norm": 1.7336164743757079, "learning_rate": 7.458134063152513e-06, "loss": 0.6283, "step": 3465 }, { "epoch": 0.37, "grad_norm": 1.782564892099691, "learning_rate": 7.456598025614511e-06, "loss": 0.6024, "step": 3466 }, { "epoch": 0.37, "grad_norm": 1.8333170002706007, "learning_rate": 7.455061682395501e-06, "loss": 0.6837, "step": 3467 }, { "epoch": 0.37, "grad_norm": 1.8093718089863706, "learning_rate": 7.4535250336866536e-06, "loss": 0.6557, "step": 3468 }, { "epoch": 0.37, "grad_norm": 1.6686072089988717, "learning_rate": 7.451988079679179e-06, "loss": 0.6479, "step": 3469 }, { "epoch": 0.37, "grad_norm": 1.719233082775883, "learning_rate": 7.450450820564322e-06, "loss": 0.6337, "step": 3470 }, { "epoch": 0.37, "grad_norm": 1.9192253813897322, "learning_rate": 7.44891325653337e-06, "loss": 0.604, "step": 3471 }, { "epoch": 0.37, "grad_norm": 1.7388758225363707, "learning_rate": 7.447375387777645e-06, "loss": 0.7169, "step": 3472 }, { "epoch": 0.37, "grad_norm": 1.8059937099990828, "learning_rate": 7.445837214488508e-06, "loss": 0.5802, "step": 3473 }, { "epoch": 0.37, "grad_norm": 1.6832135298005626, "learning_rate": 7.44429873685736e-06, "loss": 0.6286, "step": 3474 }, { "epoch": 0.37, "grad_norm": 1.7269170769333662, "learning_rate": 7.4427599550756365e-06, "loss": 0.6933, "step": 3475 }, { "epoch": 0.37, "grad_norm": 1.6100342541813948, "learning_rate": 7.441220869334812e-06, "loss": 0.6413, "step": 3476 }, { "epoch": 0.37, "grad_norm": 1.790035292089711, "learning_rate": 7.439681479826399e-06, "loss": 0.6424, "step": 3477 }, { "epoch": 0.37, "grad_norm": 1.7227523565851257, "learning_rate": 7.438141786741947e-06, "loss": 0.5791, "step": 3478 }, { "epoch": 0.37, "grad_norm": 1.7473730578014628, "learning_rate": 7.436601790273046e-06, "loss": 0.7377, "step": 3479 }, { "epoch": 0.37, "grad_norm": 1.7653295137436682, "learning_rate": 7.435061490611321e-06, "loss": 0.6377, "step": 3480 }, { "epoch": 0.37, "grad_norm": 1.6601066909341526, "learning_rate": 7.433520887948435e-06, "loss": 0.6453, "step": 3481 }, { "epoch": 0.37, "grad_norm": 1.61896349173616, "learning_rate": 7.43197998247609e-06, "loss": 0.6202, "step": 3482 }, { "epoch": 0.37, "grad_norm": 1.6148008942771153, "learning_rate": 7.430438774386025e-06, "loss": 0.5472, "step": 3483 }, { "epoch": 0.37, "grad_norm": 1.5377715080094752, "learning_rate": 7.428897263870016e-06, "loss": 0.6054, "step": 3484 }, { "epoch": 0.37, "grad_norm": 1.6555342091301812, "learning_rate": 7.427355451119878e-06, "loss": 0.6354, "step": 3485 }, { "epoch": 0.37, "grad_norm": 1.7682944270439154, "learning_rate": 7.425813336327462e-06, "loss": 0.5966, "step": 3486 }, { "epoch": 0.37, "grad_norm": 1.7897819764489797, "learning_rate": 7.424270919684657e-06, "loss": 0.6394, "step": 3487 }, { "epoch": 0.37, "grad_norm": 1.7665138494004053, "learning_rate": 7.422728201383393e-06, "loss": 0.707, "step": 3488 }, { "epoch": 0.37, "grad_norm": 1.6733580585059586, "learning_rate": 7.42118518161563e-06, "loss": 0.4638, "step": 3489 }, { "epoch": 0.37, "grad_norm": 1.6464544060313218, "learning_rate": 7.419641860573374e-06, "loss": 0.4754, "step": 3490 }, { "epoch": 0.37, "grad_norm": 1.8219039761063351, "learning_rate": 7.418098238448663e-06, "loss": 0.6336, "step": 3491 }, { "epoch": 0.37, "grad_norm": 1.702734872211331, "learning_rate": 7.416554315433574e-06, "loss": 0.5715, "step": 3492 }, { "epoch": 0.37, "grad_norm": 1.9663425681614555, "learning_rate": 7.415010091720221e-06, "loss": 0.6978, "step": 3493 }, { "epoch": 0.37, "grad_norm": 2.1567913691209384, "learning_rate": 7.413465567500756e-06, "loss": 0.6373, "step": 3494 }, { "epoch": 0.37, "grad_norm": 1.5727288717012067, "learning_rate": 7.411920742967369e-06, "loss": 0.5178, "step": 3495 }, { "epoch": 0.37, "grad_norm": 1.7203471928943896, "learning_rate": 7.4103756183122855e-06, "loss": 0.638, "step": 3496 }, { "epoch": 0.37, "grad_norm": 1.8873248991711633, "learning_rate": 7.40883019372777e-06, "loss": 0.6119, "step": 3497 }, { "epoch": 0.37, "grad_norm": 1.8878437416800302, "learning_rate": 7.407284469406124e-06, "loss": 0.6675, "step": 3498 }, { "epoch": 0.37, "grad_norm": 1.9715802343929127, "learning_rate": 7.405738445539687e-06, "loss": 0.647, "step": 3499 }, { "epoch": 0.37, "grad_norm": 1.698590670820137, "learning_rate": 7.4041921223208326e-06, "loss": 0.567, "step": 3500 }, { "epoch": 0.37, "grad_norm": 1.9696261996428182, "learning_rate": 7.402645499941975e-06, "loss": 0.705, "step": 3501 }, { "epoch": 0.37, "grad_norm": 2.1272949358666406, "learning_rate": 7.401098578595566e-06, "loss": 0.6133, "step": 3502 }, { "epoch": 0.37, "grad_norm": 1.6211839989654135, "learning_rate": 7.3995513584740905e-06, "loss": 0.6581, "step": 3503 }, { "epoch": 0.37, "grad_norm": 1.645050717781279, "learning_rate": 7.3980038397700734e-06, "loss": 0.5414, "step": 3504 }, { "epoch": 0.37, "grad_norm": 1.787425187932154, "learning_rate": 7.39645602267608e-06, "loss": 0.5596, "step": 3505 }, { "epoch": 0.37, "grad_norm": 1.4452736185042003, "learning_rate": 7.3949079073847055e-06, "loss": 0.5796, "step": 3506 }, { "epoch": 0.37, "grad_norm": 1.7872888747901197, "learning_rate": 7.393359494088588e-06, "loss": 0.6065, "step": 3507 }, { "epoch": 0.37, "grad_norm": 1.3342770289718229, "learning_rate": 7.3918107829804e-06, "loss": 0.5539, "step": 3508 }, { "epoch": 0.37, "grad_norm": 1.8097266973999873, "learning_rate": 7.390261774252852e-06, "loss": 0.6772, "step": 3509 }, { "epoch": 0.37, "grad_norm": 1.9291775023381739, "learning_rate": 7.388712468098689e-06, "loss": 0.6147, "step": 3510 }, { "epoch": 0.37, "grad_norm": 1.638534866394285, "learning_rate": 7.3871628647107e-06, "loss": 0.6457, "step": 3511 }, { "epoch": 0.37, "grad_norm": 1.6878921255379207, "learning_rate": 7.385612964281701e-06, "loss": 0.5591, "step": 3512 }, { "epoch": 0.37, "grad_norm": 1.813779122459893, "learning_rate": 7.384062767004555e-06, "loss": 0.7047, "step": 3513 }, { "epoch": 0.37, "grad_norm": 1.6684056542631502, "learning_rate": 7.382512273072154e-06, "loss": 0.6044, "step": 3514 }, { "epoch": 0.37, "grad_norm": 1.595358023653135, "learning_rate": 7.3809614826774315e-06, "loss": 0.5641, "step": 3515 }, { "epoch": 0.38, "grad_norm": 1.811224497971499, "learning_rate": 7.379410396013355e-06, "loss": 0.7267, "step": 3516 }, { "epoch": 0.38, "grad_norm": 1.725969758602972, "learning_rate": 7.377859013272932e-06, "loss": 0.555, "step": 3517 }, { "epoch": 0.38, "grad_norm": 1.9167386687967285, "learning_rate": 7.3763073346492056e-06, "loss": 0.6418, "step": 3518 }, { "epoch": 0.38, "grad_norm": 1.5975088383666392, "learning_rate": 7.374755360335253e-06, "loss": 0.5828, "step": 3519 }, { "epoch": 0.38, "grad_norm": 1.8689589162477804, "learning_rate": 7.3732030905241905e-06, "loss": 0.587, "step": 3520 }, { "epoch": 0.38, "grad_norm": 1.8082912888615021, "learning_rate": 7.371650525409173e-06, "loss": 0.6875, "step": 3521 }, { "epoch": 0.38, "grad_norm": 2.053753991238429, "learning_rate": 7.370097665183391e-06, "loss": 0.6723, "step": 3522 }, { "epoch": 0.38, "grad_norm": 1.9415896913832036, "learning_rate": 7.368544510040068e-06, "loss": 0.6808, "step": 3523 }, { "epoch": 0.38, "grad_norm": 2.2042603286871674, "learning_rate": 7.3669910601724704e-06, "loss": 0.5668, "step": 3524 }, { "epoch": 0.38, "grad_norm": 1.6604946857011185, "learning_rate": 7.365437315773896e-06, "loss": 0.6708, "step": 3525 }, { "epoch": 0.38, "grad_norm": 1.7168472886881452, "learning_rate": 7.363883277037681e-06, "loss": 0.5996, "step": 3526 }, { "epoch": 0.38, "grad_norm": 1.917102101075553, "learning_rate": 7.3623289441572e-06, "loss": 0.67, "step": 3527 }, { "epoch": 0.38, "grad_norm": 1.8753452237343708, "learning_rate": 7.360774317325863e-06, "loss": 0.7675, "step": 3528 }, { "epoch": 0.38, "grad_norm": 1.760647815286319, "learning_rate": 7.359219396737113e-06, "loss": 0.5155, "step": 3529 }, { "epoch": 0.38, "grad_norm": 1.6924679852888114, "learning_rate": 7.357664182584437e-06, "loss": 0.5409, "step": 3530 }, { "epoch": 0.38, "grad_norm": 1.6413804847619238, "learning_rate": 7.356108675061353e-06, "loss": 0.6566, "step": 3531 }, { "epoch": 0.38, "grad_norm": 1.376028427184475, "learning_rate": 7.354552874361417e-06, "loss": 0.5324, "step": 3532 }, { "epoch": 0.38, "grad_norm": 1.7287449655076483, "learning_rate": 7.352996780678221e-06, "loss": 0.6094, "step": 3533 }, { "epoch": 0.38, "grad_norm": 1.979301586728071, "learning_rate": 7.351440394205394e-06, "loss": 0.6495, "step": 3534 }, { "epoch": 0.38, "grad_norm": 1.8734438833963873, "learning_rate": 7.349883715136601e-06, "loss": 0.6446, "step": 3535 }, { "epoch": 0.38, "grad_norm": 1.7297344693993308, "learning_rate": 7.348326743665545e-06, "loss": 0.6376, "step": 3536 }, { "epoch": 0.38, "grad_norm": 1.898756752319179, "learning_rate": 7.346769479985961e-06, "loss": 0.7302, "step": 3537 }, { "epoch": 0.38, "grad_norm": 2.144770476064544, "learning_rate": 7.345211924291627e-06, "loss": 0.7338, "step": 3538 }, { "epoch": 0.38, "grad_norm": 1.6318550841803858, "learning_rate": 7.343654076776352e-06, "loss": 0.5759, "step": 3539 }, { "epoch": 0.38, "grad_norm": 1.6750975812405584, "learning_rate": 7.342095937633985e-06, "loss": 0.6321, "step": 3540 }, { "epoch": 0.38, "grad_norm": 1.8027164742610258, "learning_rate": 7.340537507058407e-06, "loss": 0.6205, "step": 3541 }, { "epoch": 0.38, "grad_norm": 1.789540625343184, "learning_rate": 7.338978785243538e-06, "loss": 0.7151, "step": 3542 }, { "epoch": 0.38, "grad_norm": 1.7561507994305472, "learning_rate": 7.337419772383334e-06, "loss": 0.6355, "step": 3543 }, { "epoch": 0.38, "grad_norm": 2.0090688215194663, "learning_rate": 7.335860468671788e-06, "loss": 0.6181, "step": 3544 }, { "epoch": 0.38, "grad_norm": 1.375116308509848, "learning_rate": 7.3343008743029284e-06, "loss": 0.5477, "step": 3545 }, { "epoch": 0.38, "grad_norm": 1.8679002234634747, "learning_rate": 7.332740989470817e-06, "loss": 0.694, "step": 3546 }, { "epoch": 0.38, "grad_norm": 1.8456117469568674, "learning_rate": 7.33118081436956e-06, "loss": 0.6943, "step": 3547 }, { "epoch": 0.38, "grad_norm": 1.6153923396145282, "learning_rate": 7.329620349193287e-06, "loss": 0.5829, "step": 3548 }, { "epoch": 0.38, "grad_norm": 2.1908139210564483, "learning_rate": 7.328059594136177e-06, "loss": 0.6644, "step": 3549 }, { "epoch": 0.38, "grad_norm": 1.62052533134445, "learning_rate": 7.326498549392434e-06, "loss": 0.5943, "step": 3550 }, { "epoch": 0.38, "grad_norm": 1.7210457384781066, "learning_rate": 7.324937215156307e-06, "loss": 0.7147, "step": 3551 }, { "epoch": 0.38, "grad_norm": 1.7781720808172023, "learning_rate": 7.323375591622075e-06, "loss": 0.7167, "step": 3552 }, { "epoch": 0.38, "grad_norm": 1.2535977650400856, "learning_rate": 7.3218136789840556e-06, "loss": 0.5397, "step": 3553 }, { "epoch": 0.38, "grad_norm": 1.5634736980796053, "learning_rate": 7.3202514774366e-06, "loss": 0.5697, "step": 3554 }, { "epoch": 0.38, "grad_norm": 1.783172714394568, "learning_rate": 7.318688987174098e-06, "loss": 0.525, "step": 3555 }, { "epoch": 0.38, "grad_norm": 1.7776796405693893, "learning_rate": 7.317126208390977e-06, "loss": 0.6611, "step": 3556 }, { "epoch": 0.38, "grad_norm": 1.8093055338453623, "learning_rate": 7.315563141281694e-06, "loss": 0.5941, "step": 3557 }, { "epoch": 0.38, "grad_norm": 1.907775596889141, "learning_rate": 7.313999786040748e-06, "loss": 0.8166, "step": 3558 }, { "epoch": 0.38, "grad_norm": 1.8569273352225828, "learning_rate": 7.312436142862669e-06, "loss": 0.7078, "step": 3559 }, { "epoch": 0.38, "grad_norm": 1.7871773962562774, "learning_rate": 7.310872211942028e-06, "loss": 0.6322, "step": 3560 }, { "epoch": 0.38, "grad_norm": 2.0208665025290875, "learning_rate": 7.309307993473427e-06, "loss": 0.6079, "step": 3561 }, { "epoch": 0.38, "grad_norm": 1.7347378622713014, "learning_rate": 7.3077434876515075e-06, "loss": 0.6679, "step": 3562 }, { "epoch": 0.38, "grad_norm": 2.0948100593970955, "learning_rate": 7.3061786946709446e-06, "loss": 0.5905, "step": 3563 }, { "epoch": 0.38, "grad_norm": 1.46089812265525, "learning_rate": 7.304613614726448e-06, "loss": 0.5649, "step": 3564 }, { "epoch": 0.38, "grad_norm": 1.7468938888095662, "learning_rate": 7.303048248012767e-06, "loss": 0.6459, "step": 3565 }, { "epoch": 0.38, "grad_norm": 2.0324030377593703, "learning_rate": 7.301482594724684e-06, "loss": 0.7181, "step": 3566 }, { "epoch": 0.38, "grad_norm": 1.8097459138192242, "learning_rate": 7.299916655057016e-06, "loss": 0.6574, "step": 3567 }, { "epoch": 0.38, "grad_norm": 1.8297126147776028, "learning_rate": 7.2983504292046185e-06, "loss": 0.6632, "step": 3568 }, { "epoch": 0.38, "grad_norm": 1.7738403109425538, "learning_rate": 7.29678391736238e-06, "loss": 0.6222, "step": 3569 }, { "epoch": 0.38, "grad_norm": 1.8000948124889438, "learning_rate": 7.295217119725226e-06, "loss": 0.6006, "step": 3570 }, { "epoch": 0.38, "grad_norm": 1.164560592018446, "learning_rate": 7.293650036488118e-06, "loss": 0.5383, "step": 3571 }, { "epoch": 0.38, "grad_norm": 2.3829505421791906, "learning_rate": 7.292082667846052e-06, "loss": 0.7132, "step": 3572 }, { "epoch": 0.38, "grad_norm": 1.1280430003871027, "learning_rate": 7.290515013994059e-06, "loss": 0.5697, "step": 3573 }, { "epoch": 0.38, "grad_norm": 1.9904235451064376, "learning_rate": 7.288947075127207e-06, "loss": 0.6275, "step": 3574 }, { "epoch": 0.38, "grad_norm": 1.7793609337640308, "learning_rate": 7.2873788514406e-06, "loss": 0.6251, "step": 3575 }, { "epoch": 0.38, "grad_norm": 1.784103758463883, "learning_rate": 7.285810343129375e-06, "loss": 0.5924, "step": 3576 }, { "epoch": 0.38, "grad_norm": 2.2483443869422897, "learning_rate": 7.284241550388703e-06, "loss": 0.5843, "step": 3577 }, { "epoch": 0.38, "grad_norm": 1.8504893761218706, "learning_rate": 7.282672473413798e-06, "loss": 0.7117, "step": 3578 }, { "epoch": 0.38, "grad_norm": 1.8340581301464867, "learning_rate": 7.281103112399902e-06, "loss": 0.5911, "step": 3579 }, { "epoch": 0.38, "grad_norm": 1.869934645474622, "learning_rate": 7.279533467542295e-06, "loss": 0.7367, "step": 3580 }, { "epoch": 0.38, "grad_norm": 1.7775219254996253, "learning_rate": 7.2779635390362916e-06, "loss": 0.7041, "step": 3581 }, { "epoch": 0.38, "grad_norm": 1.6193224654063094, "learning_rate": 7.276393327077242e-06, "loss": 0.5932, "step": 3582 }, { "epoch": 0.38, "grad_norm": 1.730270546260303, "learning_rate": 7.274822831860534e-06, "loss": 0.5801, "step": 3583 }, { "epoch": 0.38, "grad_norm": 1.9159772250401554, "learning_rate": 7.2732520535815854e-06, "loss": 0.6542, "step": 3584 }, { "epoch": 0.38, "grad_norm": 1.971954957575581, "learning_rate": 7.271680992435854e-06, "loss": 0.6468, "step": 3585 }, { "epoch": 0.38, "grad_norm": 1.7200336745584681, "learning_rate": 7.270109648618831e-06, "loss": 0.6417, "step": 3586 }, { "epoch": 0.38, "grad_norm": 1.7355466547883807, "learning_rate": 7.268538022326042e-06, "loss": 0.6177, "step": 3587 }, { "epoch": 0.38, "grad_norm": 1.949959337212933, "learning_rate": 7.26696611375305e-06, "loss": 0.5931, "step": 3588 }, { "epoch": 0.38, "grad_norm": 1.758724988823098, "learning_rate": 7.265393923095452e-06, "loss": 0.6641, "step": 3589 }, { "epoch": 0.38, "grad_norm": 2.0278487954932647, "learning_rate": 7.263821450548878e-06, "loss": 0.6168, "step": 3590 }, { "epoch": 0.38, "grad_norm": 1.7685049975871985, "learning_rate": 7.262248696308997e-06, "loss": 0.5977, "step": 3591 }, { "epoch": 0.38, "grad_norm": 1.6590709645422412, "learning_rate": 7.2606756605715075e-06, "loss": 0.5549, "step": 3592 }, { "epoch": 0.38, "grad_norm": 1.4211343721542882, "learning_rate": 7.259102343532152e-06, "loss": 0.5382, "step": 3593 }, { "epoch": 0.38, "grad_norm": 1.17690882550942, "learning_rate": 7.257528745386696e-06, "loss": 0.5211, "step": 3594 }, { "epoch": 0.38, "grad_norm": 1.70561855959845, "learning_rate": 7.255954866330952e-06, "loss": 0.6531, "step": 3595 }, { "epoch": 0.38, "grad_norm": 1.5187292443378546, "learning_rate": 7.25438070656076e-06, "loss": 0.5768, "step": 3596 }, { "epoch": 0.38, "grad_norm": 1.795013572963037, "learning_rate": 7.252806266271997e-06, "loss": 0.6769, "step": 3597 }, { "epoch": 0.38, "grad_norm": 1.8127182065884504, "learning_rate": 7.251231545660573e-06, "loss": 0.6961, "step": 3598 }, { "epoch": 0.38, "grad_norm": 1.610273075398246, "learning_rate": 7.249656544922438e-06, "loss": 0.6133, "step": 3599 }, { "epoch": 0.38, "grad_norm": 1.7899861788180194, "learning_rate": 7.24808126425357e-06, "loss": 0.6369, "step": 3600 }, { "epoch": 0.38, "grad_norm": 2.153893595781626, "learning_rate": 7.246505703849988e-06, "loss": 0.5764, "step": 3601 }, { "epoch": 0.38, "grad_norm": 1.745376178101503, "learning_rate": 7.244929863907742e-06, "loss": 0.5911, "step": 3602 }, { "epoch": 0.38, "grad_norm": 1.7030460766211932, "learning_rate": 7.243353744622917e-06, "loss": 0.67, "step": 3603 }, { "epoch": 0.38, "grad_norm": 1.9346442426314865, "learning_rate": 7.241777346191637e-06, "loss": 0.6917, "step": 3604 }, { "epoch": 0.38, "grad_norm": 1.7672874536079226, "learning_rate": 7.240200668810055e-06, "loss": 0.6973, "step": 3605 }, { "epoch": 0.38, "grad_norm": 1.5765799988963107, "learning_rate": 7.2386237126743594e-06, "loss": 0.6391, "step": 3606 }, { "epoch": 0.38, "grad_norm": 1.7295578837419479, "learning_rate": 7.237046477980779e-06, "loss": 0.6272, "step": 3607 }, { "epoch": 0.38, "grad_norm": 1.7989127705567212, "learning_rate": 7.235468964925571e-06, "loss": 0.663, "step": 3608 }, { "epoch": 0.38, "grad_norm": 1.396400620130302, "learning_rate": 7.233891173705029e-06, "loss": 0.5357, "step": 3609 }, { "epoch": 0.39, "grad_norm": 1.955441606597195, "learning_rate": 7.232313104515483e-06, "loss": 0.6783, "step": 3610 }, { "epoch": 0.39, "grad_norm": 1.8361011360745512, "learning_rate": 7.230734757553296e-06, "loss": 0.6949, "step": 3611 }, { "epoch": 0.39, "grad_norm": 2.0498672861726814, "learning_rate": 7.229156133014867e-06, "loss": 0.6631, "step": 3612 }, { "epoch": 0.39, "grad_norm": 1.7775182534888498, "learning_rate": 7.227577231096626e-06, "loss": 0.64, "step": 3613 }, { "epoch": 0.39, "grad_norm": 2.01217919703619, "learning_rate": 7.225998051995042e-06, "loss": 0.675, "step": 3614 }, { "epoch": 0.39, "grad_norm": 1.834754526588364, "learning_rate": 7.2244185959066145e-06, "loss": 0.6968, "step": 3615 }, { "epoch": 0.39, "grad_norm": 1.1435340556569396, "learning_rate": 7.222838863027882e-06, "loss": 0.5302, "step": 3616 }, { "epoch": 0.39, "grad_norm": 1.0815929477368444, "learning_rate": 7.221258853555414e-06, "loss": 0.5317, "step": 3617 }, { "epoch": 0.39, "grad_norm": 1.101349060348848, "learning_rate": 7.219678567685814e-06, "loss": 0.5188, "step": 3618 }, { "epoch": 0.39, "grad_norm": 2.146140165010886, "learning_rate": 7.218098005615722e-06, "loss": 0.5783, "step": 3619 }, { "epoch": 0.39, "grad_norm": 1.575707789119008, "learning_rate": 7.21651716754181e-06, "loss": 0.5442, "step": 3620 }, { "epoch": 0.39, "grad_norm": 1.6690815700448078, "learning_rate": 7.2149360536607894e-06, "loss": 0.5756, "step": 3621 }, { "epoch": 0.39, "grad_norm": 1.6110204206642484, "learning_rate": 7.213354664169398e-06, "loss": 0.7027, "step": 3622 }, { "epoch": 0.39, "grad_norm": 1.371421319098392, "learning_rate": 7.211772999264417e-06, "loss": 0.5207, "step": 3623 }, { "epoch": 0.39, "grad_norm": 1.8122099296015497, "learning_rate": 7.210191059142654e-06, "loss": 0.776, "step": 3624 }, { "epoch": 0.39, "grad_norm": 2.324207470145932, "learning_rate": 7.208608844000955e-06, "loss": 0.6994, "step": 3625 }, { "epoch": 0.39, "grad_norm": 1.3810781003635986, "learning_rate": 7.207026354036198e-06, "loss": 0.5577, "step": 3626 }, { "epoch": 0.39, "grad_norm": 1.8089020349092446, "learning_rate": 7.205443589445298e-06, "loss": 0.6208, "step": 3627 }, { "epoch": 0.39, "grad_norm": 1.7227365143560875, "learning_rate": 7.203860550425202e-06, "loss": 0.6294, "step": 3628 }, { "epoch": 0.39, "grad_norm": 1.6994135686927119, "learning_rate": 7.202277237172892e-06, "loss": 0.6239, "step": 3629 }, { "epoch": 0.39, "grad_norm": 1.196346061537972, "learning_rate": 7.200693649885384e-06, "loss": 0.5349, "step": 3630 }, { "epoch": 0.39, "grad_norm": 1.7108399598697934, "learning_rate": 7.199109788759727e-06, "loss": 0.7367, "step": 3631 }, { "epoch": 0.39, "grad_norm": 1.7113896936108222, "learning_rate": 7.1975256539930046e-06, "loss": 0.5712, "step": 3632 }, { "epoch": 0.39, "grad_norm": 1.9954647801491623, "learning_rate": 7.195941245782339e-06, "loss": 0.7542, "step": 3633 }, { "epoch": 0.39, "grad_norm": 1.8126035817702058, "learning_rate": 7.194356564324876e-06, "loss": 0.5897, "step": 3634 }, { "epoch": 0.39, "grad_norm": 2.0839564507748682, "learning_rate": 7.192771609817807e-06, "loss": 0.5908, "step": 3635 }, { "epoch": 0.39, "grad_norm": 1.7269687387909312, "learning_rate": 7.191186382458348e-06, "loss": 0.5907, "step": 3636 }, { "epoch": 0.39, "grad_norm": 1.7833790682591382, "learning_rate": 7.1896008824437565e-06, "loss": 0.5928, "step": 3637 }, { "epoch": 0.39, "grad_norm": 1.3032282057669322, "learning_rate": 7.18801510997132e-06, "loss": 0.5448, "step": 3638 }, { "epoch": 0.39, "grad_norm": 1.9535294973835182, "learning_rate": 7.186429065238359e-06, "loss": 0.6811, "step": 3639 }, { "epoch": 0.39, "grad_norm": 1.9752029310420232, "learning_rate": 7.184842748442229e-06, "loss": 0.7427, "step": 3640 }, { "epoch": 0.39, "grad_norm": 2.016767360833545, "learning_rate": 7.183256159780321e-06, "loss": 0.5509, "step": 3641 }, { "epoch": 0.39, "grad_norm": 1.7007406293081568, "learning_rate": 7.181669299450056e-06, "loss": 0.6568, "step": 3642 }, { "epoch": 0.39, "grad_norm": 2.0986712702363266, "learning_rate": 7.1800821676488945e-06, "loss": 0.6177, "step": 3643 }, { "epoch": 0.39, "grad_norm": 2.2404307032851967, "learning_rate": 7.178494764574326e-06, "loss": 0.7763, "step": 3644 }, { "epoch": 0.39, "grad_norm": 1.889128363036111, "learning_rate": 7.176907090423875e-06, "loss": 0.7198, "step": 3645 }, { "epoch": 0.39, "grad_norm": 1.7313895073135674, "learning_rate": 7.1753191453951e-06, "loss": 0.5562, "step": 3646 }, { "epoch": 0.39, "grad_norm": 1.8507180641111038, "learning_rate": 7.173730929685594e-06, "loss": 0.7017, "step": 3647 }, { "epoch": 0.39, "grad_norm": 1.9641925424141722, "learning_rate": 7.172142443492984e-06, "loss": 0.6906, "step": 3648 }, { "epoch": 0.39, "grad_norm": 1.5612431671684885, "learning_rate": 7.170553687014925e-06, "loss": 0.6545, "step": 3649 }, { "epoch": 0.39, "grad_norm": 1.928749505104148, "learning_rate": 7.168964660449113e-06, "loss": 0.609, "step": 3650 }, { "epoch": 0.39, "grad_norm": 1.7560626058907092, "learning_rate": 7.1673753639932764e-06, "loss": 0.5798, "step": 3651 }, { "epoch": 0.39, "grad_norm": 1.6843679986392046, "learning_rate": 7.165785797845174e-06, "loss": 0.6432, "step": 3652 }, { "epoch": 0.39, "grad_norm": 1.817556596888904, "learning_rate": 7.164195962202599e-06, "loss": 0.6234, "step": 3653 }, { "epoch": 0.39, "grad_norm": 1.5888666379550704, "learning_rate": 7.162605857263381e-06, "loss": 0.5572, "step": 3654 }, { "epoch": 0.39, "grad_norm": 2.3978532337456486, "learning_rate": 7.1610154832253794e-06, "loss": 0.7228, "step": 3655 }, { "epoch": 0.39, "grad_norm": 1.7264547998795607, "learning_rate": 7.1594248402864885e-06, "loss": 0.6946, "step": 3656 }, { "epoch": 0.39, "grad_norm": 2.008476419113563, "learning_rate": 7.1578339286446375e-06, "loss": 0.5668, "step": 3657 }, { "epoch": 0.39, "grad_norm": 1.7749086892671728, "learning_rate": 7.156242748497786e-06, "loss": 0.6485, "step": 3658 }, { "epoch": 0.39, "grad_norm": 1.207710659574942, "learning_rate": 7.154651300043929e-06, "loss": 0.5425, "step": 3659 }, { "epoch": 0.39, "grad_norm": 1.8941763293103093, "learning_rate": 7.153059583481097e-06, "loss": 0.7486, "step": 3660 }, { "epoch": 0.39, "grad_norm": 1.6614920521927887, "learning_rate": 7.151467599007348e-06, "loss": 0.6425, "step": 3661 }, { "epoch": 0.39, "grad_norm": 1.9171069506782676, "learning_rate": 7.14987534682078e-06, "loss": 0.6408, "step": 3662 }, { "epoch": 0.39, "grad_norm": 1.6754770401408339, "learning_rate": 7.14828282711952e-06, "loss": 0.63, "step": 3663 }, { "epoch": 0.39, "grad_norm": 1.695629907901448, "learning_rate": 7.1466900401017295e-06, "loss": 0.6026, "step": 3664 }, { "epoch": 0.39, "grad_norm": 1.764952964439814, "learning_rate": 7.1450969859656014e-06, "loss": 0.6219, "step": 3665 }, { "epoch": 0.39, "grad_norm": 2.01468927019586, "learning_rate": 7.143503664909367e-06, "loss": 0.702, "step": 3666 }, { "epoch": 0.39, "grad_norm": 1.6871487823273958, "learning_rate": 7.141910077131284e-06, "loss": 0.7117, "step": 3667 }, { "epoch": 0.39, "grad_norm": 1.6045235377643148, "learning_rate": 7.140316222829648e-06, "loss": 0.6974, "step": 3668 }, { "epoch": 0.39, "grad_norm": 1.4466151318456948, "learning_rate": 7.1387221022027906e-06, "loss": 0.5582, "step": 3669 }, { "epoch": 0.39, "grad_norm": 1.7062975482323788, "learning_rate": 7.137127715449067e-06, "loss": 0.6954, "step": 3670 }, { "epoch": 0.39, "grad_norm": 1.6489589118498693, "learning_rate": 7.135533062766872e-06, "loss": 0.5643, "step": 3671 }, { "epoch": 0.39, "grad_norm": 1.6995086962744355, "learning_rate": 7.133938144354634e-06, "loss": 0.5907, "step": 3672 }, { "epoch": 0.39, "grad_norm": 2.0547426305322305, "learning_rate": 7.132342960410812e-06, "loss": 0.5774, "step": 3673 }, { "epoch": 0.39, "grad_norm": 1.719534493832998, "learning_rate": 7.1307475111339e-06, "loss": 0.689, "step": 3674 }, { "epoch": 0.39, "grad_norm": 1.6881840560875132, "learning_rate": 7.129151796722422e-06, "loss": 0.6564, "step": 3675 }, { "epoch": 0.39, "grad_norm": 1.8658498291103174, "learning_rate": 7.127555817374939e-06, "loss": 0.5895, "step": 3676 }, { "epoch": 0.39, "grad_norm": 2.205079364769371, "learning_rate": 7.125959573290041e-06, "loss": 0.6689, "step": 3677 }, { "epoch": 0.39, "grad_norm": 1.8180231668121398, "learning_rate": 7.1243630646663555e-06, "loss": 0.6031, "step": 3678 }, { "epoch": 0.39, "grad_norm": 1.631439884381724, "learning_rate": 7.122766291702537e-06, "loss": 0.6317, "step": 3679 }, { "epoch": 0.39, "grad_norm": 1.810392389082644, "learning_rate": 7.1211692545972795e-06, "loss": 0.6312, "step": 3680 }, { "epoch": 0.39, "grad_norm": 1.8729650257425474, "learning_rate": 7.119571953549305e-06, "loss": 0.6123, "step": 3681 }, { "epoch": 0.39, "grad_norm": 1.7985549991597196, "learning_rate": 7.1179743887573696e-06, "loss": 0.6366, "step": 3682 }, { "epoch": 0.39, "grad_norm": 1.9652713026245001, "learning_rate": 7.116376560420262e-06, "loss": 0.6855, "step": 3683 }, { "epoch": 0.39, "grad_norm": 1.7676397696867097, "learning_rate": 7.114778468736806e-06, "loss": 0.6052, "step": 3684 }, { "epoch": 0.39, "grad_norm": 1.9858523312138772, "learning_rate": 7.113180113905855e-06, "loss": 0.6037, "step": 3685 }, { "epoch": 0.39, "grad_norm": 1.7300434006771146, "learning_rate": 7.111581496126297e-06, "loss": 0.6256, "step": 3686 }, { "epoch": 0.39, "grad_norm": 1.8381913026726735, "learning_rate": 7.109982615597053e-06, "loss": 0.7256, "step": 3687 }, { "epoch": 0.39, "grad_norm": 1.8841602474538848, "learning_rate": 7.108383472517075e-06, "loss": 0.7482, "step": 3688 }, { "epoch": 0.39, "grad_norm": 1.7076586091467767, "learning_rate": 7.106784067085348e-06, "loss": 0.6622, "step": 3689 }, { "epoch": 0.39, "grad_norm": 1.7892607641778346, "learning_rate": 7.105184399500893e-06, "loss": 0.7153, "step": 3690 }, { "epoch": 0.39, "grad_norm": 2.083334337939248, "learning_rate": 7.1035844699627575e-06, "loss": 0.5497, "step": 3691 }, { "epoch": 0.39, "grad_norm": 1.6546641416021628, "learning_rate": 7.101984278670027e-06, "loss": 0.6101, "step": 3692 }, { "epoch": 0.39, "grad_norm": 1.743303188157273, "learning_rate": 7.100383825821816e-06, "loss": 0.6678, "step": 3693 }, { "epoch": 0.39, "grad_norm": 2.117056226284754, "learning_rate": 7.098783111617275e-06, "loss": 0.7318, "step": 3694 }, { "epoch": 0.39, "grad_norm": 1.8568489467253435, "learning_rate": 7.0971821362555846e-06, "loss": 0.6718, "step": 3695 }, { "epoch": 0.39, "grad_norm": 1.583425525130183, "learning_rate": 7.095580899935957e-06, "loss": 0.5404, "step": 3696 }, { "epoch": 0.39, "grad_norm": 1.8721453869874447, "learning_rate": 7.09397940285764e-06, "loss": 0.7116, "step": 3697 }, { "epoch": 0.39, "grad_norm": 2.373528141437983, "learning_rate": 7.092377645219912e-06, "loss": 0.626, "step": 3698 }, { "epoch": 0.39, "grad_norm": 1.9460971460253367, "learning_rate": 7.090775627222082e-06, "loss": 0.7136, "step": 3699 }, { "epoch": 0.39, "grad_norm": 1.8665867086413162, "learning_rate": 7.089173349063495e-06, "loss": 0.648, "step": 3700 }, { "epoch": 0.39, "grad_norm": 1.895902207997151, "learning_rate": 7.087570810943527e-06, "loss": 0.5919, "step": 3701 }, { "epoch": 0.39, "grad_norm": 1.3362714884165448, "learning_rate": 7.085968013061585e-06, "loss": 0.5474, "step": 3702 }, { "epoch": 0.39, "grad_norm": 1.6164886439839428, "learning_rate": 7.0843649556171104e-06, "loss": 0.6545, "step": 3703 }, { "epoch": 0.4, "grad_norm": 1.6485264493582394, "learning_rate": 7.082761638809575e-06, "loss": 0.649, "step": 3704 }, { "epoch": 0.4, "grad_norm": 1.8656758804354998, "learning_rate": 7.0811580628384845e-06, "loss": 0.675, "step": 3705 }, { "epoch": 0.4, "grad_norm": 1.7622733119621579, "learning_rate": 7.079554227903376e-06, "loss": 0.6292, "step": 3706 }, { "epoch": 0.4, "grad_norm": 1.6367870161247762, "learning_rate": 7.077950134203817e-06, "loss": 0.6096, "step": 3707 }, { "epoch": 0.4, "grad_norm": 1.9605475807503296, "learning_rate": 7.076345781939412e-06, "loss": 0.7038, "step": 3708 }, { "epoch": 0.4, "grad_norm": 1.8366853972285386, "learning_rate": 7.0747411713097936e-06, "loss": 0.6221, "step": 3709 }, { "epoch": 0.4, "grad_norm": 1.6405415314175127, "learning_rate": 7.073136302514628e-06, "loss": 0.6822, "step": 3710 }, { "epoch": 0.4, "grad_norm": 1.9990068658963684, "learning_rate": 7.071531175753615e-06, "loss": 0.6797, "step": 3711 }, { "epoch": 0.4, "grad_norm": 1.2098149208605768, "learning_rate": 7.0699257912264804e-06, "loss": 0.5314, "step": 3712 }, { "epoch": 0.4, "grad_norm": 1.7284568022831424, "learning_rate": 7.06832014913299e-06, "loss": 0.5975, "step": 3713 }, { "epoch": 0.4, "grad_norm": 1.8989987352979412, "learning_rate": 7.066714249672937e-06, "loss": 0.677, "step": 3714 }, { "epoch": 0.4, "grad_norm": 1.626603671211532, "learning_rate": 7.065108093046147e-06, "loss": 0.5659, "step": 3715 }, { "epoch": 0.4, "grad_norm": 1.689942969430711, "learning_rate": 7.063501679452478e-06, "loss": 0.6187, "step": 3716 }, { "epoch": 0.4, "grad_norm": 1.8012718922801796, "learning_rate": 7.061895009091823e-06, "loss": 0.6698, "step": 3717 }, { "epoch": 0.4, "grad_norm": 1.842507375992341, "learning_rate": 7.060288082164101e-06, "loss": 0.5807, "step": 3718 }, { "epoch": 0.4, "grad_norm": 2.016981744073097, "learning_rate": 7.058680898869272e-06, "loss": 0.5926, "step": 3719 }, { "epoch": 0.4, "grad_norm": 2.2419205016447297, "learning_rate": 7.0570734594073146e-06, "loss": 0.6953, "step": 3720 }, { "epoch": 0.4, "grad_norm": 1.7795487321855454, "learning_rate": 7.055465763978251e-06, "loss": 0.6625, "step": 3721 }, { "epoch": 0.4, "grad_norm": 1.7317363386959093, "learning_rate": 7.05385781278213e-06, "loss": 0.5432, "step": 3722 }, { "epoch": 0.4, "grad_norm": 1.819005313615607, "learning_rate": 7.0522496060190326e-06, "loss": 0.5943, "step": 3723 }, { "epoch": 0.4, "grad_norm": 1.7035038196790697, "learning_rate": 7.050641143889072e-06, "loss": 0.6242, "step": 3724 }, { "epoch": 0.4, "grad_norm": 1.9357659818445503, "learning_rate": 7.049032426592396e-06, "loss": 0.6522, "step": 3725 }, { "epoch": 0.4, "grad_norm": 1.8951026723537003, "learning_rate": 7.04742345432918e-06, "loss": 0.6508, "step": 3726 }, { "epoch": 0.4, "grad_norm": 1.847301541502886, "learning_rate": 7.045814227299632e-06, "loss": 0.7724, "step": 3727 }, { "epoch": 0.4, "grad_norm": 1.8650739066101487, "learning_rate": 7.0442047457039945e-06, "loss": 0.657, "step": 3728 }, { "epoch": 0.4, "grad_norm": 1.7723830510486995, "learning_rate": 7.042595009742536e-06, "loss": 0.6985, "step": 3729 }, { "epoch": 0.4, "grad_norm": 1.3472745758364641, "learning_rate": 7.0409850196155625e-06, "loss": 0.5443, "step": 3730 }, { "epoch": 0.4, "grad_norm": 1.8611948644513414, "learning_rate": 7.039374775523411e-06, "loss": 0.6016, "step": 3731 }, { "epoch": 0.4, "grad_norm": 1.7505174024896206, "learning_rate": 7.037764277666444e-06, "loss": 0.6231, "step": 3732 }, { "epoch": 0.4, "grad_norm": 1.7791228912218486, "learning_rate": 7.036153526245063e-06, "loss": 0.6373, "step": 3733 }, { "epoch": 0.4, "grad_norm": 1.8315137978471139, "learning_rate": 7.0345425214597e-06, "loss": 0.6046, "step": 3734 }, { "epoch": 0.4, "grad_norm": 1.6214183653905898, "learning_rate": 7.032931263510811e-06, "loss": 0.525, "step": 3735 }, { "epoch": 0.4, "grad_norm": 2.0785890450428046, "learning_rate": 7.031319752598895e-06, "loss": 0.6285, "step": 3736 }, { "epoch": 0.4, "grad_norm": 1.6652829150386368, "learning_rate": 7.029707988924473e-06, "loss": 0.673, "step": 3737 }, { "epoch": 0.4, "grad_norm": 1.7773465902927108, "learning_rate": 7.028095972688102e-06, "loss": 0.6937, "step": 3738 }, { "epoch": 0.4, "grad_norm": 1.8157702888625264, "learning_rate": 7.026483704090369e-06, "loss": 0.6716, "step": 3739 }, { "epoch": 0.4, "grad_norm": 1.6775593272559446, "learning_rate": 7.024871183331894e-06, "loss": 0.5705, "step": 3740 }, { "epoch": 0.4, "grad_norm": 1.729678575059087, "learning_rate": 7.023258410613326e-06, "loss": 0.5953, "step": 3741 }, { "epoch": 0.4, "grad_norm": 1.9715038831533775, "learning_rate": 7.021645386135347e-06, "loss": 0.653, "step": 3742 }, { "epoch": 0.4, "grad_norm": 1.797182311575636, "learning_rate": 7.02003211009867e-06, "loss": 0.6616, "step": 3743 }, { "epoch": 0.4, "grad_norm": 1.8653470546745052, "learning_rate": 7.01841858270404e-06, "loss": 0.6814, "step": 3744 }, { "epoch": 0.4, "grad_norm": 1.3141982266439451, "learning_rate": 7.016804804152231e-06, "loss": 0.5474, "step": 3745 }, { "epoch": 0.4, "grad_norm": 1.663749947764099, "learning_rate": 7.01519077464405e-06, "loss": 0.6485, "step": 3746 }, { "epoch": 0.4, "grad_norm": 1.8067543516078888, "learning_rate": 7.013576494380336e-06, "loss": 0.6446, "step": 3747 }, { "epoch": 0.4, "grad_norm": 1.8358509780949157, "learning_rate": 7.011961963561957e-06, "loss": 0.6439, "step": 3748 }, { "epoch": 0.4, "grad_norm": 1.1677773428611378, "learning_rate": 7.0103471823898154e-06, "loss": 0.524, "step": 3749 }, { "epoch": 0.4, "grad_norm": 2.058708823165046, "learning_rate": 7.00873215106484e-06, "loss": 0.6609, "step": 3750 }, { "epoch": 0.4, "grad_norm": 1.776832967028492, "learning_rate": 7.007116869787994e-06, "loss": 0.5704, "step": 3751 }, { "epoch": 0.4, "grad_norm": 1.6682130642204, "learning_rate": 7.0055013387602736e-06, "loss": 0.6623, "step": 3752 }, { "epoch": 0.4, "grad_norm": 1.2175591797463994, "learning_rate": 7.003885558182701e-06, "loss": 0.5531, "step": 3753 }, { "epoch": 0.4, "grad_norm": 1.7960577574891674, "learning_rate": 7.002269528256334e-06, "loss": 0.7065, "step": 3754 }, { "epoch": 0.4, "grad_norm": 1.777182628488239, "learning_rate": 7.000653249182258e-06, "loss": 0.6166, "step": 3755 }, { "epoch": 0.4, "grad_norm": 1.916233982110023, "learning_rate": 6.999036721161589e-06, "loss": 0.6563, "step": 3756 }, { "epoch": 0.4, "grad_norm": 1.6227821695938351, "learning_rate": 6.99741994439548e-06, "loss": 0.6274, "step": 3757 }, { "epoch": 0.4, "grad_norm": 1.667782453558548, "learning_rate": 6.995802919085108e-06, "loss": 0.7278, "step": 3758 }, { "epoch": 0.4, "grad_norm": 1.8265488592841235, "learning_rate": 6.9941856454316855e-06, "loss": 0.6696, "step": 3759 }, { "epoch": 0.4, "grad_norm": 1.6867049932445706, "learning_rate": 6.992568123636454e-06, "loss": 0.5984, "step": 3760 }, { "epoch": 0.4, "grad_norm": 2.015433708367711, "learning_rate": 6.990950353900685e-06, "loss": 0.6136, "step": 3761 }, { "epoch": 0.4, "grad_norm": 1.893222698374432, "learning_rate": 6.989332336425683e-06, "loss": 0.5829, "step": 3762 }, { "epoch": 0.4, "grad_norm": 1.7581725705859652, "learning_rate": 6.987714071412781e-06, "loss": 0.5919, "step": 3763 }, { "epoch": 0.4, "grad_norm": 1.3757316425532666, "learning_rate": 6.986095559063342e-06, "loss": 0.5419, "step": 3764 }, { "epoch": 0.4, "grad_norm": 1.7966768896082257, "learning_rate": 6.9844767995787675e-06, "loss": 0.6243, "step": 3765 }, { "epoch": 0.4, "grad_norm": 1.6957384516788045, "learning_rate": 6.98285779316048e-06, "loss": 0.5489, "step": 3766 }, { "epoch": 0.4, "grad_norm": 1.708416273750627, "learning_rate": 6.981238540009937e-06, "loss": 0.6953, "step": 3767 }, { "epoch": 0.4, "grad_norm": 1.936875869561987, "learning_rate": 6.9796190403286264e-06, "loss": 0.7249, "step": 3768 }, { "epoch": 0.4, "grad_norm": 1.9905590174161416, "learning_rate": 6.9779992943180695e-06, "loss": 0.7205, "step": 3769 }, { "epoch": 0.4, "grad_norm": 1.9269200346451065, "learning_rate": 6.9763793021798136e-06, "loss": 0.571, "step": 3770 }, { "epoch": 0.4, "grad_norm": 1.8822000636817555, "learning_rate": 6.974759064115437e-06, "loss": 0.5676, "step": 3771 }, { "epoch": 0.4, "grad_norm": 1.7523604154652157, "learning_rate": 6.9731385803265515e-06, "loss": 0.6219, "step": 3772 }, { "epoch": 0.4, "grad_norm": 1.951289700370707, "learning_rate": 6.9715178510148e-06, "loss": 0.7479, "step": 3773 }, { "epoch": 0.4, "grad_norm": 1.7976573104262836, "learning_rate": 6.969896876381852e-06, "loss": 0.5569, "step": 3774 }, { "epoch": 0.4, "grad_norm": 1.8751149351405143, "learning_rate": 6.9682756566294095e-06, "loss": 0.7484, "step": 3775 }, { "epoch": 0.4, "grad_norm": 1.3427305647727825, "learning_rate": 6.9666541919592065e-06, "loss": 0.5445, "step": 3776 }, { "epoch": 0.4, "grad_norm": 1.278788459532315, "learning_rate": 6.965032482573008e-06, "loss": 0.5413, "step": 3777 }, { "epoch": 0.4, "grad_norm": 1.9346958546763116, "learning_rate": 6.963410528672603e-06, "loss": 0.5685, "step": 3778 }, { "epoch": 0.4, "grad_norm": 2.1168789655481275, "learning_rate": 6.9617883304598176e-06, "loss": 0.6065, "step": 3779 }, { "epoch": 0.4, "grad_norm": 1.7697218177882579, "learning_rate": 6.960165888136508e-06, "loss": 0.7056, "step": 3780 }, { "epoch": 0.4, "grad_norm": 1.6733297455661402, "learning_rate": 6.958543201904555e-06, "loss": 0.6435, "step": 3781 }, { "epoch": 0.4, "grad_norm": 1.7150393250971012, "learning_rate": 6.9569202719658765e-06, "loss": 0.5912, "step": 3782 }, { "epoch": 0.4, "grad_norm": 1.8343344150092278, "learning_rate": 6.955297098522419e-06, "loss": 0.6803, "step": 3783 }, { "epoch": 0.4, "grad_norm": 1.7203674527670694, "learning_rate": 6.953673681776157e-06, "loss": 0.6578, "step": 3784 }, { "epoch": 0.4, "grad_norm": 1.709143649684849, "learning_rate": 6.952050021929097e-06, "loss": 0.689, "step": 3785 }, { "epoch": 0.4, "grad_norm": 1.8682574415924387, "learning_rate": 6.950426119183273e-06, "loss": 0.6905, "step": 3786 }, { "epoch": 0.4, "grad_norm": 1.9094909465871244, "learning_rate": 6.948801973740754e-06, "loss": 0.6718, "step": 3787 }, { "epoch": 0.4, "grad_norm": 1.815476487474528, "learning_rate": 6.947177585803637e-06, "loss": 0.6189, "step": 3788 }, { "epoch": 0.4, "grad_norm": 1.730323654367311, "learning_rate": 6.945552955574045e-06, "loss": 0.6591, "step": 3789 }, { "epoch": 0.4, "grad_norm": 2.281453620905945, "learning_rate": 6.943928083254141e-06, "loss": 0.5414, "step": 3790 }, { "epoch": 0.4, "grad_norm": 1.7933836604142879, "learning_rate": 6.942302969046109e-06, "loss": 0.666, "step": 3791 }, { "epoch": 0.4, "grad_norm": 1.9138676660661968, "learning_rate": 6.9406776131521655e-06, "loss": 0.693, "step": 3792 }, { "epoch": 0.4, "grad_norm": 1.8345142949560684, "learning_rate": 6.9390520157745586e-06, "loss": 0.6965, "step": 3793 }, { "epoch": 0.4, "grad_norm": 1.6983174659828626, "learning_rate": 6.937426177115567e-06, "loss": 0.6813, "step": 3794 }, { "epoch": 0.4, "grad_norm": 1.8521086275974448, "learning_rate": 6.935800097377495e-06, "loss": 0.6774, "step": 3795 }, { "epoch": 0.4, "grad_norm": 1.792804868609787, "learning_rate": 6.934173776762684e-06, "loss": 0.6766, "step": 3796 }, { "epoch": 0.41, "grad_norm": 1.7835749101209462, "learning_rate": 6.932547215473498e-06, "loss": 0.6233, "step": 3797 }, { "epoch": 0.41, "grad_norm": 1.6314749710509389, "learning_rate": 6.930920413712336e-06, "loss": 0.6333, "step": 3798 }, { "epoch": 0.41, "grad_norm": 1.303672169110745, "learning_rate": 6.929293371681626e-06, "loss": 0.5441, "step": 3799 }, { "epoch": 0.41, "grad_norm": 1.6355605653518157, "learning_rate": 6.927666089583823e-06, "loss": 0.5484, "step": 3800 }, { "epoch": 0.41, "grad_norm": 1.639945534446232, "learning_rate": 6.926038567621416e-06, "loss": 0.6169, "step": 3801 }, { "epoch": 0.41, "grad_norm": 1.1408622643948985, "learning_rate": 6.924410805996919e-06, "loss": 0.5098, "step": 3802 }, { "epoch": 0.41, "grad_norm": 1.8243981682829773, "learning_rate": 6.922782804912883e-06, "loss": 0.6942, "step": 3803 }, { "epoch": 0.41, "grad_norm": 2.9597481719776235, "learning_rate": 6.921154564571882e-06, "loss": 0.6331, "step": 3804 }, { "epoch": 0.41, "grad_norm": 1.7910650223339162, "learning_rate": 6.919526085176521e-06, "loss": 0.5654, "step": 3805 }, { "epoch": 0.41, "grad_norm": 1.6166947137655097, "learning_rate": 6.917897366929439e-06, "loss": 0.6245, "step": 3806 }, { "epoch": 0.41, "grad_norm": 1.7483658826236572, "learning_rate": 6.9162684100333e-06, "loss": 0.6208, "step": 3807 }, { "epoch": 0.41, "grad_norm": 1.7251943382865687, "learning_rate": 6.914639214690799e-06, "loss": 0.6888, "step": 3808 }, { "epoch": 0.41, "grad_norm": 1.9804012427784146, "learning_rate": 6.913009781104662e-06, "loss": 0.6563, "step": 3809 }, { "epoch": 0.41, "grad_norm": 1.7956510286769485, "learning_rate": 6.911380109477644e-06, "loss": 0.6429, "step": 3810 }, { "epoch": 0.41, "grad_norm": 1.7405411350451434, "learning_rate": 6.9097502000125286e-06, "loss": 0.6174, "step": 3811 }, { "epoch": 0.41, "grad_norm": 1.7601174217763111, "learning_rate": 6.908120052912129e-06, "loss": 0.5795, "step": 3812 }, { "epoch": 0.41, "grad_norm": 1.7778632121106, "learning_rate": 6.906489668379291e-06, "loss": 0.6201, "step": 3813 }, { "epoch": 0.41, "grad_norm": 1.7635281098335798, "learning_rate": 6.904859046616886e-06, "loss": 0.6856, "step": 3814 }, { "epoch": 0.41, "grad_norm": 1.7386073958407304, "learning_rate": 6.903228187827817e-06, "loss": 0.6285, "step": 3815 }, { "epoch": 0.41, "grad_norm": 1.733462490136902, "learning_rate": 6.901597092215017e-06, "loss": 0.5611, "step": 3816 }, { "epoch": 0.41, "grad_norm": 1.9415745225295469, "learning_rate": 6.899965759981445e-06, "loss": 0.644, "step": 3817 }, { "epoch": 0.41, "grad_norm": 1.787510882348359, "learning_rate": 6.898334191330095e-06, "loss": 0.7434, "step": 3818 }, { "epoch": 0.41, "grad_norm": 1.8365872248118358, "learning_rate": 6.896702386463985e-06, "loss": 0.7209, "step": 3819 }, { "epoch": 0.41, "grad_norm": 1.441724643679466, "learning_rate": 6.895070345586168e-06, "loss": 0.5467, "step": 3820 }, { "epoch": 0.41, "grad_norm": 1.718273162785617, "learning_rate": 6.893438068899718e-06, "loss": 0.6397, "step": 3821 }, { "epoch": 0.41, "grad_norm": 1.9391089990734505, "learning_rate": 6.891805556607748e-06, "loss": 0.6168, "step": 3822 }, { "epoch": 0.41, "grad_norm": 1.6060394146126113, "learning_rate": 6.890172808913395e-06, "loss": 0.6257, "step": 3823 }, { "epoch": 0.41, "grad_norm": 1.8222225793843723, "learning_rate": 6.888539826019824e-06, "loss": 0.6138, "step": 3824 }, { "epoch": 0.41, "grad_norm": 1.8017578362407751, "learning_rate": 6.886906608130235e-06, "loss": 0.6015, "step": 3825 }, { "epoch": 0.41, "grad_norm": 1.7148414551843936, "learning_rate": 6.88527315544785e-06, "loss": 0.5983, "step": 3826 }, { "epoch": 0.41, "grad_norm": 1.6498729048861593, "learning_rate": 6.883639468175926e-06, "loss": 0.5495, "step": 3827 }, { "epoch": 0.41, "grad_norm": 1.5650588875956992, "learning_rate": 6.882005546517747e-06, "loss": 0.5919, "step": 3828 }, { "epoch": 0.41, "grad_norm": 1.5638951350754662, "learning_rate": 6.880371390676624e-06, "loss": 0.5799, "step": 3829 }, { "epoch": 0.41, "grad_norm": 1.6911795287351838, "learning_rate": 6.878737000855902e-06, "loss": 0.646, "step": 3830 }, { "epoch": 0.41, "grad_norm": 1.8496781519266563, "learning_rate": 6.877102377258952e-06, "loss": 0.601, "step": 3831 }, { "epoch": 0.41, "grad_norm": 1.8489788280781116, "learning_rate": 6.875467520089173e-06, "loss": 0.5303, "step": 3832 }, { "epoch": 0.41, "grad_norm": 1.7994879113324485, "learning_rate": 6.873832429549996e-06, "loss": 0.7019, "step": 3833 }, { "epoch": 0.41, "grad_norm": 1.5403457447885516, "learning_rate": 6.87219710584488e-06, "loss": 0.5918, "step": 3834 }, { "epoch": 0.41, "grad_norm": 1.8871221078961928, "learning_rate": 6.870561549177311e-06, "loss": 0.608, "step": 3835 }, { "epoch": 0.41, "grad_norm": 1.3809915622325843, "learning_rate": 6.868925759750808e-06, "loss": 0.5323, "step": 3836 }, { "epoch": 0.41, "grad_norm": 2.126666939228695, "learning_rate": 6.867289737768914e-06, "loss": 0.5062, "step": 3837 }, { "epoch": 0.41, "grad_norm": 1.6832531834283855, "learning_rate": 6.865653483435204e-06, "loss": 0.6301, "step": 3838 }, { "epoch": 0.41, "grad_norm": 1.7657212748918438, "learning_rate": 6.864016996953282e-06, "loss": 0.6197, "step": 3839 }, { "epoch": 0.41, "grad_norm": 1.7387904596877009, "learning_rate": 6.862380278526782e-06, "loss": 0.5622, "step": 3840 }, { "epoch": 0.41, "grad_norm": 1.747165647730229, "learning_rate": 6.860743328359364e-06, "loss": 0.6683, "step": 3841 }, { "epoch": 0.41, "grad_norm": 1.3617801994654117, "learning_rate": 6.8591061466547185e-06, "loss": 0.5813, "step": 3842 }, { "epoch": 0.41, "grad_norm": 1.679684009194608, "learning_rate": 6.857468733616562e-06, "loss": 0.617, "step": 3843 }, { "epoch": 0.41, "grad_norm": 1.7284412624204861, "learning_rate": 6.855831089448645e-06, "loss": 0.7295, "step": 3844 }, { "epoch": 0.41, "grad_norm": 1.6661177272454153, "learning_rate": 6.854193214354742e-06, "loss": 0.6346, "step": 3845 }, { "epoch": 0.41, "grad_norm": 1.213538778249861, "learning_rate": 6.8525551085386575e-06, "loss": 0.5505, "step": 3846 }, { "epoch": 0.41, "grad_norm": 1.5916569204570492, "learning_rate": 6.850916772204228e-06, "loss": 0.5994, "step": 3847 }, { "epoch": 0.41, "grad_norm": 1.7736175096332105, "learning_rate": 6.849278205555317e-06, "loss": 0.6148, "step": 3848 }, { "epoch": 0.41, "grad_norm": 1.8077780325589599, "learning_rate": 6.847639408795812e-06, "loss": 0.7392, "step": 3849 }, { "epoch": 0.41, "grad_norm": 1.6270634450892907, "learning_rate": 6.846000382129634e-06, "loss": 0.5801, "step": 3850 }, { "epoch": 0.41, "grad_norm": 1.3800883776709305, "learning_rate": 6.844361125760731e-06, "loss": 0.56, "step": 3851 }, { "epoch": 0.41, "grad_norm": 1.7918199181248382, "learning_rate": 6.842721639893082e-06, "loss": 0.708, "step": 3852 }, { "epoch": 0.41, "grad_norm": 1.834360805142702, "learning_rate": 6.84108192473069e-06, "loss": 0.7106, "step": 3853 }, { "epoch": 0.41, "grad_norm": 1.5787141894307843, "learning_rate": 6.839441980477591e-06, "loss": 0.5718, "step": 3854 }, { "epoch": 0.41, "grad_norm": 1.7157380255375807, "learning_rate": 6.837801807337847e-06, "loss": 0.61, "step": 3855 }, { "epoch": 0.41, "grad_norm": 2.186700736354224, "learning_rate": 6.836161405515551e-06, "loss": 0.6753, "step": 3856 }, { "epoch": 0.41, "grad_norm": 1.7531055871232442, "learning_rate": 6.83452077521482e-06, "loss": 0.6544, "step": 3857 }, { "epoch": 0.41, "grad_norm": 1.690317099457984, "learning_rate": 6.832879916639802e-06, "loss": 0.6172, "step": 3858 }, { "epoch": 0.41, "grad_norm": 1.73212291190504, "learning_rate": 6.831238829994674e-06, "loss": 0.5725, "step": 3859 }, { "epoch": 0.41, "grad_norm": 1.7079060867803442, "learning_rate": 6.829597515483642e-06, "loss": 0.6288, "step": 3860 }, { "epoch": 0.41, "grad_norm": 1.6681255272872406, "learning_rate": 6.827955973310936e-06, "loss": 0.5497, "step": 3861 }, { "epoch": 0.41, "grad_norm": 1.3204967996871138, "learning_rate": 6.826314203680821e-06, "loss": 0.5534, "step": 3862 }, { "epoch": 0.41, "grad_norm": 1.943055135281587, "learning_rate": 6.824672206797584e-06, "loss": 0.6091, "step": 3863 }, { "epoch": 0.41, "grad_norm": 1.614917798091622, "learning_rate": 6.823029982865545e-06, "loss": 0.5818, "step": 3864 }, { "epoch": 0.41, "grad_norm": 1.8328362777488474, "learning_rate": 6.821387532089049e-06, "loss": 0.6897, "step": 3865 }, { "epoch": 0.41, "grad_norm": 1.7961836846335406, "learning_rate": 6.819744854672471e-06, "loss": 0.6897, "step": 3866 }, { "epoch": 0.41, "grad_norm": 1.7331987771593629, "learning_rate": 6.818101950820214e-06, "loss": 0.5851, "step": 3867 }, { "epoch": 0.41, "grad_norm": 1.8049757218613027, "learning_rate": 6.816458820736709e-06, "loss": 0.6253, "step": 3868 }, { "epoch": 0.41, "grad_norm": 1.7903133562908118, "learning_rate": 6.8148154646264145e-06, "loss": 0.6319, "step": 3869 }, { "epoch": 0.41, "grad_norm": 1.6609670685865254, "learning_rate": 6.813171882693816e-06, "loss": 0.5832, "step": 3870 }, { "epoch": 0.41, "grad_norm": 1.8550523995358759, "learning_rate": 6.811528075143432e-06, "loss": 0.6325, "step": 3871 }, { "epoch": 0.41, "grad_norm": 1.9009205184607485, "learning_rate": 6.809884042179805e-06, "loss": 0.7127, "step": 3872 }, { "epoch": 0.41, "grad_norm": 1.6842978700117588, "learning_rate": 6.8082397840075045e-06, "loss": 0.6771, "step": 3873 }, { "epoch": 0.41, "grad_norm": 1.59978295671992, "learning_rate": 6.806595300831131e-06, "loss": 0.5865, "step": 3874 }, { "epoch": 0.41, "grad_norm": 1.5923726084550223, "learning_rate": 6.804950592855314e-06, "loss": 0.5421, "step": 3875 }, { "epoch": 0.41, "grad_norm": 1.7574956264110368, "learning_rate": 6.803305660284705e-06, "loss": 0.6338, "step": 3876 }, { "epoch": 0.41, "grad_norm": 1.766161788201345, "learning_rate": 6.80166050332399e-06, "loss": 0.6355, "step": 3877 }, { "epoch": 0.41, "grad_norm": 1.7897522238958412, "learning_rate": 6.80001512217788e-06, "loss": 0.6095, "step": 3878 }, { "epoch": 0.41, "grad_norm": 1.343396761976906, "learning_rate": 6.798369517051114e-06, "loss": 0.528, "step": 3879 }, { "epoch": 0.41, "grad_norm": 1.8875595758489752, "learning_rate": 6.796723688148457e-06, "loss": 0.5821, "step": 3880 }, { "epoch": 0.41, "grad_norm": 1.7590826791441088, "learning_rate": 6.795077635674705e-06, "loss": 0.7025, "step": 3881 }, { "epoch": 0.41, "grad_norm": 1.796286099394965, "learning_rate": 6.793431359834683e-06, "loss": 0.625, "step": 3882 }, { "epoch": 0.41, "grad_norm": 1.9556237426536456, "learning_rate": 6.7917848608332395e-06, "loss": 0.7063, "step": 3883 }, { "epoch": 0.41, "grad_norm": 1.6662074447878055, "learning_rate": 6.7901381388752515e-06, "loss": 0.6521, "step": 3884 }, { "epoch": 0.41, "grad_norm": 1.2896584301167362, "learning_rate": 6.788491194165629e-06, "loss": 0.5419, "step": 3885 }, { "epoch": 0.41, "grad_norm": 1.1911284542916658, "learning_rate": 6.7868440269093e-06, "loss": 0.5422, "step": 3886 }, { "epoch": 0.41, "grad_norm": 1.1206802475676327, "learning_rate": 6.785196637311231e-06, "loss": 0.535, "step": 3887 }, { "epoch": 0.41, "grad_norm": 1.633163188216706, "learning_rate": 6.783549025576408e-06, "loss": 0.6221, "step": 3888 }, { "epoch": 0.41, "grad_norm": 1.6759255371394999, "learning_rate": 6.781901191909849e-06, "loss": 0.6735, "step": 3889 }, { "epoch": 0.41, "grad_norm": 1.8926697376198065, "learning_rate": 6.780253136516598e-06, "loss": 0.6674, "step": 3890 }, { "epoch": 0.42, "grad_norm": 1.8912978719226898, "learning_rate": 6.778604859601728e-06, "loss": 0.6535, "step": 3891 }, { "epoch": 0.42, "grad_norm": 1.9533216150272572, "learning_rate": 6.776956361370337e-06, "loss": 0.6641, "step": 3892 }, { "epoch": 0.42, "grad_norm": 1.882444124094685, "learning_rate": 6.775307642027551e-06, "loss": 0.6991, "step": 3893 }, { "epoch": 0.42, "grad_norm": 1.7907469414481771, "learning_rate": 6.773658701778526e-06, "loss": 0.6459, "step": 3894 }, { "epoch": 0.42, "grad_norm": 1.9994911074500685, "learning_rate": 6.772009540828445e-06, "loss": 0.6144, "step": 3895 }, { "epoch": 0.42, "grad_norm": 1.7896001468555038, "learning_rate": 6.770360159382516e-06, "loss": 0.5762, "step": 3896 }, { "epoch": 0.42, "grad_norm": 1.6901235903151053, "learning_rate": 6.768710557645976e-06, "loss": 0.6498, "step": 3897 }, { "epoch": 0.42, "grad_norm": 1.9588705361458312, "learning_rate": 6.7670607358240914e-06, "loss": 0.62, "step": 3898 }, { "epoch": 0.42, "grad_norm": 2.2502797759367237, "learning_rate": 6.765410694122152e-06, "loss": 0.5587, "step": 3899 }, { "epoch": 0.42, "grad_norm": 1.8306152031685086, "learning_rate": 6.763760432745475e-06, "loss": 0.7281, "step": 3900 }, { "epoch": 0.42, "grad_norm": 1.939772336738966, "learning_rate": 6.7621099518994095e-06, "loss": 0.6773, "step": 3901 }, { "epoch": 0.42, "grad_norm": 1.656329441977379, "learning_rate": 6.760459251789328e-06, "loss": 0.792, "step": 3902 }, { "epoch": 0.42, "grad_norm": 1.7759840104488254, "learning_rate": 6.758808332620632e-06, "loss": 0.6269, "step": 3903 }, { "epoch": 0.42, "grad_norm": 1.7278329775322796, "learning_rate": 6.757157194598751e-06, "loss": 0.6579, "step": 3904 }, { "epoch": 0.42, "grad_norm": 1.6755739758682144, "learning_rate": 6.755505837929139e-06, "loss": 0.6569, "step": 3905 }, { "epoch": 0.42, "grad_norm": 1.714465641313564, "learning_rate": 6.75385426281728e-06, "loss": 0.7382, "step": 3906 }, { "epoch": 0.42, "grad_norm": 1.7151166377539515, "learning_rate": 6.752202469468682e-06, "loss": 0.6915, "step": 3907 }, { "epoch": 0.42, "grad_norm": 1.7426262598749918, "learning_rate": 6.7505504580888816e-06, "loss": 0.6482, "step": 3908 }, { "epoch": 0.42, "grad_norm": 1.7466726678121707, "learning_rate": 6.748898228883445e-06, "loss": 0.5794, "step": 3909 }, { "epoch": 0.42, "grad_norm": 1.6657461791154238, "learning_rate": 6.747245782057963e-06, "loss": 0.6476, "step": 3910 }, { "epoch": 0.42, "grad_norm": 1.8816668929184484, "learning_rate": 6.745593117818052e-06, "loss": 0.751, "step": 3911 }, { "epoch": 0.42, "grad_norm": 1.92368457578422, "learning_rate": 6.74394023636936e-06, "loss": 0.6796, "step": 3912 }, { "epoch": 0.42, "grad_norm": 1.6595794335052652, "learning_rate": 6.74228713791756e-06, "loss": 0.6524, "step": 3913 }, { "epoch": 0.42, "grad_norm": 1.2983757472337936, "learning_rate": 6.740633822668348e-06, "loss": 0.556, "step": 3914 }, { "epoch": 0.42, "grad_norm": 1.7636191919658677, "learning_rate": 6.7389802908274526e-06, "loss": 0.5819, "step": 3915 }, { "epoch": 0.42, "grad_norm": 1.6313209915990372, "learning_rate": 6.7373265426006275e-06, "loss": 0.5902, "step": 3916 }, { "epoch": 0.42, "grad_norm": 1.891647065307919, "learning_rate": 6.735672578193651e-06, "loss": 0.7032, "step": 3917 }, { "epoch": 0.42, "grad_norm": 1.704254068700177, "learning_rate": 6.734018397812333e-06, "loss": 0.6104, "step": 3918 }, { "epoch": 0.42, "grad_norm": 1.1991816233322352, "learning_rate": 6.732364001662505e-06, "loss": 0.5284, "step": 3919 }, { "epoch": 0.42, "grad_norm": 1.5919946644464167, "learning_rate": 6.7307093899500295e-06, "loss": 0.6426, "step": 3920 }, { "epoch": 0.42, "grad_norm": 1.7653176853475354, "learning_rate": 6.729054562880795e-06, "loss": 0.6509, "step": 3921 }, { "epoch": 0.42, "grad_norm": 1.765230194469242, "learning_rate": 6.727399520660714e-06, "loss": 0.6263, "step": 3922 }, { "epoch": 0.42, "grad_norm": 1.6239081745753539, "learning_rate": 6.725744263495727e-06, "loss": 0.5635, "step": 3923 }, { "epoch": 0.42, "grad_norm": 2.132887887844938, "learning_rate": 6.724088791591805e-06, "loss": 0.5848, "step": 3924 }, { "epoch": 0.42, "grad_norm": 1.6906896098148279, "learning_rate": 6.7224331051549405e-06, "loss": 0.626, "step": 3925 }, { "epoch": 0.42, "grad_norm": 1.6847353818504769, "learning_rate": 6.7207772043911556e-06, "loss": 0.5537, "step": 3926 }, { "epoch": 0.42, "grad_norm": 1.5534461996069227, "learning_rate": 6.7191210895065e-06, "loss": 0.5455, "step": 3927 }, { "epoch": 0.42, "grad_norm": 1.7237303570551834, "learning_rate": 6.717464760707046e-06, "loss": 0.5944, "step": 3928 }, { "epoch": 0.42, "grad_norm": 1.8119597991241152, "learning_rate": 6.715808218198897e-06, "loss": 0.6884, "step": 3929 }, { "epoch": 0.42, "grad_norm": 2.0094560890885163, "learning_rate": 6.71415146218818e-06, "loss": 0.6771, "step": 3930 }, { "epoch": 0.42, "grad_norm": 1.365915153595503, "learning_rate": 6.7124944928810486e-06, "loss": 0.5478, "step": 3931 }, { "epoch": 0.42, "grad_norm": 1.793096317666742, "learning_rate": 6.710837310483686e-06, "loss": 0.5995, "step": 3932 }, { "epoch": 0.42, "grad_norm": 2.2744646878252546, "learning_rate": 6.709179915202297e-06, "loss": 0.6724, "step": 3933 }, { "epoch": 0.42, "grad_norm": 1.8030834475302084, "learning_rate": 6.707522307243119e-06, "loss": 0.6445, "step": 3934 }, { "epoch": 0.42, "grad_norm": 1.7111670537900951, "learning_rate": 6.705864486812411e-06, "loss": 0.5622, "step": 3935 }, { "epoch": 0.42, "grad_norm": 2.6272838451390297, "learning_rate": 6.7042064541164594e-06, "loss": 0.6704, "step": 3936 }, { "epoch": 0.42, "grad_norm": 1.6044221730882235, "learning_rate": 6.702548209361579e-06, "loss": 0.6481, "step": 3937 }, { "epoch": 0.42, "grad_norm": 1.3614022801792616, "learning_rate": 6.700889752754107e-06, "loss": 0.5515, "step": 3938 }, { "epoch": 0.42, "grad_norm": 1.6896213215608955, "learning_rate": 6.6992310845004125e-06, "loss": 0.5805, "step": 3939 }, { "epoch": 0.42, "grad_norm": 1.8478674976380272, "learning_rate": 6.6975722048068866e-06, "loss": 0.5684, "step": 3940 }, { "epoch": 0.42, "grad_norm": 1.6445440251719365, "learning_rate": 6.695913113879948e-06, "loss": 0.5295, "step": 3941 }, { "epoch": 0.42, "grad_norm": 1.877698397338097, "learning_rate": 6.694253811926044e-06, "loss": 0.6348, "step": 3942 }, { "epoch": 0.42, "grad_norm": 1.8474020264986395, "learning_rate": 6.692594299151642e-06, "loss": 0.6974, "step": 3943 }, { "epoch": 0.42, "grad_norm": 1.2667473360684316, "learning_rate": 6.690934575763241e-06, "loss": 0.5279, "step": 3944 }, { "epoch": 0.42, "grad_norm": 1.659637901851141, "learning_rate": 6.689274641967366e-06, "loss": 0.5615, "step": 3945 }, { "epoch": 0.42, "grad_norm": 1.7248708026785895, "learning_rate": 6.687614497970567e-06, "loss": 0.6618, "step": 3946 }, { "epoch": 0.42, "grad_norm": 1.1315279262893339, "learning_rate": 6.6859541439794185e-06, "loss": 0.523, "step": 3947 }, { "epoch": 0.42, "grad_norm": 2.046000264759542, "learning_rate": 6.6842935802005246e-06, "loss": 0.6651, "step": 3948 }, { "epoch": 0.42, "grad_norm": 2.336605651362325, "learning_rate": 6.682632806840514e-06, "loss": 0.6613, "step": 3949 }, { "epoch": 0.42, "grad_norm": 1.729851307921121, "learning_rate": 6.680971824106039e-06, "loss": 0.5548, "step": 3950 }, { "epoch": 0.42, "grad_norm": 1.8030985906904065, "learning_rate": 6.679310632203779e-06, "loss": 0.7244, "step": 3951 }, { "epoch": 0.42, "grad_norm": 1.8549476858589082, "learning_rate": 6.677649231340444e-06, "loss": 0.5354, "step": 3952 }, { "epoch": 0.42, "grad_norm": 1.681879284207055, "learning_rate": 6.675987621722765e-06, "loss": 0.5479, "step": 3953 }, { "epoch": 0.42, "grad_norm": 1.6456183908655801, "learning_rate": 6.6743258035575e-06, "loss": 0.6792, "step": 3954 }, { "epoch": 0.42, "grad_norm": 1.8129618390581814, "learning_rate": 6.672663777051434e-06, "loss": 0.6151, "step": 3955 }, { "epoch": 0.42, "grad_norm": 1.6507789335670817, "learning_rate": 6.671001542411379e-06, "loss": 0.6833, "step": 3956 }, { "epoch": 0.42, "grad_norm": 1.7563753705975123, "learning_rate": 6.669339099844169e-06, "loss": 0.6048, "step": 3957 }, { "epoch": 0.42, "grad_norm": 1.716928572959947, "learning_rate": 6.667676449556666e-06, "loss": 0.648, "step": 3958 }, { "epoch": 0.42, "grad_norm": 1.5713497890883403, "learning_rate": 6.666013591755758e-06, "loss": 0.6209, "step": 3959 }, { "epoch": 0.42, "grad_norm": 1.5711889633034521, "learning_rate": 6.66435052664836e-06, "loss": 0.5318, "step": 3960 }, { "epoch": 0.42, "grad_norm": 1.57840969483253, "learning_rate": 6.662687254441411e-06, "loss": 0.696, "step": 3961 }, { "epoch": 0.42, "grad_norm": 1.7584044222160538, "learning_rate": 6.6610237753418775e-06, "loss": 0.5322, "step": 3962 }, { "epoch": 0.42, "grad_norm": 1.1600600310975528, "learning_rate": 6.659360089556748e-06, "loss": 0.5203, "step": 3963 }, { "epoch": 0.42, "grad_norm": 1.5776485797513402, "learning_rate": 6.657696197293043e-06, "loss": 0.4839, "step": 3964 }, { "epoch": 0.42, "grad_norm": 1.2016631736655694, "learning_rate": 6.656032098757802e-06, "loss": 0.5527, "step": 3965 }, { "epoch": 0.42, "grad_norm": 1.79401747006018, "learning_rate": 6.6543677941580945e-06, "loss": 0.6541, "step": 3966 }, { "epoch": 0.42, "grad_norm": 1.6239159669861203, "learning_rate": 6.652703283701014e-06, "loss": 0.6412, "step": 3967 }, { "epoch": 0.42, "grad_norm": 1.6910043012872755, "learning_rate": 6.651038567593679e-06, "loss": 0.5916, "step": 3968 }, { "epoch": 0.42, "grad_norm": 1.6214867742932577, "learning_rate": 6.649373646043236e-06, "loss": 0.5993, "step": 3969 }, { "epoch": 0.42, "grad_norm": 1.9324021312449262, "learning_rate": 6.647708519256854e-06, "loss": 0.6739, "step": 3970 }, { "epoch": 0.42, "grad_norm": 1.808935916217069, "learning_rate": 6.646043187441733e-06, "loss": 0.5352, "step": 3971 }, { "epoch": 0.42, "grad_norm": 1.9789964765871575, "learning_rate": 6.644377650805091e-06, "loss": 0.7165, "step": 3972 }, { "epoch": 0.42, "grad_norm": 1.2726675285725433, "learning_rate": 6.6427119095541745e-06, "loss": 0.5078, "step": 3973 }, { "epoch": 0.42, "grad_norm": 1.7416193757910512, "learning_rate": 6.641045963896259e-06, "loss": 0.5976, "step": 3974 }, { "epoch": 0.42, "grad_norm": 1.8218772284754474, "learning_rate": 6.63937981403864e-06, "loss": 0.626, "step": 3975 }, { "epoch": 0.42, "grad_norm": 1.9538151680528975, "learning_rate": 6.637713460188643e-06, "loss": 0.6939, "step": 3976 }, { "epoch": 0.42, "grad_norm": 2.0150825121961313, "learning_rate": 6.636046902553615e-06, "loss": 0.7087, "step": 3977 }, { "epoch": 0.42, "grad_norm": 1.6520284995824583, "learning_rate": 6.6343801413409335e-06, "loss": 0.6329, "step": 3978 }, { "epoch": 0.42, "grad_norm": 1.6866494183565583, "learning_rate": 6.632713176757994e-06, "loss": 0.661, "step": 3979 }, { "epoch": 0.42, "grad_norm": 1.6649796434979607, "learning_rate": 6.631046009012223e-06, "loss": 0.634, "step": 3980 }, { "epoch": 0.42, "grad_norm": 1.3176592516132586, "learning_rate": 6.6293786383110705e-06, "loss": 0.5392, "step": 3981 }, { "epoch": 0.42, "grad_norm": 1.8179868673129669, "learning_rate": 6.627711064862012e-06, "loss": 0.6845, "step": 3982 }, { "epoch": 0.42, "grad_norm": 1.1699555482163555, "learning_rate": 6.626043288872549e-06, "loss": 0.5442, "step": 3983 }, { "epoch": 0.42, "grad_norm": 1.6410783083790814, "learning_rate": 6.624375310550205e-06, "loss": 0.5935, "step": 3984 }, { "epoch": 0.43, "grad_norm": 1.7080423252408763, "learning_rate": 6.622707130102534e-06, "loss": 0.6446, "step": 3985 }, { "epoch": 0.43, "grad_norm": 1.797496610853984, "learning_rate": 6.621038747737108e-06, "loss": 0.6342, "step": 3986 }, { "epoch": 0.43, "grad_norm": 1.7467260518798597, "learning_rate": 6.619370163661533e-06, "loss": 0.6824, "step": 3987 }, { "epoch": 0.43, "grad_norm": 1.9473095871218054, "learning_rate": 6.617701378083432e-06, "loss": 0.6513, "step": 3988 }, { "epoch": 0.43, "grad_norm": 1.7629111940751994, "learning_rate": 6.6160323912104565e-06, "loss": 0.6943, "step": 3989 }, { "epoch": 0.43, "grad_norm": 1.7543124701117159, "learning_rate": 6.614363203250285e-06, "loss": 0.5894, "step": 3990 }, { "epoch": 0.43, "grad_norm": 1.6436521847826187, "learning_rate": 6.612693814410618e-06, "loss": 0.6183, "step": 3991 }, { "epoch": 0.43, "grad_norm": 1.735944452161843, "learning_rate": 6.611024224899181e-06, "loss": 0.6711, "step": 3992 }, { "epoch": 0.43, "grad_norm": 1.6983622596057748, "learning_rate": 6.609354434923727e-06, "loss": 0.6568, "step": 3993 }, { "epoch": 0.43, "grad_norm": 1.5653832902250981, "learning_rate": 6.607684444692032e-06, "loss": 0.5449, "step": 3994 }, { "epoch": 0.43, "grad_norm": 1.667627101632958, "learning_rate": 6.606014254411896e-06, "loss": 0.6963, "step": 3995 }, { "epoch": 0.43, "grad_norm": 1.8019620705543293, "learning_rate": 6.6043438642911476e-06, "loss": 0.6268, "step": 3996 }, { "epoch": 0.43, "grad_norm": 1.6557219009712374, "learning_rate": 6.602673274537634e-06, "loss": 0.6835, "step": 3997 }, { "epoch": 0.43, "grad_norm": 1.228156273947941, "learning_rate": 6.6010024853592356e-06, "loss": 0.5224, "step": 3998 }, { "epoch": 0.43, "grad_norm": 2.0388300641904915, "learning_rate": 6.599331496963851e-06, "loss": 0.6878, "step": 3999 }, { "epoch": 0.43, "grad_norm": 1.7131458861634488, "learning_rate": 6.597660309559406e-06, "loss": 0.6214, "step": 4000 }, { "epoch": 0.43, "grad_norm": 1.8323408896952464, "learning_rate": 6.595988923353851e-06, "loss": 0.5564, "step": 4001 }, { "epoch": 0.43, "grad_norm": 1.6303220909988805, "learning_rate": 6.5943173385551595e-06, "loss": 0.6919, "step": 4002 }, { "epoch": 0.43, "grad_norm": 1.6177890977838705, "learning_rate": 6.592645555371333e-06, "loss": 0.5347, "step": 4003 }, { "epoch": 0.43, "grad_norm": 1.7863917332611943, "learning_rate": 6.590973574010395e-06, "loss": 0.6756, "step": 4004 }, { "epoch": 0.43, "grad_norm": 1.804544634713818, "learning_rate": 6.5893013946803965e-06, "loss": 0.6314, "step": 4005 }, { "epoch": 0.43, "grad_norm": 1.7763472754553782, "learning_rate": 6.587629017589409e-06, "loss": 0.7145, "step": 4006 }, { "epoch": 0.43, "grad_norm": 1.5511879190331934, "learning_rate": 6.585956442945531e-06, "loss": 0.6435, "step": 4007 }, { "epoch": 0.43, "grad_norm": 1.6636001857884952, "learning_rate": 6.5842836709568856e-06, "loss": 0.5768, "step": 4008 }, { "epoch": 0.43, "grad_norm": 1.8280832026465184, "learning_rate": 6.582610701831621e-06, "loss": 0.6702, "step": 4009 }, { "epoch": 0.43, "grad_norm": 1.725136206638528, "learning_rate": 6.580937535777909e-06, "loss": 0.7812, "step": 4010 }, { "epoch": 0.43, "grad_norm": 1.717513178031278, "learning_rate": 6.579264173003945e-06, "loss": 0.5892, "step": 4011 }, { "epoch": 0.43, "grad_norm": 1.5773132898440039, "learning_rate": 6.577590613717952e-06, "loss": 0.5401, "step": 4012 }, { "epoch": 0.43, "grad_norm": 1.8957410240937331, "learning_rate": 6.575916858128174e-06, "loss": 0.6199, "step": 4013 }, { "epoch": 0.43, "grad_norm": 1.6079384195487352, "learning_rate": 6.5742429064428814e-06, "loss": 0.5762, "step": 4014 }, { "epoch": 0.43, "grad_norm": 1.2158091284794363, "learning_rate": 6.572568758870368e-06, "loss": 0.5499, "step": 4015 }, { "epoch": 0.43, "grad_norm": 1.6831396516654162, "learning_rate": 6.570894415618951e-06, "loss": 0.6639, "step": 4016 }, { "epoch": 0.43, "grad_norm": 1.9688109034032646, "learning_rate": 6.569219876896976e-06, "loss": 0.6719, "step": 4017 }, { "epoch": 0.43, "grad_norm": 1.6762471691960812, "learning_rate": 6.567545142912809e-06, "loss": 0.5905, "step": 4018 }, { "epoch": 0.43, "grad_norm": 1.7190497369403088, "learning_rate": 6.565870213874842e-06, "loss": 0.643, "step": 4019 }, { "epoch": 0.43, "grad_norm": 1.331101391443959, "learning_rate": 6.564195089991491e-06, "loss": 0.5259, "step": 4020 }, { "epoch": 0.43, "grad_norm": 1.8479870486643257, "learning_rate": 6.562519771471196e-06, "loss": 0.7367, "step": 4021 }, { "epoch": 0.43, "grad_norm": 1.7746614308593052, "learning_rate": 6.5608442585224205e-06, "loss": 0.6322, "step": 4022 }, { "epoch": 0.43, "grad_norm": 1.2632351827709103, "learning_rate": 6.559168551353654e-06, "loss": 0.5435, "step": 4023 }, { "epoch": 0.43, "grad_norm": 1.5286293341404462, "learning_rate": 6.557492650173409e-06, "loss": 0.5926, "step": 4024 }, { "epoch": 0.43, "grad_norm": 1.0976944766213297, "learning_rate": 6.55581655519022e-06, "loss": 0.5188, "step": 4025 }, { "epoch": 0.43, "grad_norm": 1.1723127982377317, "learning_rate": 6.554140266612652e-06, "loss": 0.5435, "step": 4026 }, { "epoch": 0.43, "grad_norm": 1.980200376244349, "learning_rate": 6.5524637846492875e-06, "loss": 0.6718, "step": 4027 }, { "epoch": 0.43, "grad_norm": 1.786903455472858, "learning_rate": 6.550787109508738e-06, "loss": 0.7082, "step": 4028 }, { "epoch": 0.43, "grad_norm": 1.697049545894788, "learning_rate": 6.549110241399633e-06, "loss": 0.5307, "step": 4029 }, { "epoch": 0.43, "grad_norm": 1.7760271809235124, "learning_rate": 6.547433180530632e-06, "loss": 0.6711, "step": 4030 }, { "epoch": 0.43, "grad_norm": 1.7535901129145703, "learning_rate": 6.545755927110416e-06, "loss": 0.5541, "step": 4031 }, { "epoch": 0.43, "grad_norm": 1.2536753945535797, "learning_rate": 6.544078481347689e-06, "loss": 0.5239, "step": 4032 }, { "epoch": 0.43, "grad_norm": 1.6212292298272908, "learning_rate": 6.54240084345118e-06, "loss": 0.5855, "step": 4033 }, { "epoch": 0.43, "grad_norm": 1.6162137888562733, "learning_rate": 6.540723013629644e-06, "loss": 0.5163, "step": 4034 }, { "epoch": 0.43, "grad_norm": 1.827235867550793, "learning_rate": 6.5390449920918575e-06, "loss": 0.7068, "step": 4035 }, { "epoch": 0.43, "grad_norm": 1.5457773941911375, "learning_rate": 6.537366779046619e-06, "loss": 0.5234, "step": 4036 }, { "epoch": 0.43, "grad_norm": 1.7785766893830917, "learning_rate": 6.535688374702755e-06, "loss": 0.6418, "step": 4037 }, { "epoch": 0.43, "grad_norm": 1.7601216675548106, "learning_rate": 6.534009779269111e-06, "loss": 0.6345, "step": 4038 }, { "epoch": 0.43, "grad_norm": 1.3257455586366913, "learning_rate": 6.532330992954562e-06, "loss": 0.5689, "step": 4039 }, { "epoch": 0.43, "grad_norm": 1.606144115638746, "learning_rate": 6.530652015968003e-06, "loss": 0.5617, "step": 4040 }, { "epoch": 0.43, "grad_norm": 1.7888065064582699, "learning_rate": 6.528972848518353e-06, "loss": 0.6551, "step": 4041 }, { "epoch": 0.43, "grad_norm": 1.7336114852987312, "learning_rate": 6.527293490814556e-06, "loss": 0.6539, "step": 4042 }, { "epoch": 0.43, "grad_norm": 1.7984734510009721, "learning_rate": 6.52561394306558e-06, "loss": 0.6183, "step": 4043 }, { "epoch": 0.43, "grad_norm": 1.7622912530291892, "learning_rate": 6.523934205480413e-06, "loss": 0.6914, "step": 4044 }, { "epoch": 0.43, "grad_norm": 2.3392987046269225, "learning_rate": 6.522254278268071e-06, "loss": 0.6952, "step": 4045 }, { "epoch": 0.43, "grad_norm": 1.2671580264548568, "learning_rate": 6.520574161637591e-06, "loss": 0.5317, "step": 4046 }, { "epoch": 0.43, "grad_norm": 1.68108574596802, "learning_rate": 6.5188938557980344e-06, "loss": 0.5819, "step": 4047 }, { "epoch": 0.43, "grad_norm": 1.8659265965198346, "learning_rate": 6.517213360958485e-06, "loss": 0.648, "step": 4048 }, { "epoch": 0.43, "grad_norm": 1.9792014598717738, "learning_rate": 6.5155326773280546e-06, "loss": 0.5928, "step": 4049 }, { "epoch": 0.43, "grad_norm": 1.6517415008067466, "learning_rate": 6.5138518051158716e-06, "loss": 0.593, "step": 4050 }, { "epoch": 0.43, "grad_norm": 1.6796483244156015, "learning_rate": 6.512170744531095e-06, "loss": 0.6362, "step": 4051 }, { "epoch": 0.43, "grad_norm": 1.8408284944633775, "learning_rate": 6.510489495782899e-06, "loss": 0.5567, "step": 4052 }, { "epoch": 0.43, "grad_norm": 1.8795700086090437, "learning_rate": 6.508808059080489e-06, "loss": 0.5888, "step": 4053 }, { "epoch": 0.43, "grad_norm": 1.7906194554185046, "learning_rate": 6.507126434633091e-06, "loss": 0.6331, "step": 4054 }, { "epoch": 0.43, "grad_norm": 1.9373802834053262, "learning_rate": 6.505444622649952e-06, "loss": 0.7401, "step": 4055 }, { "epoch": 0.43, "grad_norm": 1.7715744522441519, "learning_rate": 6.503762623340346e-06, "loss": 0.7142, "step": 4056 }, { "epoch": 0.43, "grad_norm": 1.8456645935579024, "learning_rate": 6.502080436913567e-06, "loss": 0.6496, "step": 4057 }, { "epoch": 0.43, "grad_norm": 1.3922526987354127, "learning_rate": 6.500398063578935e-06, "loss": 0.448, "step": 4058 }, { "epoch": 0.43, "grad_norm": 1.898314272608631, "learning_rate": 6.498715503545793e-06, "loss": 0.6964, "step": 4059 }, { "epoch": 0.43, "grad_norm": 1.8005446669233929, "learning_rate": 6.497032757023505e-06, "loss": 0.5456, "step": 4060 }, { "epoch": 0.43, "grad_norm": 1.8372481006875752, "learning_rate": 6.49534982422146e-06, "loss": 0.7452, "step": 4061 }, { "epoch": 0.43, "grad_norm": 1.6831710143682654, "learning_rate": 6.493666705349069e-06, "loss": 0.6565, "step": 4062 }, { "epoch": 0.43, "grad_norm": 1.9621705328289294, "learning_rate": 6.491983400615768e-06, "loss": 0.5607, "step": 4063 }, { "epoch": 0.43, "grad_norm": 1.8241838911739787, "learning_rate": 6.490299910231015e-06, "loss": 0.7068, "step": 4064 }, { "epoch": 0.43, "grad_norm": 1.5461729235047388, "learning_rate": 6.488616234404292e-06, "loss": 0.5587, "step": 4065 }, { "epoch": 0.43, "grad_norm": 1.8178798623549837, "learning_rate": 6.486932373345101e-06, "loss": 0.6292, "step": 4066 }, { "epoch": 0.43, "grad_norm": 1.3361472182932248, "learning_rate": 6.4852483272629705e-06, "loss": 0.5247, "step": 4067 }, { "epoch": 0.43, "grad_norm": 1.7004568871893977, "learning_rate": 6.483564096367452e-06, "loss": 0.6038, "step": 4068 }, { "epoch": 0.43, "grad_norm": 1.6663032955028128, "learning_rate": 6.481879680868117e-06, "loss": 0.5449, "step": 4069 }, { "epoch": 0.43, "grad_norm": 1.6789652829505735, "learning_rate": 6.480195080974563e-06, "loss": 0.6416, "step": 4070 }, { "epoch": 0.43, "grad_norm": 1.1088510271940786, "learning_rate": 6.478510296896409e-06, "loss": 0.5155, "step": 4071 }, { "epoch": 0.43, "grad_norm": 1.6969487674903514, "learning_rate": 6.476825328843296e-06, "loss": 0.6644, "step": 4072 }, { "epoch": 0.43, "grad_norm": 1.846175165266544, "learning_rate": 6.475140177024889e-06, "loss": 0.6535, "step": 4073 }, { "epoch": 0.43, "grad_norm": 1.7024452590628734, "learning_rate": 6.473454841650878e-06, "loss": 0.6248, "step": 4074 }, { "epoch": 0.43, "grad_norm": 1.727208887413399, "learning_rate": 6.471769322930972e-06, "loss": 0.643, "step": 4075 }, { "epoch": 0.43, "grad_norm": 1.663274287866434, "learning_rate": 6.4700836210749055e-06, "loss": 0.5981, "step": 4076 }, { "epoch": 0.43, "grad_norm": 1.746604887168638, "learning_rate": 6.468397736292436e-06, "loss": 0.6766, "step": 4077 }, { "epoch": 0.43, "grad_norm": 1.7059609133270042, "learning_rate": 6.4667116687933385e-06, "loss": 0.5533, "step": 4078 }, { "epoch": 0.44, "grad_norm": 1.1832824753375506, "learning_rate": 6.465025418787419e-06, "loss": 0.5195, "step": 4079 }, { "epoch": 0.44, "grad_norm": 1.6066986171479363, "learning_rate": 6.4633389864845005e-06, "loss": 0.6821, "step": 4080 }, { "epoch": 0.44, "grad_norm": 2.0708166466483564, "learning_rate": 6.461652372094429e-06, "loss": 0.6596, "step": 4081 }, { "epoch": 0.44, "grad_norm": 1.1390283833360713, "learning_rate": 6.459965575827077e-06, "loss": 0.5312, "step": 4082 }, { "epoch": 0.44, "grad_norm": 1.7837558356146888, "learning_rate": 6.4582785978923355e-06, "loss": 0.739, "step": 4083 }, { "epoch": 0.44, "grad_norm": 1.6209667881813343, "learning_rate": 6.456591438500119e-06, "loss": 0.5305, "step": 4084 }, { "epoch": 0.44, "grad_norm": 1.5847252797146405, "learning_rate": 6.4549040978603675e-06, "loss": 0.5358, "step": 4085 }, { "epoch": 0.44, "grad_norm": 1.9251871492910733, "learning_rate": 6.45321657618304e-06, "loss": 0.7058, "step": 4086 }, { "epoch": 0.44, "grad_norm": 1.7196391531343973, "learning_rate": 6.451528873678118e-06, "loss": 0.6351, "step": 4087 }, { "epoch": 0.44, "grad_norm": 1.9962627110286102, "learning_rate": 6.449840990555608e-06, "loss": 0.7243, "step": 4088 }, { "epoch": 0.44, "grad_norm": 1.692489590336689, "learning_rate": 6.4481529270255384e-06, "loss": 0.633, "step": 4089 }, { "epoch": 0.44, "grad_norm": 1.9331515709881384, "learning_rate": 6.446464683297957e-06, "loss": 0.641, "step": 4090 }, { "epoch": 0.44, "grad_norm": 1.6711754909618235, "learning_rate": 6.444776259582939e-06, "loss": 0.6476, "step": 4091 }, { "epoch": 0.44, "grad_norm": 1.7974719850230576, "learning_rate": 6.4430876560905795e-06, "loss": 0.671, "step": 4092 }, { "epoch": 0.44, "grad_norm": 1.706512235376963, "learning_rate": 6.441398873030995e-06, "loss": 0.7021, "step": 4093 }, { "epoch": 0.44, "grad_norm": 1.4003071980185908, "learning_rate": 6.439709910614324e-06, "loss": 0.5409, "step": 4094 }, { "epoch": 0.44, "grad_norm": 1.8146214805275447, "learning_rate": 6.4380207690507325e-06, "loss": 0.6479, "step": 4095 }, { "epoch": 0.44, "grad_norm": 1.7609252768014232, "learning_rate": 6.436331448550399e-06, "loss": 0.6671, "step": 4096 }, { "epoch": 0.44, "grad_norm": 1.694313793724333, "learning_rate": 6.434641949323536e-06, "loss": 0.5648, "step": 4097 }, { "epoch": 0.44, "grad_norm": 1.7606637778272878, "learning_rate": 6.432952271580367e-06, "loss": 0.6308, "step": 4098 }, { "epoch": 0.44, "grad_norm": 1.646965811921561, "learning_rate": 6.4312624155311476e-06, "loss": 0.6049, "step": 4099 }, { "epoch": 0.44, "grad_norm": 1.7238512293911201, "learning_rate": 6.4295723813861495e-06, "loss": 0.681, "step": 4100 }, { "epoch": 0.44, "grad_norm": 1.2058425906176098, "learning_rate": 6.427882169355667e-06, "loss": 0.5363, "step": 4101 }, { "epoch": 0.44, "grad_norm": 1.6084977954709747, "learning_rate": 6.426191779650019e-06, "loss": 0.6427, "step": 4102 }, { "epoch": 0.44, "grad_norm": 1.9866543658904103, "learning_rate": 6.424501212479545e-06, "loss": 0.6575, "step": 4103 }, { "epoch": 0.44, "grad_norm": 1.7968879750524969, "learning_rate": 6.422810468054606e-06, "loss": 0.5826, "step": 4104 }, { "epoch": 0.44, "grad_norm": 1.1756831510207812, "learning_rate": 6.421119546585587e-06, "loss": 0.5586, "step": 4105 }, { "epoch": 0.44, "grad_norm": 1.637004328259263, "learning_rate": 6.419428448282893e-06, "loss": 0.5602, "step": 4106 }, { "epoch": 0.44, "grad_norm": 1.74395760189556, "learning_rate": 6.41773717335695e-06, "loss": 0.6594, "step": 4107 }, { "epoch": 0.44, "grad_norm": 1.1853184327809767, "learning_rate": 6.416045722018213e-06, "loss": 0.536, "step": 4108 }, { "epoch": 0.44, "grad_norm": 1.6007282344478404, "learning_rate": 6.4143540944771486e-06, "loss": 0.6519, "step": 4109 }, { "epoch": 0.44, "grad_norm": 1.7513819075906516, "learning_rate": 6.4126622909442535e-06, "loss": 0.6374, "step": 4110 }, { "epoch": 0.44, "grad_norm": 1.8134977746111636, "learning_rate": 6.410970311630041e-06, "loss": 0.595, "step": 4111 }, { "epoch": 0.44, "grad_norm": 1.6589635091686261, "learning_rate": 6.409278156745052e-06, "loss": 0.6576, "step": 4112 }, { "epoch": 0.44, "grad_norm": 1.201647924897854, "learning_rate": 6.407585826499842e-06, "loss": 0.5246, "step": 4113 }, { "epoch": 0.44, "grad_norm": 1.6456757247644582, "learning_rate": 6.4058933211049946e-06, "loss": 0.6402, "step": 4114 }, { "epoch": 0.44, "grad_norm": 1.7476052214069928, "learning_rate": 6.404200640771112e-06, "loss": 0.6254, "step": 4115 }, { "epoch": 0.44, "grad_norm": 1.8706141555005162, "learning_rate": 6.402507785708818e-06, "loss": 0.6364, "step": 4116 }, { "epoch": 0.44, "grad_norm": 1.9409912284319233, "learning_rate": 6.40081475612876e-06, "loss": 0.603, "step": 4117 }, { "epoch": 0.44, "grad_norm": 1.8713861174470618, "learning_rate": 6.399121552241607e-06, "loss": 0.6145, "step": 4118 }, { "epoch": 0.44, "grad_norm": 1.9367643245578832, "learning_rate": 6.397428174258048e-06, "loss": 0.655, "step": 4119 }, { "epoch": 0.44, "grad_norm": 1.7260230935728849, "learning_rate": 6.395734622388794e-06, "loss": 0.6562, "step": 4120 }, { "epoch": 0.44, "grad_norm": 1.7197249823114351, "learning_rate": 6.3940408968445785e-06, "loss": 0.6387, "step": 4121 }, { "epoch": 0.44, "grad_norm": 1.7937984178517756, "learning_rate": 6.392346997836158e-06, "loss": 0.6897, "step": 4122 }, { "epoch": 0.44, "grad_norm": 1.614904518566584, "learning_rate": 6.390652925574305e-06, "loss": 0.6324, "step": 4123 }, { "epoch": 0.44, "grad_norm": 1.7561446685308615, "learning_rate": 6.38895868026982e-06, "loss": 0.5725, "step": 4124 }, { "epoch": 0.44, "grad_norm": 1.9129054972293391, "learning_rate": 6.387264262133524e-06, "loss": 0.5432, "step": 4125 }, { "epoch": 0.44, "grad_norm": 1.6830134366372864, "learning_rate": 6.385569671376254e-06, "loss": 0.5739, "step": 4126 }, { "epoch": 0.44, "grad_norm": 1.8136659084047637, "learning_rate": 6.383874908208875e-06, "loss": 0.6083, "step": 4127 }, { "epoch": 0.44, "grad_norm": 1.824093587113685, "learning_rate": 6.3821799728422695e-06, "loss": 0.5363, "step": 4128 }, { "epoch": 0.44, "grad_norm": 1.7624092146757744, "learning_rate": 6.380484865487346e-06, "loss": 0.6504, "step": 4129 }, { "epoch": 0.44, "grad_norm": 1.8464863378428236, "learning_rate": 6.378789586355026e-06, "loss": 0.6685, "step": 4130 }, { "epoch": 0.44, "grad_norm": 1.849330450207544, "learning_rate": 6.377094135656262e-06, "loss": 0.6234, "step": 4131 }, { "epoch": 0.44, "grad_norm": 1.7129798269849115, "learning_rate": 6.375398513602021e-06, "loss": 0.6756, "step": 4132 }, { "epoch": 0.44, "grad_norm": 1.7696710866759127, "learning_rate": 6.3737027204032954e-06, "loss": 0.5746, "step": 4133 }, { "epoch": 0.44, "grad_norm": 1.5447682095113227, "learning_rate": 6.372006756271096e-06, "loss": 0.6166, "step": 4134 }, { "epoch": 0.44, "grad_norm": 2.099358628037538, "learning_rate": 6.3703106214164566e-06, "loss": 0.6829, "step": 4135 }, { "epoch": 0.44, "grad_norm": 1.6012090715590601, "learning_rate": 6.368614316050433e-06, "loss": 0.5156, "step": 4136 }, { "epoch": 0.44, "grad_norm": 1.812253474376114, "learning_rate": 6.366917840384098e-06, "loss": 0.6418, "step": 4137 }, { "epoch": 0.44, "grad_norm": 1.7160180423180875, "learning_rate": 6.36522119462855e-06, "loss": 0.6171, "step": 4138 }, { "epoch": 0.44, "grad_norm": 1.8109771586771075, "learning_rate": 6.363524378994907e-06, "loss": 0.7324, "step": 4139 }, { "epoch": 0.44, "grad_norm": 1.7020444147358975, "learning_rate": 6.361827393694311e-06, "loss": 0.5962, "step": 4140 }, { "epoch": 0.44, "grad_norm": 1.841272729551553, "learning_rate": 6.360130238937918e-06, "loss": 0.6536, "step": 4141 }, { "epoch": 0.44, "grad_norm": 1.3153466953913904, "learning_rate": 6.358432914936913e-06, "loss": 0.5286, "step": 4142 }, { "epoch": 0.44, "grad_norm": 2.0019574378175977, "learning_rate": 6.356735421902497e-06, "loss": 0.6535, "step": 4143 }, { "epoch": 0.44, "grad_norm": 1.750339095904173, "learning_rate": 6.355037760045892e-06, "loss": 0.6258, "step": 4144 }, { "epoch": 0.44, "grad_norm": 1.779906338278329, "learning_rate": 6.353339929578346e-06, "loss": 0.5856, "step": 4145 }, { "epoch": 0.44, "grad_norm": 1.7238015605790016, "learning_rate": 6.351641930711121e-06, "loss": 0.6392, "step": 4146 }, { "epoch": 0.44, "grad_norm": 1.1558945929423883, "learning_rate": 6.349943763655505e-06, "loss": 0.5431, "step": 4147 }, { "epoch": 0.44, "grad_norm": 1.7129676851009958, "learning_rate": 6.348245428622807e-06, "loss": 0.6792, "step": 4148 }, { "epoch": 0.44, "grad_norm": 1.8057122757955806, "learning_rate": 6.346546925824353e-06, "loss": 0.6293, "step": 4149 }, { "epoch": 0.44, "grad_norm": 1.9153616929692276, "learning_rate": 6.344848255471495e-06, "loss": 0.5603, "step": 4150 }, { "epoch": 0.44, "grad_norm": 1.6487688028054477, "learning_rate": 6.3431494177756004e-06, "loss": 0.6279, "step": 4151 }, { "epoch": 0.44, "grad_norm": 1.7416989727388428, "learning_rate": 6.3414504129480606e-06, "loss": 0.6282, "step": 4152 }, { "epoch": 0.44, "grad_norm": 1.930205543612649, "learning_rate": 6.339751241200287e-06, "loss": 0.6468, "step": 4153 }, { "epoch": 0.44, "grad_norm": 1.191764340789826, "learning_rate": 6.3380519027437124e-06, "loss": 0.5403, "step": 4154 }, { "epoch": 0.44, "grad_norm": 1.7070306313197587, "learning_rate": 6.33635239778979e-06, "loss": 0.6697, "step": 4155 }, { "epoch": 0.44, "grad_norm": 1.8548897075693178, "learning_rate": 6.334652726549995e-06, "loss": 0.6329, "step": 4156 }, { "epoch": 0.44, "grad_norm": 1.1518717639751628, "learning_rate": 6.3329528892358215e-06, "loss": 0.5337, "step": 4157 }, { "epoch": 0.44, "grad_norm": 1.7142665852521792, "learning_rate": 6.331252886058784e-06, "loss": 0.6478, "step": 4158 }, { "epoch": 0.44, "grad_norm": 1.8568390966175372, "learning_rate": 6.329552717230418e-06, "loss": 0.7221, "step": 4159 }, { "epoch": 0.44, "grad_norm": 1.9832145140235775, "learning_rate": 6.3278523829622805e-06, "loss": 0.6079, "step": 4160 }, { "epoch": 0.44, "grad_norm": 1.825044436499352, "learning_rate": 6.32615188346595e-06, "loss": 0.632, "step": 4161 }, { "epoch": 0.44, "grad_norm": 1.7976297259874345, "learning_rate": 6.324451218953021e-06, "loss": 0.6547, "step": 4162 }, { "epoch": 0.44, "grad_norm": 1.8686133103399734, "learning_rate": 6.322750389635114e-06, "loss": 0.6165, "step": 4163 }, { "epoch": 0.44, "grad_norm": 1.8881777098927384, "learning_rate": 6.321049395723867e-06, "loss": 0.6372, "step": 4164 }, { "epoch": 0.44, "grad_norm": 1.636324038344042, "learning_rate": 6.319348237430943e-06, "loss": 0.6252, "step": 4165 }, { "epoch": 0.44, "grad_norm": 1.7939236988747722, "learning_rate": 6.317646914968014e-06, "loss": 0.6366, "step": 4166 }, { "epoch": 0.44, "grad_norm": 1.9494995000916315, "learning_rate": 6.315945428546786e-06, "loss": 0.6487, "step": 4167 }, { "epoch": 0.44, "grad_norm": 1.8007210397860889, "learning_rate": 6.314243778378977e-06, "loss": 0.6521, "step": 4168 }, { "epoch": 0.44, "grad_norm": 1.2727532766516259, "learning_rate": 6.312541964676329e-06, "loss": 0.5154, "step": 4169 }, { "epoch": 0.44, "grad_norm": 1.654838279520154, "learning_rate": 6.310839987650602e-06, "loss": 0.5915, "step": 4170 }, { "epoch": 0.44, "grad_norm": 1.7384879013072492, "learning_rate": 6.309137847513579e-06, "loss": 0.6027, "step": 4171 }, { "epoch": 0.45, "grad_norm": 1.5885606875857479, "learning_rate": 6.307435544477061e-06, "loss": 0.5391, "step": 4172 }, { "epoch": 0.45, "grad_norm": 1.719901094783486, "learning_rate": 6.3057330787528695e-06, "loss": 0.6465, "step": 4173 }, { "epoch": 0.45, "grad_norm": 1.739530784589461, "learning_rate": 6.304030450552847e-06, "loss": 0.604, "step": 4174 }, { "epoch": 0.45, "grad_norm": 1.7333108264837687, "learning_rate": 6.302327660088859e-06, "loss": 0.6308, "step": 4175 }, { "epoch": 0.45, "grad_norm": 2.0390882267927837, "learning_rate": 6.3006247075727825e-06, "loss": 0.6923, "step": 4176 }, { "epoch": 0.45, "grad_norm": 1.8345029898280025, "learning_rate": 6.298921593216525e-06, "loss": 0.6225, "step": 4177 }, { "epoch": 0.45, "grad_norm": 1.7996642218427252, "learning_rate": 6.297218317232008e-06, "loss": 0.755, "step": 4178 }, { "epoch": 0.45, "grad_norm": 1.6041150177185013, "learning_rate": 6.295514879831175e-06, "loss": 0.5704, "step": 4179 }, { "epoch": 0.45, "grad_norm": 1.8583681911761896, "learning_rate": 6.29381128122599e-06, "loss": 0.629, "step": 4180 }, { "epoch": 0.45, "grad_norm": 1.882372764317337, "learning_rate": 6.292107521628434e-06, "loss": 0.6841, "step": 4181 }, { "epoch": 0.45, "grad_norm": 1.8769816019442003, "learning_rate": 6.290403601250512e-06, "loss": 0.6502, "step": 4182 }, { "epoch": 0.45, "grad_norm": 1.6715829874134043, "learning_rate": 6.288699520304247e-06, "loss": 0.642, "step": 4183 }, { "epoch": 0.45, "grad_norm": 1.9680766470061795, "learning_rate": 6.286995279001682e-06, "loss": 0.6891, "step": 4184 }, { "epoch": 0.45, "grad_norm": 1.7464690576582114, "learning_rate": 6.285290877554881e-06, "loss": 0.585, "step": 4185 }, { "epoch": 0.45, "grad_norm": 1.7647788765571808, "learning_rate": 6.2835863161759256e-06, "loss": 0.6439, "step": 4186 }, { "epoch": 0.45, "grad_norm": 1.789472704663578, "learning_rate": 6.281881595076922e-06, "loss": 0.5714, "step": 4187 }, { "epoch": 0.45, "grad_norm": 1.8369361250657659, "learning_rate": 6.28017671446999e-06, "loss": 0.6135, "step": 4188 }, { "epoch": 0.45, "grad_norm": 1.6299487926069551, "learning_rate": 6.278471674567274e-06, "loss": 0.695, "step": 4189 }, { "epoch": 0.45, "grad_norm": 2.0142011666413437, "learning_rate": 6.276766475580935e-06, "loss": 0.6684, "step": 4190 }, { "epoch": 0.45, "grad_norm": 1.4799006188533956, "learning_rate": 6.275061117723158e-06, "loss": 0.5326, "step": 4191 }, { "epoch": 0.45, "grad_norm": 1.8431016350004994, "learning_rate": 6.273355601206143e-06, "loss": 0.6224, "step": 4192 }, { "epoch": 0.45, "grad_norm": 1.7547920028932087, "learning_rate": 6.2716499262421145e-06, "loss": 0.6342, "step": 4193 }, { "epoch": 0.45, "grad_norm": 1.8393354676471152, "learning_rate": 6.269944093043313e-06, "loss": 0.6518, "step": 4194 }, { "epoch": 0.45, "grad_norm": 1.7663266893457306, "learning_rate": 6.268238101821998e-06, "loss": 0.6544, "step": 4195 }, { "epoch": 0.45, "grad_norm": 1.8611865612061418, "learning_rate": 6.266531952790451e-06, "loss": 0.6101, "step": 4196 }, { "epoch": 0.45, "grad_norm": 1.7903657427963529, "learning_rate": 6.264825646160973e-06, "loss": 0.6226, "step": 4197 }, { "epoch": 0.45, "grad_norm": 1.8846552327333161, "learning_rate": 6.263119182145887e-06, "loss": 0.6673, "step": 4198 }, { "epoch": 0.45, "grad_norm": 1.984022542997776, "learning_rate": 6.261412560957529e-06, "loss": 0.717, "step": 4199 }, { "epoch": 0.45, "grad_norm": 1.3378434459067607, "learning_rate": 6.259705782808262e-06, "loss": 0.541, "step": 4200 }, { "epoch": 0.45, "grad_norm": 1.726847809557121, "learning_rate": 6.257998847910463e-06, "loss": 0.6469, "step": 4201 }, { "epoch": 0.45, "grad_norm": 1.7162644105715235, "learning_rate": 6.256291756476529e-06, "loss": 0.6441, "step": 4202 }, { "epoch": 0.45, "grad_norm": 1.687408321782581, "learning_rate": 6.254584508718877e-06, "loss": 0.5749, "step": 4203 }, { "epoch": 0.45, "grad_norm": 1.8161267685817757, "learning_rate": 6.25287710484995e-06, "loss": 0.5918, "step": 4204 }, { "epoch": 0.45, "grad_norm": 1.7642208528580294, "learning_rate": 6.251169545082201e-06, "loss": 0.6188, "step": 4205 }, { "epoch": 0.45, "grad_norm": 1.8148484785605752, "learning_rate": 6.249461829628105e-06, "loss": 0.6528, "step": 4206 }, { "epoch": 0.45, "grad_norm": 1.7161288382866757, "learning_rate": 6.2477539587001614e-06, "loss": 0.6389, "step": 4207 }, { "epoch": 0.45, "grad_norm": 2.128779727932897, "learning_rate": 6.246045932510882e-06, "loss": 0.6925, "step": 4208 }, { "epoch": 0.45, "grad_norm": 1.6066735456420467, "learning_rate": 6.244337751272803e-06, "loss": 0.5876, "step": 4209 }, { "epoch": 0.45, "grad_norm": 1.777009946828742, "learning_rate": 6.242629415198476e-06, "loss": 0.6481, "step": 4210 }, { "epoch": 0.45, "grad_norm": 1.7635042072375309, "learning_rate": 6.2409209245004736e-06, "loss": 0.5667, "step": 4211 }, { "epoch": 0.45, "grad_norm": 1.4944640957673574, "learning_rate": 6.239212279391389e-06, "loss": 0.5232, "step": 4212 }, { "epoch": 0.45, "grad_norm": 1.2586875984768797, "learning_rate": 6.237503480083834e-06, "loss": 0.5468, "step": 4213 }, { "epoch": 0.45, "grad_norm": 1.6628388671037304, "learning_rate": 6.235794526790439e-06, "loss": 0.5743, "step": 4214 }, { "epoch": 0.45, "grad_norm": 1.9894792119220595, "learning_rate": 6.234085419723853e-06, "loss": 0.694, "step": 4215 }, { "epoch": 0.45, "grad_norm": 1.6398599621189844, "learning_rate": 6.232376159096745e-06, "loss": 0.5886, "step": 4216 }, { "epoch": 0.45, "grad_norm": 1.7592313369891053, "learning_rate": 6.230666745121802e-06, "loss": 0.6123, "step": 4217 }, { "epoch": 0.45, "grad_norm": 1.6986367102922983, "learning_rate": 6.228957178011731e-06, "loss": 0.5751, "step": 4218 }, { "epoch": 0.45, "grad_norm": 1.9268200948965413, "learning_rate": 6.227247457979259e-06, "loss": 0.5315, "step": 4219 }, { "epoch": 0.45, "grad_norm": 1.6771975279463065, "learning_rate": 6.225537585237128e-06, "loss": 0.6578, "step": 4220 }, { "epoch": 0.45, "grad_norm": 1.7103246391973006, "learning_rate": 6.2238275599981065e-06, "loss": 0.6145, "step": 4221 }, { "epoch": 0.45, "grad_norm": 1.8072123302734575, "learning_rate": 6.222117382474977e-06, "loss": 0.665, "step": 4222 }, { "epoch": 0.45, "grad_norm": 1.6901310256463657, "learning_rate": 6.2204070528805385e-06, "loss": 0.6095, "step": 4223 }, { "epoch": 0.45, "grad_norm": 1.780426644605024, "learning_rate": 6.2186965714276125e-06, "loss": 0.6914, "step": 4224 }, { "epoch": 0.45, "grad_norm": 1.9657763736384761, "learning_rate": 6.21698593832904e-06, "loss": 0.6687, "step": 4225 }, { "epoch": 0.45, "grad_norm": 1.8971220979505465, "learning_rate": 6.2152751537976784e-06, "loss": 0.7174, "step": 4226 }, { "epoch": 0.45, "grad_norm": 1.6757761219731258, "learning_rate": 6.213564218046406e-06, "loss": 0.6411, "step": 4227 }, { "epoch": 0.45, "grad_norm": 2.131798414934923, "learning_rate": 6.211853131288118e-06, "loss": 0.6873, "step": 4228 }, { "epoch": 0.45, "grad_norm": 1.8686239731780114, "learning_rate": 6.210141893735733e-06, "loss": 0.6859, "step": 4229 }, { "epoch": 0.45, "grad_norm": 1.7874546274315486, "learning_rate": 6.20843050560218e-06, "loss": 0.6533, "step": 4230 }, { "epoch": 0.45, "grad_norm": 1.774028245845445, "learning_rate": 6.206718967100413e-06, "loss": 0.5985, "step": 4231 }, { "epoch": 0.45, "grad_norm": 1.8865544096200877, "learning_rate": 6.205007278443406e-06, "loss": 0.6008, "step": 4232 }, { "epoch": 0.45, "grad_norm": 2.295854627568315, "learning_rate": 6.203295439844146e-06, "loss": 0.5766, "step": 4233 }, { "epoch": 0.45, "grad_norm": 1.6981766633304656, "learning_rate": 6.201583451515643e-06, "loss": 0.6121, "step": 4234 }, { "epoch": 0.45, "grad_norm": 1.7455400726407435, "learning_rate": 6.199871313670923e-06, "loss": 0.5876, "step": 4235 }, { "epoch": 0.45, "grad_norm": 1.7633199695843262, "learning_rate": 6.198159026523034e-06, "loss": 0.6096, "step": 4236 }, { "epoch": 0.45, "grad_norm": 1.819193722052934, "learning_rate": 6.196446590285038e-06, "loss": 0.6699, "step": 4237 }, { "epoch": 0.45, "grad_norm": 2.4222798262437366, "learning_rate": 6.194734005170019e-06, "loss": 0.5759, "step": 4238 }, { "epoch": 0.45, "grad_norm": 2.1172735475359676, "learning_rate": 6.19302127139108e-06, "loss": 0.5554, "step": 4239 }, { "epoch": 0.45, "grad_norm": 1.499725104887345, "learning_rate": 6.191308389161338e-06, "loss": 0.534, "step": 4240 }, { "epoch": 0.45, "grad_norm": 1.183341590826746, "learning_rate": 6.189595358693934e-06, "loss": 0.5236, "step": 4241 }, { "epoch": 0.45, "grad_norm": 1.7924470967676465, "learning_rate": 6.187882180202023e-06, "loss": 0.5959, "step": 4242 }, { "epoch": 0.45, "grad_norm": 1.930367127761807, "learning_rate": 6.186168853898782e-06, "loss": 0.5434, "step": 4243 }, { "epoch": 0.45, "grad_norm": 2.065000312909294, "learning_rate": 6.184455379997404e-06, "loss": 0.7024, "step": 4244 }, { "epoch": 0.45, "grad_norm": 1.8572278330390022, "learning_rate": 6.182741758711101e-06, "loss": 0.58, "step": 4245 }, { "epoch": 0.45, "grad_norm": 1.7109300877516838, "learning_rate": 6.181027990253104e-06, "loss": 0.6094, "step": 4246 }, { "epoch": 0.45, "grad_norm": 1.9913811980698164, "learning_rate": 6.179314074836662e-06, "loss": 0.6398, "step": 4247 }, { "epoch": 0.45, "grad_norm": 2.6972861125507115, "learning_rate": 6.17760001267504e-06, "loss": 0.5974, "step": 4248 }, { "epoch": 0.45, "grad_norm": 1.8035338348102665, "learning_rate": 6.175885803981526e-06, "loss": 0.6928, "step": 4249 }, { "epoch": 0.45, "grad_norm": 1.695230323712258, "learning_rate": 6.1741714489694215e-06, "loss": 0.6173, "step": 4250 }, { "epoch": 0.45, "grad_norm": 1.8668505166370741, "learning_rate": 6.1724569478520495e-06, "loss": 0.6736, "step": 4251 }, { "epoch": 0.45, "grad_norm": 1.7659266021828688, "learning_rate": 6.170742300842749e-06, "loss": 0.5253, "step": 4252 }, { "epoch": 0.45, "grad_norm": 1.790244542955612, "learning_rate": 6.169027508154879e-06, "loss": 0.6568, "step": 4253 }, { "epoch": 0.45, "grad_norm": 1.7840743874008775, "learning_rate": 6.167312570001814e-06, "loss": 0.6315, "step": 4254 }, { "epoch": 0.45, "grad_norm": 1.7321588910394963, "learning_rate": 6.16559748659695e-06, "loss": 0.6189, "step": 4255 }, { "epoch": 0.45, "grad_norm": 1.2950653876331655, "learning_rate": 6.1638822581537e-06, "loss": 0.5357, "step": 4256 }, { "epoch": 0.45, "grad_norm": 1.770337495488442, "learning_rate": 6.162166884885492e-06, "loss": 0.6096, "step": 4257 }, { "epoch": 0.45, "grad_norm": 1.94738830310699, "learning_rate": 6.160451367005778e-06, "loss": 0.6368, "step": 4258 }, { "epoch": 0.45, "grad_norm": 1.8434371351184549, "learning_rate": 6.1587357047280205e-06, "loss": 0.7304, "step": 4259 }, { "epoch": 0.45, "grad_norm": 1.6756447384716187, "learning_rate": 6.1570198982657046e-06, "loss": 0.6982, "step": 4260 }, { "epoch": 0.45, "grad_norm": 1.9406002689791986, "learning_rate": 6.1553039478323354e-06, "loss": 0.5636, "step": 4261 }, { "epoch": 0.45, "grad_norm": 1.849186217451762, "learning_rate": 6.15358785364143e-06, "loss": 0.6191, "step": 4262 }, { "epoch": 0.45, "grad_norm": 1.7183888937429104, "learning_rate": 6.151871615906528e-06, "loss": 0.5607, "step": 4263 }, { "epoch": 0.45, "grad_norm": 1.6529243464431502, "learning_rate": 6.150155234841187e-06, "loss": 0.5963, "step": 4264 }, { "epoch": 0.45, "grad_norm": 1.339934042105181, "learning_rate": 6.148438710658979e-06, "loss": 0.5187, "step": 4265 }, { "epoch": 0.46, "grad_norm": 1.8878457371133632, "learning_rate": 6.146722043573495e-06, "loss": 0.6543, "step": 4266 }, { "epoch": 0.46, "grad_norm": 1.227841628472483, "learning_rate": 6.145005233798346e-06, "loss": 0.527, "step": 4267 }, { "epoch": 0.46, "grad_norm": 1.237308734764134, "learning_rate": 6.143288281547157e-06, "loss": 0.5327, "step": 4268 }, { "epoch": 0.46, "grad_norm": 1.7439124113166682, "learning_rate": 6.141571187033575e-06, "loss": 0.6509, "step": 4269 }, { "epoch": 0.46, "grad_norm": 1.8608777554794784, "learning_rate": 6.139853950471263e-06, "loss": 0.5943, "step": 4270 }, { "epoch": 0.46, "grad_norm": 1.9209383951487637, "learning_rate": 6.138136572073899e-06, "loss": 0.618, "step": 4271 }, { "epoch": 0.46, "grad_norm": 1.9674886779283645, "learning_rate": 6.1364190520551845e-06, "loss": 0.6288, "step": 4272 }, { "epoch": 0.46, "grad_norm": 1.5951115527595592, "learning_rate": 6.134701390628832e-06, "loss": 0.5303, "step": 4273 }, { "epoch": 0.46, "grad_norm": 1.800230531607058, "learning_rate": 6.132983588008575e-06, "loss": 0.6592, "step": 4274 }, { "epoch": 0.46, "grad_norm": 1.8028649593145731, "learning_rate": 6.131265644408165e-06, "loss": 0.7212, "step": 4275 }, { "epoch": 0.46, "grad_norm": 1.7063448722514392, "learning_rate": 6.129547560041371e-06, "loss": 0.6664, "step": 4276 }, { "epoch": 0.46, "grad_norm": 1.9030955916030585, "learning_rate": 6.1278293351219755e-06, "loss": 0.6706, "step": 4277 }, { "epoch": 0.46, "grad_norm": 1.8442141442855107, "learning_rate": 6.1261109698637855e-06, "loss": 0.6834, "step": 4278 }, { "epoch": 0.46, "grad_norm": 1.7424017903387738, "learning_rate": 6.124392464480621e-06, "loss": 0.65, "step": 4279 }, { "epoch": 0.46, "grad_norm": 1.7795110314849705, "learning_rate": 6.12267381918632e-06, "loss": 0.6675, "step": 4280 }, { "epoch": 0.46, "grad_norm": 1.578256919580035, "learning_rate": 6.120955034194737e-06, "loss": 0.6546, "step": 4281 }, { "epoch": 0.46, "grad_norm": 1.8483058223902327, "learning_rate": 6.119236109719745e-06, "loss": 0.568, "step": 4282 }, { "epoch": 0.46, "grad_norm": 1.770494643662924, "learning_rate": 6.117517045975237e-06, "loss": 0.6553, "step": 4283 }, { "epoch": 0.46, "grad_norm": 1.6067791649330745, "learning_rate": 6.115797843175116e-06, "loss": 0.5879, "step": 4284 }, { "epoch": 0.46, "grad_norm": 1.7604141515129534, "learning_rate": 6.114078501533309e-06, "loss": 0.7132, "step": 4285 }, { "epoch": 0.46, "grad_norm": 1.9270825261141835, "learning_rate": 6.1123590212637614e-06, "loss": 0.5387, "step": 4286 }, { "epoch": 0.46, "grad_norm": 1.693107203979896, "learning_rate": 6.11063940258043e-06, "loss": 0.6928, "step": 4287 }, { "epoch": 0.46, "grad_norm": 2.3353374248418435, "learning_rate": 6.108919645697291e-06, "loss": 0.6803, "step": 4288 }, { "epoch": 0.46, "grad_norm": 2.4375332517413364, "learning_rate": 6.107199750828339e-06, "loss": 0.6348, "step": 4289 }, { "epoch": 0.46, "grad_norm": 1.581420436778529, "learning_rate": 6.105479718187586e-06, "loss": 0.5791, "step": 4290 }, { "epoch": 0.46, "grad_norm": 1.6501533045403098, "learning_rate": 6.103759547989057e-06, "loss": 0.6396, "step": 4291 }, { "epoch": 0.46, "grad_norm": 1.7448828645816619, "learning_rate": 6.1020392404468025e-06, "loss": 0.6618, "step": 4292 }, { "epoch": 0.46, "grad_norm": 1.7815776731546036, "learning_rate": 6.100318795774879e-06, "loss": 0.6325, "step": 4293 }, { "epoch": 0.46, "grad_norm": 1.6603789357026169, "learning_rate": 6.098598214187371e-06, "loss": 0.5776, "step": 4294 }, { "epoch": 0.46, "grad_norm": 1.8597530433586573, "learning_rate": 6.096877495898373e-06, "loss": 0.6869, "step": 4295 }, { "epoch": 0.46, "grad_norm": 1.4392242252859444, "learning_rate": 6.0951566411219976e-06, "loss": 0.5113, "step": 4296 }, { "epoch": 0.46, "grad_norm": 1.7406488454416136, "learning_rate": 6.093435650072377e-06, "loss": 0.6521, "step": 4297 }, { "epoch": 0.46, "grad_norm": 1.6172957902458855, "learning_rate": 6.091714522963659e-06, "loss": 0.5904, "step": 4298 }, { "epoch": 0.46, "grad_norm": 1.8823623999748227, "learning_rate": 6.089993260010005e-06, "loss": 0.7208, "step": 4299 }, { "epoch": 0.46, "grad_norm": 1.8666620444922115, "learning_rate": 6.0882718614255994e-06, "loss": 0.7328, "step": 4300 }, { "epoch": 0.46, "grad_norm": 1.8545240876057942, "learning_rate": 6.08655032742464e-06, "loss": 0.6828, "step": 4301 }, { "epoch": 0.46, "grad_norm": 1.8190808575356918, "learning_rate": 6.08482865822134e-06, "loss": 0.782, "step": 4302 }, { "epoch": 0.46, "grad_norm": 1.797262983836167, "learning_rate": 6.083106854029933e-06, "loss": 0.6704, "step": 4303 }, { "epoch": 0.46, "grad_norm": 2.282489421985665, "learning_rate": 6.081384915064668e-06, "loss": 0.6386, "step": 4304 }, { "epoch": 0.46, "grad_norm": 1.6669422454519596, "learning_rate": 6.079662841539807e-06, "loss": 0.7368, "step": 4305 }, { "epoch": 0.46, "grad_norm": 1.8039202931568636, "learning_rate": 6.0779406336696365e-06, "loss": 0.6645, "step": 4306 }, { "epoch": 0.46, "grad_norm": 1.725647016587622, "learning_rate": 6.076218291668455e-06, "loss": 0.5862, "step": 4307 }, { "epoch": 0.46, "grad_norm": 1.6956814164233567, "learning_rate": 6.074495815750574e-06, "loss": 0.5679, "step": 4308 }, { "epoch": 0.46, "grad_norm": 1.2431254050039486, "learning_rate": 6.072773206130329e-06, "loss": 0.5297, "step": 4309 }, { "epoch": 0.46, "grad_norm": 1.6946005816917173, "learning_rate": 6.071050463022069e-06, "loss": 0.618, "step": 4310 }, { "epoch": 0.46, "grad_norm": 1.6906275099778107, "learning_rate": 6.069327586640159e-06, "loss": 0.6021, "step": 4311 }, { "epoch": 0.46, "grad_norm": 1.664833186114009, "learning_rate": 6.067604577198981e-06, "loss": 0.682, "step": 4312 }, { "epoch": 0.46, "grad_norm": 1.819287232355401, "learning_rate": 6.065881434912932e-06, "loss": 0.5591, "step": 4313 }, { "epoch": 0.46, "grad_norm": 1.6633599291622827, "learning_rate": 6.064158159996431e-06, "loss": 0.5838, "step": 4314 }, { "epoch": 0.46, "grad_norm": 1.6746992613279046, "learning_rate": 6.062434752663906e-06, "loss": 0.6248, "step": 4315 }, { "epoch": 0.46, "grad_norm": 1.6257029663367195, "learning_rate": 6.060711213129807e-06, "loss": 0.6162, "step": 4316 }, { "epoch": 0.46, "grad_norm": 2.8648567885028937, "learning_rate": 6.058987541608598e-06, "loss": 0.6346, "step": 4317 }, { "epoch": 0.46, "grad_norm": 1.211831099762258, "learning_rate": 6.05726373831476e-06, "loss": 0.5192, "step": 4318 }, { "epoch": 0.46, "grad_norm": 1.6471288662627586, "learning_rate": 6.055539803462791e-06, "loss": 0.6187, "step": 4319 }, { "epoch": 0.46, "grad_norm": 1.5755702866889754, "learning_rate": 6.0538157372672035e-06, "loss": 0.5856, "step": 4320 }, { "epoch": 0.46, "grad_norm": 1.8932391917258802, "learning_rate": 6.052091539942528e-06, "loss": 0.608, "step": 4321 }, { "epoch": 0.46, "grad_norm": 1.8229143492440014, "learning_rate": 6.050367211703313e-06, "loss": 0.6596, "step": 4322 }, { "epoch": 0.46, "grad_norm": 1.6923276105621408, "learning_rate": 6.04864275276412e-06, "loss": 0.6063, "step": 4323 }, { "epoch": 0.46, "grad_norm": 1.9236038925215082, "learning_rate": 6.046918163339527e-06, "loss": 0.6333, "step": 4324 }, { "epoch": 0.46, "grad_norm": 1.6608955988924465, "learning_rate": 6.045193443644128e-06, "loss": 0.5614, "step": 4325 }, { "epoch": 0.46, "grad_norm": 1.683021874893821, "learning_rate": 6.043468593892538e-06, "loss": 0.63, "step": 4326 }, { "epoch": 0.46, "grad_norm": 1.3032993927679053, "learning_rate": 6.041743614299382e-06, "loss": 0.5371, "step": 4327 }, { "epoch": 0.46, "grad_norm": 1.153482604776913, "learning_rate": 6.0400185050793055e-06, "loss": 0.5401, "step": 4328 }, { "epoch": 0.46, "grad_norm": 1.7734500016103298, "learning_rate": 6.0382932664469685e-06, "loss": 0.7425, "step": 4329 }, { "epoch": 0.46, "grad_norm": 1.8083793394472731, "learning_rate": 6.036567898617046e-06, "loss": 0.6538, "step": 4330 }, { "epoch": 0.46, "grad_norm": 1.7114333794010272, "learning_rate": 6.034842401804229e-06, "loss": 0.6252, "step": 4331 }, { "epoch": 0.46, "grad_norm": 1.7499814160189056, "learning_rate": 6.033116776223229e-06, "loss": 0.6716, "step": 4332 }, { "epoch": 0.46, "grad_norm": 1.6687076775791003, "learning_rate": 6.031391022088765e-06, "loss": 0.6066, "step": 4333 }, { "epoch": 0.46, "grad_norm": 1.795054796101648, "learning_rate": 6.029665139615583e-06, "loss": 0.6826, "step": 4334 }, { "epoch": 0.46, "grad_norm": 1.738411729413347, "learning_rate": 6.027939129018434e-06, "loss": 0.6149, "step": 4335 }, { "epoch": 0.46, "grad_norm": 1.8153946905069656, "learning_rate": 6.026212990512095e-06, "loss": 0.6248, "step": 4336 }, { "epoch": 0.46, "grad_norm": 1.5691622665569744, "learning_rate": 6.02448672431135e-06, "loss": 0.5405, "step": 4337 }, { "epoch": 0.46, "grad_norm": 1.631342616647183, "learning_rate": 6.022760330631006e-06, "loss": 0.6522, "step": 4338 }, { "epoch": 0.46, "grad_norm": 1.8936798869412599, "learning_rate": 6.021033809685879e-06, "loss": 0.6385, "step": 4339 }, { "epoch": 0.46, "grad_norm": 1.6961027023021833, "learning_rate": 6.019307161690807e-06, "loss": 0.626, "step": 4340 }, { "epoch": 0.46, "grad_norm": 1.6333337953514684, "learning_rate": 6.017580386860642e-06, "loss": 0.5345, "step": 4341 }, { "epoch": 0.46, "grad_norm": 1.7646134061827745, "learning_rate": 6.015853485410248e-06, "loss": 0.7079, "step": 4342 }, { "epoch": 0.46, "grad_norm": 1.6404782873699275, "learning_rate": 6.014126457554512e-06, "loss": 0.6376, "step": 4343 }, { "epoch": 0.46, "grad_norm": 2.0515714701520738, "learning_rate": 6.0123993035083315e-06, "loss": 0.612, "step": 4344 }, { "epoch": 0.46, "grad_norm": 2.0406050440508445, "learning_rate": 6.01067202348662e-06, "loss": 0.5965, "step": 4345 }, { "epoch": 0.46, "grad_norm": 1.7886207876826934, "learning_rate": 6.008944617704308e-06, "loss": 0.5512, "step": 4346 }, { "epoch": 0.46, "grad_norm": 1.7811076668236416, "learning_rate": 6.007217086376339e-06, "loss": 0.6316, "step": 4347 }, { "epoch": 0.46, "grad_norm": 1.878335032623541, "learning_rate": 6.005489429717677e-06, "loss": 0.6682, "step": 4348 }, { "epoch": 0.46, "grad_norm": 1.530119717976989, "learning_rate": 6.003761647943298e-06, "loss": 0.5404, "step": 4349 }, { "epoch": 0.46, "grad_norm": 1.7148276113027734, "learning_rate": 6.002033741268194e-06, "loss": 0.7284, "step": 4350 }, { "epoch": 0.46, "grad_norm": 1.950779315796316, "learning_rate": 6.000305709907375e-06, "loss": 0.6804, "step": 4351 }, { "epoch": 0.46, "grad_norm": 1.7003966625533866, "learning_rate": 5.998577554075864e-06, "loss": 0.6339, "step": 4352 }, { "epoch": 0.46, "grad_norm": 1.279169981281365, "learning_rate": 5.996849273988698e-06, "loss": 0.5495, "step": 4353 }, { "epoch": 0.46, "grad_norm": 1.8331137624990446, "learning_rate": 5.995120869860932e-06, "loss": 0.6496, "step": 4354 }, { "epoch": 0.46, "grad_norm": 1.7409980697794083, "learning_rate": 5.993392341907637e-06, "loss": 0.5846, "step": 4355 }, { "epoch": 0.46, "grad_norm": 1.6416709954964772, "learning_rate": 5.991663690343897e-06, "loss": 0.6757, "step": 4356 }, { "epoch": 0.46, "grad_norm": 1.6088721619716866, "learning_rate": 5.9899349153848144e-06, "loss": 0.5853, "step": 4357 }, { "epoch": 0.46, "grad_norm": 1.9511627131841993, "learning_rate": 5.988206017245504e-06, "loss": 0.6193, "step": 4358 }, { "epoch": 0.46, "grad_norm": 1.880671460218812, "learning_rate": 5.986476996141097e-06, "loss": 0.5821, "step": 4359 }, { "epoch": 0.47, "grad_norm": 1.2542475612213086, "learning_rate": 5.984747852286742e-06, "loss": 0.5262, "step": 4360 }, { "epoch": 0.47, "grad_norm": 1.5088823350572371, "learning_rate": 5.983018585897598e-06, "loss": 0.5428, "step": 4361 }, { "epoch": 0.47, "grad_norm": 1.7936362842764046, "learning_rate": 5.9812891971888455e-06, "loss": 0.6126, "step": 4362 }, { "epoch": 0.47, "grad_norm": 1.8173125974168927, "learning_rate": 5.979559686375674e-06, "loss": 0.6582, "step": 4363 }, { "epoch": 0.47, "grad_norm": 1.9328322038225427, "learning_rate": 5.977830053673294e-06, "loss": 0.7222, "step": 4364 }, { "epoch": 0.47, "grad_norm": 2.043529016098139, "learning_rate": 5.9761002992969255e-06, "loss": 0.6259, "step": 4365 }, { "epoch": 0.47, "grad_norm": 1.8054701654149488, "learning_rate": 5.974370423461807e-06, "loss": 0.6411, "step": 4366 }, { "epoch": 0.47, "grad_norm": 1.777257065552328, "learning_rate": 5.972640426383193e-06, "loss": 0.6578, "step": 4367 }, { "epoch": 0.47, "grad_norm": 1.9625210217377491, "learning_rate": 5.97091030827635e-06, "loss": 0.6832, "step": 4368 }, { "epoch": 0.47, "grad_norm": 2.3593609232925274, "learning_rate": 5.969180069356563e-06, "loss": 0.676, "step": 4369 }, { "epoch": 0.47, "grad_norm": 1.615559500809526, "learning_rate": 5.967449709839128e-06, "loss": 0.5404, "step": 4370 }, { "epoch": 0.47, "grad_norm": 1.2167208567710455, "learning_rate": 5.965719229939359e-06, "loss": 0.5137, "step": 4371 }, { "epoch": 0.47, "grad_norm": 1.9203668082804972, "learning_rate": 5.9639886298725855e-06, "loss": 0.7313, "step": 4372 }, { "epoch": 0.47, "grad_norm": 2.180654830116161, "learning_rate": 5.96225790985415e-06, "loss": 0.639, "step": 4373 }, { "epoch": 0.47, "grad_norm": 1.5928453803345755, "learning_rate": 5.96052707009941e-06, "loss": 0.5838, "step": 4374 }, { "epoch": 0.47, "grad_norm": 2.0088039730630967, "learning_rate": 5.958796110823739e-06, "loss": 0.7036, "step": 4375 }, { "epoch": 0.47, "grad_norm": 1.8896849257227957, "learning_rate": 5.957065032242524e-06, "loss": 0.6948, "step": 4376 }, { "epoch": 0.47, "grad_norm": 1.7404595953319757, "learning_rate": 5.955333834571169e-06, "loss": 0.5805, "step": 4377 }, { "epoch": 0.47, "grad_norm": 1.8980679815351231, "learning_rate": 5.95360251802509e-06, "loss": 0.5851, "step": 4378 }, { "epoch": 0.47, "grad_norm": 1.6074682540141259, "learning_rate": 5.951871082819721e-06, "loss": 0.5424, "step": 4379 }, { "epoch": 0.47, "grad_norm": 1.2131239838921497, "learning_rate": 5.950139529170509e-06, "loss": 0.5231, "step": 4380 }, { "epoch": 0.47, "grad_norm": 1.863771295940012, "learning_rate": 5.948407857292914e-06, "loss": 0.5535, "step": 4381 }, { "epoch": 0.47, "grad_norm": 1.1702429024597023, "learning_rate": 5.9466760674024125e-06, "loss": 0.5196, "step": 4382 }, { "epoch": 0.47, "grad_norm": 1.7757614719113752, "learning_rate": 5.944944159714497e-06, "loss": 0.6292, "step": 4383 }, { "epoch": 0.47, "grad_norm": 1.6779090058946846, "learning_rate": 5.943212134444675e-06, "loss": 0.6725, "step": 4384 }, { "epoch": 0.47, "grad_norm": 1.6687618513030444, "learning_rate": 5.941479991808463e-06, "loss": 0.6968, "step": 4385 }, { "epoch": 0.47, "grad_norm": 1.8666344654152711, "learning_rate": 5.9397477320214e-06, "loss": 0.6912, "step": 4386 }, { "epoch": 0.47, "grad_norm": 1.7493532649254506, "learning_rate": 5.938015355299034e-06, "loss": 0.6609, "step": 4387 }, { "epoch": 0.47, "grad_norm": 1.6431102279909506, "learning_rate": 5.936282861856929e-06, "loss": 0.6563, "step": 4388 }, { "epoch": 0.47, "grad_norm": 1.8701946598033645, "learning_rate": 5.934550251910664e-06, "loss": 0.6968, "step": 4389 }, { "epoch": 0.47, "grad_norm": 1.6772000418612505, "learning_rate": 5.932817525675829e-06, "loss": 0.6144, "step": 4390 }, { "epoch": 0.47, "grad_norm": 1.8306584503714844, "learning_rate": 5.931084683368038e-06, "loss": 0.7046, "step": 4391 }, { "epoch": 0.47, "grad_norm": 1.5815952082500144, "learning_rate": 5.92935172520291e-06, "loss": 0.5426, "step": 4392 }, { "epoch": 0.47, "grad_norm": 1.6053197299732216, "learning_rate": 5.927618651396081e-06, "loss": 0.6781, "step": 4393 }, { "epoch": 0.47, "grad_norm": 1.7657584020082613, "learning_rate": 5.925885462163202e-06, "loss": 0.5479, "step": 4394 }, { "epoch": 0.47, "grad_norm": 1.9954101382383271, "learning_rate": 5.9241521577199415e-06, "loss": 0.6009, "step": 4395 }, { "epoch": 0.47, "grad_norm": 2.0561776244167933, "learning_rate": 5.922418738281974e-06, "loss": 0.6897, "step": 4396 }, { "epoch": 0.47, "grad_norm": 2.1643653260738436, "learning_rate": 5.920685204064998e-06, "loss": 0.5881, "step": 4397 }, { "epoch": 0.47, "grad_norm": 1.1359531338904114, "learning_rate": 5.9189515552847175e-06, "loss": 0.5223, "step": 4398 }, { "epoch": 0.47, "grad_norm": 1.1015186732353845, "learning_rate": 5.917217792156859e-06, "loss": 0.5154, "step": 4399 }, { "epoch": 0.47, "grad_norm": 1.0914744850877878, "learning_rate": 5.915483914897156e-06, "loss": 0.5106, "step": 4400 }, { "epoch": 0.47, "grad_norm": 2.018467413076426, "learning_rate": 5.913749923721363e-06, "loss": 0.6206, "step": 4401 }, { "epoch": 0.47, "grad_norm": 1.822503143525217, "learning_rate": 5.912015818845244e-06, "loss": 0.5631, "step": 4402 }, { "epoch": 0.47, "grad_norm": 1.7631323227722135, "learning_rate": 5.910281600484575e-06, "loss": 0.6348, "step": 4403 }, { "epoch": 0.47, "grad_norm": 1.3317690688677561, "learning_rate": 5.908547268855154e-06, "loss": 0.5253, "step": 4404 }, { "epoch": 0.47, "grad_norm": 1.6922142954468298, "learning_rate": 5.906812824172784e-06, "loss": 0.6441, "step": 4405 }, { "epoch": 0.47, "grad_norm": 1.8639912549738433, "learning_rate": 5.90507826665329e-06, "loss": 0.5807, "step": 4406 }, { "epoch": 0.47, "grad_norm": 1.7347267989741049, "learning_rate": 5.903343596512505e-06, "loss": 0.6432, "step": 4407 }, { "epoch": 0.47, "grad_norm": 1.8652230962132692, "learning_rate": 5.901608813966282e-06, "loss": 0.634, "step": 4408 }, { "epoch": 0.47, "grad_norm": 1.6303810842682946, "learning_rate": 5.899873919230484e-06, "loss": 0.5922, "step": 4409 }, { "epoch": 0.47, "grad_norm": 1.6747958600715427, "learning_rate": 5.8981389125209854e-06, "loss": 0.5455, "step": 4410 }, { "epoch": 0.47, "grad_norm": 2.0085874638399526, "learning_rate": 5.896403794053679e-06, "loss": 0.5705, "step": 4411 }, { "epoch": 0.47, "grad_norm": 1.80933328424126, "learning_rate": 5.894668564044472e-06, "loss": 0.6298, "step": 4412 }, { "epoch": 0.47, "grad_norm": 1.6161200959320237, "learning_rate": 5.892933222709282e-06, "loss": 0.6117, "step": 4413 }, { "epoch": 0.47, "grad_norm": 1.518335118082172, "learning_rate": 5.891197770264041e-06, "loss": 0.566, "step": 4414 }, { "epoch": 0.47, "grad_norm": 1.6487835847858556, "learning_rate": 5.889462206924699e-06, "loss": 0.5335, "step": 4415 }, { "epoch": 0.47, "grad_norm": 1.2867686464013837, "learning_rate": 5.887726532907215e-06, "loss": 0.513, "step": 4416 }, { "epoch": 0.47, "grad_norm": 1.5026452102997478, "learning_rate": 5.8859907484275636e-06, "loss": 0.5362, "step": 4417 }, { "epoch": 0.47, "grad_norm": 1.2192697050878678, "learning_rate": 5.884254853701733e-06, "loss": 0.5228, "step": 4418 }, { "epoch": 0.47, "grad_norm": 1.796211728519057, "learning_rate": 5.8825188489457265e-06, "loss": 0.5496, "step": 4419 }, { "epoch": 0.47, "grad_norm": 1.9417405394410383, "learning_rate": 5.880782734375557e-06, "loss": 0.6717, "step": 4420 }, { "epoch": 0.47, "grad_norm": 1.7294182221299372, "learning_rate": 5.879046510207257e-06, "loss": 0.6292, "step": 4421 }, { "epoch": 0.47, "grad_norm": 1.6406908561706963, "learning_rate": 5.8773101766568665e-06, "loss": 0.5847, "step": 4422 }, { "epoch": 0.47, "grad_norm": 1.7440356596696027, "learning_rate": 5.875573733940445e-06, "loss": 0.6011, "step": 4423 }, { "epoch": 0.47, "grad_norm": 1.9257764201904057, "learning_rate": 5.87383718227406e-06, "loss": 0.7161, "step": 4424 }, { "epoch": 0.47, "grad_norm": 1.9108101770474089, "learning_rate": 5.872100521873797e-06, "loss": 0.5855, "step": 4425 }, { "epoch": 0.47, "grad_norm": 1.684355252517829, "learning_rate": 5.870363752955753e-06, "loss": 0.6697, "step": 4426 }, { "epoch": 0.47, "grad_norm": 1.6736562760550746, "learning_rate": 5.868626875736039e-06, "loss": 0.6086, "step": 4427 }, { "epoch": 0.47, "grad_norm": 1.7255763857980517, "learning_rate": 5.866889890430777e-06, "loss": 0.5943, "step": 4428 }, { "epoch": 0.47, "grad_norm": 1.251979968515072, "learning_rate": 5.865152797256107e-06, "loss": 0.5392, "step": 4429 }, { "epoch": 0.47, "grad_norm": 1.6935189612880375, "learning_rate": 5.86341559642818e-06, "loss": 0.5877, "step": 4430 }, { "epoch": 0.47, "grad_norm": 1.7026984231239024, "learning_rate": 5.861678288163161e-06, "loss": 0.6394, "step": 4431 }, { "epoch": 0.47, "grad_norm": 1.8094181189903122, "learning_rate": 5.859940872677225e-06, "loss": 0.5569, "step": 4432 }, { "epoch": 0.47, "grad_norm": 1.1851157237799381, "learning_rate": 5.858203350186566e-06, "loss": 0.5349, "step": 4433 }, { "epoch": 0.47, "grad_norm": 1.7888733716109155, "learning_rate": 5.856465720907388e-06, "loss": 0.5688, "step": 4434 }, { "epoch": 0.47, "grad_norm": 1.7191194266838865, "learning_rate": 5.8547279850559095e-06, "loss": 0.5903, "step": 4435 }, { "epoch": 0.47, "grad_norm": 1.901169153567196, "learning_rate": 5.852990142848358e-06, "loss": 0.5823, "step": 4436 }, { "epoch": 0.47, "grad_norm": 1.7286803650471758, "learning_rate": 5.851252194500983e-06, "loss": 0.6781, "step": 4437 }, { "epoch": 0.47, "grad_norm": 1.6443996732230393, "learning_rate": 5.849514140230037e-06, "loss": 0.5981, "step": 4438 }, { "epoch": 0.47, "grad_norm": 1.789357288538061, "learning_rate": 5.847775980251796e-06, "loss": 0.5834, "step": 4439 }, { "epoch": 0.47, "grad_norm": 1.8941208075658358, "learning_rate": 5.846037714782538e-06, "loss": 0.7298, "step": 4440 }, { "epoch": 0.47, "grad_norm": 1.7795659471833631, "learning_rate": 5.844299344038564e-06, "loss": 0.5469, "step": 4441 }, { "epoch": 0.47, "grad_norm": 1.9528722648384191, "learning_rate": 5.842560868236184e-06, "loss": 0.5863, "step": 4442 }, { "epoch": 0.47, "grad_norm": 1.7194927615183653, "learning_rate": 5.840822287591719e-06, "loss": 0.519, "step": 4443 }, { "epoch": 0.47, "grad_norm": 1.7373191500845628, "learning_rate": 5.839083602321507e-06, "loss": 0.5328, "step": 4444 }, { "epoch": 0.47, "grad_norm": 1.4543225480549884, "learning_rate": 5.837344812641897e-06, "loss": 0.5308, "step": 4445 }, { "epoch": 0.47, "grad_norm": 2.029710566006837, "learning_rate": 5.83560591876925e-06, "loss": 0.7872, "step": 4446 }, { "epoch": 0.47, "grad_norm": 1.8752533568953489, "learning_rate": 5.833866920919942e-06, "loss": 0.6542, "step": 4447 }, { "epoch": 0.47, "grad_norm": 1.7244112186499347, "learning_rate": 5.832127819310361e-06, "loss": 0.658, "step": 4448 }, { "epoch": 0.47, "grad_norm": 1.9942038337308674, "learning_rate": 5.830388614156909e-06, "loss": 0.648, "step": 4449 }, { "epoch": 0.47, "grad_norm": 1.6639472001543474, "learning_rate": 5.828649305675998e-06, "loss": 0.55, "step": 4450 }, { "epoch": 0.47, "grad_norm": 2.059328839422966, "learning_rate": 5.826909894084056e-06, "loss": 0.5325, "step": 4451 }, { "epoch": 0.47, "grad_norm": 1.5594172974653433, "learning_rate": 5.825170379597524e-06, "loss": 0.602, "step": 4452 }, { "epoch": 0.47, "grad_norm": 1.713515611033138, "learning_rate": 5.823430762432852e-06, "loss": 0.5511, "step": 4453 }, { "epoch": 0.48, "grad_norm": 1.6600621016206751, "learning_rate": 5.821691042806506e-06, "loss": 0.616, "step": 4454 }, { "epoch": 0.48, "grad_norm": 1.741540945953554, "learning_rate": 5.819951220934962e-06, "loss": 0.6828, "step": 4455 }, { "epoch": 0.48, "grad_norm": 1.63504538787582, "learning_rate": 5.818211297034715e-06, "loss": 0.6605, "step": 4456 }, { "epoch": 0.48, "grad_norm": 1.6696054192249001, "learning_rate": 5.8164712713222645e-06, "loss": 0.5518, "step": 4457 }, { "epoch": 0.48, "grad_norm": 1.236516782213967, "learning_rate": 5.814731144014128e-06, "loss": 0.5368, "step": 4458 }, { "epoch": 0.48, "grad_norm": 1.8372550641376155, "learning_rate": 5.812990915326836e-06, "loss": 0.6993, "step": 4459 }, { "epoch": 0.48, "grad_norm": 1.7035537778206924, "learning_rate": 5.8112505854769265e-06, "loss": 0.6016, "step": 4460 }, { "epoch": 0.48, "grad_norm": 1.727268652306219, "learning_rate": 5.8095101546809554e-06, "loss": 0.6117, "step": 4461 }, { "epoch": 0.48, "grad_norm": 1.2255278979024342, "learning_rate": 5.807769623155488e-06, "loss": 0.537, "step": 4462 }, { "epoch": 0.48, "grad_norm": 1.6198305459585067, "learning_rate": 5.806028991117103e-06, "loss": 0.5003, "step": 4463 }, { "epoch": 0.48, "grad_norm": 1.6763493052939706, "learning_rate": 5.8042882587823925e-06, "loss": 0.5991, "step": 4464 }, { "epoch": 0.48, "grad_norm": 1.7840456540456628, "learning_rate": 5.80254742636796e-06, "loss": 0.5504, "step": 4465 }, { "epoch": 0.48, "grad_norm": 1.8909664635453014, "learning_rate": 5.8008064940904255e-06, "loss": 0.6319, "step": 4466 }, { "epoch": 0.48, "grad_norm": 1.3392613925137562, "learning_rate": 5.799065462166412e-06, "loss": 0.5274, "step": 4467 }, { "epoch": 0.48, "grad_norm": 1.3273034160019064, "learning_rate": 5.797324330812565e-06, "loss": 0.5082, "step": 4468 }, { "epoch": 0.48, "grad_norm": 1.7282627665931058, "learning_rate": 5.7955831002455355e-06, "loss": 0.597, "step": 4469 }, { "epoch": 0.48, "grad_norm": 1.7121428803945642, "learning_rate": 5.79384177068199e-06, "loss": 0.6063, "step": 4470 }, { "epoch": 0.48, "grad_norm": 1.577638370541367, "learning_rate": 5.792100342338609e-06, "loss": 0.5053, "step": 4471 }, { "epoch": 0.48, "grad_norm": 1.8127380602962115, "learning_rate": 5.790358815432079e-06, "loss": 0.7054, "step": 4472 }, { "epoch": 0.48, "grad_norm": 1.7191624816348436, "learning_rate": 5.788617190179106e-06, "loss": 0.6724, "step": 4473 }, { "epoch": 0.48, "grad_norm": 1.8216618310744537, "learning_rate": 5.786875466796406e-06, "loss": 0.7153, "step": 4474 }, { "epoch": 0.48, "grad_norm": 1.6912601363948405, "learning_rate": 5.785133645500702e-06, "loss": 0.5523, "step": 4475 }, { "epoch": 0.48, "grad_norm": 1.5015944726396955, "learning_rate": 5.783391726508737e-06, "loss": 0.5436, "step": 4476 }, { "epoch": 0.48, "grad_norm": 2.028702442237526, "learning_rate": 5.781649710037262e-06, "loss": 0.6252, "step": 4477 }, { "epoch": 0.48, "grad_norm": 1.540649273076683, "learning_rate": 5.779907596303038e-06, "loss": 0.5854, "step": 4478 }, { "epoch": 0.48, "grad_norm": 1.2025521992683184, "learning_rate": 5.778165385522845e-06, "loss": 0.5099, "step": 4479 }, { "epoch": 0.48, "grad_norm": 1.748939907144007, "learning_rate": 5.776423077913467e-06, "loss": 0.6541, "step": 4480 }, { "epoch": 0.48, "grad_norm": 1.1426069960154983, "learning_rate": 5.774680673691708e-06, "loss": 0.5346, "step": 4481 }, { "epoch": 0.48, "grad_norm": 2.110852361225869, "learning_rate": 5.772938173074377e-06, "loss": 0.7358, "step": 4482 }, { "epoch": 0.48, "grad_norm": 1.6735357581732997, "learning_rate": 5.7711955762782986e-06, "loss": 0.6282, "step": 4483 }, { "epoch": 0.48, "grad_norm": 1.8833042972650373, "learning_rate": 5.76945288352031e-06, "loss": 0.6881, "step": 4484 }, { "epoch": 0.48, "grad_norm": 1.7501189172502754, "learning_rate": 5.767710095017257e-06, "loss": 0.5482, "step": 4485 }, { "epoch": 0.48, "grad_norm": 1.678845976525615, "learning_rate": 5.765967210986001e-06, "loss": 0.5182, "step": 4486 }, { "epoch": 0.48, "grad_norm": 1.8112823299129286, "learning_rate": 5.764224231643412e-06, "loss": 0.6521, "step": 4487 }, { "epoch": 0.48, "grad_norm": 1.829333831439334, "learning_rate": 5.762481157206377e-06, "loss": 0.5171, "step": 4488 }, { "epoch": 0.48, "grad_norm": 1.6649901865607784, "learning_rate": 5.760737987891787e-06, "loss": 0.5207, "step": 4489 }, { "epoch": 0.48, "grad_norm": 1.7263088717588728, "learning_rate": 5.758994723916552e-06, "loss": 0.6222, "step": 4490 }, { "epoch": 0.48, "grad_norm": 1.9230124604915408, "learning_rate": 5.757251365497589e-06, "loss": 0.6819, "step": 4491 }, { "epoch": 0.48, "grad_norm": 1.6728119441954368, "learning_rate": 5.755507912851832e-06, "loss": 0.6259, "step": 4492 }, { "epoch": 0.48, "grad_norm": 1.1488496634245926, "learning_rate": 5.753764366196221e-06, "loss": 0.5073, "step": 4493 }, { "epoch": 0.48, "grad_norm": 1.7075694178842387, "learning_rate": 5.752020725747709e-06, "loss": 0.6834, "step": 4494 }, { "epoch": 0.48, "grad_norm": 1.812340185407782, "learning_rate": 5.7502769917232635e-06, "loss": 0.5998, "step": 4495 }, { "epoch": 0.48, "grad_norm": 1.7278109230518797, "learning_rate": 5.748533164339862e-06, "loss": 0.6457, "step": 4496 }, { "epoch": 0.48, "grad_norm": 1.6138630690736702, "learning_rate": 5.746789243814495e-06, "loss": 0.6438, "step": 4497 }, { "epoch": 0.48, "grad_norm": 1.9101500611265447, "learning_rate": 5.745045230364159e-06, "loss": 0.6368, "step": 4498 }, { "epoch": 0.48, "grad_norm": 1.6038594718800476, "learning_rate": 5.74330112420587e-06, "loss": 0.5713, "step": 4499 }, { "epoch": 0.48, "grad_norm": 1.3843276928546058, "learning_rate": 5.74155692555665e-06, "loss": 0.5302, "step": 4500 }, { "epoch": 0.48, "grad_norm": 1.8283839936465593, "learning_rate": 5.739812634633536e-06, "loss": 0.6393, "step": 4501 }, { "epoch": 0.48, "grad_norm": 1.1174980473847422, "learning_rate": 5.738068251653573e-06, "loss": 0.5132, "step": 4502 }, { "epoch": 0.48, "grad_norm": 1.8126213256714108, "learning_rate": 5.736323776833818e-06, "loss": 0.6507, "step": 4503 }, { "epoch": 0.48, "grad_norm": 3.223021812365079, "learning_rate": 5.7345792103913435e-06, "loss": 0.682, "step": 4504 }, { "epoch": 0.48, "grad_norm": 1.899145975631978, "learning_rate": 5.73283455254323e-06, "loss": 0.7518, "step": 4505 }, { "epoch": 0.48, "grad_norm": 1.5681742780241064, "learning_rate": 5.731089803506568e-06, "loss": 0.6904, "step": 4506 }, { "epoch": 0.48, "grad_norm": 1.640046128062556, "learning_rate": 5.729344963498462e-06, "loss": 0.6262, "step": 4507 }, { "epoch": 0.48, "grad_norm": 1.8246977588254811, "learning_rate": 5.7276000327360286e-06, "loss": 0.6227, "step": 4508 }, { "epoch": 0.48, "grad_norm": 1.6072013074099833, "learning_rate": 5.725855011436393e-06, "loss": 0.5825, "step": 4509 }, { "epoch": 0.48, "grad_norm": 1.6598579746144169, "learning_rate": 5.7241098998166935e-06, "loss": 0.5094, "step": 4510 }, { "epoch": 0.48, "grad_norm": 1.378612196130456, "learning_rate": 5.7223646980940775e-06, "loss": 0.5248, "step": 4511 }, { "epoch": 0.48, "grad_norm": 1.6080249874089028, "learning_rate": 5.720619406485706e-06, "loss": 0.6396, "step": 4512 }, { "epoch": 0.48, "grad_norm": 1.1344487037051283, "learning_rate": 5.718874025208749e-06, "loss": 0.5321, "step": 4513 }, { "epoch": 0.48, "grad_norm": 1.870188256713151, "learning_rate": 5.717128554480392e-06, "loss": 0.6014, "step": 4514 }, { "epoch": 0.48, "grad_norm": 1.4566983252405115, "learning_rate": 5.715382994517826e-06, "loss": 0.5372, "step": 4515 }, { "epoch": 0.48, "grad_norm": 1.833542316022226, "learning_rate": 5.7136373455382565e-06, "loss": 0.5892, "step": 4516 }, { "epoch": 0.48, "grad_norm": 1.7401618363039244, "learning_rate": 5.711891607758902e-06, "loss": 0.6095, "step": 4517 }, { "epoch": 0.48, "grad_norm": 1.842470006639533, "learning_rate": 5.710145781396983e-06, "loss": 0.5819, "step": 4518 }, { "epoch": 0.48, "grad_norm": 1.7280750301464118, "learning_rate": 5.70839986666974e-06, "loss": 0.6046, "step": 4519 }, { "epoch": 0.48, "grad_norm": 1.715258501197368, "learning_rate": 5.706653863794424e-06, "loss": 0.6751, "step": 4520 }, { "epoch": 0.48, "grad_norm": 1.7339206362394617, "learning_rate": 5.704907772988293e-06, "loss": 0.6835, "step": 4521 }, { "epoch": 0.48, "grad_norm": 2.0508881751810977, "learning_rate": 5.703161594468618e-06, "loss": 0.6031, "step": 4522 }, { "epoch": 0.48, "grad_norm": 1.827978214588679, "learning_rate": 5.7014153284526805e-06, "loss": 0.6975, "step": 4523 }, { "epoch": 0.48, "grad_norm": 1.6116765847688688, "learning_rate": 5.699668975157774e-06, "loss": 0.5814, "step": 4524 }, { "epoch": 0.48, "grad_norm": 1.743929024683473, "learning_rate": 5.6979225348012e-06, "loss": 0.5783, "step": 4525 }, { "epoch": 0.48, "grad_norm": 1.8657688123441332, "learning_rate": 5.696176007600275e-06, "loss": 0.647, "step": 4526 }, { "epoch": 0.48, "grad_norm": 1.7869671558105669, "learning_rate": 5.6944293937723215e-06, "loss": 0.7139, "step": 4527 }, { "epoch": 0.48, "grad_norm": 1.7087854627464014, "learning_rate": 5.692682693534677e-06, "loss": 0.6193, "step": 4528 }, { "epoch": 0.48, "grad_norm": 1.7286589415345581, "learning_rate": 5.6909359071046856e-06, "loss": 0.5987, "step": 4529 }, { "epoch": 0.48, "grad_norm": 1.7784096378508947, "learning_rate": 5.689189034699708e-06, "loss": 0.6008, "step": 4530 }, { "epoch": 0.48, "grad_norm": 1.4053250082706499, "learning_rate": 5.687442076537112e-06, "loss": 0.5128, "step": 4531 }, { "epoch": 0.48, "grad_norm": 1.3190807779201192, "learning_rate": 5.685695032834273e-06, "loss": 0.526, "step": 4532 }, { "epoch": 0.48, "grad_norm": 1.1817562909108268, "learning_rate": 5.683947903808583e-06, "loss": 0.5225, "step": 4533 }, { "epoch": 0.48, "grad_norm": 1.7129903109189248, "learning_rate": 5.6822006896774415e-06, "loss": 0.5745, "step": 4534 }, { "epoch": 0.48, "grad_norm": 1.6726148527408937, "learning_rate": 5.680453390658258e-06, "loss": 0.5416, "step": 4535 }, { "epoch": 0.48, "grad_norm": 1.6095811655659513, "learning_rate": 5.678706006968454e-06, "loss": 0.5637, "step": 4536 }, { "epoch": 0.48, "grad_norm": 2.1100307987288085, "learning_rate": 5.67695853882546e-06, "loss": 0.5771, "step": 4537 }, { "epoch": 0.48, "grad_norm": 1.8795010324981454, "learning_rate": 5.675210986446722e-06, "loss": 0.6098, "step": 4538 }, { "epoch": 0.48, "grad_norm": 1.9234900706524871, "learning_rate": 5.673463350049688e-06, "loss": 0.5299, "step": 4539 }, { "epoch": 0.48, "grad_norm": 1.8650111525912625, "learning_rate": 5.671715629851824e-06, "loss": 0.6701, "step": 4540 }, { "epoch": 0.48, "grad_norm": 1.6950033353326643, "learning_rate": 5.669967826070603e-06, "loss": 0.6639, "step": 4541 }, { "epoch": 0.48, "grad_norm": 1.6798968621875243, "learning_rate": 5.668219938923506e-06, "loss": 0.5345, "step": 4542 }, { "epoch": 0.48, "grad_norm": 1.3740938420933912, "learning_rate": 5.6664719686280316e-06, "loss": 0.5224, "step": 4543 }, { "epoch": 0.48, "grad_norm": 1.883699330104927, "learning_rate": 5.664723915401681e-06, "loss": 0.6459, "step": 4544 }, { "epoch": 0.48, "grad_norm": 1.1196142782992509, "learning_rate": 5.66297577946197e-06, "loss": 0.5317, "step": 4545 }, { "epoch": 0.48, "grad_norm": 1.687683978097032, "learning_rate": 5.661227561026425e-06, "loss": 0.6601, "step": 4546 }, { "epoch": 0.49, "grad_norm": 1.9804906854447413, "learning_rate": 5.659479260312582e-06, "loss": 0.6233, "step": 4547 }, { "epoch": 0.49, "grad_norm": 1.811075779017099, "learning_rate": 5.657730877537982e-06, "loss": 0.6217, "step": 4548 }, { "epoch": 0.49, "grad_norm": 1.573018318740006, "learning_rate": 5.655982412920187e-06, "loss": 0.5864, "step": 4549 }, { "epoch": 0.49, "grad_norm": 1.7306512065040072, "learning_rate": 5.65423386667676e-06, "loss": 0.6664, "step": 4550 }, { "epoch": 0.49, "grad_norm": 2.193798706149106, "learning_rate": 5.652485239025276e-06, "loss": 0.6053, "step": 4551 }, { "epoch": 0.49, "grad_norm": 1.8086128594523148, "learning_rate": 5.650736530183324e-06, "loss": 0.5442, "step": 4552 }, { "epoch": 0.49, "grad_norm": 1.6862797072658806, "learning_rate": 5.648987740368499e-06, "loss": 0.5373, "step": 4553 }, { "epoch": 0.49, "grad_norm": 1.8231539954285183, "learning_rate": 5.647238869798408e-06, "loss": 0.6698, "step": 4554 }, { "epoch": 0.49, "grad_norm": 1.6864114937008536, "learning_rate": 5.645489918690668e-06, "loss": 0.5937, "step": 4555 }, { "epoch": 0.49, "grad_norm": 1.9884165192332, "learning_rate": 5.643740887262905e-06, "loss": 0.57, "step": 4556 }, { "epoch": 0.49, "grad_norm": 1.9545906970877356, "learning_rate": 5.641991775732756e-06, "loss": 0.6849, "step": 4557 }, { "epoch": 0.49, "grad_norm": 1.7326076102146624, "learning_rate": 5.640242584317866e-06, "loss": 0.5466, "step": 4558 }, { "epoch": 0.49, "grad_norm": 1.780287760883509, "learning_rate": 5.6384933132358945e-06, "loss": 0.6227, "step": 4559 }, { "epoch": 0.49, "grad_norm": 1.9262496995918428, "learning_rate": 5.636743962704506e-06, "loss": 0.6812, "step": 4560 }, { "epoch": 0.49, "grad_norm": 1.7685823797950289, "learning_rate": 5.634994532941377e-06, "loss": 0.6316, "step": 4561 }, { "epoch": 0.49, "grad_norm": 1.7273058937344754, "learning_rate": 5.633245024164194e-06, "loss": 0.6089, "step": 4562 }, { "epoch": 0.49, "grad_norm": 1.5608178066362168, "learning_rate": 5.631495436590652e-06, "loss": 0.5706, "step": 4563 }, { "epoch": 0.49, "grad_norm": 1.5787274508987013, "learning_rate": 5.629745770438459e-06, "loss": 0.6039, "step": 4564 }, { "epoch": 0.49, "grad_norm": 1.8284262132978657, "learning_rate": 5.62799602592533e-06, "loss": 0.6697, "step": 4565 }, { "epoch": 0.49, "grad_norm": 1.7836783044210305, "learning_rate": 5.626246203268988e-06, "loss": 0.6386, "step": 4566 }, { "epoch": 0.49, "grad_norm": 1.565117758762295, "learning_rate": 5.624496302687172e-06, "loss": 0.5889, "step": 4567 }, { "epoch": 0.49, "grad_norm": 1.3726042884953513, "learning_rate": 5.622746324397623e-06, "loss": 0.517, "step": 4568 }, { "epoch": 0.49, "grad_norm": 1.8389666562380533, "learning_rate": 5.620996268618095e-06, "loss": 0.6864, "step": 4569 }, { "epoch": 0.49, "grad_norm": 1.5792196168179484, "learning_rate": 5.619246135566356e-06, "loss": 0.5619, "step": 4570 }, { "epoch": 0.49, "grad_norm": 1.8226732500111378, "learning_rate": 5.617495925460178e-06, "loss": 0.6948, "step": 4571 }, { "epoch": 0.49, "grad_norm": 1.8540781907986967, "learning_rate": 5.615745638517344e-06, "loss": 0.6825, "step": 4572 }, { "epoch": 0.49, "grad_norm": 1.226704324519824, "learning_rate": 5.613995274955647e-06, "loss": 0.5301, "step": 4573 }, { "epoch": 0.49, "grad_norm": 1.665296765107287, "learning_rate": 5.6122448349928895e-06, "loss": 0.5891, "step": 4574 }, { "epoch": 0.49, "grad_norm": 1.1270136412515748, "learning_rate": 5.610494318846883e-06, "loss": 0.5125, "step": 4575 }, { "epoch": 0.49, "grad_norm": 1.8200385919514297, "learning_rate": 5.608743726735448e-06, "loss": 0.5485, "step": 4576 }, { "epoch": 0.49, "grad_norm": 1.9202961682020567, "learning_rate": 5.6069930588764155e-06, "loss": 0.6729, "step": 4577 }, { "epoch": 0.49, "grad_norm": 1.7344415366083203, "learning_rate": 5.605242315487628e-06, "loss": 0.6575, "step": 4578 }, { "epoch": 0.49, "grad_norm": 1.8601545721778514, "learning_rate": 5.603491496786934e-06, "loss": 0.6656, "step": 4579 }, { "epoch": 0.49, "grad_norm": 1.7745629447066207, "learning_rate": 5.601740602992191e-06, "loss": 0.5436, "step": 4580 }, { "epoch": 0.49, "grad_norm": 1.2024239849875744, "learning_rate": 5.599989634321271e-06, "loss": 0.4856, "step": 4581 }, { "epoch": 0.49, "grad_norm": 1.7482920228991854, "learning_rate": 5.598238590992048e-06, "loss": 0.6136, "step": 4582 }, { "epoch": 0.49, "grad_norm": 1.893101489018904, "learning_rate": 5.596487473222411e-06, "loss": 0.6653, "step": 4583 }, { "epoch": 0.49, "grad_norm": 1.8322010112426983, "learning_rate": 5.594736281230253e-06, "loss": 0.7157, "step": 4584 }, { "epoch": 0.49, "grad_norm": 1.9426569044024444, "learning_rate": 5.592985015233483e-06, "loss": 0.7147, "step": 4585 }, { "epoch": 0.49, "grad_norm": 1.621721549258636, "learning_rate": 5.591233675450015e-06, "loss": 0.6427, "step": 4586 }, { "epoch": 0.49, "grad_norm": 1.7669630155718998, "learning_rate": 5.589482262097772e-06, "loss": 0.566, "step": 4587 }, { "epoch": 0.49, "grad_norm": 1.656570106949786, "learning_rate": 5.587730775394689e-06, "loss": 0.6335, "step": 4588 }, { "epoch": 0.49, "grad_norm": 2.02378765473109, "learning_rate": 5.5859792155587055e-06, "loss": 0.6395, "step": 4589 }, { "epoch": 0.49, "grad_norm": 1.5636973493315438, "learning_rate": 5.584227582807774e-06, "loss": 0.6668, "step": 4590 }, { "epoch": 0.49, "grad_norm": 1.8546398768418197, "learning_rate": 5.582475877359854e-06, "loss": 0.6074, "step": 4591 }, { "epoch": 0.49, "grad_norm": 1.859957479229653, "learning_rate": 5.580724099432917e-06, "loss": 0.5857, "step": 4592 }, { "epoch": 0.49, "grad_norm": 1.7610745865966246, "learning_rate": 5.578972249244938e-06, "loss": 0.6454, "step": 4593 }, { "epoch": 0.49, "grad_norm": 2.1434345959561667, "learning_rate": 5.577220327013907e-06, "loss": 0.5601, "step": 4594 }, { "epoch": 0.49, "grad_norm": 1.7123118538167645, "learning_rate": 5.57546833295782e-06, "loss": 0.7626, "step": 4595 }, { "epoch": 0.49, "grad_norm": 1.7349629070026322, "learning_rate": 5.573716267294682e-06, "loss": 0.6139, "step": 4596 }, { "epoch": 0.49, "grad_norm": 1.751888152904789, "learning_rate": 5.571964130242507e-06, "loss": 0.546, "step": 4597 }, { "epoch": 0.49, "grad_norm": 1.6460533561602624, "learning_rate": 5.570211922019318e-06, "loss": 0.5727, "step": 4598 }, { "epoch": 0.49, "grad_norm": 1.7990849940952163, "learning_rate": 5.568459642843146e-06, "loss": 0.6456, "step": 4599 }, { "epoch": 0.49, "grad_norm": 1.6257163694510026, "learning_rate": 5.566707292932035e-06, "loss": 0.6133, "step": 4600 }, { "epoch": 0.49, "grad_norm": 2.08073345017853, "learning_rate": 5.564954872504029e-06, "loss": 0.6174, "step": 4601 }, { "epoch": 0.49, "grad_norm": 1.6500936405467177, "learning_rate": 5.5632023817771904e-06, "loss": 0.5628, "step": 4602 }, { "epoch": 0.49, "grad_norm": 1.8066517785672278, "learning_rate": 5.561449820969588e-06, "loss": 0.6028, "step": 4603 }, { "epoch": 0.49, "grad_norm": 1.878808431937161, "learning_rate": 5.559697190299294e-06, "loss": 0.6949, "step": 4604 }, { "epoch": 0.49, "grad_norm": 1.7035568412748072, "learning_rate": 5.5579444899843926e-06, "loss": 0.6144, "step": 4605 }, { "epoch": 0.49, "grad_norm": 1.8063550885496475, "learning_rate": 5.55619172024298e-06, "loss": 0.6473, "step": 4606 }, { "epoch": 0.49, "grad_norm": 1.9613745612760947, "learning_rate": 5.554438881293155e-06, "loss": 0.7235, "step": 4607 }, { "epoch": 0.49, "grad_norm": 2.0929249353488504, "learning_rate": 5.552685973353031e-06, "loss": 0.6529, "step": 4608 }, { "epoch": 0.49, "grad_norm": 1.8381787679337342, "learning_rate": 5.550932996640723e-06, "loss": 0.572, "step": 4609 }, { "epoch": 0.49, "grad_norm": 1.7775147263987658, "learning_rate": 5.549179951374361e-06, "loss": 0.5989, "step": 4610 }, { "epoch": 0.49, "grad_norm": 1.833774348773011, "learning_rate": 5.547426837772083e-06, "loss": 0.665, "step": 4611 }, { "epoch": 0.49, "grad_norm": 1.9108871232701252, "learning_rate": 5.545673656052031e-06, "loss": 0.6564, "step": 4612 }, { "epoch": 0.49, "grad_norm": 1.816061922757503, "learning_rate": 5.543920406432357e-06, "loss": 0.6829, "step": 4613 }, { "epoch": 0.49, "grad_norm": 1.8451750992297677, "learning_rate": 5.542167089131225e-06, "loss": 0.6548, "step": 4614 }, { "epoch": 0.49, "grad_norm": 1.7189316835776973, "learning_rate": 5.5404137043668035e-06, "loss": 0.65, "step": 4615 }, { "epoch": 0.49, "grad_norm": 1.7752953793324522, "learning_rate": 5.538660252357272e-06, "loss": 0.6847, "step": 4616 }, { "epoch": 0.49, "grad_norm": 1.6864448869511686, "learning_rate": 5.536906733320816e-06, "loss": 0.6407, "step": 4617 }, { "epoch": 0.49, "grad_norm": 1.6969297136208903, "learning_rate": 5.5351531474756305e-06, "loss": 0.6387, "step": 4618 }, { "epoch": 0.49, "grad_norm": 1.7943408326928787, "learning_rate": 5.5333994950399205e-06, "loss": 0.642, "step": 4619 }, { "epoch": 0.49, "grad_norm": 1.6934614372461294, "learning_rate": 5.531645776231895e-06, "loss": 0.6739, "step": 4620 }, { "epoch": 0.49, "grad_norm": 1.2647569372996699, "learning_rate": 5.529891991269777e-06, "loss": 0.5185, "step": 4621 }, { "epoch": 0.49, "grad_norm": 1.8286051421968739, "learning_rate": 5.528138140371792e-06, "loss": 0.6407, "step": 4622 }, { "epoch": 0.49, "grad_norm": 1.9507810964894867, "learning_rate": 5.526384223756177e-06, "loss": 0.6404, "step": 4623 }, { "epoch": 0.49, "grad_norm": 1.8732490467987089, "learning_rate": 5.524630241641178e-06, "loss": 0.6098, "step": 4624 }, { "epoch": 0.49, "grad_norm": 1.1553592553511036, "learning_rate": 5.522876194245045e-06, "loss": 0.5231, "step": 4625 }, { "epoch": 0.49, "grad_norm": 1.8227495033391947, "learning_rate": 5.52112208178604e-06, "loss": 0.6076, "step": 4626 }, { "epoch": 0.49, "grad_norm": 1.9535775434014773, "learning_rate": 5.519367904482432e-06, "loss": 0.6607, "step": 4627 }, { "epoch": 0.49, "grad_norm": 1.713321690377923, "learning_rate": 5.517613662552499e-06, "loss": 0.6429, "step": 4628 }, { "epoch": 0.49, "grad_norm": 1.6250236724023304, "learning_rate": 5.515859356214525e-06, "loss": 0.5692, "step": 4629 }, { "epoch": 0.49, "grad_norm": 1.8231815887394143, "learning_rate": 5.514104985686802e-06, "loss": 0.5688, "step": 4630 }, { "epoch": 0.49, "grad_norm": 1.9612213052416394, "learning_rate": 5.512350551187633e-06, "loss": 0.7041, "step": 4631 }, { "epoch": 0.49, "grad_norm": 1.2208578408942068, "learning_rate": 5.510596052935327e-06, "loss": 0.5239, "step": 4632 }, { "epoch": 0.49, "grad_norm": 1.7864958555343204, "learning_rate": 5.508841491148197e-06, "loss": 0.633, "step": 4633 }, { "epoch": 0.49, "grad_norm": 2.076779977431086, "learning_rate": 5.507086866044571e-06, "loss": 0.7039, "step": 4634 }, { "epoch": 0.49, "grad_norm": 1.896380435368194, "learning_rate": 5.5053321778427815e-06, "loss": 0.6206, "step": 4635 }, { "epoch": 0.49, "grad_norm": 2.1129121913603104, "learning_rate": 5.5035774267611686e-06, "loss": 0.6413, "step": 4636 }, { "epoch": 0.49, "grad_norm": 1.8585418268779295, "learning_rate": 5.501822613018081e-06, "loss": 0.591, "step": 4637 }, { "epoch": 0.49, "grad_norm": 1.6865104270888984, "learning_rate": 5.500067736831874e-06, "loss": 0.6496, "step": 4638 }, { "epoch": 0.49, "grad_norm": 1.703027830996452, "learning_rate": 5.498312798420914e-06, "loss": 0.6141, "step": 4639 }, { "epoch": 0.49, "grad_norm": 1.1749373955857558, "learning_rate": 5.49655779800357e-06, "loss": 0.5364, "step": 4640 }, { "epoch": 0.5, "grad_norm": 1.8153235899303306, "learning_rate": 5.494802735798221e-06, "loss": 0.5916, "step": 4641 }, { "epoch": 0.5, "grad_norm": 1.6410590580240387, "learning_rate": 5.4930476120232545e-06, "loss": 0.5338, "step": 4642 }, { "epoch": 0.5, "grad_norm": 1.7635424438154061, "learning_rate": 5.491292426897066e-06, "loss": 0.6233, "step": 4643 }, { "epoch": 0.5, "grad_norm": 1.7564513276152836, "learning_rate": 5.489537180638058e-06, "loss": 0.6419, "step": 4644 }, { "epoch": 0.5, "grad_norm": 1.8996201081875719, "learning_rate": 5.487781873464641e-06, "loss": 0.5839, "step": 4645 }, { "epoch": 0.5, "grad_norm": 1.7963669256818675, "learning_rate": 5.4860265055952325e-06, "loss": 0.585, "step": 4646 }, { "epoch": 0.5, "grad_norm": 1.9282769842108938, "learning_rate": 5.4842710772482545e-06, "loss": 0.7826, "step": 4647 }, { "epoch": 0.5, "grad_norm": 1.6635082573850828, "learning_rate": 5.482515588642142e-06, "loss": 0.6105, "step": 4648 }, { "epoch": 0.5, "grad_norm": 1.2122742577159782, "learning_rate": 5.480760039995335e-06, "loss": 0.5219, "step": 4649 }, { "epoch": 0.5, "grad_norm": 1.9910554430154237, "learning_rate": 5.479004431526281e-06, "loss": 0.6222, "step": 4650 }, { "epoch": 0.5, "grad_norm": 1.6569858179203902, "learning_rate": 5.477248763453434e-06, "loss": 0.6013, "step": 4651 }, { "epoch": 0.5, "grad_norm": 1.7006469706187097, "learning_rate": 5.475493035995259e-06, "loss": 0.6313, "step": 4652 }, { "epoch": 0.5, "grad_norm": 1.838849008529108, "learning_rate": 5.473737249370226e-06, "loss": 0.6571, "step": 4653 }, { "epoch": 0.5, "grad_norm": 1.7321738936754068, "learning_rate": 5.471981403796808e-06, "loss": 0.625, "step": 4654 }, { "epoch": 0.5, "grad_norm": 1.5459978857352241, "learning_rate": 5.470225499493491e-06, "loss": 0.5864, "step": 4655 }, { "epoch": 0.5, "grad_norm": 2.3720064081572554, "learning_rate": 5.46846953667877e-06, "loss": 0.6001, "step": 4656 }, { "epoch": 0.5, "grad_norm": 1.7351632265398138, "learning_rate": 5.4667135155711405e-06, "loss": 0.5572, "step": 4657 }, { "epoch": 0.5, "grad_norm": 1.710871105985002, "learning_rate": 5.464957436389111e-06, "loss": 0.6099, "step": 4658 }, { "epoch": 0.5, "grad_norm": 1.684046866679504, "learning_rate": 5.463201299351193e-06, "loss": 0.5931, "step": 4659 }, { "epoch": 0.5, "grad_norm": 1.7339709902486793, "learning_rate": 5.4614451046759095e-06, "loss": 0.6042, "step": 4660 }, { "epoch": 0.5, "grad_norm": 1.6914993238243239, "learning_rate": 5.459688852581788e-06, "loss": 0.6324, "step": 4661 }, { "epoch": 0.5, "grad_norm": 1.8411491643375422, "learning_rate": 5.457932543287363e-06, "loss": 0.6665, "step": 4662 }, { "epoch": 0.5, "grad_norm": 1.6750255575767579, "learning_rate": 5.456176177011177e-06, "loss": 0.6238, "step": 4663 }, { "epoch": 0.5, "grad_norm": 1.972289362022053, "learning_rate": 5.4544197539717795e-06, "loss": 0.6919, "step": 4664 }, { "epoch": 0.5, "grad_norm": 1.7634010683198054, "learning_rate": 5.452663274387726e-06, "loss": 0.6687, "step": 4665 }, { "epoch": 0.5, "grad_norm": 1.774463549538347, "learning_rate": 5.45090673847758e-06, "loss": 0.6485, "step": 4666 }, { "epoch": 0.5, "grad_norm": 1.7821407386019816, "learning_rate": 5.449150146459913e-06, "loss": 0.597, "step": 4667 }, { "epoch": 0.5, "grad_norm": 1.234763967792748, "learning_rate": 5.447393498553303e-06, "loss": 0.5326, "step": 4668 }, { "epoch": 0.5, "grad_norm": 1.887275219208052, "learning_rate": 5.445636794976332e-06, "loss": 0.6468, "step": 4669 }, { "epoch": 0.5, "grad_norm": 1.6671611805686615, "learning_rate": 5.443880035947593e-06, "loss": 0.5966, "step": 4670 }, { "epoch": 0.5, "grad_norm": 1.9605524000099261, "learning_rate": 5.4421232216856836e-06, "loss": 0.6103, "step": 4671 }, { "epoch": 0.5, "grad_norm": 1.8874931277237874, "learning_rate": 5.440366352409209e-06, "loss": 0.628, "step": 4672 }, { "epoch": 0.5, "grad_norm": 1.1497137705085017, "learning_rate": 5.438609428336781e-06, "loss": 0.5132, "step": 4673 }, { "epoch": 0.5, "grad_norm": 1.7433749338729425, "learning_rate": 5.436852449687018e-06, "loss": 0.5114, "step": 4674 }, { "epoch": 0.5, "grad_norm": 1.7761056465361897, "learning_rate": 5.435095416678546e-06, "loss": 0.5794, "step": 4675 }, { "epoch": 0.5, "grad_norm": 1.9198445826821438, "learning_rate": 5.4333383295299976e-06, "loss": 0.6587, "step": 4676 }, { "epoch": 0.5, "grad_norm": 1.821109542257753, "learning_rate": 5.4315811884600115e-06, "loss": 0.6249, "step": 4677 }, { "epoch": 0.5, "grad_norm": 1.8546651190820753, "learning_rate": 5.429823993687234e-06, "loss": 0.6368, "step": 4678 }, { "epoch": 0.5, "grad_norm": 1.6625478629947974, "learning_rate": 5.4280667454303145e-06, "loss": 0.5717, "step": 4679 }, { "epoch": 0.5, "grad_norm": 1.6656086356675093, "learning_rate": 5.426309443907916e-06, "loss": 0.626, "step": 4680 }, { "epoch": 0.5, "grad_norm": 1.7676893647351677, "learning_rate": 5.424552089338703e-06, "loss": 0.6581, "step": 4681 }, { "epoch": 0.5, "grad_norm": 1.8815022360935965, "learning_rate": 5.4227946819413455e-06, "loss": 0.6426, "step": 4682 }, { "epoch": 0.5, "grad_norm": 1.9058953873397062, "learning_rate": 5.421037221934525e-06, "loss": 0.6007, "step": 4683 }, { "epoch": 0.5, "grad_norm": 1.5035643515620922, "learning_rate": 5.419279709536927e-06, "loss": 0.5402, "step": 4684 }, { "epoch": 0.5, "grad_norm": 1.582463905023252, "learning_rate": 5.417522144967242e-06, "loss": 0.5879, "step": 4685 }, { "epoch": 0.5, "grad_norm": 1.1822478077687997, "learning_rate": 5.41576452844417e-06, "loss": 0.5214, "step": 4686 }, { "epoch": 0.5, "grad_norm": 1.1804710847592643, "learning_rate": 5.4140068601864125e-06, "loss": 0.5236, "step": 4687 }, { "epoch": 0.5, "grad_norm": 1.9187740853079263, "learning_rate": 5.412249140412685e-06, "loss": 0.549, "step": 4688 }, { "epoch": 0.5, "grad_norm": 1.5812949523134683, "learning_rate": 5.410491369341704e-06, "loss": 0.5767, "step": 4689 }, { "epoch": 0.5, "grad_norm": 1.7845900347246824, "learning_rate": 5.408733547192191e-06, "loss": 0.5847, "step": 4690 }, { "epoch": 0.5, "grad_norm": 1.3025323680586693, "learning_rate": 5.406975674182879e-06, "loss": 0.5186, "step": 4691 }, { "epoch": 0.5, "grad_norm": 1.8034026384527193, "learning_rate": 5.405217750532506e-06, "loss": 0.6664, "step": 4692 }, { "epoch": 0.5, "grad_norm": 1.9067023097473006, "learning_rate": 5.403459776459812e-06, "loss": 0.6187, "step": 4693 }, { "epoch": 0.5, "grad_norm": 1.7243090061302462, "learning_rate": 5.401701752183548e-06, "loss": 0.65, "step": 4694 }, { "epoch": 0.5, "grad_norm": 1.5806556802220908, "learning_rate": 5.39994367792247e-06, "loss": 0.5799, "step": 4695 }, { "epoch": 0.5, "grad_norm": 1.6333476263687021, "learning_rate": 5.39818555389534e-06, "loss": 0.5481, "step": 4696 }, { "epoch": 0.5, "grad_norm": 1.7310647394730438, "learning_rate": 5.396427380320923e-06, "loss": 0.601, "step": 4697 }, { "epoch": 0.5, "grad_norm": 1.6850584943486455, "learning_rate": 5.394669157417998e-06, "loss": 0.5575, "step": 4698 }, { "epoch": 0.5, "grad_norm": 1.754032810562427, "learning_rate": 5.392910885405339e-06, "loss": 0.6327, "step": 4699 }, { "epoch": 0.5, "grad_norm": 1.9878472532116453, "learning_rate": 5.3911525645017396e-06, "loss": 0.6572, "step": 4700 }, { "epoch": 0.5, "grad_norm": 1.7473105675033735, "learning_rate": 5.389394194925988e-06, "loss": 0.4848, "step": 4701 }, { "epoch": 0.5, "grad_norm": 1.6698235266352337, "learning_rate": 5.3876357768968834e-06, "loss": 0.6267, "step": 4702 }, { "epoch": 0.5, "grad_norm": 1.8388967514586778, "learning_rate": 5.385877310633233e-06, "loss": 0.6066, "step": 4703 }, { "epoch": 0.5, "grad_norm": 1.905007624175121, "learning_rate": 5.384118796353844e-06, "loss": 0.5805, "step": 4704 }, { "epoch": 0.5, "grad_norm": 1.8511998819876978, "learning_rate": 5.382360234277534e-06, "loss": 0.7082, "step": 4705 }, { "epoch": 0.5, "grad_norm": 1.6509362617753065, "learning_rate": 5.3806016246231255e-06, "loss": 0.6411, "step": 4706 }, { "epoch": 0.5, "grad_norm": 1.8001526378372121, "learning_rate": 5.378842967609447e-06, "loss": 0.6431, "step": 4707 }, { "epoch": 0.5, "grad_norm": 1.1949336390185645, "learning_rate": 5.3770842634553335e-06, "loss": 0.5083, "step": 4708 }, { "epoch": 0.5, "grad_norm": 1.5371921666244255, "learning_rate": 5.375325512379624e-06, "loss": 0.6123, "step": 4709 }, { "epoch": 0.5, "grad_norm": 1.8289165677514343, "learning_rate": 5.373566714601167e-06, "loss": 0.661, "step": 4710 }, { "epoch": 0.5, "grad_norm": 1.4884697430017162, "learning_rate": 5.371807870338813e-06, "loss": 0.5658, "step": 4711 }, { "epoch": 0.5, "grad_norm": 1.1027009751498784, "learning_rate": 5.370048979811419e-06, "loss": 0.5119, "step": 4712 }, { "epoch": 0.5, "grad_norm": 1.7277239359864047, "learning_rate": 5.368290043237848e-06, "loss": 0.6705, "step": 4713 }, { "epoch": 0.5, "grad_norm": 1.6719826920848813, "learning_rate": 5.366531060836972e-06, "loss": 0.6089, "step": 4714 }, { "epoch": 0.5, "grad_norm": 1.7376299570083567, "learning_rate": 5.3647720328276605e-06, "loss": 0.5853, "step": 4715 }, { "epoch": 0.5, "grad_norm": 1.7574942007432277, "learning_rate": 5.3630129594288e-06, "loss": 0.6501, "step": 4716 }, { "epoch": 0.5, "grad_norm": 1.1992112216651085, "learning_rate": 5.361253840859272e-06, "loss": 0.5118, "step": 4717 }, { "epoch": 0.5, "grad_norm": 1.9579859101483412, "learning_rate": 5.359494677337973e-06, "loss": 0.5816, "step": 4718 }, { "epoch": 0.5, "grad_norm": 1.6333319238772428, "learning_rate": 5.3577354690837955e-06, "loss": 0.5994, "step": 4719 }, { "epoch": 0.5, "grad_norm": 1.6178277250459507, "learning_rate": 5.355976216315646e-06, "loss": 0.5726, "step": 4720 }, { "epoch": 0.5, "grad_norm": 1.7574847540624396, "learning_rate": 5.354216919252431e-06, "loss": 0.6531, "step": 4721 }, { "epoch": 0.5, "grad_norm": 1.6968810044230518, "learning_rate": 5.352457578113065e-06, "loss": 0.5661, "step": 4722 }, { "epoch": 0.5, "grad_norm": 1.8163782580657453, "learning_rate": 5.350698193116468e-06, "loss": 0.6064, "step": 4723 }, { "epoch": 0.5, "grad_norm": 1.7105778495299695, "learning_rate": 5.348938764481564e-06, "loss": 0.6262, "step": 4724 }, { "epoch": 0.5, "grad_norm": 1.755094563660687, "learning_rate": 5.347179292427285e-06, "loss": 0.64, "step": 4725 }, { "epoch": 0.5, "grad_norm": 1.207693534085368, "learning_rate": 5.345419777172565e-06, "loss": 0.5115, "step": 4726 }, { "epoch": 0.5, "grad_norm": 1.87011546945702, "learning_rate": 5.343660218936346e-06, "loss": 0.596, "step": 4727 }, { "epoch": 0.5, "grad_norm": 1.78987638233763, "learning_rate": 5.3419006179375745e-06, "loss": 0.5967, "step": 4728 }, { "epoch": 0.5, "grad_norm": 2.0085988791274043, "learning_rate": 5.3401409743952015e-06, "loss": 0.7108, "step": 4729 }, { "epoch": 0.5, "grad_norm": 2.0263856978989687, "learning_rate": 5.338381288528185e-06, "loss": 0.745, "step": 4730 }, { "epoch": 0.5, "grad_norm": 1.7388125922103157, "learning_rate": 5.336621560555489e-06, "loss": 0.6627, "step": 4731 }, { "epoch": 0.5, "grad_norm": 1.7057663930507574, "learning_rate": 5.3348617906960765e-06, "loss": 0.7034, "step": 4732 }, { "epoch": 0.5, "grad_norm": 1.9026570253622586, "learning_rate": 5.333101979168924e-06, "loss": 0.6643, "step": 4733 }, { "epoch": 0.5, "grad_norm": 1.7769567863774343, "learning_rate": 5.33134212619301e-06, "loss": 0.6603, "step": 4734 }, { "epoch": 0.51, "grad_norm": 1.9395338783837421, "learning_rate": 5.3295822319873145e-06, "loss": 0.6466, "step": 4735 }, { "epoch": 0.51, "grad_norm": 1.2088528534694227, "learning_rate": 5.327822296770829e-06, "loss": 0.5203, "step": 4736 }, { "epoch": 0.51, "grad_norm": 1.8546650997839496, "learning_rate": 5.326062320762545e-06, "loss": 0.6559, "step": 4737 }, { "epoch": 0.51, "grad_norm": 1.6222379696837603, "learning_rate": 5.3243023041814614e-06, "loss": 0.5352, "step": 4738 }, { "epoch": 0.51, "grad_norm": 1.7953720488057394, "learning_rate": 5.322542247246583e-06, "loss": 0.6396, "step": 4739 }, { "epoch": 0.51, "grad_norm": 1.6955207224629056, "learning_rate": 5.320782150176915e-06, "loss": 0.521, "step": 4740 }, { "epoch": 0.51, "grad_norm": 1.6136154969199399, "learning_rate": 5.319022013191475e-06, "loss": 0.6529, "step": 4741 }, { "epoch": 0.51, "grad_norm": 1.766746382152616, "learning_rate": 5.3172618365092785e-06, "loss": 0.5998, "step": 4742 }, { "epoch": 0.51, "grad_norm": 2.4151933164280837, "learning_rate": 5.31550162034935e-06, "loss": 0.6697, "step": 4743 }, { "epoch": 0.51, "grad_norm": 1.8069917500706518, "learning_rate": 5.313741364930719e-06, "loss": 0.6083, "step": 4744 }, { "epoch": 0.51, "grad_norm": 1.764751108940784, "learning_rate": 5.3119810704724185e-06, "loss": 0.6278, "step": 4745 }, { "epoch": 0.51, "grad_norm": 1.5472680111691328, "learning_rate": 5.310220737193484e-06, "loss": 0.6107, "step": 4746 }, { "epoch": 0.51, "grad_norm": 1.7364179852141997, "learning_rate": 5.308460365312961e-06, "loss": 0.6601, "step": 4747 }, { "epoch": 0.51, "grad_norm": 1.8235032169192356, "learning_rate": 5.3066999550498965e-06, "loss": 0.6521, "step": 4748 }, { "epoch": 0.51, "grad_norm": 1.7080568157873999, "learning_rate": 5.304939506623343e-06, "loss": 0.6723, "step": 4749 }, { "epoch": 0.51, "grad_norm": 1.7517299973067681, "learning_rate": 5.303179020252358e-06, "loss": 0.6109, "step": 4750 }, { "epoch": 0.51, "grad_norm": 1.6549887974057853, "learning_rate": 5.301418496156003e-06, "loss": 0.5368, "step": 4751 }, { "epoch": 0.51, "grad_norm": 1.7660454044993543, "learning_rate": 5.299657934553345e-06, "loss": 0.6124, "step": 4752 }, { "epoch": 0.51, "grad_norm": 1.7626165798877942, "learning_rate": 5.297897335663455e-06, "loss": 0.6306, "step": 4753 }, { "epoch": 0.51, "grad_norm": 1.892032744246035, "learning_rate": 5.296136699705412e-06, "loss": 0.6833, "step": 4754 }, { "epoch": 0.51, "grad_norm": 1.8854560859426845, "learning_rate": 5.29437602689829e-06, "loss": 0.6623, "step": 4755 }, { "epoch": 0.51, "grad_norm": 1.9105056090475523, "learning_rate": 5.292615317461179e-06, "loss": 0.5858, "step": 4756 }, { "epoch": 0.51, "grad_norm": 1.703471008031351, "learning_rate": 5.29085457161317e-06, "loss": 0.6508, "step": 4757 }, { "epoch": 0.51, "grad_norm": 1.9083825300852246, "learning_rate": 5.289093789573353e-06, "loss": 0.6752, "step": 4758 }, { "epoch": 0.51, "grad_norm": 1.6007928402203115, "learning_rate": 5.28733297156083e-06, "loss": 0.5879, "step": 4759 }, { "epoch": 0.51, "grad_norm": 1.7384814696436957, "learning_rate": 5.285572117794703e-06, "loss": 0.5249, "step": 4760 }, { "epoch": 0.51, "grad_norm": 2.1395072005251596, "learning_rate": 5.283811228494081e-06, "loss": 0.6015, "step": 4761 }, { "epoch": 0.51, "grad_norm": 1.2040072219845692, "learning_rate": 5.282050303878074e-06, "loss": 0.5318, "step": 4762 }, { "epoch": 0.51, "grad_norm": 1.7564298270428202, "learning_rate": 5.280289344165799e-06, "loss": 0.7377, "step": 4763 }, { "epoch": 0.51, "grad_norm": 1.7908964119712134, "learning_rate": 5.278528349576376e-06, "loss": 0.5609, "step": 4764 }, { "epoch": 0.51, "grad_norm": 1.6929215547964223, "learning_rate": 5.2767673203289335e-06, "loss": 0.5622, "step": 4765 }, { "epoch": 0.51, "grad_norm": 1.775921244200182, "learning_rate": 5.2750062566426e-06, "loss": 0.4883, "step": 4766 }, { "epoch": 0.51, "grad_norm": 1.1639431086301835, "learning_rate": 5.273245158736507e-06, "loss": 0.5342, "step": 4767 }, { "epoch": 0.51, "grad_norm": 1.8033551061177282, "learning_rate": 5.271484026829796e-06, "loss": 0.5838, "step": 4768 }, { "epoch": 0.51, "grad_norm": 1.8338618318896691, "learning_rate": 5.2697228611416065e-06, "loss": 0.6105, "step": 4769 }, { "epoch": 0.51, "grad_norm": 1.6690156578988502, "learning_rate": 5.2679616618910845e-06, "loss": 0.6266, "step": 4770 }, { "epoch": 0.51, "grad_norm": 1.6262114328272033, "learning_rate": 5.266200429297383e-06, "loss": 0.6816, "step": 4771 }, { "epoch": 0.51, "grad_norm": 2.0813115244178757, "learning_rate": 5.264439163579656e-06, "loss": 0.6168, "step": 4772 }, { "epoch": 0.51, "grad_norm": 1.7960834848891574, "learning_rate": 5.262677864957062e-06, "loss": 0.6071, "step": 4773 }, { "epoch": 0.51, "grad_norm": 2.0042475719694877, "learning_rate": 5.260916533648764e-06, "loss": 0.7131, "step": 4774 }, { "epoch": 0.51, "grad_norm": 1.1791024894749207, "learning_rate": 5.259155169873933e-06, "loss": 0.5203, "step": 4775 }, { "epoch": 0.51, "grad_norm": 1.1753404836054557, "learning_rate": 5.257393773851733e-06, "loss": 0.5356, "step": 4776 }, { "epoch": 0.51, "grad_norm": 1.73853208252105, "learning_rate": 5.255632345801345e-06, "loss": 0.7035, "step": 4777 }, { "epoch": 0.51, "grad_norm": 1.8227942144580203, "learning_rate": 5.2538708859419455e-06, "loss": 0.5627, "step": 4778 }, { "epoch": 0.51, "grad_norm": 1.793413805211116, "learning_rate": 5.252109394492718e-06, "loss": 0.6131, "step": 4779 }, { "epoch": 0.51, "grad_norm": 1.7200628925012102, "learning_rate": 5.250347871672851e-06, "loss": 0.6845, "step": 4780 }, { "epoch": 0.51, "grad_norm": 1.5413073435986442, "learning_rate": 5.248586317701532e-06, "loss": 0.5771, "step": 4781 }, { "epoch": 0.51, "grad_norm": 1.8121112599585807, "learning_rate": 5.246824732797959e-06, "loss": 0.7036, "step": 4782 }, { "epoch": 0.51, "grad_norm": 1.7695903425001451, "learning_rate": 5.24506311718133e-06, "loss": 0.6029, "step": 4783 }, { "epoch": 0.51, "grad_norm": 1.619372172176963, "learning_rate": 5.243301471070849e-06, "loss": 0.5461, "step": 4784 }, { "epoch": 0.51, "grad_norm": 1.7431434913674115, "learning_rate": 5.241539794685718e-06, "loss": 0.5881, "step": 4785 }, { "epoch": 0.51, "grad_norm": 1.8327287297886308, "learning_rate": 5.239778088245151e-06, "loss": 0.5889, "step": 4786 }, { "epoch": 0.51, "grad_norm": 1.7452640230791592, "learning_rate": 5.2380163519683605e-06, "loss": 0.5813, "step": 4787 }, { "epoch": 0.51, "grad_norm": 1.6990207608569594, "learning_rate": 5.236254586074563e-06, "loss": 0.661, "step": 4788 }, { "epoch": 0.51, "grad_norm": 1.5826759419954681, "learning_rate": 5.23449279078298e-06, "loss": 0.5306, "step": 4789 }, { "epoch": 0.51, "grad_norm": 1.7128476965881165, "learning_rate": 5.232730966312839e-06, "loss": 0.6496, "step": 4790 }, { "epoch": 0.51, "grad_norm": 1.4040380491283484, "learning_rate": 5.230969112883367e-06, "loss": 0.529, "step": 4791 }, { "epoch": 0.51, "grad_norm": 1.8211420945248729, "learning_rate": 5.229207230713793e-06, "loss": 0.5997, "step": 4792 }, { "epoch": 0.51, "grad_norm": 1.1779048861614367, "learning_rate": 5.227445320023356e-06, "loss": 0.4987, "step": 4793 }, { "epoch": 0.51, "grad_norm": 1.1597324326265885, "learning_rate": 5.225683381031296e-06, "loss": 0.5263, "step": 4794 }, { "epoch": 0.51, "grad_norm": 1.6779181580246247, "learning_rate": 5.223921413956853e-06, "loss": 0.6569, "step": 4795 }, { "epoch": 0.51, "grad_norm": 1.862885098773502, "learning_rate": 5.222159419019275e-06, "loss": 0.6922, "step": 4796 }, { "epoch": 0.51, "grad_norm": 1.5884922543385054, "learning_rate": 5.2203973964378105e-06, "loss": 0.5975, "step": 4797 }, { "epoch": 0.51, "grad_norm": 1.545461856292515, "learning_rate": 5.218635346431715e-06, "loss": 0.608, "step": 4798 }, { "epoch": 0.51, "grad_norm": 1.8808570568094567, "learning_rate": 5.216873269220242e-06, "loss": 0.6159, "step": 4799 }, { "epoch": 0.51, "grad_norm": 1.6646805083802623, "learning_rate": 5.215111165022653e-06, "loss": 0.593, "step": 4800 }, { "epoch": 0.51, "grad_norm": 1.762798472686056, "learning_rate": 5.213349034058212e-06, "loss": 0.5549, "step": 4801 }, { "epoch": 0.51, "grad_norm": 1.5981047763796448, "learning_rate": 5.211586876546183e-06, "loss": 0.5935, "step": 4802 }, { "epoch": 0.51, "grad_norm": 1.8490062630093922, "learning_rate": 5.209824692705839e-06, "loss": 0.6525, "step": 4803 }, { "epoch": 0.51, "grad_norm": 1.703555120406344, "learning_rate": 5.2080624827564515e-06, "loss": 0.6627, "step": 4804 }, { "epoch": 0.51, "grad_norm": 1.8043846865126985, "learning_rate": 5.206300246917298e-06, "loss": 0.5759, "step": 4805 }, { "epoch": 0.51, "grad_norm": 1.6925440967791638, "learning_rate": 5.204537985407657e-06, "loss": 0.5943, "step": 4806 }, { "epoch": 0.51, "grad_norm": 1.7418569216079605, "learning_rate": 5.202775698446812e-06, "loss": 0.659, "step": 4807 }, { "epoch": 0.51, "grad_norm": 1.8641460739981104, "learning_rate": 5.201013386254049e-06, "loss": 0.6083, "step": 4808 }, { "epoch": 0.51, "grad_norm": 1.6083895413372993, "learning_rate": 5.199251049048657e-06, "loss": 0.5371, "step": 4809 }, { "epoch": 0.51, "grad_norm": 1.743365164487549, "learning_rate": 5.197488687049928e-06, "loss": 0.6057, "step": 4810 }, { "epoch": 0.51, "grad_norm": 1.9653226397270323, "learning_rate": 5.195726300477159e-06, "loss": 0.5345, "step": 4811 }, { "epoch": 0.51, "grad_norm": 1.9633297507529994, "learning_rate": 5.193963889549646e-06, "loss": 0.646, "step": 4812 }, { "epoch": 0.51, "grad_norm": 1.8125118281097397, "learning_rate": 5.192201454486693e-06, "loss": 0.5932, "step": 4813 }, { "epoch": 0.51, "grad_norm": 1.754813726579974, "learning_rate": 5.190438995507602e-06, "loss": 0.6047, "step": 4814 }, { "epoch": 0.51, "grad_norm": 1.6156600565886157, "learning_rate": 5.188676512831682e-06, "loss": 0.5037, "step": 4815 }, { "epoch": 0.51, "grad_norm": 1.5637653214307936, "learning_rate": 5.1869140066782435e-06, "loss": 0.616, "step": 4816 }, { "epoch": 0.51, "grad_norm": 1.6488014041745436, "learning_rate": 5.185151477266599e-06, "loss": 0.5089, "step": 4817 }, { "epoch": 0.51, "grad_norm": 1.6746223643853793, "learning_rate": 5.183388924816065e-06, "loss": 0.6106, "step": 4818 }, { "epoch": 0.51, "grad_norm": 1.7228823473488715, "learning_rate": 5.181626349545962e-06, "loss": 0.6346, "step": 4819 }, { "epoch": 0.51, "grad_norm": 1.6999994633213065, "learning_rate": 5.179863751675609e-06, "loss": 0.6624, "step": 4820 }, { "epoch": 0.51, "grad_norm": 1.7374396090958462, "learning_rate": 5.178101131424331e-06, "loss": 0.6187, "step": 4821 }, { "epoch": 0.51, "grad_norm": 1.9389972387803154, "learning_rate": 5.176338489011458e-06, "loss": 0.644, "step": 4822 }, { "epoch": 0.51, "grad_norm": 1.704686150909849, "learning_rate": 5.174575824656319e-06, "loss": 0.559, "step": 4823 }, { "epoch": 0.51, "grad_norm": 1.2633130324610042, "learning_rate": 5.172813138578247e-06, "loss": 0.5149, "step": 4824 }, { "epoch": 0.51, "grad_norm": 1.6911634673061793, "learning_rate": 5.1710504309965795e-06, "loss": 0.6401, "step": 4825 }, { "epoch": 0.51, "grad_norm": 2.0070561446042894, "learning_rate": 5.169287702130652e-06, "loss": 0.6781, "step": 4826 }, { "epoch": 0.51, "grad_norm": 1.8492277864314601, "learning_rate": 5.167524952199807e-06, "loss": 0.6036, "step": 4827 }, { "epoch": 0.51, "grad_norm": 1.6656869076930971, "learning_rate": 5.165762181423387e-06, "loss": 0.6823, "step": 4828 }, { "epoch": 0.52, "grad_norm": 1.772300911763056, "learning_rate": 5.1639993900207385e-06, "loss": 0.6602, "step": 4829 }, { "epoch": 0.52, "grad_norm": 1.144356123954577, "learning_rate": 5.162236578211212e-06, "loss": 0.5193, "step": 4830 }, { "epoch": 0.52, "grad_norm": 2.1097179052691066, "learning_rate": 5.1604737462141564e-06, "loss": 0.6323, "step": 4831 }, { "epoch": 0.52, "grad_norm": 1.7955676603342663, "learning_rate": 5.158710894248929e-06, "loss": 0.6716, "step": 4832 }, { "epoch": 0.52, "grad_norm": 1.99770732577258, "learning_rate": 5.156948022534883e-06, "loss": 0.6228, "step": 4833 }, { "epoch": 0.52, "grad_norm": 2.5010812893033836, "learning_rate": 5.155185131291379e-06, "loss": 0.6404, "step": 4834 }, { "epoch": 0.52, "grad_norm": 1.8102504766904153, "learning_rate": 5.153422220737776e-06, "loss": 0.6605, "step": 4835 }, { "epoch": 0.52, "grad_norm": 1.6929802370889522, "learning_rate": 5.15165929109344e-06, "loss": 0.6353, "step": 4836 }, { "epoch": 0.52, "grad_norm": 1.7421711696372182, "learning_rate": 5.149896342577735e-06, "loss": 0.6596, "step": 4837 }, { "epoch": 0.52, "grad_norm": 1.8537427797769606, "learning_rate": 5.148133375410031e-06, "loss": 0.6728, "step": 4838 }, { "epoch": 0.52, "grad_norm": 1.7941151006247154, "learning_rate": 5.146370389809699e-06, "loss": 0.5829, "step": 4839 }, { "epoch": 0.52, "grad_norm": 1.6707974854622785, "learning_rate": 5.144607385996112e-06, "loss": 0.6842, "step": 4840 }, { "epoch": 0.52, "grad_norm": 1.7168833723223953, "learning_rate": 5.142844364188642e-06, "loss": 0.6529, "step": 4841 }, { "epoch": 0.52, "grad_norm": 1.1532267011055528, "learning_rate": 5.141081324606671e-06, "loss": 0.5137, "step": 4842 }, { "epoch": 0.52, "grad_norm": 1.7016973910631399, "learning_rate": 5.139318267469575e-06, "loss": 0.6149, "step": 4843 }, { "epoch": 0.52, "grad_norm": 1.8327337848205592, "learning_rate": 5.137555192996738e-06, "loss": 0.6856, "step": 4844 }, { "epoch": 0.52, "grad_norm": 1.8062589786855088, "learning_rate": 5.135792101407543e-06, "loss": 0.6362, "step": 4845 }, { "epoch": 0.52, "grad_norm": 1.8906221565767156, "learning_rate": 5.134028992921376e-06, "loss": 0.7185, "step": 4846 }, { "epoch": 0.52, "grad_norm": 1.696789291303736, "learning_rate": 5.132265867757628e-06, "loss": 0.5479, "step": 4847 }, { "epoch": 0.52, "grad_norm": 1.6701017689611881, "learning_rate": 5.130502726135686e-06, "loss": 0.6015, "step": 4848 }, { "epoch": 0.52, "grad_norm": 1.782079979930494, "learning_rate": 5.1287395682749444e-06, "loss": 0.651, "step": 4849 }, { "epoch": 0.52, "grad_norm": 1.6259434799675125, "learning_rate": 5.126976394394796e-06, "loss": 0.6031, "step": 4850 }, { "epoch": 0.52, "grad_norm": 1.6470784321079925, "learning_rate": 5.125213204714637e-06, "loss": 0.5321, "step": 4851 }, { "epoch": 0.52, "grad_norm": 1.8751961514078117, "learning_rate": 5.123449999453869e-06, "loss": 0.6138, "step": 4852 }, { "epoch": 0.52, "grad_norm": 1.6934786259670338, "learning_rate": 5.121686778831888e-06, "loss": 0.5407, "step": 4853 }, { "epoch": 0.52, "grad_norm": 1.8441862160581972, "learning_rate": 5.119923543068099e-06, "loss": 0.7413, "step": 4854 }, { "epoch": 0.52, "grad_norm": 2.0366766242233028, "learning_rate": 5.118160292381906e-06, "loss": 0.6125, "step": 4855 }, { "epoch": 0.52, "grad_norm": 1.7473528344274956, "learning_rate": 5.116397026992714e-06, "loss": 0.7257, "step": 4856 }, { "epoch": 0.52, "grad_norm": 1.6520835945129637, "learning_rate": 5.114633747119933e-06, "loss": 0.6032, "step": 4857 }, { "epoch": 0.52, "grad_norm": 1.9076811302886991, "learning_rate": 5.112870452982969e-06, "loss": 0.59, "step": 4858 }, { "epoch": 0.52, "grad_norm": 1.895783880988154, "learning_rate": 5.111107144801235e-06, "loss": 0.7141, "step": 4859 }, { "epoch": 0.52, "grad_norm": 1.7387872107815785, "learning_rate": 5.109343822794144e-06, "loss": 0.6562, "step": 4860 }, { "epoch": 0.52, "grad_norm": 1.6078891517141012, "learning_rate": 5.107580487181112e-06, "loss": 0.5118, "step": 4861 }, { "epoch": 0.52, "grad_norm": 1.7882172511188728, "learning_rate": 5.1058171381815535e-06, "loss": 0.6797, "step": 4862 }, { "epoch": 0.52, "grad_norm": 1.8010903136582344, "learning_rate": 5.104053776014889e-06, "loss": 0.5042, "step": 4863 }, { "epoch": 0.52, "grad_norm": 1.782270326193123, "learning_rate": 5.102290400900537e-06, "loss": 0.53, "step": 4864 }, { "epoch": 0.52, "grad_norm": 1.7565829131880366, "learning_rate": 5.100527013057919e-06, "loss": 0.6428, "step": 4865 }, { "epoch": 0.52, "grad_norm": 1.7185489156480382, "learning_rate": 5.098763612706457e-06, "loss": 0.543, "step": 4866 }, { "epoch": 0.52, "grad_norm": 1.6739570839445896, "learning_rate": 5.097000200065577e-06, "loss": 0.5774, "step": 4867 }, { "epoch": 0.52, "grad_norm": 1.7038323082922433, "learning_rate": 5.095236775354706e-06, "loss": 0.6059, "step": 4868 }, { "epoch": 0.52, "grad_norm": 1.920301528519015, "learning_rate": 5.09347333879327e-06, "loss": 0.6663, "step": 4869 }, { "epoch": 0.52, "grad_norm": 1.6673656399175825, "learning_rate": 5.091709890600698e-06, "loss": 0.6027, "step": 4870 }, { "epoch": 0.52, "grad_norm": 1.399044955891052, "learning_rate": 5.089946430996421e-06, "loss": 0.5458, "step": 4871 }, { "epoch": 0.52, "grad_norm": 1.6804437320348387, "learning_rate": 5.088182960199871e-06, "loss": 0.5958, "step": 4872 }, { "epoch": 0.52, "grad_norm": 1.6646459010627062, "learning_rate": 5.086419478430482e-06, "loss": 0.6404, "step": 4873 }, { "epoch": 0.52, "grad_norm": 1.7440599625375806, "learning_rate": 5.084655985907689e-06, "loss": 0.6333, "step": 4874 }, { "epoch": 0.52, "grad_norm": 1.639098934913478, "learning_rate": 5.082892482850925e-06, "loss": 0.6542, "step": 4875 }, { "epoch": 0.52, "grad_norm": 1.9778990471173632, "learning_rate": 5.0811289694796314e-06, "loss": 0.5698, "step": 4876 }, { "epoch": 0.52, "grad_norm": 1.9616571367693154, "learning_rate": 5.079365446013243e-06, "loss": 0.6594, "step": 4877 }, { "epoch": 0.52, "grad_norm": 1.7624226710848125, "learning_rate": 5.077601912671202e-06, "loss": 0.5553, "step": 4878 }, { "epoch": 0.52, "grad_norm": 1.9032407663154443, "learning_rate": 5.075838369672949e-06, "loss": 0.5708, "step": 4879 }, { "epoch": 0.52, "grad_norm": 1.6539571919991256, "learning_rate": 5.074074817237928e-06, "loss": 0.6285, "step": 4880 }, { "epoch": 0.52, "grad_norm": 1.5928792497681006, "learning_rate": 5.0723112555855805e-06, "loss": 0.4804, "step": 4881 }, { "epoch": 0.52, "grad_norm": 1.6298772495460887, "learning_rate": 5.070547684935351e-06, "loss": 0.6203, "step": 4882 }, { "epoch": 0.52, "grad_norm": 1.5510038727740134, "learning_rate": 5.0687841055066865e-06, "loss": 0.6485, "step": 4883 }, { "epoch": 0.52, "grad_norm": 1.6999661743786696, "learning_rate": 5.0670205175190336e-06, "loss": 0.6081, "step": 4884 }, { "epoch": 0.52, "grad_norm": 1.7110360861300544, "learning_rate": 5.065256921191839e-06, "loss": 0.59, "step": 4885 }, { "epoch": 0.52, "grad_norm": 1.6239762042270507, "learning_rate": 5.063493316744552e-06, "loss": 0.5779, "step": 4886 }, { "epoch": 0.52, "grad_norm": 1.7409425014924393, "learning_rate": 5.061729704396624e-06, "loss": 0.5453, "step": 4887 }, { "epoch": 0.52, "grad_norm": 2.040818053601734, "learning_rate": 5.059966084367506e-06, "loss": 0.7114, "step": 4888 }, { "epoch": 0.52, "grad_norm": 1.696307466406882, "learning_rate": 5.0582024568766475e-06, "loss": 0.578, "step": 4889 }, { "epoch": 0.52, "grad_norm": 1.785562745922725, "learning_rate": 5.056438822143504e-06, "loss": 0.6612, "step": 4890 }, { "epoch": 0.52, "grad_norm": 1.7343972774115786, "learning_rate": 5.05467518038753e-06, "loss": 0.6733, "step": 4891 }, { "epoch": 0.52, "grad_norm": 1.953056323915455, "learning_rate": 5.052911531828175e-06, "loss": 0.6495, "step": 4892 }, { "epoch": 0.52, "grad_norm": 1.7054374791839881, "learning_rate": 5.051147876684898e-06, "loss": 0.689, "step": 4893 }, { "epoch": 0.52, "grad_norm": 2.0440774496064895, "learning_rate": 5.0493842151771556e-06, "loss": 0.591, "step": 4894 }, { "epoch": 0.52, "grad_norm": 1.7950125216648334, "learning_rate": 5.047620547524405e-06, "loss": 0.5775, "step": 4895 }, { "epoch": 0.52, "grad_norm": 1.7432111451916386, "learning_rate": 5.045856873946102e-06, "loss": 0.622, "step": 4896 }, { "epoch": 0.52, "grad_norm": 1.9274608974426772, "learning_rate": 5.044093194661709e-06, "loss": 0.6369, "step": 4897 }, { "epoch": 0.52, "grad_norm": 1.7713904797946576, "learning_rate": 5.042329509890681e-06, "loss": 0.6146, "step": 4898 }, { "epoch": 0.52, "grad_norm": 1.5763550874609564, "learning_rate": 5.040565819852481e-06, "loss": 0.6018, "step": 4899 }, { "epoch": 0.52, "grad_norm": 1.8228408226184392, "learning_rate": 5.038802124766568e-06, "loss": 0.6856, "step": 4900 }, { "epoch": 0.52, "grad_norm": 1.7681477270175672, "learning_rate": 5.037038424852403e-06, "loss": 0.616, "step": 4901 }, { "epoch": 0.52, "grad_norm": 1.7655140256435349, "learning_rate": 5.035274720329448e-06, "loss": 0.5645, "step": 4902 }, { "epoch": 0.52, "grad_norm": 1.3495510122432832, "learning_rate": 5.033511011417166e-06, "loss": 0.5492, "step": 4903 }, { "epoch": 0.52, "grad_norm": 1.5955195700299516, "learning_rate": 5.03174729833502e-06, "loss": 0.6011, "step": 4904 }, { "epoch": 0.52, "grad_norm": 1.86227702400797, "learning_rate": 5.029983581302475e-06, "loss": 0.6367, "step": 4905 }, { "epoch": 0.52, "grad_norm": 1.8184203131790995, "learning_rate": 5.028219860538993e-06, "loss": 0.6268, "step": 4906 }, { "epoch": 0.52, "grad_norm": 1.637358853078508, "learning_rate": 5.02645613626404e-06, "loss": 0.6262, "step": 4907 }, { "epoch": 0.52, "grad_norm": 1.9457791871274033, "learning_rate": 5.0246924086970775e-06, "loss": 0.6053, "step": 4908 }, { "epoch": 0.52, "grad_norm": 1.7726256668613618, "learning_rate": 5.0229286780575735e-06, "loss": 0.6008, "step": 4909 }, { "epoch": 0.52, "grad_norm": 2.0555857730626967, "learning_rate": 5.021164944564993e-06, "loss": 0.5894, "step": 4910 }, { "epoch": 0.52, "grad_norm": 1.9101527502444853, "learning_rate": 5.019401208438804e-06, "loss": 0.6996, "step": 4911 }, { "epoch": 0.52, "grad_norm": 2.045555592456782, "learning_rate": 5.01763746989847e-06, "loss": 0.6995, "step": 4912 }, { "epoch": 0.52, "grad_norm": 1.8738738037053075, "learning_rate": 5.015873729163458e-06, "loss": 0.5477, "step": 4913 }, { "epoch": 0.52, "grad_norm": 1.7945746711484858, "learning_rate": 5.0141099864532384e-06, "loss": 0.6167, "step": 4914 }, { "epoch": 0.52, "grad_norm": 1.5593359093748116, "learning_rate": 5.012346241987273e-06, "loss": 0.5474, "step": 4915 }, { "epoch": 0.52, "grad_norm": 1.7533140196041246, "learning_rate": 5.010582495985034e-06, "loss": 0.6348, "step": 4916 }, { "epoch": 0.52, "grad_norm": 1.7026158496135197, "learning_rate": 5.008818748665986e-06, "loss": 0.6487, "step": 4917 }, { "epoch": 0.52, "grad_norm": 1.8598235097559697, "learning_rate": 5.007055000249598e-06, "loss": 0.6246, "step": 4918 }, { "epoch": 0.52, "grad_norm": 1.5734772527963397, "learning_rate": 5.005291250955337e-06, "loss": 0.626, "step": 4919 }, { "epoch": 0.52, "grad_norm": 1.9537273661940064, "learning_rate": 5.003527501002672e-06, "loss": 0.6665, "step": 4920 }, { "epoch": 0.52, "grad_norm": 1.8903951469178109, "learning_rate": 5.001763750611071e-06, "loss": 0.6944, "step": 4921 }, { "epoch": 0.53, "grad_norm": 1.4757870359682566, "learning_rate": 5e-06, "loss": 0.5408, "step": 4922 }, { "epoch": 0.53, "grad_norm": 1.756317744914859, "learning_rate": 4.998236249388932e-06, "loss": 0.5805, "step": 4923 }, { "epoch": 0.53, "grad_norm": 1.185497414293789, "learning_rate": 4.99647249899733e-06, "loss": 0.5166, "step": 4924 }, { "epoch": 0.53, "grad_norm": 1.6595953208181735, "learning_rate": 4.994708749044665e-06, "loss": 0.5921, "step": 4925 }, { "epoch": 0.53, "grad_norm": 1.5215722308889148, "learning_rate": 4.992944999750405e-06, "loss": 0.6088, "step": 4926 }, { "epoch": 0.53, "grad_norm": 1.6607167456336593, "learning_rate": 4.991181251334016e-06, "loss": 0.6453, "step": 4927 }, { "epoch": 0.53, "grad_norm": 1.8586505250739895, "learning_rate": 4.989417504014967e-06, "loss": 0.5057, "step": 4928 }, { "epoch": 0.53, "grad_norm": 1.670655249230233, "learning_rate": 4.987653758012727e-06, "loss": 0.4972, "step": 4929 }, { "epoch": 0.53, "grad_norm": 1.6800756479979757, "learning_rate": 4.985890013546765e-06, "loss": 0.5423, "step": 4930 }, { "epoch": 0.53, "grad_norm": 1.8228010042290075, "learning_rate": 4.984126270836543e-06, "loss": 0.5948, "step": 4931 }, { "epoch": 0.53, "grad_norm": 2.080007145142493, "learning_rate": 4.982362530101532e-06, "loss": 0.7355, "step": 4932 }, { "epoch": 0.53, "grad_norm": 1.8818226510135945, "learning_rate": 4.980598791561199e-06, "loss": 0.6009, "step": 4933 }, { "epoch": 0.53, "grad_norm": 2.1057609202396237, "learning_rate": 4.978835055435008e-06, "loss": 0.623, "step": 4934 }, { "epoch": 0.53, "grad_norm": 1.7422722954793182, "learning_rate": 4.977071321942427e-06, "loss": 0.6845, "step": 4935 }, { "epoch": 0.53, "grad_norm": 1.730135947382159, "learning_rate": 4.975307591302924e-06, "loss": 0.7298, "step": 4936 }, { "epoch": 0.53, "grad_norm": 1.8446515381801305, "learning_rate": 4.973543863735963e-06, "loss": 0.5999, "step": 4937 }, { "epoch": 0.53, "grad_norm": 1.8373115732033907, "learning_rate": 4.9717801394610075e-06, "loss": 0.6007, "step": 4938 }, { "epoch": 0.53, "grad_norm": 1.829557460995631, "learning_rate": 4.970016418697526e-06, "loss": 0.5181, "step": 4939 }, { "epoch": 0.53, "grad_norm": 1.9458659097449156, "learning_rate": 4.96825270166498e-06, "loss": 0.5249, "step": 4940 }, { "epoch": 0.53, "grad_norm": 2.0601001809250454, "learning_rate": 4.966488988582834e-06, "loss": 0.6148, "step": 4941 }, { "epoch": 0.53, "grad_norm": 1.799155418921616, "learning_rate": 4.964725279670553e-06, "loss": 0.6987, "step": 4942 }, { "epoch": 0.53, "grad_norm": 1.7276379487466051, "learning_rate": 4.962961575147599e-06, "loss": 0.5611, "step": 4943 }, { "epoch": 0.53, "grad_norm": 1.8833459283320346, "learning_rate": 4.9611978752334356e-06, "loss": 0.659, "step": 4944 }, { "epoch": 0.53, "grad_norm": 1.687707966813552, "learning_rate": 4.959434180147522e-06, "loss": 0.5451, "step": 4945 }, { "epoch": 0.53, "grad_norm": 1.5891224002786948, "learning_rate": 4.957670490109321e-06, "loss": 0.5716, "step": 4946 }, { "epoch": 0.53, "grad_norm": 1.8444244723451688, "learning_rate": 4.955906805338293e-06, "loss": 0.6711, "step": 4947 }, { "epoch": 0.53, "grad_norm": 1.8201122908263936, "learning_rate": 4.954143126053899e-06, "loss": 0.6929, "step": 4948 }, { "epoch": 0.53, "grad_norm": 1.2330567869883235, "learning_rate": 4.952379452475597e-06, "loss": 0.5095, "step": 4949 }, { "epoch": 0.53, "grad_norm": 1.8830757411366033, "learning_rate": 4.950615784822846e-06, "loss": 0.6041, "step": 4950 }, { "epoch": 0.53, "grad_norm": 1.8449105986883207, "learning_rate": 4.9488521233151024e-06, "loss": 0.5441, "step": 4951 }, { "epoch": 0.53, "grad_norm": 1.2518977091007786, "learning_rate": 4.947088468171826e-06, "loss": 0.5398, "step": 4952 }, { "epoch": 0.53, "grad_norm": 1.6623236511902628, "learning_rate": 4.945324819612473e-06, "loss": 0.5555, "step": 4953 }, { "epoch": 0.53, "grad_norm": 1.7352170724656664, "learning_rate": 4.9435611778564965e-06, "loss": 0.6208, "step": 4954 }, { "epoch": 0.53, "grad_norm": 1.7787651983049662, "learning_rate": 4.9417975431233525e-06, "loss": 0.6943, "step": 4955 }, { "epoch": 0.53, "grad_norm": 2.056661230431452, "learning_rate": 4.940033915632495e-06, "loss": 0.683, "step": 4956 }, { "epoch": 0.53, "grad_norm": 1.7922178809908635, "learning_rate": 4.9382702956033764e-06, "loss": 0.5723, "step": 4957 }, { "epoch": 0.53, "grad_norm": 1.5651793397518603, "learning_rate": 4.936506683255449e-06, "loss": 0.6174, "step": 4958 }, { "epoch": 0.53, "grad_norm": 1.8229100549005854, "learning_rate": 4.934743078808164e-06, "loss": 0.6176, "step": 4959 }, { "epoch": 0.53, "grad_norm": 1.86091684193845, "learning_rate": 4.932979482480969e-06, "loss": 0.6401, "step": 4960 }, { "epoch": 0.53, "grad_norm": 2.085430354510989, "learning_rate": 4.931215894493315e-06, "loss": 0.6026, "step": 4961 }, { "epoch": 0.53, "grad_norm": 1.319782600407587, "learning_rate": 4.9294523150646505e-06, "loss": 0.503, "step": 4962 }, { "epoch": 0.53, "grad_norm": 1.921502267649505, "learning_rate": 4.927688744414422e-06, "loss": 0.6892, "step": 4963 }, { "epoch": 0.53, "grad_norm": 1.2410251901994138, "learning_rate": 4.925925182762074e-06, "loss": 0.5244, "step": 4964 }, { "epoch": 0.53, "grad_norm": 1.1501521965026846, "learning_rate": 4.9241616303270514e-06, "loss": 0.5412, "step": 4965 }, { "epoch": 0.53, "grad_norm": 1.686326054644049, "learning_rate": 4.922398087328799e-06, "loss": 0.6221, "step": 4966 }, { "epoch": 0.53, "grad_norm": 1.6897582212000708, "learning_rate": 4.920634553986759e-06, "loss": 0.5773, "step": 4967 }, { "epoch": 0.53, "grad_norm": 1.813768620827669, "learning_rate": 4.91887103052037e-06, "loss": 0.614, "step": 4968 }, { "epoch": 0.53, "grad_norm": 1.6245621480189676, "learning_rate": 4.917107517149076e-06, "loss": 0.6011, "step": 4969 }, { "epoch": 0.53, "grad_norm": 1.7636036014943586, "learning_rate": 4.915344014092312e-06, "loss": 0.6545, "step": 4970 }, { "epoch": 0.53, "grad_norm": 1.928236234432107, "learning_rate": 4.9135805215695185e-06, "loss": 0.6555, "step": 4971 }, { "epoch": 0.53, "grad_norm": 1.9731706445951174, "learning_rate": 4.911817039800128e-06, "loss": 0.7798, "step": 4972 }, { "epoch": 0.53, "grad_norm": 1.843047305267832, "learning_rate": 4.9100535690035815e-06, "loss": 0.5855, "step": 4973 }, { "epoch": 0.53, "grad_norm": 1.8253056366182931, "learning_rate": 4.908290109399304e-06, "loss": 0.526, "step": 4974 }, { "epoch": 0.53, "grad_norm": 1.6563384756975883, "learning_rate": 4.9065266612067315e-06, "loss": 0.5672, "step": 4975 }, { "epoch": 0.53, "grad_norm": 1.7605010647411607, "learning_rate": 4.9047632246452956e-06, "loss": 0.5943, "step": 4976 }, { "epoch": 0.53, "grad_norm": 1.9880170153242223, "learning_rate": 4.902999799934424e-06, "loss": 0.643, "step": 4977 }, { "epoch": 0.53, "grad_norm": 1.2983897868986833, "learning_rate": 4.901236387293544e-06, "loss": 0.5404, "step": 4978 }, { "epoch": 0.53, "grad_norm": 1.826497433664613, "learning_rate": 4.899472986942083e-06, "loss": 0.7917, "step": 4979 }, { "epoch": 0.53, "grad_norm": 1.939801924365392, "learning_rate": 4.897709599099465e-06, "loss": 0.5143, "step": 4980 }, { "epoch": 0.53, "grad_norm": 1.8543966946516515, "learning_rate": 4.8959462239851125e-06, "loss": 0.5641, "step": 4981 }, { "epoch": 0.53, "grad_norm": 1.844069669298942, "learning_rate": 4.894182861818447e-06, "loss": 0.7168, "step": 4982 }, { "epoch": 0.53, "grad_norm": 1.7216227341906174, "learning_rate": 4.89241951281889e-06, "loss": 0.6312, "step": 4983 }, { "epoch": 0.53, "grad_norm": 1.784707629218367, "learning_rate": 4.890656177205857e-06, "loss": 0.6299, "step": 4984 }, { "epoch": 0.53, "grad_norm": 1.3489521513257767, "learning_rate": 4.8888928551987655e-06, "loss": 0.5323, "step": 4985 }, { "epoch": 0.53, "grad_norm": 1.9240789146955959, "learning_rate": 4.887129547017032e-06, "loss": 0.6071, "step": 4986 }, { "epoch": 0.53, "grad_norm": 1.2341406195068836, "learning_rate": 4.8853662528800704e-06, "loss": 0.528, "step": 4987 }, { "epoch": 0.53, "grad_norm": 1.8092206071341257, "learning_rate": 4.883602973007288e-06, "loss": 0.5956, "step": 4988 }, { "epoch": 0.53, "grad_norm": 1.7607699185918453, "learning_rate": 4.881839707618096e-06, "loss": 0.6264, "step": 4989 }, { "epoch": 0.53, "grad_norm": 1.7108911247746603, "learning_rate": 4.8800764569319025e-06, "loss": 0.588, "step": 4990 }, { "epoch": 0.53, "grad_norm": 1.818558857808575, "learning_rate": 4.878313221168113e-06, "loss": 0.6478, "step": 4991 }, { "epoch": 0.53, "grad_norm": 1.8739659510339608, "learning_rate": 4.876550000546133e-06, "loss": 0.5889, "step": 4992 }, { "epoch": 0.53, "grad_norm": 1.5525453540687864, "learning_rate": 4.874786795285364e-06, "loss": 0.5124, "step": 4993 }, { "epoch": 0.53, "grad_norm": 1.669867545442321, "learning_rate": 4.873023605605206e-06, "loss": 0.6368, "step": 4994 }, { "epoch": 0.53, "grad_norm": 1.6885512239586775, "learning_rate": 4.871260431725058e-06, "loss": 0.6333, "step": 4995 }, { "epoch": 0.53, "grad_norm": 1.5709921025912157, "learning_rate": 4.869497273864315e-06, "loss": 0.5699, "step": 4996 }, { "epoch": 0.53, "grad_norm": 1.7204180217780007, "learning_rate": 4.867734132242373e-06, "loss": 0.6264, "step": 4997 }, { "epoch": 0.53, "grad_norm": 1.7121333401288188, "learning_rate": 4.865971007078624e-06, "loss": 0.5534, "step": 4998 }, { "epoch": 0.53, "grad_norm": 1.6311501271673154, "learning_rate": 4.8642078985924585e-06, "loss": 0.5227, "step": 4999 }, { "epoch": 0.53, "grad_norm": 1.5929759196173734, "learning_rate": 4.8624448070032634e-06, "loss": 0.6264, "step": 5000 }, { "epoch": 0.53, "grad_norm": 1.877226512873422, "learning_rate": 4.860681732530425e-06, "loss": 0.5692, "step": 5001 }, { "epoch": 0.53, "grad_norm": 1.9260475900192555, "learning_rate": 4.858918675393332e-06, "loss": 0.5492, "step": 5002 }, { "epoch": 0.53, "grad_norm": 1.6593665351596478, "learning_rate": 4.85715563581136e-06, "loss": 0.5495, "step": 5003 }, { "epoch": 0.53, "grad_norm": 1.9016328304189616, "learning_rate": 4.855392614003891e-06, "loss": 0.6822, "step": 5004 }, { "epoch": 0.53, "grad_norm": 1.625653764299911, "learning_rate": 4.853629610190302e-06, "loss": 0.5684, "step": 5005 }, { "epoch": 0.53, "grad_norm": 1.819485571051732, "learning_rate": 4.8518666245899704e-06, "loss": 0.6447, "step": 5006 }, { "epoch": 0.53, "grad_norm": 1.7885064822978551, "learning_rate": 4.850103657422267e-06, "loss": 0.6114, "step": 5007 }, { "epoch": 0.53, "grad_norm": 1.8128798724676598, "learning_rate": 4.848340708906561e-06, "loss": 0.5918, "step": 5008 }, { "epoch": 0.53, "grad_norm": 2.0765303965264468, "learning_rate": 4.846577779262225e-06, "loss": 0.6721, "step": 5009 }, { "epoch": 0.53, "grad_norm": 1.7095764286614288, "learning_rate": 4.844814868708623e-06, "loss": 0.545, "step": 5010 }, { "epoch": 0.53, "grad_norm": 1.5210168636367916, "learning_rate": 4.843051977465118e-06, "loss": 0.633, "step": 5011 }, { "epoch": 0.53, "grad_norm": 1.894587140953683, "learning_rate": 4.841289105751072e-06, "loss": 0.6698, "step": 5012 }, { "epoch": 0.53, "grad_norm": 2.3075047658146848, "learning_rate": 4.8395262537858435e-06, "loss": 0.6424, "step": 5013 }, { "epoch": 0.53, "grad_norm": 1.7501434166419398, "learning_rate": 4.837763421788788e-06, "loss": 0.6728, "step": 5014 }, { "epoch": 0.53, "grad_norm": 1.8809269177334753, "learning_rate": 4.836000609979262e-06, "loss": 0.6422, "step": 5015 }, { "epoch": 0.54, "grad_norm": 1.9942394858859493, "learning_rate": 4.834237818576615e-06, "loss": 0.7254, "step": 5016 }, { "epoch": 0.54, "grad_norm": 1.8923902119772482, "learning_rate": 4.832475047800196e-06, "loss": 0.64, "step": 5017 }, { "epoch": 0.54, "grad_norm": 1.7624979057904626, "learning_rate": 4.830712297869351e-06, "loss": 0.508, "step": 5018 }, { "epoch": 0.54, "grad_norm": 1.9580544971019491, "learning_rate": 4.828949569003423e-06, "loss": 0.6125, "step": 5019 }, { "epoch": 0.54, "grad_norm": 1.6289555981146508, "learning_rate": 4.827186861421754e-06, "loss": 0.6138, "step": 5020 }, { "epoch": 0.54, "grad_norm": 1.7259473978277828, "learning_rate": 4.825424175343682e-06, "loss": 0.5921, "step": 5021 }, { "epoch": 0.54, "grad_norm": 2.0077627747657814, "learning_rate": 4.823661510988543e-06, "loss": 0.6284, "step": 5022 }, { "epoch": 0.54, "grad_norm": 1.7244928899265008, "learning_rate": 4.821898868575671e-06, "loss": 0.6106, "step": 5023 }, { "epoch": 0.54, "grad_norm": 2.1118748064666675, "learning_rate": 4.820136248324393e-06, "loss": 0.5954, "step": 5024 }, { "epoch": 0.54, "grad_norm": 1.8076550268217773, "learning_rate": 4.81837365045404e-06, "loss": 0.5195, "step": 5025 }, { "epoch": 0.54, "grad_norm": 1.808519622932098, "learning_rate": 4.816611075183936e-06, "loss": 0.6723, "step": 5026 }, { "epoch": 0.54, "grad_norm": 1.575521293062853, "learning_rate": 4.8148485227334014e-06, "loss": 0.6114, "step": 5027 }, { "epoch": 0.54, "grad_norm": 1.8319223447313917, "learning_rate": 4.813085993321757e-06, "loss": 0.6105, "step": 5028 }, { "epoch": 0.54, "grad_norm": 2.011509194401781, "learning_rate": 4.811323487168318e-06, "loss": 0.6217, "step": 5029 }, { "epoch": 0.54, "grad_norm": 1.727162241291141, "learning_rate": 4.809561004492401e-06, "loss": 0.628, "step": 5030 }, { "epoch": 0.54, "grad_norm": 1.6758990138538739, "learning_rate": 4.807798545513309e-06, "loss": 0.5308, "step": 5031 }, { "epoch": 0.54, "grad_norm": 1.538817821875087, "learning_rate": 4.806036110450356e-06, "loss": 0.5329, "step": 5032 }, { "epoch": 0.54, "grad_norm": 1.7950048289685407, "learning_rate": 4.804273699522843e-06, "loss": 0.6224, "step": 5033 }, { "epoch": 0.54, "grad_norm": 1.5364862679597877, "learning_rate": 4.802511312950073e-06, "loss": 0.5334, "step": 5034 }, { "epoch": 0.54, "grad_norm": 1.7652187437570999, "learning_rate": 4.800748950951345e-06, "loss": 0.6111, "step": 5035 }, { "epoch": 0.54, "grad_norm": 1.7313360599849126, "learning_rate": 4.798986613745953e-06, "loss": 0.6779, "step": 5036 }, { "epoch": 0.54, "grad_norm": 1.9468007812859924, "learning_rate": 4.79722430155319e-06, "loss": 0.749, "step": 5037 }, { "epoch": 0.54, "grad_norm": 1.70442145934014, "learning_rate": 4.795462014592345e-06, "loss": 0.5362, "step": 5038 }, { "epoch": 0.54, "grad_norm": 1.8113159541697532, "learning_rate": 4.793699753082704e-06, "loss": 0.6423, "step": 5039 }, { "epoch": 0.54, "grad_norm": 1.9883421304451705, "learning_rate": 4.791937517243549e-06, "loss": 0.6653, "step": 5040 }, { "epoch": 0.54, "grad_norm": 1.6275828627808315, "learning_rate": 4.790175307294162e-06, "loss": 0.507, "step": 5041 }, { "epoch": 0.54, "grad_norm": 1.686220789989686, "learning_rate": 4.7884131234538166e-06, "loss": 0.6403, "step": 5042 }, { "epoch": 0.54, "grad_norm": 1.8484826921680366, "learning_rate": 4.786650965941789e-06, "loss": 0.6231, "step": 5043 }, { "epoch": 0.54, "grad_norm": 1.7506165808805152, "learning_rate": 4.784888834977347e-06, "loss": 0.554, "step": 5044 }, { "epoch": 0.54, "grad_norm": 1.8373604344307124, "learning_rate": 4.783126730779761e-06, "loss": 0.5035, "step": 5045 }, { "epoch": 0.54, "grad_norm": 1.6883407008905793, "learning_rate": 4.781364653568288e-06, "loss": 0.6569, "step": 5046 }, { "epoch": 0.54, "grad_norm": 2.2918902806787433, "learning_rate": 4.77960260356219e-06, "loss": 0.6472, "step": 5047 }, { "epoch": 0.54, "grad_norm": 1.1720443340513116, "learning_rate": 4.777840580980726e-06, "loss": 0.5331, "step": 5048 }, { "epoch": 0.54, "grad_norm": 1.7762089456036376, "learning_rate": 4.776078586043148e-06, "loss": 0.5921, "step": 5049 }, { "epoch": 0.54, "grad_norm": 1.6867057951224096, "learning_rate": 4.774316618968706e-06, "loss": 0.6426, "step": 5050 }, { "epoch": 0.54, "grad_norm": 1.727051143685436, "learning_rate": 4.7725546799766445e-06, "loss": 0.6571, "step": 5051 }, { "epoch": 0.54, "grad_norm": 1.7985740241326813, "learning_rate": 4.770792769286209e-06, "loss": 0.6634, "step": 5052 }, { "epoch": 0.54, "grad_norm": 1.972100865868365, "learning_rate": 4.769030887116636e-06, "loss": 0.6726, "step": 5053 }, { "epoch": 0.54, "grad_norm": 1.649820235643192, "learning_rate": 4.767269033687162e-06, "loss": 0.5394, "step": 5054 }, { "epoch": 0.54, "grad_norm": 1.1696458569076016, "learning_rate": 4.765507209217021e-06, "loss": 0.5119, "step": 5055 }, { "epoch": 0.54, "grad_norm": 1.6386297251122983, "learning_rate": 4.763745413925439e-06, "loss": 0.5821, "step": 5056 }, { "epoch": 0.54, "grad_norm": 1.732276741652737, "learning_rate": 4.761983648031642e-06, "loss": 0.5307, "step": 5057 }, { "epoch": 0.54, "grad_norm": 1.8315866362113982, "learning_rate": 4.76022191175485e-06, "loss": 0.7316, "step": 5058 }, { "epoch": 0.54, "grad_norm": 1.6646357610912492, "learning_rate": 4.758460205314285e-06, "loss": 0.5296, "step": 5059 }, { "epoch": 0.54, "grad_norm": 2.002993108895214, "learning_rate": 4.756698528929155e-06, "loss": 0.641, "step": 5060 }, { "epoch": 0.54, "grad_norm": 2.0383907830493904, "learning_rate": 4.7549368828186715e-06, "loss": 0.6932, "step": 5061 }, { "epoch": 0.54, "grad_norm": 1.9368805977876313, "learning_rate": 4.753175267202043e-06, "loss": 0.6128, "step": 5062 }, { "epoch": 0.54, "grad_norm": 1.701484952604979, "learning_rate": 4.75141368229847e-06, "loss": 0.5496, "step": 5063 }, { "epoch": 0.54, "grad_norm": 1.771043990364098, "learning_rate": 4.749652128327152e-06, "loss": 0.5954, "step": 5064 }, { "epoch": 0.54, "grad_norm": 1.5690871780809355, "learning_rate": 4.747890605507283e-06, "loss": 0.576, "step": 5065 }, { "epoch": 0.54, "grad_norm": 1.7609653549902708, "learning_rate": 4.746129114058055e-06, "loss": 0.5782, "step": 5066 }, { "epoch": 0.54, "grad_norm": 2.1132557414762947, "learning_rate": 4.744367654198656e-06, "loss": 0.6741, "step": 5067 }, { "epoch": 0.54, "grad_norm": 1.7249787003494017, "learning_rate": 4.742606226148268e-06, "loss": 0.5339, "step": 5068 }, { "epoch": 0.54, "grad_norm": 1.2003062991314184, "learning_rate": 4.740844830126069e-06, "loss": 0.5091, "step": 5069 }, { "epoch": 0.54, "grad_norm": 1.7636210372522487, "learning_rate": 4.739083466351235e-06, "loss": 0.6016, "step": 5070 }, { "epoch": 0.54, "grad_norm": 1.9043728155353807, "learning_rate": 4.7373221350429385e-06, "loss": 0.65, "step": 5071 }, { "epoch": 0.54, "grad_norm": 2.0064567464790044, "learning_rate": 4.735560836420345e-06, "loss": 0.6451, "step": 5072 }, { "epoch": 0.54, "grad_norm": 1.7159044164092334, "learning_rate": 4.733799570702619e-06, "loss": 0.5704, "step": 5073 }, { "epoch": 0.54, "grad_norm": 1.7555244061356479, "learning_rate": 4.732038338108917e-06, "loss": 0.5721, "step": 5074 }, { "epoch": 0.54, "grad_norm": 1.6238808626186374, "learning_rate": 4.730277138858397e-06, "loss": 0.5922, "step": 5075 }, { "epoch": 0.54, "grad_norm": 1.843219857212463, "learning_rate": 4.728515973170207e-06, "loss": 0.5824, "step": 5076 }, { "epoch": 0.54, "grad_norm": 1.746599074779039, "learning_rate": 4.726754841263494e-06, "loss": 0.6482, "step": 5077 }, { "epoch": 0.54, "grad_norm": 1.6784050341606225, "learning_rate": 4.724993743357402e-06, "loss": 0.6116, "step": 5078 }, { "epoch": 0.54, "grad_norm": 1.567548330227429, "learning_rate": 4.723232679671067e-06, "loss": 0.5878, "step": 5079 }, { "epoch": 0.54, "grad_norm": 1.2196200588644053, "learning_rate": 4.721471650423625e-06, "loss": 0.504, "step": 5080 }, { "epoch": 0.54, "grad_norm": 1.8026522129302796, "learning_rate": 4.719710655834203e-06, "loss": 0.5824, "step": 5081 }, { "epoch": 0.54, "grad_norm": 1.6752358669844671, "learning_rate": 4.717949696121928e-06, "loss": 0.51, "step": 5082 }, { "epoch": 0.54, "grad_norm": 2.3335083007979103, "learning_rate": 4.716188771505921e-06, "loss": 0.6396, "step": 5083 }, { "epoch": 0.54, "grad_norm": 1.7017823707612991, "learning_rate": 4.714427882205297e-06, "loss": 0.6153, "step": 5084 }, { "epoch": 0.54, "grad_norm": 1.0943163250490178, "learning_rate": 4.7126670284391705e-06, "loss": 0.5119, "step": 5085 }, { "epoch": 0.54, "grad_norm": 1.9388554882310325, "learning_rate": 4.710906210426647e-06, "loss": 0.6366, "step": 5086 }, { "epoch": 0.54, "grad_norm": 2.0153946354553245, "learning_rate": 4.709145428386831e-06, "loss": 0.6882, "step": 5087 }, { "epoch": 0.54, "grad_norm": 1.5878338035441604, "learning_rate": 4.7073846825388215e-06, "loss": 0.5699, "step": 5088 }, { "epoch": 0.54, "grad_norm": 1.8466253711158587, "learning_rate": 4.705623973101711e-06, "loss": 0.6293, "step": 5089 }, { "epoch": 0.54, "grad_norm": 1.7958375149023666, "learning_rate": 4.703863300294591e-06, "loss": 0.5665, "step": 5090 }, { "epoch": 0.54, "grad_norm": 1.1508410747540376, "learning_rate": 4.7021026643365454e-06, "loss": 0.5076, "step": 5091 }, { "epoch": 0.54, "grad_norm": 1.8118978197281823, "learning_rate": 4.700342065446657e-06, "loss": 0.5948, "step": 5092 }, { "epoch": 0.54, "grad_norm": 1.6800408092431596, "learning_rate": 4.698581503843999e-06, "loss": 0.6126, "step": 5093 }, { "epoch": 0.54, "grad_norm": 1.8544383810363247, "learning_rate": 4.696820979747643e-06, "loss": 0.6698, "step": 5094 }, { "epoch": 0.54, "grad_norm": 2.054564091020758, "learning_rate": 4.6950604933766585e-06, "loss": 0.7246, "step": 5095 }, { "epoch": 0.54, "grad_norm": 1.7420192827269678, "learning_rate": 4.693300044950105e-06, "loss": 0.5952, "step": 5096 }, { "epoch": 0.54, "grad_norm": 1.6734550261487033, "learning_rate": 4.691539634687041e-06, "loss": 0.5039, "step": 5097 }, { "epoch": 0.54, "grad_norm": 1.539278359098021, "learning_rate": 4.689779262806517e-06, "loss": 0.5621, "step": 5098 }, { "epoch": 0.54, "grad_norm": 1.6488685332754374, "learning_rate": 4.688018929527582e-06, "loss": 0.5846, "step": 5099 }, { "epoch": 0.54, "grad_norm": 1.8188509067890948, "learning_rate": 4.686258635069281e-06, "loss": 0.6549, "step": 5100 }, { "epoch": 0.54, "grad_norm": 1.8901517725138761, "learning_rate": 4.684498379650649e-06, "loss": 0.6165, "step": 5101 }, { "epoch": 0.54, "grad_norm": 1.9708043514078484, "learning_rate": 4.682738163490724e-06, "loss": 0.6191, "step": 5102 }, { "epoch": 0.54, "grad_norm": 1.814306095736227, "learning_rate": 4.680977986808528e-06, "loss": 0.561, "step": 5103 }, { "epoch": 0.54, "grad_norm": 1.669043396293792, "learning_rate": 4.679217849823086e-06, "loss": 0.667, "step": 5104 }, { "epoch": 0.54, "grad_norm": 1.7682029518929203, "learning_rate": 4.6774577527534195e-06, "loss": 0.5677, "step": 5105 }, { "epoch": 0.54, "grad_norm": 1.7304185848503921, "learning_rate": 4.675697695818539e-06, "loss": 0.6544, "step": 5106 }, { "epoch": 0.54, "grad_norm": 1.7931233725545275, "learning_rate": 4.673937679237457e-06, "loss": 0.6945, "step": 5107 }, { "epoch": 0.54, "grad_norm": 1.8902195518004035, "learning_rate": 4.6721777032291724e-06, "loss": 0.6413, "step": 5108 }, { "epoch": 0.54, "grad_norm": 1.6177452707183588, "learning_rate": 4.670417768012686e-06, "loss": 0.6099, "step": 5109 }, { "epoch": 0.55, "grad_norm": 1.7659521238761473, "learning_rate": 4.668657873806992e-06, "loss": 0.6047, "step": 5110 }, { "epoch": 0.55, "grad_norm": 1.194202616678273, "learning_rate": 4.666898020831077e-06, "loss": 0.5358, "step": 5111 }, { "epoch": 0.55, "grad_norm": 1.6470800973999231, "learning_rate": 4.665138209303925e-06, "loss": 0.627, "step": 5112 }, { "epoch": 0.55, "grad_norm": 1.748734634196825, "learning_rate": 4.663378439444514e-06, "loss": 0.5524, "step": 5113 }, { "epoch": 0.55, "grad_norm": 1.7630604338239821, "learning_rate": 4.661618711471816e-06, "loss": 0.5787, "step": 5114 }, { "epoch": 0.55, "grad_norm": 1.7049903153446344, "learning_rate": 4.6598590256047984e-06, "loss": 0.6437, "step": 5115 }, { "epoch": 0.55, "grad_norm": 1.8883988489186703, "learning_rate": 4.658099382062428e-06, "loss": 0.6536, "step": 5116 }, { "epoch": 0.55, "grad_norm": 1.2295036434884856, "learning_rate": 4.656339781063657e-06, "loss": 0.5199, "step": 5117 }, { "epoch": 0.55, "grad_norm": 1.660797135037364, "learning_rate": 4.654580222827438e-06, "loss": 0.5641, "step": 5118 }, { "epoch": 0.55, "grad_norm": 1.935114049389847, "learning_rate": 4.6528207075727175e-06, "loss": 0.6717, "step": 5119 }, { "epoch": 0.55, "grad_norm": 1.750623276673915, "learning_rate": 4.651061235518438e-06, "loss": 0.6851, "step": 5120 }, { "epoch": 0.55, "grad_norm": 1.9009624584504687, "learning_rate": 4.649301806883533e-06, "loss": 0.6239, "step": 5121 }, { "epoch": 0.55, "grad_norm": 1.9028694884230555, "learning_rate": 4.647542421886935e-06, "loss": 0.6277, "step": 5122 }, { "epoch": 0.55, "grad_norm": 1.6014149916845253, "learning_rate": 4.64578308074757e-06, "loss": 0.5323, "step": 5123 }, { "epoch": 0.55, "grad_norm": 1.784679879940864, "learning_rate": 4.6440237836843555e-06, "loss": 0.5231, "step": 5124 }, { "epoch": 0.55, "grad_norm": 2.463317025304249, "learning_rate": 4.642264530916205e-06, "loss": 0.591, "step": 5125 }, { "epoch": 0.55, "grad_norm": 1.2156355691056715, "learning_rate": 4.6405053226620286e-06, "loss": 0.5089, "step": 5126 }, { "epoch": 0.55, "grad_norm": 1.8444813700148277, "learning_rate": 4.638746159140728e-06, "loss": 0.6398, "step": 5127 }, { "epoch": 0.55, "grad_norm": 1.667031371045801, "learning_rate": 4.636987040571201e-06, "loss": 0.573, "step": 5128 }, { "epoch": 0.55, "grad_norm": 1.6122349087850534, "learning_rate": 4.63522796717234e-06, "loss": 0.6508, "step": 5129 }, { "epoch": 0.55, "grad_norm": 1.855395578714395, "learning_rate": 4.633468939163031e-06, "loss": 0.6014, "step": 5130 }, { "epoch": 0.55, "grad_norm": 1.830177652739701, "learning_rate": 4.631709956762154e-06, "loss": 0.5537, "step": 5131 }, { "epoch": 0.55, "grad_norm": 1.9071999514450557, "learning_rate": 4.629951020188583e-06, "loss": 0.7435, "step": 5132 }, { "epoch": 0.55, "grad_norm": 1.6588014099483992, "learning_rate": 4.628192129661189e-06, "loss": 0.5611, "step": 5133 }, { "epoch": 0.55, "grad_norm": 1.7056387613261867, "learning_rate": 4.626433285398835e-06, "loss": 0.5514, "step": 5134 }, { "epoch": 0.55, "grad_norm": 1.835015283074967, "learning_rate": 4.624674487620377e-06, "loss": 0.5509, "step": 5135 }, { "epoch": 0.55, "grad_norm": 1.2585382579205229, "learning_rate": 4.622915736544668e-06, "loss": 0.535, "step": 5136 }, { "epoch": 0.55, "grad_norm": 1.9346759281301622, "learning_rate": 4.621157032390555e-06, "loss": 0.63, "step": 5137 }, { "epoch": 0.55, "grad_norm": 1.8700327710807985, "learning_rate": 4.619398375376875e-06, "loss": 0.6773, "step": 5138 }, { "epoch": 0.55, "grad_norm": 1.8550024198761723, "learning_rate": 4.617639765722467e-06, "loss": 0.5755, "step": 5139 }, { "epoch": 0.55, "grad_norm": 1.6466794234160096, "learning_rate": 4.615881203646157e-06, "loss": 0.5463, "step": 5140 }, { "epoch": 0.55, "grad_norm": 1.8073009087916678, "learning_rate": 4.614122689366769e-06, "loss": 0.6295, "step": 5141 }, { "epoch": 0.55, "grad_norm": 2.021733485761932, "learning_rate": 4.6123642231031165e-06, "loss": 0.6943, "step": 5142 }, { "epoch": 0.55, "grad_norm": 1.7588181672214496, "learning_rate": 4.610605805074012e-06, "loss": 0.6458, "step": 5143 }, { "epoch": 0.55, "grad_norm": 1.203083549470203, "learning_rate": 4.608847435498261e-06, "loss": 0.5238, "step": 5144 }, { "epoch": 0.55, "grad_norm": 1.6191938159562427, "learning_rate": 4.607089114594662e-06, "loss": 0.5649, "step": 5145 }, { "epoch": 0.55, "grad_norm": 1.91033115169379, "learning_rate": 4.605330842582005e-06, "loss": 0.6206, "step": 5146 }, { "epoch": 0.55, "grad_norm": 1.7573556070882823, "learning_rate": 4.603572619679078e-06, "loss": 0.6478, "step": 5147 }, { "epoch": 0.55, "grad_norm": 1.700451182936161, "learning_rate": 4.6018144461046634e-06, "loss": 0.6326, "step": 5148 }, { "epoch": 0.55, "grad_norm": 1.8503047662603112, "learning_rate": 4.6000563220775314e-06, "loss": 0.6366, "step": 5149 }, { "epoch": 0.55, "grad_norm": 1.759577325625547, "learning_rate": 4.598298247816453e-06, "loss": 0.5665, "step": 5150 }, { "epoch": 0.55, "grad_norm": 1.8318129354412447, "learning_rate": 4.596540223540189e-06, "loss": 0.7277, "step": 5151 }, { "epoch": 0.55, "grad_norm": 1.1403405124955919, "learning_rate": 4.594782249467496e-06, "loss": 0.5075, "step": 5152 }, { "epoch": 0.55, "grad_norm": 1.9370456473006339, "learning_rate": 4.593024325817122e-06, "loss": 0.6748, "step": 5153 }, { "epoch": 0.55, "grad_norm": 1.4762342659746617, "learning_rate": 4.5912664528078105e-06, "loss": 0.579, "step": 5154 }, { "epoch": 0.55, "grad_norm": 1.7980864604123619, "learning_rate": 4.589508630658297e-06, "loss": 0.5499, "step": 5155 }, { "epoch": 0.55, "grad_norm": 2.237956750055332, "learning_rate": 4.587750859587316e-06, "loss": 0.6685, "step": 5156 }, { "epoch": 0.55, "grad_norm": 1.7131384492131552, "learning_rate": 4.5859931398135874e-06, "loss": 0.6193, "step": 5157 }, { "epoch": 0.55, "grad_norm": 1.4753577751056903, "learning_rate": 4.584235471555831e-06, "loss": 0.5635, "step": 5158 }, { "epoch": 0.55, "grad_norm": 1.7164951427360908, "learning_rate": 4.582477855032758e-06, "loss": 0.5717, "step": 5159 }, { "epoch": 0.55, "grad_norm": 1.910806357877841, "learning_rate": 4.580720290463075e-06, "loss": 0.6277, "step": 5160 }, { "epoch": 0.55, "grad_norm": 1.9098518751615587, "learning_rate": 4.578962778065476e-06, "loss": 0.602, "step": 5161 }, { "epoch": 0.55, "grad_norm": 1.7069850664157953, "learning_rate": 4.577205318058655e-06, "loss": 0.578, "step": 5162 }, { "epoch": 0.55, "grad_norm": 1.7137440208268238, "learning_rate": 4.5754479106613e-06, "loss": 0.5416, "step": 5163 }, { "epoch": 0.55, "grad_norm": 1.7558601501814735, "learning_rate": 4.573690556092085e-06, "loss": 0.6457, "step": 5164 }, { "epoch": 0.55, "grad_norm": 1.7620867525082613, "learning_rate": 4.571933254569686e-06, "loss": 0.6683, "step": 5165 }, { "epoch": 0.55, "grad_norm": 1.1441079647507348, "learning_rate": 4.570176006312769e-06, "loss": 0.5075, "step": 5166 }, { "epoch": 0.55, "grad_norm": 1.6139396761726459, "learning_rate": 4.568418811539989e-06, "loss": 0.6524, "step": 5167 }, { "epoch": 0.55, "grad_norm": 1.7824906357770456, "learning_rate": 4.566661670470003e-06, "loss": 0.6767, "step": 5168 }, { "epoch": 0.55, "grad_norm": 1.8115660524753996, "learning_rate": 4.564904583321455e-06, "loss": 0.5958, "step": 5169 }, { "epoch": 0.55, "grad_norm": 1.8666802284381363, "learning_rate": 4.563147550312983e-06, "loss": 0.6513, "step": 5170 }, { "epoch": 0.55, "grad_norm": 1.6730904311773165, "learning_rate": 4.56139057166322e-06, "loss": 0.6335, "step": 5171 }, { "epoch": 0.55, "grad_norm": 1.8532099746966, "learning_rate": 4.559633647590791e-06, "loss": 0.6124, "step": 5172 }, { "epoch": 0.55, "grad_norm": 1.9219910725907996, "learning_rate": 4.557876778314316e-06, "loss": 0.7402, "step": 5173 }, { "epoch": 0.55, "grad_norm": 1.7544323774563024, "learning_rate": 4.556119964052409e-06, "loss": 0.6545, "step": 5174 }, { "epoch": 0.55, "grad_norm": 1.3189728253550947, "learning_rate": 4.55436320502367e-06, "loss": 0.5286, "step": 5175 }, { "epoch": 0.55, "grad_norm": 1.9006798399498428, "learning_rate": 4.552606501446699e-06, "loss": 0.6252, "step": 5176 }, { "epoch": 0.55, "grad_norm": 1.8312452320003254, "learning_rate": 4.5508498535400885e-06, "loss": 0.5892, "step": 5177 }, { "epoch": 0.55, "grad_norm": 1.6177665942360093, "learning_rate": 4.5490932615224205e-06, "loss": 0.5305, "step": 5178 }, { "epoch": 0.55, "grad_norm": 1.7028294987073462, "learning_rate": 4.547336725612275e-06, "loss": 0.666, "step": 5179 }, { "epoch": 0.55, "grad_norm": 1.522669025701333, "learning_rate": 4.545580246028222e-06, "loss": 0.5252, "step": 5180 }, { "epoch": 0.55, "grad_norm": 1.7623451139659665, "learning_rate": 4.543823822988824e-06, "loss": 0.6017, "step": 5181 }, { "epoch": 0.55, "grad_norm": 1.8261850129751815, "learning_rate": 4.542067456712637e-06, "loss": 0.5891, "step": 5182 }, { "epoch": 0.55, "grad_norm": 1.7963350003184975, "learning_rate": 4.540311147418213e-06, "loss": 0.6104, "step": 5183 }, { "epoch": 0.55, "grad_norm": 1.779922230597691, "learning_rate": 4.538554895324091e-06, "loss": 0.6177, "step": 5184 }, { "epoch": 0.55, "grad_norm": 1.7195099939790384, "learning_rate": 4.536798700648808e-06, "loss": 0.638, "step": 5185 }, { "epoch": 0.55, "grad_norm": 1.7511206918674866, "learning_rate": 4.5350425636108904e-06, "loss": 0.5606, "step": 5186 }, { "epoch": 0.55, "grad_norm": 1.7238612920266703, "learning_rate": 4.533286484428861e-06, "loss": 0.6614, "step": 5187 }, { "epoch": 0.55, "grad_norm": 1.7583840214850952, "learning_rate": 4.531530463321233e-06, "loss": 0.6274, "step": 5188 }, { "epoch": 0.55, "grad_norm": 1.9992581145363983, "learning_rate": 4.529774500506511e-06, "loss": 0.6241, "step": 5189 }, { "epoch": 0.55, "grad_norm": 1.900085975499481, "learning_rate": 4.528018596203195e-06, "loss": 0.6154, "step": 5190 }, { "epoch": 0.55, "grad_norm": 1.8382889737681531, "learning_rate": 4.526262750629777e-06, "loss": 0.5537, "step": 5191 }, { "epoch": 0.55, "grad_norm": 1.5074935788283867, "learning_rate": 4.5245069640047424e-06, "loss": 0.5343, "step": 5192 }, { "epoch": 0.55, "grad_norm": 1.580177142574391, "learning_rate": 4.522751236546567e-06, "loss": 0.5565, "step": 5193 }, { "epoch": 0.55, "grad_norm": 1.8475860118459149, "learning_rate": 4.52099556847372e-06, "loss": 0.6762, "step": 5194 }, { "epoch": 0.55, "grad_norm": 1.701449976628265, "learning_rate": 4.519239960004666e-06, "loss": 0.5865, "step": 5195 }, { "epoch": 0.55, "grad_norm": 1.3087883395374635, "learning_rate": 4.517484411357859e-06, "loss": 0.5327, "step": 5196 }, { "epoch": 0.55, "grad_norm": 1.6338618959288689, "learning_rate": 4.515728922751747e-06, "loss": 0.5578, "step": 5197 }, { "epoch": 0.55, "grad_norm": 1.6958414748533406, "learning_rate": 4.51397349440477e-06, "loss": 0.6409, "step": 5198 }, { "epoch": 0.55, "grad_norm": 2.0109585634817666, "learning_rate": 4.51221812653536e-06, "loss": 0.6644, "step": 5199 }, { "epoch": 0.55, "grad_norm": 1.5481011594759067, "learning_rate": 4.5104628193619424e-06, "loss": 0.5416, "step": 5200 }, { "epoch": 0.55, "grad_norm": 1.1948624883990613, "learning_rate": 4.508707573102934e-06, "loss": 0.5071, "step": 5201 }, { "epoch": 0.55, "grad_norm": 1.7721743663589038, "learning_rate": 4.506952387976746e-06, "loss": 0.6213, "step": 5202 }, { "epoch": 0.55, "grad_norm": 1.6062439393066863, "learning_rate": 4.505197264201782e-06, "loss": 0.596, "step": 5203 }, { "epoch": 0.56, "grad_norm": 1.805338316294078, "learning_rate": 4.503442201996433e-06, "loss": 0.6705, "step": 5204 }, { "epoch": 0.56, "grad_norm": 1.889032478713325, "learning_rate": 4.501687201579088e-06, "loss": 0.6469, "step": 5205 }, { "epoch": 0.56, "grad_norm": 1.7948450769007989, "learning_rate": 4.499932263168127e-06, "loss": 0.5887, "step": 5206 }, { "epoch": 0.56, "grad_norm": 1.1583652388820251, "learning_rate": 4.49817738698192e-06, "loss": 0.5045, "step": 5207 }, { "epoch": 0.56, "grad_norm": 1.1190325197272453, "learning_rate": 4.496422573238833e-06, "loss": 0.51, "step": 5208 }, { "epoch": 0.56, "grad_norm": 1.7805960387224153, "learning_rate": 4.49466782215722e-06, "loss": 0.6162, "step": 5209 }, { "epoch": 0.56, "grad_norm": 2.0016964594574724, "learning_rate": 4.49291313395543e-06, "loss": 0.6756, "step": 5210 }, { "epoch": 0.56, "grad_norm": 1.6439526184438633, "learning_rate": 4.4911585088518036e-06, "loss": 0.5692, "step": 5211 }, { "epoch": 0.56, "grad_norm": 1.822188983797486, "learning_rate": 4.489403947064675e-06, "loss": 0.6499, "step": 5212 }, { "epoch": 0.56, "grad_norm": 1.9053896287824488, "learning_rate": 4.487649448812367e-06, "loss": 0.7028, "step": 5213 }, { "epoch": 0.56, "grad_norm": 1.8083717739882212, "learning_rate": 4.485895014313198e-06, "loss": 0.5878, "step": 5214 }, { "epoch": 0.56, "grad_norm": 1.8176953421118704, "learning_rate": 4.484140643785476e-06, "loss": 0.6194, "step": 5215 }, { "epoch": 0.56, "grad_norm": 1.231409374624764, "learning_rate": 4.482386337447501e-06, "loss": 0.496, "step": 5216 }, { "epoch": 0.56, "grad_norm": 1.6737298621496326, "learning_rate": 4.4806320955175685e-06, "loss": 0.6222, "step": 5217 }, { "epoch": 0.56, "grad_norm": 1.7204256869891172, "learning_rate": 4.4788779182139615e-06, "loss": 0.6686, "step": 5218 }, { "epoch": 0.56, "grad_norm": 2.1245447674682465, "learning_rate": 4.477123805754957e-06, "loss": 0.6295, "step": 5219 }, { "epoch": 0.56, "grad_norm": 1.86611001104797, "learning_rate": 4.4753697583588245e-06, "loss": 0.6916, "step": 5220 }, { "epoch": 0.56, "grad_norm": 1.1543005767693022, "learning_rate": 4.473615776243825e-06, "loss": 0.4948, "step": 5221 }, { "epoch": 0.56, "grad_norm": 1.8255237580208856, "learning_rate": 4.47186185962821e-06, "loss": 0.5739, "step": 5222 }, { "epoch": 0.56, "grad_norm": 1.7359544326306788, "learning_rate": 4.4701080087302254e-06, "loss": 0.6033, "step": 5223 }, { "epoch": 0.56, "grad_norm": 1.82744040814105, "learning_rate": 4.468354223768106e-06, "loss": 0.6912, "step": 5224 }, { "epoch": 0.56, "grad_norm": 1.739250465387327, "learning_rate": 4.466600504960081e-06, "loss": 0.6272, "step": 5225 }, { "epoch": 0.56, "grad_norm": 1.789842619724573, "learning_rate": 4.46484685252437e-06, "loss": 0.6392, "step": 5226 }, { "epoch": 0.56, "grad_norm": 1.8248809432329498, "learning_rate": 4.463093266679185e-06, "loss": 0.6719, "step": 5227 }, { "epoch": 0.56, "grad_norm": 1.703708809425363, "learning_rate": 4.461339747642728e-06, "loss": 0.6764, "step": 5228 }, { "epoch": 0.56, "grad_norm": 1.5917884198434737, "learning_rate": 4.4595862956331965e-06, "loss": 0.5019, "step": 5229 }, { "epoch": 0.56, "grad_norm": 1.7653533528176857, "learning_rate": 4.4578329108687755e-06, "loss": 0.658, "step": 5230 }, { "epoch": 0.56, "grad_norm": 1.6359681649425357, "learning_rate": 4.4560795935676455e-06, "loss": 0.5292, "step": 5231 }, { "epoch": 0.56, "grad_norm": 2.336669146591641, "learning_rate": 4.454326343947972e-06, "loss": 0.5987, "step": 5232 }, { "epoch": 0.56, "grad_norm": 1.621248678685234, "learning_rate": 4.45257316222792e-06, "loss": 0.5531, "step": 5233 }, { "epoch": 0.56, "grad_norm": 1.802588032482001, "learning_rate": 4.450820048625639e-06, "loss": 0.6733, "step": 5234 }, { "epoch": 0.56, "grad_norm": 1.7761068752055607, "learning_rate": 4.4490670033592785e-06, "loss": 0.662, "step": 5235 }, { "epoch": 0.56, "grad_norm": 1.6012021163029768, "learning_rate": 4.447314026646972e-06, "loss": 0.5492, "step": 5236 }, { "epoch": 0.56, "grad_norm": 1.760110652035534, "learning_rate": 4.445561118706846e-06, "loss": 0.6359, "step": 5237 }, { "epoch": 0.56, "grad_norm": 1.7133333245968871, "learning_rate": 4.443808279757021e-06, "loss": 0.6277, "step": 5238 }, { "epoch": 0.56, "grad_norm": 1.8349235561747248, "learning_rate": 4.442055510015608e-06, "loss": 0.5207, "step": 5239 }, { "epoch": 0.56, "grad_norm": 1.980813862073341, "learning_rate": 4.440302809700708e-06, "loss": 0.6547, "step": 5240 }, { "epoch": 0.56, "grad_norm": 1.7141830197968306, "learning_rate": 4.4385501790304135e-06, "loss": 0.6926, "step": 5241 }, { "epoch": 0.56, "grad_norm": 2.531856854716856, "learning_rate": 4.4367976182228095e-06, "loss": 0.5905, "step": 5242 }, { "epoch": 0.56, "grad_norm": 1.160344835226549, "learning_rate": 4.4350451274959715e-06, "loss": 0.5041, "step": 5243 }, { "epoch": 0.56, "grad_norm": 1.6484942418509296, "learning_rate": 4.433292707067968e-06, "loss": 0.6098, "step": 5244 }, { "epoch": 0.56, "grad_norm": 1.9434341675548206, "learning_rate": 4.431540357156854e-06, "loss": 0.6303, "step": 5245 }, { "epoch": 0.56, "grad_norm": 1.7392936257959684, "learning_rate": 4.429788077980685e-06, "loss": 0.5607, "step": 5246 }, { "epoch": 0.56, "grad_norm": 1.5402800665484673, "learning_rate": 4.428035869757496e-06, "loss": 0.5538, "step": 5247 }, { "epoch": 0.56, "grad_norm": 1.5573648429629787, "learning_rate": 4.42628373270532e-06, "loss": 0.562, "step": 5248 }, { "epoch": 0.56, "grad_norm": 1.5525614786071127, "learning_rate": 4.424531667042182e-06, "loss": 0.5961, "step": 5249 }, { "epoch": 0.56, "grad_norm": 1.7082677772938795, "learning_rate": 4.422779672986095e-06, "loss": 0.6498, "step": 5250 }, { "epoch": 0.56, "grad_norm": 1.9594097188948074, "learning_rate": 4.421027750755063e-06, "loss": 0.6407, "step": 5251 }, { "epoch": 0.56, "grad_norm": 1.7771092160215711, "learning_rate": 4.419275900567085e-06, "loss": 0.5936, "step": 5252 }, { "epoch": 0.56, "grad_norm": 1.595470171316169, "learning_rate": 4.4175241226401465e-06, "loss": 0.629, "step": 5253 }, { "epoch": 0.56, "grad_norm": 1.2301517954630348, "learning_rate": 4.415772417192227e-06, "loss": 0.5442, "step": 5254 }, { "epoch": 0.56, "grad_norm": 1.8808389061917665, "learning_rate": 4.414020784441296e-06, "loss": 0.6897, "step": 5255 }, { "epoch": 0.56, "grad_norm": 1.8795280484476167, "learning_rate": 4.412269224605312e-06, "loss": 0.5808, "step": 5256 }, { "epoch": 0.56, "grad_norm": 1.1468763718784403, "learning_rate": 4.410517737902228e-06, "loss": 0.5117, "step": 5257 }, { "epoch": 0.56, "grad_norm": 1.9303449841263205, "learning_rate": 4.4087663245499855e-06, "loss": 0.5662, "step": 5258 }, { "epoch": 0.56, "grad_norm": 1.851924163879989, "learning_rate": 4.407014984766518e-06, "loss": 0.62, "step": 5259 }, { "epoch": 0.56, "grad_norm": 1.6180491479405221, "learning_rate": 4.405263718769749e-06, "loss": 0.5374, "step": 5260 }, { "epoch": 0.56, "grad_norm": 1.7023347831928728, "learning_rate": 4.4035125267775925e-06, "loss": 0.5054, "step": 5261 }, { "epoch": 0.56, "grad_norm": 1.8820814892599174, "learning_rate": 4.4017614090079545e-06, "loss": 0.7578, "step": 5262 }, { "epoch": 0.56, "grad_norm": 1.119405528621713, "learning_rate": 4.400010365678731e-06, "loss": 0.4987, "step": 5263 }, { "epoch": 0.56, "grad_norm": 1.1528641067544003, "learning_rate": 4.3982593970078095e-06, "loss": 0.5284, "step": 5264 }, { "epoch": 0.56, "grad_norm": 1.8092669297699198, "learning_rate": 4.396508503213068e-06, "loss": 0.6636, "step": 5265 }, { "epoch": 0.56, "grad_norm": 1.0969225626582315, "learning_rate": 4.394757684512373e-06, "loss": 0.5284, "step": 5266 }, { "epoch": 0.56, "grad_norm": 2.29561484844112, "learning_rate": 4.393006941123585e-06, "loss": 0.6055, "step": 5267 }, { "epoch": 0.56, "grad_norm": 1.585640688683346, "learning_rate": 4.391256273264554e-06, "loss": 0.5104, "step": 5268 }, { "epoch": 0.56, "grad_norm": 1.6982854871246367, "learning_rate": 4.389505681153119e-06, "loss": 0.6215, "step": 5269 }, { "epoch": 0.56, "grad_norm": 1.7018242898031068, "learning_rate": 4.387755165007112e-06, "loss": 0.564, "step": 5270 }, { "epoch": 0.56, "grad_norm": 1.902991043481273, "learning_rate": 4.386004725044354e-06, "loss": 0.6592, "step": 5271 }, { "epoch": 0.56, "grad_norm": 1.1627699372582068, "learning_rate": 4.384254361482656e-06, "loss": 0.538, "step": 5272 }, { "epoch": 0.56, "grad_norm": 1.7095913648006829, "learning_rate": 4.382504074539822e-06, "loss": 0.6003, "step": 5273 }, { "epoch": 0.56, "grad_norm": 1.7106097731289132, "learning_rate": 4.380753864433645e-06, "loss": 0.573, "step": 5274 }, { "epoch": 0.56, "grad_norm": 1.7798265795985333, "learning_rate": 4.379003731381906e-06, "loss": 0.6142, "step": 5275 }, { "epoch": 0.56, "grad_norm": 1.971875038784025, "learning_rate": 4.3772536756023795e-06, "loss": 0.6543, "step": 5276 }, { "epoch": 0.56, "grad_norm": 1.7925744002201562, "learning_rate": 4.3755036973128305e-06, "loss": 0.6435, "step": 5277 }, { "epoch": 0.56, "grad_norm": 1.6261461257897474, "learning_rate": 4.373753796731013e-06, "loss": 0.5947, "step": 5278 }, { "epoch": 0.56, "grad_norm": 1.7511106719760283, "learning_rate": 4.372003974074672e-06, "loss": 0.6933, "step": 5279 }, { "epoch": 0.56, "grad_norm": 1.7078114206440884, "learning_rate": 4.3702542295615415e-06, "loss": 0.7271, "step": 5280 }, { "epoch": 0.56, "grad_norm": 1.093974033526707, "learning_rate": 4.3685045634093485e-06, "loss": 0.5031, "step": 5281 }, { "epoch": 0.56, "grad_norm": 1.8108873028606165, "learning_rate": 4.366754975835807e-06, "loss": 0.6008, "step": 5282 }, { "epoch": 0.56, "grad_norm": 1.7307493500907807, "learning_rate": 4.365005467058624e-06, "loss": 0.6337, "step": 5283 }, { "epoch": 0.56, "grad_norm": 1.8813813400035109, "learning_rate": 4.363256037295496e-06, "loss": 0.6958, "step": 5284 }, { "epoch": 0.56, "grad_norm": 1.879897580831018, "learning_rate": 4.361506686764106e-06, "loss": 0.714, "step": 5285 }, { "epoch": 0.56, "grad_norm": 1.1340687672021146, "learning_rate": 4.359757415682134e-06, "loss": 0.4891, "step": 5286 }, { "epoch": 0.56, "grad_norm": 1.1334083106112405, "learning_rate": 4.358008224267245e-06, "loss": 0.5001, "step": 5287 }, { "epoch": 0.56, "grad_norm": 1.748085128292021, "learning_rate": 4.356259112737096e-06, "loss": 0.6565, "step": 5288 }, { "epoch": 0.56, "grad_norm": 1.9189219465561906, "learning_rate": 4.354510081309335e-06, "loss": 0.5757, "step": 5289 }, { "epoch": 0.56, "grad_norm": 1.6752968305745832, "learning_rate": 4.352761130201595e-06, "loss": 0.6003, "step": 5290 }, { "epoch": 0.56, "grad_norm": 1.8130154328484394, "learning_rate": 4.351012259631503e-06, "loss": 0.7134, "step": 5291 }, { "epoch": 0.56, "grad_norm": 1.1715772017001453, "learning_rate": 4.349263469816678e-06, "loss": 0.5068, "step": 5292 }, { "epoch": 0.56, "grad_norm": 1.9199721093365063, "learning_rate": 4.347514760974726e-06, "loss": 0.6212, "step": 5293 }, { "epoch": 0.56, "grad_norm": 2.0372867127886356, "learning_rate": 4.345766133323243e-06, "loss": 0.6543, "step": 5294 }, { "epoch": 0.56, "grad_norm": 1.893670488442213, "learning_rate": 4.344017587079815e-06, "loss": 0.6593, "step": 5295 }, { "epoch": 0.56, "grad_norm": 1.106347709653867, "learning_rate": 4.342269122462019e-06, "loss": 0.5069, "step": 5296 }, { "epoch": 0.57, "grad_norm": 1.8402962249984414, "learning_rate": 4.340520739687421e-06, "loss": 0.6757, "step": 5297 }, { "epoch": 0.57, "grad_norm": 1.7033244918729693, "learning_rate": 4.338772438973576e-06, "loss": 0.5842, "step": 5298 }, { "epoch": 0.57, "grad_norm": 1.8673976913272632, "learning_rate": 4.337024220538031e-06, "loss": 0.5483, "step": 5299 }, { "epoch": 0.57, "grad_norm": 1.6429384451212428, "learning_rate": 4.33527608459832e-06, "loss": 0.5529, "step": 5300 }, { "epoch": 0.57, "grad_norm": 2.1350950600326892, "learning_rate": 4.33352803137197e-06, "loss": 0.6417, "step": 5301 }, { "epoch": 0.57, "grad_norm": 1.8705098173099193, "learning_rate": 4.331780061076494e-06, "loss": 0.6761, "step": 5302 }, { "epoch": 0.57, "grad_norm": 1.8127287482089725, "learning_rate": 4.330032173929401e-06, "loss": 0.6329, "step": 5303 }, { "epoch": 0.57, "grad_norm": 1.6499620394493193, "learning_rate": 4.328284370148178e-06, "loss": 0.6135, "step": 5304 }, { "epoch": 0.57, "grad_norm": 1.6582556574697827, "learning_rate": 4.326536649950314e-06, "loss": 0.529, "step": 5305 }, { "epoch": 0.57, "grad_norm": 1.741870531870902, "learning_rate": 4.324789013553281e-06, "loss": 0.6261, "step": 5306 }, { "epoch": 0.57, "grad_norm": 1.7181810419784627, "learning_rate": 4.323041461174541e-06, "loss": 0.6206, "step": 5307 }, { "epoch": 0.57, "grad_norm": 1.7559493811425408, "learning_rate": 4.321293993031547e-06, "loss": 0.6278, "step": 5308 }, { "epoch": 0.57, "grad_norm": 1.7500695805959963, "learning_rate": 4.319546609341744e-06, "loss": 0.5625, "step": 5309 }, { "epoch": 0.57, "grad_norm": 1.172165353973359, "learning_rate": 4.31779931032256e-06, "loss": 0.5009, "step": 5310 }, { "epoch": 0.57, "grad_norm": 2.003263900345563, "learning_rate": 4.316052096191418e-06, "loss": 0.713, "step": 5311 }, { "epoch": 0.57, "grad_norm": 1.9424009148346104, "learning_rate": 4.314304967165728e-06, "loss": 0.7246, "step": 5312 }, { "epoch": 0.57, "grad_norm": 1.765020914902679, "learning_rate": 4.31255792346289e-06, "loss": 0.593, "step": 5313 }, { "epoch": 0.57, "grad_norm": 1.7749156784338669, "learning_rate": 4.310810965300293e-06, "loss": 0.6177, "step": 5314 }, { "epoch": 0.57, "grad_norm": 1.879750333290521, "learning_rate": 4.309064092895314e-06, "loss": 0.7145, "step": 5315 }, { "epoch": 0.57, "grad_norm": 1.7593568637325232, "learning_rate": 4.307317306465325e-06, "loss": 0.6262, "step": 5316 }, { "epoch": 0.57, "grad_norm": 1.7120561477888936, "learning_rate": 4.30557060622768e-06, "loss": 0.7194, "step": 5317 }, { "epoch": 0.57, "grad_norm": 1.6955567153328592, "learning_rate": 4.303823992399728e-06, "loss": 0.6325, "step": 5318 }, { "epoch": 0.57, "grad_norm": 1.7474387925165435, "learning_rate": 4.302077465198802e-06, "loss": 0.5921, "step": 5319 }, { "epoch": 0.57, "grad_norm": 1.7380962229112906, "learning_rate": 4.300331024842228e-06, "loss": 0.5961, "step": 5320 }, { "epoch": 0.57, "grad_norm": 1.9509413606407096, "learning_rate": 4.298584671547321e-06, "loss": 0.6396, "step": 5321 }, { "epoch": 0.57, "grad_norm": 1.5453896185103433, "learning_rate": 4.296838405531384e-06, "loss": 0.5733, "step": 5322 }, { "epoch": 0.57, "grad_norm": 1.7107941376395372, "learning_rate": 4.295092227011709e-06, "loss": 0.5712, "step": 5323 }, { "epoch": 0.57, "grad_norm": 1.7779476148677376, "learning_rate": 4.293346136205578e-06, "loss": 0.6509, "step": 5324 }, { "epoch": 0.57, "grad_norm": 1.8699205190633927, "learning_rate": 4.291600133330261e-06, "loss": 0.5844, "step": 5325 }, { "epoch": 0.57, "grad_norm": 1.6281155827804912, "learning_rate": 4.289854218603019e-06, "loss": 0.567, "step": 5326 }, { "epoch": 0.57, "grad_norm": 1.6785017784488274, "learning_rate": 4.288108392241101e-06, "loss": 0.5228, "step": 5327 }, { "epoch": 0.57, "grad_norm": 1.7739275702367132, "learning_rate": 4.2863626544617435e-06, "loss": 0.6342, "step": 5328 }, { "epoch": 0.57, "grad_norm": 1.8185650883165712, "learning_rate": 4.284617005482174e-06, "loss": 0.5434, "step": 5329 }, { "epoch": 0.57, "grad_norm": 1.8919995413043154, "learning_rate": 4.282871445519608e-06, "loss": 0.6154, "step": 5330 }, { "epoch": 0.57, "grad_norm": 1.8668323291392361, "learning_rate": 4.281125974791251e-06, "loss": 0.5467, "step": 5331 }, { "epoch": 0.57, "grad_norm": 1.8581411798102208, "learning_rate": 4.279380593514297e-06, "loss": 0.6522, "step": 5332 }, { "epoch": 0.57, "grad_norm": 1.7523507279863793, "learning_rate": 4.277635301905925e-06, "loss": 0.5892, "step": 5333 }, { "epoch": 0.57, "grad_norm": 1.561821859404349, "learning_rate": 4.275890100183309e-06, "loss": 0.513, "step": 5334 }, { "epoch": 0.57, "grad_norm": 1.868177241319242, "learning_rate": 4.2741449885636085e-06, "loss": 0.6087, "step": 5335 }, { "epoch": 0.57, "grad_norm": 1.573753174603133, "learning_rate": 4.272399967263973e-06, "loss": 0.5485, "step": 5336 }, { "epoch": 0.57, "grad_norm": 1.3095052016657076, "learning_rate": 4.27065503650154e-06, "loss": 0.5134, "step": 5337 }, { "epoch": 0.57, "grad_norm": 1.9658741425862991, "learning_rate": 4.268910196493434e-06, "loss": 0.6182, "step": 5338 }, { "epoch": 0.57, "grad_norm": 1.1631302788931928, "learning_rate": 4.267165447456773e-06, "loss": 0.5143, "step": 5339 }, { "epoch": 0.57, "grad_norm": 1.8006578663216506, "learning_rate": 4.265420789608658e-06, "loss": 0.6117, "step": 5340 }, { "epoch": 0.57, "grad_norm": 1.8155374603362173, "learning_rate": 4.263676223166183e-06, "loss": 0.6265, "step": 5341 }, { "epoch": 0.57, "grad_norm": 1.799905442253417, "learning_rate": 4.261931748346428e-06, "loss": 0.7022, "step": 5342 }, { "epoch": 0.57, "grad_norm": 1.7375509952536579, "learning_rate": 4.260187365366465e-06, "loss": 0.6515, "step": 5343 }, { "epoch": 0.57, "grad_norm": 1.750804452450481, "learning_rate": 4.25844307444335e-06, "loss": 0.6058, "step": 5344 }, { "epoch": 0.57, "grad_norm": 2.1152263017844453, "learning_rate": 4.2566988757941295e-06, "loss": 0.6412, "step": 5345 }, { "epoch": 0.57, "grad_norm": 1.7918993750966605, "learning_rate": 4.254954769635843e-06, "loss": 0.6955, "step": 5346 }, { "epoch": 0.57, "grad_norm": 1.8511276099138871, "learning_rate": 4.253210756185508e-06, "loss": 0.6588, "step": 5347 }, { "epoch": 0.57, "grad_norm": 1.6640412440723862, "learning_rate": 4.2514668356601385e-06, "loss": 0.548, "step": 5348 }, { "epoch": 0.57, "grad_norm": 2.4401821497511524, "learning_rate": 4.249723008276737e-06, "loss": 0.565, "step": 5349 }, { "epoch": 0.57, "grad_norm": 1.8534458690805313, "learning_rate": 4.247979274252293e-06, "loss": 0.6398, "step": 5350 }, { "epoch": 0.57, "grad_norm": 1.7815468506611472, "learning_rate": 4.246235633803781e-06, "loss": 0.6395, "step": 5351 }, { "epoch": 0.57, "grad_norm": 1.7934338425015157, "learning_rate": 4.24449208714817e-06, "loss": 0.6296, "step": 5352 }, { "epoch": 0.57, "grad_norm": 1.676743829967106, "learning_rate": 4.242748634502412e-06, "loss": 0.6393, "step": 5353 }, { "epoch": 0.57, "grad_norm": 2.3265987321355133, "learning_rate": 4.2410052760834495e-06, "loss": 0.6184, "step": 5354 }, { "epoch": 0.57, "grad_norm": 1.644014218203235, "learning_rate": 4.2392620121082145e-06, "loss": 0.6462, "step": 5355 }, { "epoch": 0.57, "grad_norm": 1.6198620013061318, "learning_rate": 4.237518842793625e-06, "loss": 0.5769, "step": 5356 }, { "epoch": 0.57, "grad_norm": 1.739299984278177, "learning_rate": 4.2357757683565885e-06, "loss": 0.6843, "step": 5357 }, { "epoch": 0.57, "grad_norm": 1.7491612878245408, "learning_rate": 4.234032789014e-06, "loss": 0.611, "step": 5358 }, { "epoch": 0.57, "grad_norm": 1.7806924013387506, "learning_rate": 4.232289904982743e-06, "loss": 0.5564, "step": 5359 }, { "epoch": 0.57, "grad_norm": 1.865478736181334, "learning_rate": 4.230547116479691e-06, "loss": 0.586, "step": 5360 }, { "epoch": 0.57, "grad_norm": 1.594286525513177, "learning_rate": 4.228804423721703e-06, "loss": 0.6609, "step": 5361 }, { "epoch": 0.57, "grad_norm": 1.62588883916204, "learning_rate": 4.227061826925626e-06, "loss": 0.5187, "step": 5362 }, { "epoch": 0.57, "grad_norm": 1.5416326237403968, "learning_rate": 4.225319326308295e-06, "loss": 0.5313, "step": 5363 }, { "epoch": 0.57, "grad_norm": 1.4825150898949193, "learning_rate": 4.223576922086534e-06, "loss": 0.5324, "step": 5364 }, { "epoch": 0.57, "grad_norm": 1.7177470983017609, "learning_rate": 4.221834614477157e-06, "loss": 0.5789, "step": 5365 }, { "epoch": 0.57, "grad_norm": 1.9027945556534354, "learning_rate": 4.2200924036969626e-06, "loss": 0.7056, "step": 5366 }, { "epoch": 0.57, "grad_norm": 2.1732055397806573, "learning_rate": 4.218350289962741e-06, "loss": 0.5824, "step": 5367 }, { "epoch": 0.57, "grad_norm": 1.7698357027544358, "learning_rate": 4.216608273491264e-06, "loss": 0.6136, "step": 5368 }, { "epoch": 0.57, "grad_norm": 2.02736286952283, "learning_rate": 4.2148663544992995e-06, "loss": 0.6368, "step": 5369 }, { "epoch": 0.57, "grad_norm": 1.5354031143424076, "learning_rate": 4.213124533203596e-06, "loss": 0.5871, "step": 5370 }, { "epoch": 0.57, "grad_norm": 1.8667804088773639, "learning_rate": 4.211382809820894e-06, "loss": 0.6129, "step": 5371 }, { "epoch": 0.57, "grad_norm": 2.110393298910872, "learning_rate": 4.209641184567922e-06, "loss": 0.5959, "step": 5372 }, { "epoch": 0.57, "grad_norm": 1.4722051431921845, "learning_rate": 4.207899657661393e-06, "loss": 0.5077, "step": 5373 }, { "epoch": 0.57, "grad_norm": 1.4814929602074394, "learning_rate": 4.2061582293180105e-06, "loss": 0.5104, "step": 5374 }, { "epoch": 0.57, "grad_norm": 1.8162517637219482, "learning_rate": 4.204416899754467e-06, "loss": 0.6233, "step": 5375 }, { "epoch": 0.57, "grad_norm": 1.707669149775352, "learning_rate": 4.202675669187438e-06, "loss": 0.7069, "step": 5376 }, { "epoch": 0.57, "grad_norm": 1.8218509318140412, "learning_rate": 4.20093453783359e-06, "loss": 0.7119, "step": 5377 }, { "epoch": 0.57, "grad_norm": 1.8140268241919555, "learning_rate": 4.199193505909577e-06, "loss": 0.6269, "step": 5378 }, { "epoch": 0.57, "grad_norm": 1.9711154656563488, "learning_rate": 4.197452573632041e-06, "loss": 0.6406, "step": 5379 }, { "epoch": 0.57, "grad_norm": 1.7373439086523155, "learning_rate": 4.19571174121761e-06, "loss": 0.564, "step": 5380 }, { "epoch": 0.57, "grad_norm": 1.844242952907457, "learning_rate": 4.193971008882899e-06, "loss": 0.5703, "step": 5381 }, { "epoch": 0.57, "grad_norm": 1.6588663115710969, "learning_rate": 4.192230376844514e-06, "loss": 0.5795, "step": 5382 }, { "epoch": 0.57, "grad_norm": 1.7908356758719344, "learning_rate": 4.190489845319046e-06, "loss": 0.6333, "step": 5383 }, { "epoch": 0.57, "grad_norm": 1.8112151298505288, "learning_rate": 4.188749414523074e-06, "loss": 0.6114, "step": 5384 }, { "epoch": 0.57, "grad_norm": 1.574948117715139, "learning_rate": 4.187009084673164e-06, "loss": 0.4594, "step": 5385 }, { "epoch": 0.57, "grad_norm": 1.8939081013329273, "learning_rate": 4.1852688559858716e-06, "loss": 0.5683, "step": 5386 }, { "epoch": 0.57, "grad_norm": 1.7564788411730101, "learning_rate": 4.183528728677736e-06, "loss": 0.5381, "step": 5387 }, { "epoch": 0.57, "grad_norm": 1.8610145021371538, "learning_rate": 4.181788702965286e-06, "loss": 0.5706, "step": 5388 }, { "epoch": 0.57, "grad_norm": 1.6809143019845578, "learning_rate": 4.180048779065039e-06, "loss": 0.5482, "step": 5389 }, { "epoch": 0.57, "grad_norm": 1.8098622424867825, "learning_rate": 4.178308957193497e-06, "loss": 0.6515, "step": 5390 }, { "epoch": 0.58, "grad_norm": 1.6817344701648849, "learning_rate": 4.1765692375671505e-06, "loss": 0.6264, "step": 5391 }, { "epoch": 0.58, "grad_norm": 1.73996622369352, "learning_rate": 4.1748296204024776e-06, "loss": 0.6577, "step": 5392 }, { "epoch": 0.58, "grad_norm": 1.8304677643029479, "learning_rate": 4.173090105915945e-06, "loss": 0.5906, "step": 5393 }, { "epoch": 0.58, "grad_norm": 1.5378741608834552, "learning_rate": 4.171350694324003e-06, "loss": 0.5074, "step": 5394 }, { "epoch": 0.58, "grad_norm": 1.7352963075238694, "learning_rate": 4.169611385843093e-06, "loss": 0.6431, "step": 5395 }, { "epoch": 0.58, "grad_norm": 1.7728652971627121, "learning_rate": 4.1678721806896405e-06, "loss": 0.6116, "step": 5396 }, { "epoch": 0.58, "grad_norm": 1.7008060740220234, "learning_rate": 4.16613307908006e-06, "loss": 0.5333, "step": 5397 }, { "epoch": 0.58, "grad_norm": 1.7344126225672807, "learning_rate": 4.164394081230751e-06, "loss": 0.6713, "step": 5398 }, { "epoch": 0.58, "grad_norm": 1.7438063080118953, "learning_rate": 4.1626551873581045e-06, "loss": 0.5616, "step": 5399 }, { "epoch": 0.58, "grad_norm": 1.8250507130077032, "learning_rate": 4.160916397678494e-06, "loss": 0.6534, "step": 5400 }, { "epoch": 0.58, "grad_norm": 1.6869488115229834, "learning_rate": 4.159177712408281e-06, "loss": 0.7192, "step": 5401 }, { "epoch": 0.58, "grad_norm": 1.8298897666691412, "learning_rate": 4.157439131763817e-06, "loss": 0.5944, "step": 5402 }, { "epoch": 0.58, "grad_norm": 1.6611651649251522, "learning_rate": 4.155700655961436e-06, "loss": 0.5528, "step": 5403 }, { "epoch": 0.58, "grad_norm": 1.7337972158617083, "learning_rate": 4.153962285217463e-06, "loss": 0.5621, "step": 5404 }, { "epoch": 0.58, "grad_norm": 1.58900653534156, "learning_rate": 4.152224019748207e-06, "loss": 0.6158, "step": 5405 }, { "epoch": 0.58, "grad_norm": 1.7008110912076753, "learning_rate": 4.150485859769964e-06, "loss": 0.5334, "step": 5406 }, { "epoch": 0.58, "grad_norm": 1.7598428411253613, "learning_rate": 4.1487478054990196e-06, "loss": 0.5195, "step": 5407 }, { "epoch": 0.58, "grad_norm": 1.9182013951497805, "learning_rate": 4.1470098571516425e-06, "loss": 0.6207, "step": 5408 }, { "epoch": 0.58, "grad_norm": 1.7111998529909238, "learning_rate": 4.145272014944093e-06, "loss": 0.6224, "step": 5409 }, { "epoch": 0.58, "grad_norm": 1.7610474061762396, "learning_rate": 4.143534279092613e-06, "loss": 0.652, "step": 5410 }, { "epoch": 0.58, "grad_norm": 1.7318114314296118, "learning_rate": 4.1417966498134344e-06, "loss": 0.7119, "step": 5411 }, { "epoch": 0.58, "grad_norm": 2.1298476176735313, "learning_rate": 4.1400591273227755e-06, "loss": 0.7082, "step": 5412 }, { "epoch": 0.58, "grad_norm": 1.6813198725717589, "learning_rate": 4.138321711836841e-06, "loss": 0.4901, "step": 5413 }, { "epoch": 0.58, "grad_norm": 1.5738976427269735, "learning_rate": 4.136584403571821e-06, "loss": 0.6684, "step": 5414 }, { "epoch": 0.58, "grad_norm": 1.7986896391061489, "learning_rate": 4.134847202743892e-06, "loss": 0.6321, "step": 5415 }, { "epoch": 0.58, "grad_norm": 1.2606979966301286, "learning_rate": 4.133110109569223e-06, "loss": 0.4945, "step": 5416 }, { "epoch": 0.58, "grad_norm": 1.8528955201032182, "learning_rate": 4.131373124263962e-06, "loss": 0.689, "step": 5417 }, { "epoch": 0.58, "grad_norm": 1.5803596307760515, "learning_rate": 4.12963624704425e-06, "loss": 0.5609, "step": 5418 }, { "epoch": 0.58, "grad_norm": 1.820702976823076, "learning_rate": 4.127899478126205e-06, "loss": 0.6617, "step": 5419 }, { "epoch": 0.58, "grad_norm": 1.7181799776719744, "learning_rate": 4.1261628177259425e-06, "loss": 0.6567, "step": 5420 }, { "epoch": 0.58, "grad_norm": 1.782689002158745, "learning_rate": 4.124426266059557e-06, "loss": 0.6228, "step": 5421 }, { "epoch": 0.58, "grad_norm": 1.7393545246121347, "learning_rate": 4.122689823343134e-06, "loss": 0.6507, "step": 5422 }, { "epoch": 0.58, "grad_norm": 1.8574360551648992, "learning_rate": 4.120953489792745e-06, "loss": 0.5805, "step": 5423 }, { "epoch": 0.58, "grad_norm": 1.5811807019227244, "learning_rate": 4.119217265624444e-06, "loss": 0.5184, "step": 5424 }, { "epoch": 0.58, "grad_norm": 2.0663303901543806, "learning_rate": 4.117481151054275e-06, "loss": 0.5899, "step": 5425 }, { "epoch": 0.58, "grad_norm": 1.8071578029139355, "learning_rate": 4.115745146298268e-06, "loss": 0.5986, "step": 5426 }, { "epoch": 0.58, "grad_norm": 1.9481823201879456, "learning_rate": 4.114009251572438e-06, "loss": 0.6611, "step": 5427 }, { "epoch": 0.58, "grad_norm": 1.5607076351912519, "learning_rate": 4.112273467092786e-06, "loss": 0.5732, "step": 5428 }, { "epoch": 0.58, "grad_norm": 1.8756200744060942, "learning_rate": 4.110537793075302e-06, "loss": 0.7305, "step": 5429 }, { "epoch": 0.58, "grad_norm": 1.6632030543403773, "learning_rate": 4.10880222973596e-06, "loss": 0.5423, "step": 5430 }, { "epoch": 0.58, "grad_norm": 1.5447391287730254, "learning_rate": 4.1070667772907205e-06, "loss": 0.6557, "step": 5431 }, { "epoch": 0.58, "grad_norm": 2.000267795364779, "learning_rate": 4.10533143595553e-06, "loss": 0.6847, "step": 5432 }, { "epoch": 0.58, "grad_norm": 1.7894908114587638, "learning_rate": 4.103596205946323e-06, "loss": 0.6693, "step": 5433 }, { "epoch": 0.58, "grad_norm": 1.665971178019394, "learning_rate": 4.101861087479018e-06, "loss": 0.5496, "step": 5434 }, { "epoch": 0.58, "grad_norm": 1.7670003153979255, "learning_rate": 4.100126080769519e-06, "loss": 0.5445, "step": 5435 }, { "epoch": 0.58, "grad_norm": 2.100806654936719, "learning_rate": 4.098391186033719e-06, "loss": 0.6881, "step": 5436 }, { "epoch": 0.58, "grad_norm": 1.9820637595272896, "learning_rate": 4.096656403487496e-06, "loss": 0.5468, "step": 5437 }, { "epoch": 0.58, "grad_norm": 1.7161544954736918, "learning_rate": 4.094921733346711e-06, "loss": 0.5784, "step": 5438 }, { "epoch": 0.58, "grad_norm": 1.76188606046145, "learning_rate": 4.093187175827216e-06, "loss": 0.5472, "step": 5439 }, { "epoch": 0.58, "grad_norm": 1.7776842858656967, "learning_rate": 4.091452731144848e-06, "loss": 0.5964, "step": 5440 }, { "epoch": 0.58, "grad_norm": 1.7103948203091563, "learning_rate": 4.089718399515426e-06, "loss": 0.6791, "step": 5441 }, { "epoch": 0.58, "grad_norm": 1.7026866147643391, "learning_rate": 4.087984181154759e-06, "loss": 0.623, "step": 5442 }, { "epoch": 0.58, "grad_norm": 1.7662052768627436, "learning_rate": 4.086250076278637e-06, "loss": 0.6777, "step": 5443 }, { "epoch": 0.58, "grad_norm": 1.7993412950443524, "learning_rate": 4.084516085102844e-06, "loss": 0.6779, "step": 5444 }, { "epoch": 0.58, "grad_norm": 1.165793638559579, "learning_rate": 4.082782207843142e-06, "loss": 0.5232, "step": 5445 }, { "epoch": 0.58, "grad_norm": 1.6253939498948808, "learning_rate": 4.081048444715283e-06, "loss": 0.5257, "step": 5446 }, { "epoch": 0.58, "grad_norm": 1.8180143896297496, "learning_rate": 4.079314795935005e-06, "loss": 0.5716, "step": 5447 }, { "epoch": 0.58, "grad_norm": 1.919798636674785, "learning_rate": 4.0775812617180275e-06, "loss": 0.5234, "step": 5448 }, { "epoch": 0.58, "grad_norm": 1.8469241946365738, "learning_rate": 4.075847842280061e-06, "loss": 0.7005, "step": 5449 }, { "epoch": 0.58, "grad_norm": 1.9187250758702326, "learning_rate": 4.074114537836799e-06, "loss": 0.6214, "step": 5450 }, { "epoch": 0.58, "grad_norm": 1.6537460152453385, "learning_rate": 4.072381348603921e-06, "loss": 0.5798, "step": 5451 }, { "epoch": 0.58, "grad_norm": 1.1160110916079014, "learning_rate": 4.070648274797092e-06, "loss": 0.5115, "step": 5452 }, { "epoch": 0.58, "grad_norm": 1.9257989757483955, "learning_rate": 4.068915316631963e-06, "loss": 0.6882, "step": 5453 }, { "epoch": 0.58, "grad_norm": 1.8585037088930476, "learning_rate": 4.067182474324172e-06, "loss": 0.753, "step": 5454 }, { "epoch": 0.58, "grad_norm": 1.816996966895756, "learning_rate": 4.065449748089339e-06, "loss": 0.6298, "step": 5455 }, { "epoch": 0.58, "grad_norm": 1.6583428957929724, "learning_rate": 4.063717138143073e-06, "loss": 0.5918, "step": 5456 }, { "epoch": 0.58, "grad_norm": 1.9566666406794475, "learning_rate": 4.061984644700967e-06, "loss": 0.5461, "step": 5457 }, { "epoch": 0.58, "grad_norm": 1.8151864252384433, "learning_rate": 4.0602522679786005e-06, "loss": 0.5275, "step": 5458 }, { "epoch": 0.58, "grad_norm": 1.7431280305073678, "learning_rate": 4.058520008191537e-06, "loss": 0.573, "step": 5459 }, { "epoch": 0.58, "grad_norm": 1.7620420419338982, "learning_rate": 4.056787865555326e-06, "loss": 0.604, "step": 5460 }, { "epoch": 0.58, "grad_norm": 1.7064343859117506, "learning_rate": 4.055055840285504e-06, "loss": 0.6621, "step": 5461 }, { "epoch": 0.58, "grad_norm": 1.9174796254583848, "learning_rate": 4.053323932597589e-06, "loss": 0.6222, "step": 5462 }, { "epoch": 0.58, "grad_norm": 1.6486580627829137, "learning_rate": 4.051592142707089e-06, "loss": 0.5672, "step": 5463 }, { "epoch": 0.58, "grad_norm": 1.9757001547962194, "learning_rate": 4.049860470829493e-06, "loss": 0.6656, "step": 5464 }, { "epoch": 0.58, "grad_norm": 1.696199102476167, "learning_rate": 4.048128917180281e-06, "loss": 0.5868, "step": 5465 }, { "epoch": 0.58, "grad_norm": 1.9558173882938943, "learning_rate": 4.046397481974912e-06, "loss": 0.5913, "step": 5466 }, { "epoch": 0.58, "grad_norm": 1.7226795525638292, "learning_rate": 4.044666165428833e-06, "loss": 0.6474, "step": 5467 }, { "epoch": 0.58, "grad_norm": 1.2280967151056796, "learning_rate": 4.042934967757477e-06, "loss": 0.5289, "step": 5468 }, { "epoch": 0.58, "grad_norm": 1.8109106101375936, "learning_rate": 4.041203889176263e-06, "loss": 0.6102, "step": 5469 }, { "epoch": 0.58, "grad_norm": 1.8310377230334633, "learning_rate": 4.039472929900591e-06, "loss": 0.5601, "step": 5470 }, { "epoch": 0.58, "grad_norm": 1.7611880289213098, "learning_rate": 4.037742090145851e-06, "loss": 0.6467, "step": 5471 }, { "epoch": 0.58, "grad_norm": 1.7569778905546063, "learning_rate": 4.036011370127414e-06, "loss": 0.5686, "step": 5472 }, { "epoch": 0.58, "grad_norm": 1.8242512385732192, "learning_rate": 4.03428077006064e-06, "loss": 0.6167, "step": 5473 }, { "epoch": 0.58, "grad_norm": 1.8149611602328688, "learning_rate": 4.032550290160872e-06, "loss": 0.6197, "step": 5474 }, { "epoch": 0.58, "grad_norm": 1.7737819313178398, "learning_rate": 4.030819930643437e-06, "loss": 0.6279, "step": 5475 }, { "epoch": 0.58, "grad_norm": 1.6626605624768183, "learning_rate": 4.0290896917236525e-06, "loss": 0.5808, "step": 5476 }, { "epoch": 0.58, "grad_norm": 2.0607235940123987, "learning_rate": 4.02735957361681e-06, "loss": 0.6062, "step": 5477 }, { "epoch": 0.58, "grad_norm": 1.8559617084584128, "learning_rate": 4.025629576538194e-06, "loss": 0.689, "step": 5478 }, { "epoch": 0.58, "grad_norm": 1.852017996865545, "learning_rate": 4.023899700703076e-06, "loss": 0.5979, "step": 5479 }, { "epoch": 0.58, "grad_norm": 2.04072505926048, "learning_rate": 4.022169946326708e-06, "loss": 0.6533, "step": 5480 }, { "epoch": 0.58, "grad_norm": 1.6812117788222, "learning_rate": 4.0204403136243265e-06, "loss": 0.6159, "step": 5481 }, { "epoch": 0.58, "grad_norm": 1.7140490770528736, "learning_rate": 4.018710802811155e-06, "loss": 0.634, "step": 5482 }, { "epoch": 0.58, "grad_norm": 1.7940258837933205, "learning_rate": 4.0169814141024024e-06, "loss": 0.6132, "step": 5483 }, { "epoch": 0.58, "grad_norm": 1.7376640323268557, "learning_rate": 4.01525214771326e-06, "loss": 0.679, "step": 5484 }, { "epoch": 0.59, "grad_norm": 1.8312940304137346, "learning_rate": 4.013523003858904e-06, "loss": 0.6297, "step": 5485 }, { "epoch": 0.59, "grad_norm": 1.8250846218131969, "learning_rate": 4.011793982754497e-06, "loss": 0.6059, "step": 5486 }, { "epoch": 0.59, "grad_norm": 1.7684570346632609, "learning_rate": 4.010065084615187e-06, "loss": 0.5749, "step": 5487 }, { "epoch": 0.59, "grad_norm": 1.7101120089483803, "learning_rate": 4.008336309656105e-06, "loss": 0.6164, "step": 5488 }, { "epoch": 0.59, "grad_norm": 1.7722205190607794, "learning_rate": 4.006607658092364e-06, "loss": 0.6735, "step": 5489 }, { "epoch": 0.59, "grad_norm": 1.0961321532001047, "learning_rate": 4.004879130139071e-06, "loss": 0.4993, "step": 5490 }, { "epoch": 0.59, "grad_norm": 2.089859969993391, "learning_rate": 4.003150726011306e-06, "loss": 0.6373, "step": 5491 }, { "epoch": 0.59, "grad_norm": 1.764568371524639, "learning_rate": 4.001422445924139e-06, "loss": 0.5703, "step": 5492 }, { "epoch": 0.59, "grad_norm": 1.825594531840023, "learning_rate": 3.999694290092627e-06, "loss": 0.6256, "step": 5493 }, { "epoch": 0.59, "grad_norm": 1.683025225366522, "learning_rate": 3.997966258731807e-06, "loss": 0.6466, "step": 5494 }, { "epoch": 0.59, "grad_norm": 1.6583593384593394, "learning_rate": 3.996238352056703e-06, "loss": 0.6935, "step": 5495 }, { "epoch": 0.59, "grad_norm": 2.013981387737608, "learning_rate": 3.994510570282324e-06, "loss": 0.5967, "step": 5496 }, { "epoch": 0.59, "grad_norm": 1.8143470847555763, "learning_rate": 3.992782913623662e-06, "loss": 0.5794, "step": 5497 }, { "epoch": 0.59, "grad_norm": 1.8662739065012133, "learning_rate": 3.991055382295694e-06, "loss": 0.5187, "step": 5498 }, { "epoch": 0.59, "grad_norm": 1.0776322213840384, "learning_rate": 3.9893279765133815e-06, "loss": 0.5178, "step": 5499 }, { "epoch": 0.59, "grad_norm": 1.76909879281427, "learning_rate": 3.987600696491669e-06, "loss": 0.701, "step": 5500 }, { "epoch": 0.59, "grad_norm": 1.0750029193563015, "learning_rate": 3.985873542445488e-06, "loss": 0.5021, "step": 5501 }, { "epoch": 0.59, "grad_norm": 1.6984332042447656, "learning_rate": 3.984146514589751e-06, "loss": 0.5611, "step": 5502 }, { "epoch": 0.59, "grad_norm": 1.684643290635515, "learning_rate": 3.982419613139359e-06, "loss": 0.615, "step": 5503 }, { "epoch": 0.59, "grad_norm": 1.8410200902637668, "learning_rate": 3.9806928383091945e-06, "loss": 0.7064, "step": 5504 }, { "epoch": 0.59, "grad_norm": 1.7600541379409587, "learning_rate": 3.978966190314124e-06, "loss": 0.6644, "step": 5505 }, { "epoch": 0.59, "grad_norm": 1.0620208929326491, "learning_rate": 3.977239669368998e-06, "loss": 0.495, "step": 5506 }, { "epoch": 0.59, "grad_norm": 1.5793900656772741, "learning_rate": 3.975513275688652e-06, "loss": 0.579, "step": 5507 }, { "epoch": 0.59, "grad_norm": 1.837852439126374, "learning_rate": 3.973787009487907e-06, "loss": 0.675, "step": 5508 }, { "epoch": 0.59, "grad_norm": 1.5538714056541505, "learning_rate": 3.9720608709815665e-06, "loss": 0.5162, "step": 5509 }, { "epoch": 0.59, "grad_norm": 1.8782568426132835, "learning_rate": 3.97033486038442e-06, "loss": 0.5855, "step": 5510 }, { "epoch": 0.59, "grad_norm": 1.7565238480490257, "learning_rate": 3.968608977911237e-06, "loss": 0.6319, "step": 5511 }, { "epoch": 0.59, "grad_norm": 1.1284764192303172, "learning_rate": 3.966883223776773e-06, "loss": 0.5036, "step": 5512 }, { "epoch": 0.59, "grad_norm": 1.9074576279127338, "learning_rate": 3.965157598195772e-06, "loss": 0.6833, "step": 5513 }, { "epoch": 0.59, "grad_norm": 1.7272215306447174, "learning_rate": 3.963432101382955e-06, "loss": 0.5997, "step": 5514 }, { "epoch": 0.59, "grad_norm": 1.7649383255964453, "learning_rate": 3.9617067335530315e-06, "loss": 0.6928, "step": 5515 }, { "epoch": 0.59, "grad_norm": 1.7332046747908956, "learning_rate": 3.9599814949206945e-06, "loss": 0.6036, "step": 5516 }, { "epoch": 0.59, "grad_norm": 1.9661787908851411, "learning_rate": 3.958256385700618e-06, "loss": 0.4902, "step": 5517 }, { "epoch": 0.59, "grad_norm": 1.6608782636465287, "learning_rate": 3.956531406107462e-06, "loss": 0.5869, "step": 5518 }, { "epoch": 0.59, "grad_norm": 1.8419831435284373, "learning_rate": 3.9548065563558736e-06, "loss": 0.5218, "step": 5519 }, { "epoch": 0.59, "grad_norm": 1.8753786194458657, "learning_rate": 3.9530818366604766e-06, "loss": 0.699, "step": 5520 }, { "epoch": 0.59, "grad_norm": 1.6630661562846574, "learning_rate": 3.951357247235882e-06, "loss": 0.6927, "step": 5521 }, { "epoch": 0.59, "grad_norm": 1.6576952408662746, "learning_rate": 3.949632788296689e-06, "loss": 0.5922, "step": 5522 }, { "epoch": 0.59, "grad_norm": 2.5139168262597527, "learning_rate": 3.947908460057473e-06, "loss": 0.6481, "step": 5523 }, { "epoch": 0.59, "grad_norm": 1.1810007581524549, "learning_rate": 3.946184262732798e-06, "loss": 0.5097, "step": 5524 }, { "epoch": 0.59, "grad_norm": 1.69194256322021, "learning_rate": 3.944460196537211e-06, "loss": 0.5802, "step": 5525 }, { "epoch": 0.59, "grad_norm": 1.9219763141330763, "learning_rate": 3.942736261685242e-06, "loss": 0.6019, "step": 5526 }, { "epoch": 0.59, "grad_norm": 1.6874431905072544, "learning_rate": 3.941012458391403e-06, "loss": 0.6487, "step": 5527 }, { "epoch": 0.59, "grad_norm": 1.663222419006807, "learning_rate": 3.9392887868701945e-06, "loss": 0.5722, "step": 5528 }, { "epoch": 0.59, "grad_norm": 1.704935869071527, "learning_rate": 3.937565247336094e-06, "loss": 0.7049, "step": 5529 }, { "epoch": 0.59, "grad_norm": 1.7579126485326662, "learning_rate": 3.935841840003569e-06, "loss": 0.6099, "step": 5530 }, { "epoch": 0.59, "grad_norm": 2.0340705597473057, "learning_rate": 3.934118565087067e-06, "loss": 0.5953, "step": 5531 }, { "epoch": 0.59, "grad_norm": 1.670274744646472, "learning_rate": 3.93239542280102e-06, "loss": 0.6061, "step": 5532 }, { "epoch": 0.59, "grad_norm": 1.9068548001510157, "learning_rate": 3.930672413359843e-06, "loss": 0.6769, "step": 5533 }, { "epoch": 0.59, "grad_norm": 1.1555358095078099, "learning_rate": 3.928949536977933e-06, "loss": 0.5104, "step": 5534 }, { "epoch": 0.59, "grad_norm": 1.9500145901801331, "learning_rate": 3.927226793869672e-06, "loss": 0.5708, "step": 5535 }, { "epoch": 0.59, "grad_norm": 2.0166664739074824, "learning_rate": 3.925504184249427e-06, "loss": 0.6342, "step": 5536 }, { "epoch": 0.59, "grad_norm": 1.5565041167720297, "learning_rate": 3.923781708331548e-06, "loss": 0.5249, "step": 5537 }, { "epoch": 0.59, "grad_norm": 1.5744675052086485, "learning_rate": 3.922059366330364e-06, "loss": 0.5545, "step": 5538 }, { "epoch": 0.59, "grad_norm": 1.8003850201502918, "learning_rate": 3.920337158460194e-06, "loss": 0.5792, "step": 5539 }, { "epoch": 0.59, "grad_norm": 1.5968349413075553, "learning_rate": 3.918615084935335e-06, "loss": 0.594, "step": 5540 }, { "epoch": 0.59, "grad_norm": 1.1187569897473655, "learning_rate": 3.9168931459700684e-06, "loss": 0.4969, "step": 5541 }, { "epoch": 0.59, "grad_norm": 2.0696796080860294, "learning_rate": 3.915171341778662e-06, "loss": 0.6511, "step": 5542 }, { "epoch": 0.59, "grad_norm": 1.0816963730331997, "learning_rate": 3.913449672575362e-06, "loss": 0.5291, "step": 5543 }, { "epoch": 0.59, "grad_norm": 1.8879947865382518, "learning_rate": 3.911728138574401e-06, "loss": 0.5949, "step": 5544 }, { "epoch": 0.59, "grad_norm": 1.761925442079345, "learning_rate": 3.910006739989995e-06, "loss": 0.6248, "step": 5545 }, { "epoch": 0.59, "grad_norm": 1.7403433277355451, "learning_rate": 3.908285477036342e-06, "loss": 0.6192, "step": 5546 }, { "epoch": 0.59, "grad_norm": 1.7312753199006063, "learning_rate": 3.906564349927625e-06, "loss": 0.6336, "step": 5547 }, { "epoch": 0.59, "grad_norm": 1.676855284702898, "learning_rate": 3.904843358878004e-06, "loss": 0.6272, "step": 5548 }, { "epoch": 0.59, "grad_norm": 1.6970217525007483, "learning_rate": 3.90312250410163e-06, "loss": 0.6403, "step": 5549 }, { "epoch": 0.59, "grad_norm": 1.6814662127027655, "learning_rate": 3.901401785812631e-06, "loss": 0.5634, "step": 5550 }, { "epoch": 0.59, "grad_norm": 1.9049003201169892, "learning_rate": 3.899681204225123e-06, "loss": 0.575, "step": 5551 }, { "epoch": 0.59, "grad_norm": 1.9200135218659051, "learning_rate": 3.8979607595532e-06, "loss": 0.6698, "step": 5552 }, { "epoch": 0.59, "grad_norm": 1.708673093877784, "learning_rate": 3.8962404520109435e-06, "loss": 0.5822, "step": 5553 }, { "epoch": 0.59, "grad_norm": 1.6150691712999476, "learning_rate": 3.894520281812416e-06, "loss": 0.585, "step": 5554 }, { "epoch": 0.59, "grad_norm": 1.7640177847492193, "learning_rate": 3.892800249171662e-06, "loss": 0.6258, "step": 5555 }, { "epoch": 0.59, "grad_norm": 1.5556071881567153, "learning_rate": 3.89108035430271e-06, "loss": 0.5624, "step": 5556 }, { "epoch": 0.59, "grad_norm": 1.6622345027290517, "learning_rate": 3.8893605974195715e-06, "loss": 0.6185, "step": 5557 }, { "epoch": 0.59, "grad_norm": 1.5447525677374774, "learning_rate": 3.887640978736239e-06, "loss": 0.5741, "step": 5558 }, { "epoch": 0.59, "grad_norm": 1.8988614102025467, "learning_rate": 3.885921498466691e-06, "loss": 0.5809, "step": 5559 }, { "epoch": 0.59, "grad_norm": 1.713284063788211, "learning_rate": 3.884202156824885e-06, "loss": 0.5859, "step": 5560 }, { "epoch": 0.59, "grad_norm": 1.984670862190618, "learning_rate": 3.882482954024766e-06, "loss": 0.5942, "step": 5561 }, { "epoch": 0.59, "grad_norm": 1.6815532572886382, "learning_rate": 3.880763890280257e-06, "loss": 0.5838, "step": 5562 }, { "epoch": 0.59, "grad_norm": 1.6848477293204442, "learning_rate": 3.879044965805266e-06, "loss": 0.6115, "step": 5563 }, { "epoch": 0.59, "grad_norm": 1.7938611309250512, "learning_rate": 3.8773261808136825e-06, "loss": 0.6243, "step": 5564 }, { "epoch": 0.59, "grad_norm": 1.9538952756905037, "learning_rate": 3.875607535519381e-06, "loss": 0.5832, "step": 5565 }, { "epoch": 0.59, "grad_norm": 1.2576421128555972, "learning_rate": 3.873889030136215e-06, "loss": 0.535, "step": 5566 }, { "epoch": 0.59, "grad_norm": 2.099142287079167, "learning_rate": 3.872170664878025e-06, "loss": 0.6422, "step": 5567 }, { "epoch": 0.59, "grad_norm": 1.7679934785113283, "learning_rate": 3.870452439958631e-06, "loss": 0.5555, "step": 5568 }, { "epoch": 0.59, "grad_norm": 2.013494486923105, "learning_rate": 3.868734355591836e-06, "loss": 0.6802, "step": 5569 }, { "epoch": 0.59, "grad_norm": 1.6199932601863365, "learning_rate": 3.867016411991426e-06, "loss": 0.5288, "step": 5570 }, { "epoch": 0.59, "grad_norm": 1.9194786668339472, "learning_rate": 3.8652986093711686e-06, "loss": 0.631, "step": 5571 }, { "epoch": 0.59, "grad_norm": 1.7098438069044577, "learning_rate": 3.863580947944816e-06, "loss": 0.6394, "step": 5572 }, { "epoch": 0.59, "grad_norm": 1.169899287452866, "learning_rate": 3.8618634279261e-06, "loss": 0.4919, "step": 5573 }, { "epoch": 0.59, "grad_norm": 2.1706359172095357, "learning_rate": 3.860146049528738e-06, "loss": 0.6574, "step": 5574 }, { "epoch": 0.59, "grad_norm": 1.9127214901884402, "learning_rate": 3.858428812966425e-06, "loss": 0.5791, "step": 5575 }, { "epoch": 0.59, "grad_norm": 1.80494198732691, "learning_rate": 3.856711718452844e-06, "loss": 0.6612, "step": 5576 }, { "epoch": 0.59, "grad_norm": 1.9720762532823246, "learning_rate": 3.854994766201656e-06, "loss": 0.5988, "step": 5577 }, { "epoch": 0.59, "grad_norm": 1.7140583894180184, "learning_rate": 3.8532779564265074e-06, "loss": 0.6089, "step": 5578 }, { "epoch": 0.6, "grad_norm": 1.0912741972670763, "learning_rate": 3.851561289341023e-06, "loss": 0.5051, "step": 5579 }, { "epoch": 0.6, "grad_norm": 1.5339344139007707, "learning_rate": 3.849844765158814e-06, "loss": 0.6233, "step": 5580 }, { "epoch": 0.6, "grad_norm": 1.9439821415097036, "learning_rate": 3.848128384093473e-06, "loss": 0.5971, "step": 5581 }, { "epoch": 0.6, "grad_norm": 1.8248111672540712, "learning_rate": 3.846412146358571e-06, "loss": 0.6098, "step": 5582 }, { "epoch": 0.6, "grad_norm": 1.6983957678428594, "learning_rate": 3.844696052167667e-06, "loss": 0.6, "step": 5583 }, { "epoch": 0.6, "grad_norm": 1.6918877031972541, "learning_rate": 3.842980101734296e-06, "loss": 0.5322, "step": 5584 }, { "epoch": 0.6, "grad_norm": 1.8027632817980304, "learning_rate": 3.841264295271981e-06, "loss": 0.6223, "step": 5585 }, { "epoch": 0.6, "grad_norm": 1.8801424581525175, "learning_rate": 3.839548632994223e-06, "loss": 0.6782, "step": 5586 }, { "epoch": 0.6, "grad_norm": 1.5174827843077743, "learning_rate": 3.837833115114508e-06, "loss": 0.5439, "step": 5587 }, { "epoch": 0.6, "grad_norm": 1.8555854835001995, "learning_rate": 3.8361177418463006e-06, "loss": 0.6614, "step": 5588 }, { "epoch": 0.6, "grad_norm": 1.8171387408330641, "learning_rate": 3.834402513403049e-06, "loss": 0.5966, "step": 5589 }, { "epoch": 0.6, "grad_norm": 1.751503019451252, "learning_rate": 3.832687429998188e-06, "loss": 0.597, "step": 5590 }, { "epoch": 0.6, "grad_norm": 1.797639107783123, "learning_rate": 3.830972491845123e-06, "loss": 0.5645, "step": 5591 }, { "epoch": 0.6, "grad_norm": 1.8059181425484716, "learning_rate": 3.829257699157252e-06, "loss": 0.7363, "step": 5592 }, { "epoch": 0.6, "grad_norm": 1.9078263894538958, "learning_rate": 3.827543052147952e-06, "loss": 0.6207, "step": 5593 }, { "epoch": 0.6, "grad_norm": 1.7017014336124958, "learning_rate": 3.82582855103058e-06, "loss": 0.6648, "step": 5594 }, { "epoch": 0.6, "grad_norm": 1.6807322793521142, "learning_rate": 3.824114196018476e-06, "loss": 0.6377, "step": 5595 }, { "epoch": 0.6, "grad_norm": 1.6664215057420815, "learning_rate": 3.822399987324961e-06, "loss": 0.5501, "step": 5596 }, { "epoch": 0.6, "grad_norm": 1.9275576341692089, "learning_rate": 3.82068592516334e-06, "loss": 0.5938, "step": 5597 }, { "epoch": 0.6, "grad_norm": 1.6062862913429192, "learning_rate": 3.8189720097468965e-06, "loss": 0.5479, "step": 5598 }, { "epoch": 0.6, "grad_norm": 1.9023939666818075, "learning_rate": 3.8172582412888995e-06, "loss": 0.6353, "step": 5599 }, { "epoch": 0.6, "grad_norm": 1.7642934243746782, "learning_rate": 3.815544620002597e-06, "loss": 0.5856, "step": 5600 }, { "epoch": 0.6, "grad_norm": 1.8560396923752418, "learning_rate": 3.813831146101219e-06, "loss": 0.594, "step": 5601 }, { "epoch": 0.6, "grad_norm": 1.8008315705124385, "learning_rate": 3.812117819797977e-06, "loss": 0.603, "step": 5602 }, { "epoch": 0.6, "grad_norm": 2.0021432909997485, "learning_rate": 3.810404641306067e-06, "loss": 0.6514, "step": 5603 }, { "epoch": 0.6, "grad_norm": 1.2050493247545715, "learning_rate": 3.8086916108386625e-06, "loss": 0.5051, "step": 5604 }, { "epoch": 0.6, "grad_norm": 1.8755545484129224, "learning_rate": 3.806978728608923e-06, "loss": 0.7268, "step": 5605 }, { "epoch": 0.6, "grad_norm": 1.8598660756182421, "learning_rate": 3.805265994829983e-06, "loss": 0.6339, "step": 5606 }, { "epoch": 0.6, "grad_norm": 1.7702589200083378, "learning_rate": 3.8035534097149646e-06, "loss": 0.5907, "step": 5607 }, { "epoch": 0.6, "grad_norm": 1.728811123178243, "learning_rate": 3.801840973476968e-06, "loss": 0.565, "step": 5608 }, { "epoch": 0.6, "grad_norm": 1.66299059115918, "learning_rate": 3.800128686329078e-06, "loss": 0.5919, "step": 5609 }, { "epoch": 0.6, "grad_norm": 1.628968403804861, "learning_rate": 3.7984165484843582e-06, "loss": 0.5512, "step": 5610 }, { "epoch": 0.6, "grad_norm": 1.1187055335688598, "learning_rate": 3.796704560155855e-06, "loss": 0.5109, "step": 5611 }, { "epoch": 0.6, "grad_norm": 1.5899602739542469, "learning_rate": 3.7949927215565953e-06, "loss": 0.5446, "step": 5612 }, { "epoch": 0.6, "grad_norm": 2.315411458976112, "learning_rate": 3.793281032899587e-06, "loss": 0.6362, "step": 5613 }, { "epoch": 0.6, "grad_norm": 1.9994823545976166, "learning_rate": 3.7915694943978217e-06, "loss": 0.6271, "step": 5614 }, { "epoch": 0.6, "grad_norm": 1.8095720255023233, "learning_rate": 3.7898581062642694e-06, "loss": 0.6084, "step": 5615 }, { "epoch": 0.6, "grad_norm": 1.5996531003631147, "learning_rate": 3.788146868711883e-06, "loss": 0.5355, "step": 5616 }, { "epoch": 0.6, "grad_norm": 2.0718122227152014, "learning_rate": 3.7864357819535956e-06, "loss": 0.6646, "step": 5617 }, { "epoch": 0.6, "grad_norm": 1.6891948097621892, "learning_rate": 3.7847248462023232e-06, "loss": 0.5305, "step": 5618 }, { "epoch": 0.6, "grad_norm": 1.807940975497641, "learning_rate": 3.7830140616709633e-06, "loss": 0.6315, "step": 5619 }, { "epoch": 0.6, "grad_norm": 1.9795920548535388, "learning_rate": 3.7813034285723904e-06, "loss": 0.6248, "step": 5620 }, { "epoch": 0.6, "grad_norm": 1.87020102990014, "learning_rate": 3.779592947119465e-06, "loss": 0.6509, "step": 5621 }, { "epoch": 0.6, "grad_norm": 1.7647080397307657, "learning_rate": 3.777882617525026e-06, "loss": 0.6846, "step": 5622 }, { "epoch": 0.6, "grad_norm": 1.6614555543869913, "learning_rate": 3.7761724400018947e-06, "loss": 0.5966, "step": 5623 }, { "epoch": 0.6, "grad_norm": 1.5977982607329269, "learning_rate": 3.774462414762873e-06, "loss": 0.5553, "step": 5624 }, { "epoch": 0.6, "grad_norm": 1.9505240857477528, "learning_rate": 3.7727525420207435e-06, "loss": 0.633, "step": 5625 }, { "epoch": 0.6, "grad_norm": 1.4440395175408987, "learning_rate": 3.7710428219882704e-06, "loss": 0.5184, "step": 5626 }, { "epoch": 0.6, "grad_norm": 1.8842083400600609, "learning_rate": 3.7693332548782e-06, "loss": 0.6723, "step": 5627 }, { "epoch": 0.6, "grad_norm": 1.2046257951727628, "learning_rate": 3.7676238409032567e-06, "loss": 0.514, "step": 5628 }, { "epoch": 0.6, "grad_norm": 1.5080244039132231, "learning_rate": 3.7659145802761477e-06, "loss": 0.4823, "step": 5629 }, { "epoch": 0.6, "grad_norm": 1.915405093790596, "learning_rate": 3.764205473209561e-06, "loss": 0.6536, "step": 5630 }, { "epoch": 0.6, "grad_norm": 1.6503121659171376, "learning_rate": 3.7624965199161662e-06, "loss": 0.701, "step": 5631 }, { "epoch": 0.6, "grad_norm": 1.2262823274528767, "learning_rate": 3.7607877206086114e-06, "loss": 0.5177, "step": 5632 }, { "epoch": 0.6, "grad_norm": 1.1705730530639735, "learning_rate": 3.7590790754995277e-06, "loss": 0.5019, "step": 5633 }, { "epoch": 0.6, "grad_norm": 1.6855771400584165, "learning_rate": 3.7573705848015268e-06, "loss": 0.5885, "step": 5634 }, { "epoch": 0.6, "grad_norm": 1.7410432476790774, "learning_rate": 3.7556622487272e-06, "loss": 0.6443, "step": 5635 }, { "epoch": 0.6, "grad_norm": 2.4319065448156465, "learning_rate": 3.7539540674891195e-06, "loss": 0.6121, "step": 5636 }, { "epoch": 0.6, "grad_norm": 1.697523396636111, "learning_rate": 3.7522460412998402e-06, "loss": 0.5604, "step": 5637 }, { "epoch": 0.6, "grad_norm": 1.1285068011634063, "learning_rate": 3.750538170371896e-06, "loss": 0.4917, "step": 5638 }, { "epoch": 0.6, "grad_norm": 1.1337757211870585, "learning_rate": 3.748830454917801e-06, "loss": 0.4852, "step": 5639 }, { "epoch": 0.6, "grad_norm": 1.8803817075634544, "learning_rate": 3.7471228951500517e-06, "loss": 0.6094, "step": 5640 }, { "epoch": 0.6, "grad_norm": 1.6818975490179187, "learning_rate": 3.7454154912811236e-06, "loss": 0.6044, "step": 5641 }, { "epoch": 0.6, "grad_norm": 1.8200870113304615, "learning_rate": 3.743708243523473e-06, "loss": 0.6141, "step": 5642 }, { "epoch": 0.6, "grad_norm": 1.5441245344797547, "learning_rate": 3.7420011520895393e-06, "loss": 0.5662, "step": 5643 }, { "epoch": 0.6, "grad_norm": 1.6450614250071285, "learning_rate": 3.7402942171917387e-06, "loss": 0.6109, "step": 5644 }, { "epoch": 0.6, "grad_norm": 1.7718805845667436, "learning_rate": 3.73858743904247e-06, "loss": 0.6343, "step": 5645 }, { "epoch": 0.6, "grad_norm": 1.8302513227368034, "learning_rate": 3.7368808178541132e-06, "loss": 0.7115, "step": 5646 }, { "epoch": 0.6, "grad_norm": 1.7462838958305422, "learning_rate": 3.7351743538390262e-06, "loss": 0.5895, "step": 5647 }, { "epoch": 0.6, "grad_norm": 1.2445117625156568, "learning_rate": 3.7334680472095506e-06, "loss": 0.5143, "step": 5648 }, { "epoch": 0.6, "grad_norm": 1.8344760445345998, "learning_rate": 3.7317618981780045e-06, "loss": 0.5425, "step": 5649 }, { "epoch": 0.6, "grad_norm": 1.6675265749253878, "learning_rate": 3.7300559069566894e-06, "loss": 0.6864, "step": 5650 }, { "epoch": 0.6, "grad_norm": 1.6835560031460526, "learning_rate": 3.7283500737578863e-06, "loss": 0.56, "step": 5651 }, { "epoch": 0.6, "grad_norm": 1.8653138759857533, "learning_rate": 3.726644398793857e-06, "loss": 0.5883, "step": 5652 }, { "epoch": 0.6, "grad_norm": 1.1444758403824211, "learning_rate": 3.724938882276843e-06, "loss": 0.5273, "step": 5653 }, { "epoch": 0.6, "grad_norm": 1.16739135938343, "learning_rate": 3.7232335244190656e-06, "loss": 0.4952, "step": 5654 }, { "epoch": 0.6, "grad_norm": 1.7016168151698672, "learning_rate": 3.721528325432728e-06, "loss": 0.6464, "step": 5655 }, { "epoch": 0.6, "grad_norm": 2.1366074870029967, "learning_rate": 3.7198232855300114e-06, "loss": 0.5312, "step": 5656 }, { "epoch": 0.6, "grad_norm": 1.8921333699289629, "learning_rate": 3.71811840492308e-06, "loss": 0.6338, "step": 5657 }, { "epoch": 0.6, "grad_norm": 1.760064484785517, "learning_rate": 3.716413683824075e-06, "loss": 0.6498, "step": 5658 }, { "epoch": 0.6, "grad_norm": 1.782795843644426, "learning_rate": 3.7147091224451192e-06, "loss": 0.61, "step": 5659 }, { "epoch": 0.6, "grad_norm": 1.6038136685153142, "learning_rate": 3.713004720998318e-06, "loss": 0.5897, "step": 5660 }, { "epoch": 0.6, "grad_norm": 1.6868062235934835, "learning_rate": 3.7113004796957534e-06, "loss": 0.6231, "step": 5661 }, { "epoch": 0.6, "grad_norm": 1.159225180122524, "learning_rate": 3.709596398749491e-06, "loss": 0.5241, "step": 5662 }, { "epoch": 0.6, "grad_norm": 1.591124717342877, "learning_rate": 3.7078924783715685e-06, "loss": 0.5667, "step": 5663 }, { "epoch": 0.6, "grad_norm": 1.760264194738503, "learning_rate": 3.706188718774013e-06, "loss": 0.5615, "step": 5664 }, { "epoch": 0.6, "grad_norm": 1.9470190972749657, "learning_rate": 3.704485120168826e-06, "loss": 0.6839, "step": 5665 }, { "epoch": 0.6, "grad_norm": 1.7611090794467925, "learning_rate": 3.702781682767993e-06, "loss": 0.592, "step": 5666 }, { "epoch": 0.6, "grad_norm": 1.6110269173209113, "learning_rate": 3.701078406783476e-06, "loss": 0.6574, "step": 5667 }, { "epoch": 0.6, "grad_norm": 1.8317008744162855, "learning_rate": 3.6993752924272187e-06, "loss": 0.5303, "step": 5668 }, { "epoch": 0.6, "grad_norm": 1.7727738192920346, "learning_rate": 3.6976723399111435e-06, "loss": 0.6148, "step": 5669 }, { "epoch": 0.6, "grad_norm": 1.8714063390987419, "learning_rate": 3.6959695494471536e-06, "loss": 0.6136, "step": 5670 }, { "epoch": 0.6, "grad_norm": 1.8936882498458576, "learning_rate": 3.6942669212471317e-06, "loss": 0.6526, "step": 5671 }, { "epoch": 0.61, "grad_norm": 1.6569190092922712, "learning_rate": 3.6925644555229407e-06, "loss": 0.6171, "step": 5672 }, { "epoch": 0.61, "grad_norm": 1.6742773681097125, "learning_rate": 3.6908621524864223e-06, "loss": 0.5658, "step": 5673 }, { "epoch": 0.61, "grad_norm": 1.6467113516183036, "learning_rate": 3.6891600123493985e-06, "loss": 0.5818, "step": 5674 }, { "epoch": 0.61, "grad_norm": 1.7891738768175358, "learning_rate": 3.6874580353236722e-06, "loss": 0.6951, "step": 5675 }, { "epoch": 0.61, "grad_norm": 1.9216716595501184, "learning_rate": 3.685756221621023e-06, "loss": 0.6635, "step": 5676 }, { "epoch": 0.61, "grad_norm": 1.112621156143359, "learning_rate": 3.6840545714532165e-06, "loss": 0.5122, "step": 5677 }, { "epoch": 0.61, "grad_norm": 1.6535306046224574, "learning_rate": 3.682353085031988e-06, "loss": 0.6138, "step": 5678 }, { "epoch": 0.61, "grad_norm": 1.6782364949937008, "learning_rate": 3.6806517625690608e-06, "loss": 0.5388, "step": 5679 }, { "epoch": 0.61, "grad_norm": 1.621554798599727, "learning_rate": 3.6789506042761334e-06, "loss": 0.6351, "step": 5680 }, { "epoch": 0.61, "grad_norm": 1.8487470474352548, "learning_rate": 3.6772496103648873e-06, "loss": 0.6637, "step": 5681 }, { "epoch": 0.61, "grad_norm": 2.0518872133440262, "learning_rate": 3.6755487810469803e-06, "loss": 0.7014, "step": 5682 }, { "epoch": 0.61, "grad_norm": 1.9274576402972328, "learning_rate": 3.673848116534052e-06, "loss": 0.5284, "step": 5683 }, { "epoch": 0.61, "grad_norm": 1.8485496199753275, "learning_rate": 3.6721476170377203e-06, "loss": 0.5864, "step": 5684 }, { "epoch": 0.61, "grad_norm": 1.7597360175790704, "learning_rate": 3.670447282769583e-06, "loss": 0.6197, "step": 5685 }, { "epoch": 0.61, "grad_norm": 1.917964873104025, "learning_rate": 3.6687471139412173e-06, "loss": 0.7132, "step": 5686 }, { "epoch": 0.61, "grad_norm": 1.6405709924026293, "learning_rate": 3.6670471107641794e-06, "loss": 0.5186, "step": 5687 }, { "epoch": 0.61, "grad_norm": 2.0148267899341024, "learning_rate": 3.665347273450005e-06, "loss": 0.7483, "step": 5688 }, { "epoch": 0.61, "grad_norm": 1.1082141277122854, "learning_rate": 3.6636476022102095e-06, "loss": 0.5161, "step": 5689 }, { "epoch": 0.61, "grad_norm": 1.7560286609400795, "learning_rate": 3.6619480972562884e-06, "loss": 0.6811, "step": 5690 }, { "epoch": 0.61, "grad_norm": 2.2085162769185698, "learning_rate": 3.660248758799716e-06, "loss": 0.5495, "step": 5691 }, { "epoch": 0.61, "grad_norm": 2.880373000324758, "learning_rate": 3.658549587051943e-06, "loss": 0.5928, "step": 5692 }, { "epoch": 0.61, "grad_norm": 2.1847158052648292, "learning_rate": 3.6568505822244025e-06, "loss": 0.5599, "step": 5693 }, { "epoch": 0.61, "grad_norm": 1.7748073647002878, "learning_rate": 3.655151744528507e-06, "loss": 0.6073, "step": 5694 }, { "epoch": 0.61, "grad_norm": 1.7307996480094578, "learning_rate": 3.653453074175648e-06, "loss": 0.5774, "step": 5695 }, { "epoch": 0.61, "grad_norm": 1.5442858723567452, "learning_rate": 3.6517545713771948e-06, "loss": 0.5429, "step": 5696 }, { "epoch": 0.61, "grad_norm": 1.8333497808094894, "learning_rate": 3.650056236344496e-06, "loss": 0.6503, "step": 5697 }, { "epoch": 0.61, "grad_norm": 1.1318695406826147, "learning_rate": 3.648358069288881e-06, "loss": 0.4958, "step": 5698 }, { "epoch": 0.61, "grad_norm": 1.8172848412057854, "learning_rate": 3.646660070421656e-06, "loss": 0.5966, "step": 5699 }, { "epoch": 0.61, "grad_norm": 1.7153695157911184, "learning_rate": 3.6449622399541087e-06, "loss": 0.5347, "step": 5700 }, { "epoch": 0.61, "grad_norm": 1.8093332177788264, "learning_rate": 3.6432645780975045e-06, "loss": 0.6429, "step": 5701 }, { "epoch": 0.61, "grad_norm": 1.765606235834543, "learning_rate": 3.6415670850630878e-06, "loss": 0.5475, "step": 5702 }, { "epoch": 0.61, "grad_norm": 1.607561722233015, "learning_rate": 3.639869761062082e-06, "loss": 0.567, "step": 5703 }, { "epoch": 0.61, "grad_norm": 1.7167849276929261, "learning_rate": 3.63817260630569e-06, "loss": 0.6054, "step": 5704 }, { "epoch": 0.61, "grad_norm": 1.6665502156410041, "learning_rate": 3.6364756210050934e-06, "loss": 0.4862, "step": 5705 }, { "epoch": 0.61, "grad_norm": 1.729196467180867, "learning_rate": 3.6347788053714515e-06, "loss": 0.6471, "step": 5706 }, { "epoch": 0.61, "grad_norm": 1.7423180585632838, "learning_rate": 3.6330821596159037e-06, "loss": 0.6488, "step": 5707 }, { "epoch": 0.61, "grad_norm": 1.7518030647783498, "learning_rate": 3.63138568394957e-06, "loss": 0.5843, "step": 5708 }, { "epoch": 0.61, "grad_norm": 1.7101965183107815, "learning_rate": 3.6296893785835447e-06, "loss": 0.5302, "step": 5709 }, { "epoch": 0.61, "grad_norm": 1.1830698436081792, "learning_rate": 3.6279932437289057e-06, "loss": 0.5157, "step": 5710 }, { "epoch": 0.61, "grad_norm": 1.6752912069726005, "learning_rate": 3.6262972795967062e-06, "loss": 0.5513, "step": 5711 }, { "epoch": 0.61, "grad_norm": 1.7959741880179454, "learning_rate": 3.6246014863979806e-06, "loss": 0.5989, "step": 5712 }, { "epoch": 0.61, "grad_norm": 1.6650635134651564, "learning_rate": 3.6229058643437397e-06, "loss": 0.602, "step": 5713 }, { "epoch": 0.61, "grad_norm": 1.91377277145134, "learning_rate": 3.6212104136449755e-06, "loss": 0.7523, "step": 5714 }, { "epoch": 0.61, "grad_norm": 1.9306774102521977, "learning_rate": 3.6195151345126556e-06, "loss": 0.5468, "step": 5715 }, { "epoch": 0.61, "grad_norm": 1.7026909537069403, "learning_rate": 3.61782002715773e-06, "loss": 0.5986, "step": 5716 }, { "epoch": 0.61, "grad_norm": 1.8084035362616178, "learning_rate": 3.6161250917911263e-06, "loss": 0.5549, "step": 5717 }, { "epoch": 0.61, "grad_norm": 1.7896323518104114, "learning_rate": 3.614430328623747e-06, "loss": 0.4876, "step": 5718 }, { "epoch": 0.61, "grad_norm": 1.8447691748171782, "learning_rate": 3.6127357378664775e-06, "loss": 0.5321, "step": 5719 }, { "epoch": 0.61, "grad_norm": 1.2657822516039032, "learning_rate": 3.611041319730182e-06, "loss": 0.5034, "step": 5720 }, { "epoch": 0.61, "grad_norm": 1.697066474758531, "learning_rate": 3.6093470744256974e-06, "loss": 0.6196, "step": 5721 }, { "epoch": 0.61, "grad_norm": 1.8709625190788493, "learning_rate": 3.6076530021638443e-06, "loss": 0.6706, "step": 5722 }, { "epoch": 0.61, "grad_norm": 1.9598372046783192, "learning_rate": 3.6059591031554223e-06, "loss": 0.6792, "step": 5723 }, { "epoch": 0.61, "grad_norm": 1.8161720839620785, "learning_rate": 3.6042653776112067e-06, "loss": 0.5495, "step": 5724 }, { "epoch": 0.61, "grad_norm": 1.0937892680476091, "learning_rate": 3.6025718257419532e-06, "loss": 0.5397, "step": 5725 }, { "epoch": 0.61, "grad_norm": 1.9247531218602363, "learning_rate": 3.6008784477583943e-06, "loss": 0.649, "step": 5726 }, { "epoch": 0.61, "grad_norm": 1.9033277351678204, "learning_rate": 3.5991852438712404e-06, "loss": 0.665, "step": 5727 }, { "epoch": 0.61, "grad_norm": 1.0995819371823654, "learning_rate": 3.5974922142911835e-06, "loss": 0.5166, "step": 5728 }, { "epoch": 0.61, "grad_norm": 1.8369902496812833, "learning_rate": 3.5957993592288897e-06, "loss": 0.6242, "step": 5729 }, { "epoch": 0.61, "grad_norm": 1.788577981761431, "learning_rate": 3.5941066788950075e-06, "loss": 0.6805, "step": 5730 }, { "epoch": 0.61, "grad_norm": 1.6240465902513306, "learning_rate": 3.5924141735001596e-06, "loss": 0.589, "step": 5731 }, { "epoch": 0.61, "grad_norm": 1.2289861227612333, "learning_rate": 3.5907218432549488e-06, "loss": 0.5561, "step": 5732 }, { "epoch": 0.61, "grad_norm": 1.086796581387615, "learning_rate": 3.589029688369959e-06, "loss": 0.5, "step": 5733 }, { "epoch": 0.61, "grad_norm": 2.0499639467826514, "learning_rate": 3.5873377090557494e-06, "loss": 0.6366, "step": 5734 }, { "epoch": 0.61, "grad_norm": 1.0588158477253704, "learning_rate": 3.5856459055228535e-06, "loss": 0.4835, "step": 5735 }, { "epoch": 0.61, "grad_norm": 1.8197940826105645, "learning_rate": 3.58395427798179e-06, "loss": 0.7291, "step": 5736 }, { "epoch": 0.61, "grad_norm": 1.1350045958833475, "learning_rate": 3.5822628266430514e-06, "loss": 0.5191, "step": 5737 }, { "epoch": 0.61, "grad_norm": 1.7381999058813118, "learning_rate": 3.580571551717109e-06, "loss": 0.5443, "step": 5738 }, { "epoch": 0.61, "grad_norm": 1.8388872859126737, "learning_rate": 3.578880453414415e-06, "loss": 0.5597, "step": 5739 }, { "epoch": 0.61, "grad_norm": 1.9503851267620158, "learning_rate": 3.577189531945395e-06, "loss": 0.6405, "step": 5740 }, { "epoch": 0.61, "grad_norm": 1.7132951588419398, "learning_rate": 3.5754987875204563e-06, "loss": 0.6006, "step": 5741 }, { "epoch": 0.61, "grad_norm": 1.7146270700727615, "learning_rate": 3.5738082203499823e-06, "loss": 0.5969, "step": 5742 }, { "epoch": 0.61, "grad_norm": 1.6332989864286704, "learning_rate": 3.572117830644334e-06, "loss": 0.5853, "step": 5743 }, { "epoch": 0.61, "grad_norm": 1.8611571688345747, "learning_rate": 3.570427618613852e-06, "loss": 0.5638, "step": 5744 }, { "epoch": 0.61, "grad_norm": 1.1178553947966414, "learning_rate": 3.5687375844688533e-06, "loss": 0.5084, "step": 5745 }, { "epoch": 0.61, "grad_norm": 1.653871442376093, "learning_rate": 3.5670477284196335e-06, "loss": 0.6297, "step": 5746 }, { "epoch": 0.61, "grad_norm": 1.1546862862940905, "learning_rate": 3.565358050676466e-06, "loss": 0.5211, "step": 5747 }, { "epoch": 0.61, "grad_norm": 1.5803360406383724, "learning_rate": 3.5636685514496028e-06, "loss": 0.6165, "step": 5748 }, { "epoch": 0.61, "grad_norm": 1.7965793134027528, "learning_rate": 3.561979230949271e-06, "loss": 0.5723, "step": 5749 }, { "epoch": 0.61, "grad_norm": 1.8206865356917987, "learning_rate": 3.5602900893856774e-06, "loss": 0.5921, "step": 5750 }, { "epoch": 0.61, "grad_norm": 2.1493385070128954, "learning_rate": 3.5586011269690067e-06, "loss": 0.622, "step": 5751 }, { "epoch": 0.61, "grad_norm": 1.7299010023751173, "learning_rate": 3.556912343909422e-06, "loss": 0.6085, "step": 5752 }, { "epoch": 0.61, "grad_norm": 2.0006496621490317, "learning_rate": 3.555223740417062e-06, "loss": 0.6419, "step": 5753 }, { "epoch": 0.61, "grad_norm": 1.7843967752014749, "learning_rate": 3.5535353167020448e-06, "loss": 0.6303, "step": 5754 }, { "epoch": 0.61, "grad_norm": 1.9269456254097048, "learning_rate": 3.5518470729744632e-06, "loss": 0.6722, "step": 5755 }, { "epoch": 0.61, "grad_norm": 1.8809572063345998, "learning_rate": 3.5501590094443926e-06, "loss": 0.6459, "step": 5756 }, { "epoch": 0.61, "grad_norm": 1.2210358896773341, "learning_rate": 3.5484711263218828e-06, "loss": 0.5193, "step": 5757 }, { "epoch": 0.61, "grad_norm": 1.8323733282525583, "learning_rate": 3.5467834238169613e-06, "loss": 0.5115, "step": 5758 }, { "epoch": 0.61, "grad_norm": 1.7482389528206965, "learning_rate": 3.545095902139633e-06, "loss": 0.6422, "step": 5759 }, { "epoch": 0.61, "grad_norm": 1.7795110858575593, "learning_rate": 3.54340856149988e-06, "loss": 0.5324, "step": 5760 }, { "epoch": 0.61, "grad_norm": 1.1579142520577541, "learning_rate": 3.5417214021076653e-06, "loss": 0.5186, "step": 5761 }, { "epoch": 0.61, "grad_norm": 1.6991087169269332, "learning_rate": 3.5400344241729234e-06, "loss": 0.4992, "step": 5762 }, { "epoch": 0.61, "grad_norm": 1.0660444315399502, "learning_rate": 3.5383476279055715e-06, "loss": 0.4938, "step": 5763 }, { "epoch": 0.61, "grad_norm": 1.919759363584671, "learning_rate": 3.536661013515501e-06, "loss": 0.8033, "step": 5764 }, { "epoch": 0.61, "grad_norm": 1.1530554054759294, "learning_rate": 3.5349745812125824e-06, "loss": 0.5021, "step": 5765 }, { "epoch": 0.62, "grad_norm": 1.0745761510694967, "learning_rate": 3.5332883312066627e-06, "loss": 0.4989, "step": 5766 }, { "epoch": 0.62, "grad_norm": 1.7962641287995456, "learning_rate": 3.5316022637075665e-06, "loss": 0.5957, "step": 5767 }, { "epoch": 0.62, "grad_norm": 1.7996722515124177, "learning_rate": 3.5299163789250957e-06, "loss": 0.6403, "step": 5768 }, { "epoch": 0.62, "grad_norm": 1.8967996993842822, "learning_rate": 3.5282306770690287e-06, "loss": 0.6477, "step": 5769 }, { "epoch": 0.62, "grad_norm": 2.0568002210669505, "learning_rate": 3.526545158349124e-06, "loss": 0.5502, "step": 5770 }, { "epoch": 0.62, "grad_norm": 1.7888138716184394, "learning_rate": 3.5248598229751124e-06, "loss": 0.6168, "step": 5771 }, { "epoch": 0.62, "grad_norm": 1.8664382595288176, "learning_rate": 3.5231746711567055e-06, "loss": 0.6808, "step": 5772 }, { "epoch": 0.62, "grad_norm": 1.7984043747637632, "learning_rate": 3.5214897031035927e-06, "loss": 0.6351, "step": 5773 }, { "epoch": 0.62, "grad_norm": 1.7937635975656652, "learning_rate": 3.519804919025438e-06, "loss": 0.5239, "step": 5774 }, { "epoch": 0.62, "grad_norm": 1.833153333979668, "learning_rate": 3.5181203191318834e-06, "loss": 0.5899, "step": 5775 }, { "epoch": 0.62, "grad_norm": 1.531243372707301, "learning_rate": 3.5164359036325483e-06, "loss": 0.6747, "step": 5776 }, { "epoch": 0.62, "grad_norm": 1.834826319232476, "learning_rate": 3.514751672737031e-06, "loss": 0.6627, "step": 5777 }, { "epoch": 0.62, "grad_norm": 1.5231300237923575, "learning_rate": 3.5130676266549e-06, "loss": 0.5235, "step": 5778 }, { "epoch": 0.62, "grad_norm": 1.7348383399362617, "learning_rate": 3.5113837655957096e-06, "loss": 0.6248, "step": 5779 }, { "epoch": 0.62, "grad_norm": 1.9272047204683047, "learning_rate": 3.509700089768986e-06, "loss": 0.5341, "step": 5780 }, { "epoch": 0.62, "grad_norm": 1.6522750458390734, "learning_rate": 3.508016599384233e-06, "loss": 0.5426, "step": 5781 }, { "epoch": 0.62, "grad_norm": 1.75696102622929, "learning_rate": 3.506333294650932e-06, "loss": 0.6609, "step": 5782 }, { "epoch": 0.62, "grad_norm": 1.7135962857218972, "learning_rate": 3.504650175778542e-06, "loss": 0.5923, "step": 5783 }, { "epoch": 0.62, "grad_norm": 1.1044638153611253, "learning_rate": 3.502967242976497e-06, "loss": 0.4793, "step": 5784 }, { "epoch": 0.62, "grad_norm": 1.6565720269707531, "learning_rate": 3.501284496454208e-06, "loss": 0.6142, "step": 5785 }, { "epoch": 0.62, "grad_norm": 1.7194089414547828, "learning_rate": 3.4996019364210653e-06, "loss": 0.5792, "step": 5786 }, { "epoch": 0.62, "grad_norm": 1.5264941425978569, "learning_rate": 3.4979195630864337e-06, "loss": 0.5956, "step": 5787 }, { "epoch": 0.62, "grad_norm": 1.9876082120834264, "learning_rate": 3.496237376659656e-06, "loss": 0.6894, "step": 5788 }, { "epoch": 0.62, "grad_norm": 2.0566287049508003, "learning_rate": 3.494555377350048e-06, "loss": 0.6015, "step": 5789 }, { "epoch": 0.62, "grad_norm": 1.9677209082301774, "learning_rate": 3.4928735653669095e-06, "loss": 0.5729, "step": 5790 }, { "epoch": 0.62, "grad_norm": 1.6463929395663903, "learning_rate": 3.4911919409195107e-06, "loss": 0.5716, "step": 5791 }, { "epoch": 0.62, "grad_norm": 1.6309340225819666, "learning_rate": 3.489510504217103e-06, "loss": 0.6768, "step": 5792 }, { "epoch": 0.62, "grad_norm": 1.7760760311092088, "learning_rate": 3.4878292554689088e-06, "loss": 0.5298, "step": 5793 }, { "epoch": 0.62, "grad_norm": 1.8291805958561764, "learning_rate": 3.48614819488413e-06, "loss": 0.6381, "step": 5794 }, { "epoch": 0.62, "grad_norm": 1.8250716046605857, "learning_rate": 3.484467322671947e-06, "loss": 0.6288, "step": 5795 }, { "epoch": 0.62, "grad_norm": 1.706558620627835, "learning_rate": 3.4827866390415155e-06, "loss": 0.6702, "step": 5796 }, { "epoch": 0.62, "grad_norm": 1.8260607004851008, "learning_rate": 3.4811061442019677e-06, "loss": 0.6176, "step": 5797 }, { "epoch": 0.62, "grad_norm": 1.5081666985607975, "learning_rate": 3.4794258383624115e-06, "loss": 0.5144, "step": 5798 }, { "epoch": 0.62, "grad_norm": 1.8131532580623475, "learning_rate": 3.4777457217319312e-06, "loss": 0.645, "step": 5799 }, { "epoch": 0.62, "grad_norm": 1.945736073968096, "learning_rate": 3.4760657945195885e-06, "loss": 0.6611, "step": 5800 }, { "epoch": 0.62, "grad_norm": 1.7619206999535684, "learning_rate": 3.4743860569344213e-06, "loss": 0.6898, "step": 5801 }, { "epoch": 0.62, "grad_norm": 1.740373147476426, "learning_rate": 3.472706509185444e-06, "loss": 0.6467, "step": 5802 }, { "epoch": 0.62, "grad_norm": 1.7783911643131056, "learning_rate": 3.4710271514816473e-06, "loss": 0.6492, "step": 5803 }, { "epoch": 0.62, "grad_norm": 1.5911116608315918, "learning_rate": 3.4693479840319975e-06, "loss": 0.613, "step": 5804 }, { "epoch": 0.62, "grad_norm": 1.2332249307440815, "learning_rate": 3.467669007045439e-06, "loss": 0.4985, "step": 5805 }, { "epoch": 0.62, "grad_norm": 1.2494587749427524, "learning_rate": 3.4659902207308916e-06, "loss": 0.501, "step": 5806 }, { "epoch": 0.62, "grad_norm": 1.7293321153218972, "learning_rate": 3.4643116252972486e-06, "loss": 0.613, "step": 5807 }, { "epoch": 0.62, "grad_norm": 1.7229058543495632, "learning_rate": 3.4626332209533838e-06, "loss": 0.5931, "step": 5808 }, { "epoch": 0.62, "grad_norm": 1.7806557902247597, "learning_rate": 3.460955007908145e-06, "loss": 0.5613, "step": 5809 }, { "epoch": 0.62, "grad_norm": 1.75324121818837, "learning_rate": 3.4592769863703573e-06, "loss": 0.6095, "step": 5810 }, { "epoch": 0.62, "grad_norm": 1.6080255214680221, "learning_rate": 3.457599156548821e-06, "loss": 0.5671, "step": 5811 }, { "epoch": 0.62, "grad_norm": 1.774511893239756, "learning_rate": 3.455921518652312e-06, "loss": 0.5842, "step": 5812 }, { "epoch": 0.62, "grad_norm": 1.8500510908560195, "learning_rate": 3.4542440728895854e-06, "loss": 0.6091, "step": 5813 }, { "epoch": 0.62, "grad_norm": 1.9210199763995426, "learning_rate": 3.452566819469369e-06, "loss": 0.688, "step": 5814 }, { "epoch": 0.62, "grad_norm": 1.7391585158342928, "learning_rate": 3.4508897586003674e-06, "loss": 0.5661, "step": 5815 }, { "epoch": 0.62, "grad_norm": 1.5595862001034562, "learning_rate": 3.4492128904912638e-06, "loss": 0.5678, "step": 5816 }, { "epoch": 0.62, "grad_norm": 1.7208741895566824, "learning_rate": 3.4475362153507125e-06, "loss": 0.5593, "step": 5817 }, { "epoch": 0.62, "grad_norm": 1.8120593106336114, "learning_rate": 3.4458597333873484e-06, "loss": 0.5482, "step": 5818 }, { "epoch": 0.62, "grad_norm": 1.4504514130774426, "learning_rate": 3.44418344480978e-06, "loss": 0.526, "step": 5819 }, { "epoch": 0.62, "grad_norm": 1.7334907433513267, "learning_rate": 3.4425073498265936e-06, "loss": 0.6113, "step": 5820 }, { "epoch": 0.62, "grad_norm": 1.765066219731546, "learning_rate": 3.4408314486463483e-06, "loss": 0.6441, "step": 5821 }, { "epoch": 0.62, "grad_norm": 1.7240203008656287, "learning_rate": 3.4391557414775812e-06, "loss": 0.6467, "step": 5822 }, { "epoch": 0.62, "grad_norm": 1.721170556961151, "learning_rate": 3.437480228528806e-06, "loss": 0.6241, "step": 5823 }, { "epoch": 0.62, "grad_norm": 1.6433858704016746, "learning_rate": 3.4358049100085106e-06, "loss": 0.5874, "step": 5824 }, { "epoch": 0.62, "grad_norm": 1.9959070391501648, "learning_rate": 3.4341297861251594e-06, "loss": 0.5631, "step": 5825 }, { "epoch": 0.62, "grad_norm": 1.990180957137083, "learning_rate": 3.4324548570871917e-06, "loss": 0.5577, "step": 5826 }, { "epoch": 0.62, "grad_norm": 1.6661937058694196, "learning_rate": 3.4307801231030257e-06, "loss": 0.6382, "step": 5827 }, { "epoch": 0.62, "grad_norm": 1.1842149183052662, "learning_rate": 3.4291055843810507e-06, "loss": 0.4993, "step": 5828 }, { "epoch": 0.62, "grad_norm": 1.606895639830562, "learning_rate": 3.4274312411296335e-06, "loss": 0.603, "step": 5829 }, { "epoch": 0.62, "grad_norm": 1.565788836899267, "learning_rate": 3.4257570935571194e-06, "loss": 0.5364, "step": 5830 }, { "epoch": 0.62, "grad_norm": 1.8674102941778388, "learning_rate": 3.424083141871827e-06, "loss": 0.61, "step": 5831 }, { "epoch": 0.62, "grad_norm": 1.6166076905897713, "learning_rate": 3.4224093862820484e-06, "loss": 0.5877, "step": 5832 }, { "epoch": 0.62, "grad_norm": 1.7671558557247962, "learning_rate": 3.4207358269960543e-06, "loss": 0.7249, "step": 5833 }, { "epoch": 0.62, "grad_norm": 1.7721162578212317, "learning_rate": 3.4190624642220914e-06, "loss": 0.679, "step": 5834 }, { "epoch": 0.62, "grad_norm": 1.753093806254221, "learning_rate": 3.41738929816838e-06, "loss": 0.6242, "step": 5835 }, { "epoch": 0.62, "grad_norm": 1.6872359150983407, "learning_rate": 3.4157163290431157e-06, "loss": 0.6613, "step": 5836 }, { "epoch": 0.62, "grad_norm": 1.8175654978359161, "learning_rate": 3.4140435570544708e-06, "loss": 0.5277, "step": 5837 }, { "epoch": 0.62, "grad_norm": 1.6166930873573329, "learning_rate": 3.4123709824105934e-06, "loss": 0.5609, "step": 5838 }, { "epoch": 0.62, "grad_norm": 1.114120886212725, "learning_rate": 3.410698605319605e-06, "loss": 0.5142, "step": 5839 }, { "epoch": 0.62, "grad_norm": 1.683846876731599, "learning_rate": 3.4090264259896055e-06, "loss": 0.5707, "step": 5840 }, { "epoch": 0.62, "grad_norm": 2.0411288607868037, "learning_rate": 3.4073544446286684e-06, "loss": 0.5808, "step": 5841 }, { "epoch": 0.62, "grad_norm": 1.6251377192753047, "learning_rate": 3.4056826614448418e-06, "loss": 0.6105, "step": 5842 }, { "epoch": 0.62, "grad_norm": 1.852567953725364, "learning_rate": 3.404011076646151e-06, "loss": 0.5727, "step": 5843 }, { "epoch": 0.62, "grad_norm": 1.105247810321439, "learning_rate": 3.4023396904405954e-06, "loss": 0.5064, "step": 5844 }, { "epoch": 0.62, "grad_norm": 1.1269113275171265, "learning_rate": 3.4006685030361498e-06, "loss": 0.5278, "step": 5845 }, { "epoch": 0.62, "grad_norm": 1.8764549148155867, "learning_rate": 3.398997514640764e-06, "loss": 0.4977, "step": 5846 }, { "epoch": 0.62, "grad_norm": 1.8122616826330769, "learning_rate": 3.3973267254623656e-06, "loss": 0.519, "step": 5847 }, { "epoch": 0.62, "grad_norm": 1.6857147660411453, "learning_rate": 3.3956561357088537e-06, "loss": 0.6336, "step": 5848 }, { "epoch": 0.62, "grad_norm": 1.0724729642937703, "learning_rate": 3.3939857455881063e-06, "loss": 0.5005, "step": 5849 }, { "epoch": 0.62, "grad_norm": 1.7047653376333713, "learning_rate": 3.3923155553079713e-06, "loss": 0.5923, "step": 5850 }, { "epoch": 0.62, "grad_norm": 1.7053423870687117, "learning_rate": 3.3906455650762755e-06, "loss": 0.6177, "step": 5851 }, { "epoch": 0.62, "grad_norm": 1.634934463264665, "learning_rate": 3.38897577510082e-06, "loss": 0.5815, "step": 5852 }, { "epoch": 0.62, "grad_norm": 2.1984863406295667, "learning_rate": 3.3873061855893834e-06, "loss": 0.7153, "step": 5853 }, { "epoch": 0.62, "grad_norm": 1.8333447832112322, "learning_rate": 3.385636796749716e-06, "loss": 0.6031, "step": 5854 }, { "epoch": 0.62, "grad_norm": 1.7021674162106015, "learning_rate": 3.3839676087895448e-06, "loss": 0.6244, "step": 5855 }, { "epoch": 0.62, "grad_norm": 1.7080369815426162, "learning_rate": 3.38229862191657e-06, "loss": 0.5434, "step": 5856 }, { "epoch": 0.62, "grad_norm": 1.6491659734879858, "learning_rate": 3.3806298363384694e-06, "loss": 0.5775, "step": 5857 }, { "epoch": 0.62, "grad_norm": 1.6154422362670111, "learning_rate": 3.378961252262893e-06, "loss": 0.5815, "step": 5858 }, { "epoch": 0.62, "grad_norm": 1.6427668693678716, "learning_rate": 3.3772928698974684e-06, "loss": 0.5596, "step": 5859 }, { "epoch": 0.63, "grad_norm": 1.6922499191433862, "learning_rate": 3.3756246894497957e-06, "loss": 0.5048, "step": 5860 }, { "epoch": 0.63, "grad_norm": 1.8111672548884827, "learning_rate": 3.373956711127453e-06, "loss": 0.6515, "step": 5861 }, { "epoch": 0.63, "grad_norm": 1.7845103188540883, "learning_rate": 3.372288935137989e-06, "loss": 0.6327, "step": 5862 }, { "epoch": 0.63, "grad_norm": 1.840351030071609, "learning_rate": 3.3706213616889316e-06, "loss": 0.6446, "step": 5863 }, { "epoch": 0.63, "grad_norm": 1.2062858055211207, "learning_rate": 3.3689539909877794e-06, "loss": 0.4871, "step": 5864 }, { "epoch": 0.63, "grad_norm": 1.7040913312534376, "learning_rate": 3.3672868232420084e-06, "loss": 0.6735, "step": 5865 }, { "epoch": 0.63, "grad_norm": 1.1286082454586002, "learning_rate": 3.3656198586590694e-06, "loss": 0.5174, "step": 5866 }, { "epoch": 0.63, "grad_norm": 1.616598891090698, "learning_rate": 3.3639530974463863e-06, "loss": 0.565, "step": 5867 }, { "epoch": 0.63, "grad_norm": 1.8124176492381303, "learning_rate": 3.3622865398113593e-06, "loss": 0.5999, "step": 5868 }, { "epoch": 0.63, "grad_norm": 1.7512954248957768, "learning_rate": 3.3606201859613607e-06, "loss": 0.5221, "step": 5869 }, { "epoch": 0.63, "grad_norm": 1.8308116033058452, "learning_rate": 3.3589540361037426e-06, "loss": 0.6359, "step": 5870 }, { "epoch": 0.63, "grad_norm": 1.7329119124652963, "learning_rate": 3.3572880904458267e-06, "loss": 0.6703, "step": 5871 }, { "epoch": 0.63, "grad_norm": 1.7700673486621212, "learning_rate": 3.3556223491949114e-06, "loss": 0.5897, "step": 5872 }, { "epoch": 0.63, "grad_norm": 1.8587005899512625, "learning_rate": 3.3539568125582684e-06, "loss": 0.6703, "step": 5873 }, { "epoch": 0.63, "grad_norm": 1.1205807141471087, "learning_rate": 3.352291480743146e-06, "loss": 0.4857, "step": 5874 }, { "epoch": 0.63, "grad_norm": 1.7843565127671717, "learning_rate": 3.350626353956765e-06, "loss": 0.6087, "step": 5875 }, { "epoch": 0.63, "grad_norm": 1.797265799498617, "learning_rate": 3.3489614324063224e-06, "loss": 0.6345, "step": 5876 }, { "epoch": 0.63, "grad_norm": 1.7166105552734394, "learning_rate": 3.347296716298988e-06, "loss": 0.5258, "step": 5877 }, { "epoch": 0.63, "grad_norm": 1.725986535732892, "learning_rate": 3.3456322058419076e-06, "loss": 0.5951, "step": 5878 }, { "epoch": 0.63, "grad_norm": 1.1659733686753124, "learning_rate": 3.3439679012421998e-06, "loss": 0.4989, "step": 5879 }, { "epoch": 0.63, "grad_norm": 1.7328398846162056, "learning_rate": 3.342303802706959e-06, "loss": 0.6468, "step": 5880 }, { "epoch": 0.63, "grad_norm": 1.7093575083657782, "learning_rate": 3.340639910443253e-06, "loss": 0.558, "step": 5881 }, { "epoch": 0.63, "grad_norm": 1.8948232844947457, "learning_rate": 3.3389762246581246e-06, "loss": 0.5885, "step": 5882 }, { "epoch": 0.63, "grad_norm": 1.753746855717961, "learning_rate": 3.337312745558591e-06, "loss": 0.6127, "step": 5883 }, { "epoch": 0.63, "grad_norm": 1.8800331928852894, "learning_rate": 3.335649473351642e-06, "loss": 0.5852, "step": 5884 }, { "epoch": 0.63, "grad_norm": 1.7931016657664054, "learning_rate": 3.333986408244244e-06, "loss": 0.6357, "step": 5885 }, { "epoch": 0.63, "grad_norm": 1.719047726469175, "learning_rate": 3.332323550443336e-06, "loss": 0.5912, "step": 5886 }, { "epoch": 0.63, "grad_norm": 1.1204838025207224, "learning_rate": 3.330660900155833e-06, "loss": 0.5147, "step": 5887 }, { "epoch": 0.63, "grad_norm": 1.6944522271004123, "learning_rate": 3.3289984575886227e-06, "loss": 0.6357, "step": 5888 }, { "epoch": 0.63, "grad_norm": 1.6307150374253256, "learning_rate": 3.327336222948566e-06, "loss": 0.5949, "step": 5889 }, { "epoch": 0.63, "grad_norm": 1.8364878388214687, "learning_rate": 3.3256741964425004e-06, "loss": 0.5495, "step": 5890 }, { "epoch": 0.63, "grad_norm": 1.8322016810021207, "learning_rate": 3.324012378277236e-06, "loss": 0.5623, "step": 5891 }, { "epoch": 0.63, "grad_norm": 1.752434161981998, "learning_rate": 3.322350768659558e-06, "loss": 0.6417, "step": 5892 }, { "epoch": 0.63, "grad_norm": 1.8737168927210242, "learning_rate": 3.3206893677962227e-06, "loss": 0.5749, "step": 5893 }, { "epoch": 0.63, "grad_norm": 1.069968613143318, "learning_rate": 3.3190281758939642e-06, "loss": 0.4869, "step": 5894 }, { "epoch": 0.63, "grad_norm": 1.788998127450454, "learning_rate": 3.3173671931594888e-06, "loss": 0.6247, "step": 5895 }, { "epoch": 0.63, "grad_norm": 1.6343161303428424, "learning_rate": 3.3157064197994767e-06, "loss": 0.5624, "step": 5896 }, { "epoch": 0.63, "grad_norm": 1.8412946720002352, "learning_rate": 3.3140458560205823e-06, "loss": 0.593, "step": 5897 }, { "epoch": 0.63, "grad_norm": 1.1102751305898662, "learning_rate": 3.3123855020294344e-06, "loss": 0.4896, "step": 5898 }, { "epoch": 0.63, "grad_norm": 1.6307522990449197, "learning_rate": 3.3107253580326347e-06, "loss": 0.5641, "step": 5899 }, { "epoch": 0.63, "grad_norm": 1.6503757455709096, "learning_rate": 3.3090654242367603e-06, "loss": 0.5956, "step": 5900 }, { "epoch": 0.63, "grad_norm": 1.8880499594894058, "learning_rate": 3.3074057008483605e-06, "loss": 0.5724, "step": 5901 }, { "epoch": 0.63, "grad_norm": 1.5575950848160165, "learning_rate": 3.3057461880739576e-06, "loss": 0.4848, "step": 5902 }, { "epoch": 0.63, "grad_norm": 1.7848215809383376, "learning_rate": 3.304086886120052e-06, "loss": 0.6792, "step": 5903 }, { "epoch": 0.63, "grad_norm": 1.6822380528506613, "learning_rate": 3.302427795193114e-06, "loss": 0.5725, "step": 5904 }, { "epoch": 0.63, "grad_norm": 1.120823757747453, "learning_rate": 3.3007689154995875e-06, "loss": 0.5066, "step": 5905 }, { "epoch": 0.63, "grad_norm": 1.7857672760870822, "learning_rate": 3.299110247245895e-06, "loss": 0.6045, "step": 5906 }, { "epoch": 0.63, "grad_norm": 2.039267747733788, "learning_rate": 3.297451790638424e-06, "loss": 0.6825, "step": 5907 }, { "epoch": 0.63, "grad_norm": 1.67953709167731, "learning_rate": 3.295793545883543e-06, "loss": 0.5591, "step": 5908 }, { "epoch": 0.63, "grad_norm": 2.0447759866071946, "learning_rate": 3.2941355131875906e-06, "loss": 0.7, "step": 5909 }, { "epoch": 0.63, "grad_norm": 1.5813093045953213, "learning_rate": 3.2924776927568817e-06, "loss": 0.5252, "step": 5910 }, { "epoch": 0.63, "grad_norm": 1.7092740806409579, "learning_rate": 3.2908200847977034e-06, "loss": 0.6438, "step": 5911 }, { "epoch": 0.63, "grad_norm": 1.9273882382551373, "learning_rate": 3.289162689516315e-06, "loss": 0.5794, "step": 5912 }, { "epoch": 0.63, "grad_norm": 1.8818083502535, "learning_rate": 3.2875055071189527e-06, "loss": 0.6296, "step": 5913 }, { "epoch": 0.63, "grad_norm": 2.087830661327212, "learning_rate": 3.285848537811822e-06, "loss": 0.6161, "step": 5914 }, { "epoch": 0.63, "grad_norm": 1.5519937195461193, "learning_rate": 3.2841917818011043e-06, "loss": 0.568, "step": 5915 }, { "epoch": 0.63, "grad_norm": 1.6896760566128164, "learning_rate": 3.2825352392929548e-06, "loss": 0.5147, "step": 5916 }, { "epoch": 0.63, "grad_norm": 1.992499543703392, "learning_rate": 3.2808789104935015e-06, "loss": 0.6174, "step": 5917 }, { "epoch": 0.63, "grad_norm": 1.7943725399295767, "learning_rate": 3.2792227956088452e-06, "loss": 0.667, "step": 5918 }, { "epoch": 0.63, "grad_norm": 1.8067061970434772, "learning_rate": 3.27756689484506e-06, "loss": 0.6535, "step": 5919 }, { "epoch": 0.63, "grad_norm": 1.1605903452053645, "learning_rate": 3.2759112084081956e-06, "loss": 0.5025, "step": 5920 }, { "epoch": 0.63, "grad_norm": 1.146722064681704, "learning_rate": 3.2742557365042752e-06, "loss": 0.5177, "step": 5921 }, { "epoch": 0.63, "grad_norm": 1.7232234125195265, "learning_rate": 3.2726004793392896e-06, "loss": 0.6056, "step": 5922 }, { "epoch": 0.63, "grad_norm": 1.845067262721394, "learning_rate": 3.2709454371192083e-06, "loss": 0.578, "step": 5923 }, { "epoch": 0.63, "grad_norm": 1.7809238149644773, "learning_rate": 3.269290610049972e-06, "loss": 0.6284, "step": 5924 }, { "epoch": 0.63, "grad_norm": 1.7992594472849717, "learning_rate": 3.2676359983374954e-06, "loss": 0.5868, "step": 5925 }, { "epoch": 0.63, "grad_norm": 2.0223619949156104, "learning_rate": 3.265981602187668e-06, "loss": 0.5509, "step": 5926 }, { "epoch": 0.63, "grad_norm": 1.6175740570823882, "learning_rate": 3.264327421806349e-06, "loss": 0.5985, "step": 5927 }, { "epoch": 0.63, "grad_norm": 2.0179670670186103, "learning_rate": 3.2626734573993734e-06, "loss": 0.6978, "step": 5928 }, { "epoch": 0.63, "grad_norm": 1.9474059764665903, "learning_rate": 3.2610197091725483e-06, "loss": 0.6055, "step": 5929 }, { "epoch": 0.63, "grad_norm": 1.810720656237393, "learning_rate": 3.2593661773316533e-06, "loss": 0.7098, "step": 5930 }, { "epoch": 0.63, "grad_norm": 1.8620600117777784, "learning_rate": 3.2577128620824417e-06, "loss": 0.6964, "step": 5931 }, { "epoch": 0.63, "grad_norm": 1.439135747148008, "learning_rate": 3.25605976363064e-06, "loss": 0.4972, "step": 5932 }, { "epoch": 0.63, "grad_norm": 1.7051897740648563, "learning_rate": 3.2544068821819485e-06, "loss": 0.654, "step": 5933 }, { "epoch": 0.63, "grad_norm": 1.6544966806335741, "learning_rate": 3.2527542179420385e-06, "loss": 0.5421, "step": 5934 }, { "epoch": 0.63, "grad_norm": 2.030797048625804, "learning_rate": 3.251101771116557e-06, "loss": 0.6318, "step": 5935 }, { "epoch": 0.63, "grad_norm": 1.7871788259466779, "learning_rate": 3.2494495419111205e-06, "loss": 0.5642, "step": 5936 }, { "epoch": 0.63, "grad_norm": 1.893918611955803, "learning_rate": 3.2477975305313215e-06, "loss": 0.6654, "step": 5937 }, { "epoch": 0.63, "grad_norm": 1.7619666981365754, "learning_rate": 3.2461457371827227e-06, "loss": 0.5814, "step": 5938 }, { "epoch": 0.63, "grad_norm": 1.5269838683107813, "learning_rate": 3.2444941620708626e-06, "loss": 0.5916, "step": 5939 }, { "epoch": 0.63, "grad_norm": 1.20781645810754, "learning_rate": 3.24284280540125e-06, "loss": 0.4731, "step": 5940 }, { "epoch": 0.63, "grad_norm": 1.7790587575955508, "learning_rate": 3.2411916673793686e-06, "loss": 0.6413, "step": 5941 }, { "epoch": 0.63, "grad_norm": 1.6538519524805135, "learning_rate": 3.2395407482106724e-06, "loss": 0.5113, "step": 5942 }, { "epoch": 0.63, "grad_norm": 1.8069928537863493, "learning_rate": 3.2378900481005914e-06, "loss": 0.5825, "step": 5943 }, { "epoch": 0.63, "grad_norm": 1.8446435780325863, "learning_rate": 3.236239567254526e-06, "loss": 0.6095, "step": 5944 }, { "epoch": 0.63, "grad_norm": 1.8640365034686586, "learning_rate": 3.23458930587785e-06, "loss": 0.592, "step": 5945 }, { "epoch": 0.63, "grad_norm": 1.8844849547495985, "learning_rate": 3.2329392641759094e-06, "loss": 0.6424, "step": 5946 }, { "epoch": 0.63, "grad_norm": 1.8110655807186182, "learning_rate": 3.2312894423540237e-06, "loss": 0.6762, "step": 5947 }, { "epoch": 0.63, "grad_norm": 1.9490057790986066, "learning_rate": 3.2296398406174845e-06, "loss": 0.6095, "step": 5948 }, { "epoch": 0.63, "grad_norm": 2.013433656277672, "learning_rate": 3.227990459171555e-06, "loss": 0.6357, "step": 5949 }, { "epoch": 0.63, "grad_norm": 1.8526784955510232, "learning_rate": 3.226341298221475e-06, "loss": 0.5853, "step": 5950 }, { "epoch": 0.63, "grad_norm": 2.0173044157385647, "learning_rate": 3.224692357972451e-06, "loss": 0.6539, "step": 5951 }, { "epoch": 0.63, "grad_norm": 1.6892936609834823, "learning_rate": 3.223043638629666e-06, "loss": 0.652, "step": 5952 }, { "epoch": 0.63, "grad_norm": 1.7267272322656988, "learning_rate": 3.221395140398274e-06, "loss": 0.5011, "step": 5953 }, { "epoch": 0.64, "grad_norm": 1.6705442677944218, "learning_rate": 3.2197468634834035e-06, "loss": 0.5618, "step": 5954 }, { "epoch": 0.64, "grad_norm": 2.670891804903846, "learning_rate": 3.218098808090152e-06, "loss": 0.4916, "step": 5955 }, { "epoch": 0.64, "grad_norm": 1.8790609504673015, "learning_rate": 3.2164509744235934e-06, "loss": 0.6044, "step": 5956 }, { "epoch": 0.64, "grad_norm": 1.8669778585544006, "learning_rate": 3.214803362688771e-06, "loss": 0.5848, "step": 5957 }, { "epoch": 0.64, "grad_norm": 1.9028302971499977, "learning_rate": 3.2131559730907014e-06, "loss": 0.5911, "step": 5958 }, { "epoch": 0.64, "grad_norm": 2.0854892422116205, "learning_rate": 3.2115088058343725e-06, "loss": 0.604, "step": 5959 }, { "epoch": 0.64, "grad_norm": 1.6888836968887775, "learning_rate": 3.2098618611247485e-06, "loss": 0.5918, "step": 5960 }, { "epoch": 0.64, "grad_norm": 1.7752569915468417, "learning_rate": 3.2082151391667613e-06, "loss": 0.7051, "step": 5961 }, { "epoch": 0.64, "grad_norm": 1.6915409290913033, "learning_rate": 3.206568640165317e-06, "loss": 0.6007, "step": 5962 }, { "epoch": 0.64, "grad_norm": 2.031513580899858, "learning_rate": 3.2049223643252948e-06, "loss": 0.6997, "step": 5963 }, { "epoch": 0.64, "grad_norm": 1.632660796827639, "learning_rate": 3.203276311851546e-06, "loss": 0.5356, "step": 5964 }, { "epoch": 0.64, "grad_norm": 1.8586278700742211, "learning_rate": 3.201630482948889e-06, "loss": 0.5718, "step": 5965 }, { "epoch": 0.64, "grad_norm": 1.7157692152869448, "learning_rate": 3.1999848778221217e-06, "loss": 0.5104, "step": 5966 }, { "epoch": 0.64, "grad_norm": 1.6059270602846434, "learning_rate": 3.198339496676011e-06, "loss": 0.5966, "step": 5967 }, { "epoch": 0.64, "grad_norm": 1.8053023439006985, "learning_rate": 3.196694339715296e-06, "loss": 0.5856, "step": 5968 }, { "epoch": 0.64, "grad_norm": 1.7226502918414168, "learning_rate": 3.195049407144688e-06, "loss": 0.5455, "step": 5969 }, { "epoch": 0.64, "grad_norm": 1.208816458032675, "learning_rate": 3.1934046991688696e-06, "loss": 0.5156, "step": 5970 }, { "epoch": 0.64, "grad_norm": 1.8672533577175818, "learning_rate": 3.1917602159924963e-06, "loss": 0.6015, "step": 5971 }, { "epoch": 0.64, "grad_norm": 1.9662018183962855, "learning_rate": 3.1901159578201968e-06, "loss": 0.6636, "step": 5972 }, { "epoch": 0.64, "grad_norm": 1.6663048612103692, "learning_rate": 3.1884719248565687e-06, "loss": 0.5959, "step": 5973 }, { "epoch": 0.64, "grad_norm": 1.6274022344367969, "learning_rate": 3.186828117306184e-06, "loss": 0.6284, "step": 5974 }, { "epoch": 0.64, "grad_norm": 1.7487744198845594, "learning_rate": 3.1851845353735876e-06, "loss": 0.6575, "step": 5975 }, { "epoch": 0.64, "grad_norm": 1.986515919076358, "learning_rate": 3.183541179263292e-06, "loss": 0.5712, "step": 5976 }, { "epoch": 0.64, "grad_norm": 1.7531274328995134, "learning_rate": 3.1818980491797857e-06, "loss": 0.6683, "step": 5977 }, { "epoch": 0.64, "grad_norm": 1.7928187555208184, "learning_rate": 3.180255145327531e-06, "loss": 0.643, "step": 5978 }, { "epoch": 0.64, "grad_norm": 1.6825647516652147, "learning_rate": 3.178612467910953e-06, "loss": 0.5303, "step": 5979 }, { "epoch": 0.64, "grad_norm": 1.8779705737895385, "learning_rate": 3.1769700171344574e-06, "loss": 0.6322, "step": 5980 }, { "epoch": 0.64, "grad_norm": 1.6684607283773694, "learning_rate": 3.1753277932024187e-06, "loss": 0.5948, "step": 5981 }, { "epoch": 0.64, "grad_norm": 1.6596274291335384, "learning_rate": 3.173685796319181e-06, "loss": 0.5088, "step": 5982 }, { "epoch": 0.64, "grad_norm": 1.920005573969755, "learning_rate": 3.1720440266890652e-06, "loss": 0.606, "step": 5983 }, { "epoch": 0.64, "grad_norm": 1.1971729063451089, "learning_rate": 3.1704024845163605e-06, "loss": 0.5161, "step": 5984 }, { "epoch": 0.64, "grad_norm": 1.798057996272226, "learning_rate": 3.1687611700053274e-06, "loss": 0.6243, "step": 5985 }, { "epoch": 0.64, "grad_norm": 1.1646971521248803, "learning_rate": 3.1671200833601993e-06, "loss": 0.4961, "step": 5986 }, { "epoch": 0.64, "grad_norm": 1.6579221898040495, "learning_rate": 3.165479224785182e-06, "loss": 0.5056, "step": 5987 }, { "epoch": 0.64, "grad_norm": 1.6625696330745576, "learning_rate": 3.1638385944844496e-06, "loss": 0.5064, "step": 5988 }, { "epoch": 0.64, "grad_norm": 1.8457504441708694, "learning_rate": 3.1621981926621525e-06, "loss": 0.6306, "step": 5989 }, { "epoch": 0.64, "grad_norm": 1.5902762002931596, "learning_rate": 3.160558019522409e-06, "loss": 0.5301, "step": 5990 }, { "epoch": 0.64, "grad_norm": 1.9824270747034516, "learning_rate": 3.1589180752693103e-06, "loss": 0.6542, "step": 5991 }, { "epoch": 0.64, "grad_norm": 1.7835938966802753, "learning_rate": 3.157278360106919e-06, "loss": 0.5691, "step": 5992 }, { "epoch": 0.64, "grad_norm": 1.223501335882246, "learning_rate": 3.155638874239271e-06, "loss": 0.5108, "step": 5993 }, { "epoch": 0.64, "grad_norm": 1.8443344912573159, "learning_rate": 3.153999617870369e-06, "loss": 0.5986, "step": 5994 }, { "epoch": 0.64, "grad_norm": 1.6912251133462286, "learning_rate": 3.1523605912041912e-06, "loss": 0.5592, "step": 5995 }, { "epoch": 0.64, "grad_norm": 1.722439547465638, "learning_rate": 3.1507217944446853e-06, "loss": 0.6047, "step": 5996 }, { "epoch": 0.64, "grad_norm": 1.7613280786050158, "learning_rate": 3.149083227795773e-06, "loss": 0.6322, "step": 5997 }, { "epoch": 0.64, "grad_norm": 1.6719941053587493, "learning_rate": 3.1474448914613437e-06, "loss": 0.5519, "step": 5998 }, { "epoch": 0.64, "grad_norm": 1.4828057888938442, "learning_rate": 3.1458067856452595e-06, "loss": 0.5591, "step": 5999 }, { "epoch": 0.64, "grad_norm": 1.6560855484019972, "learning_rate": 3.1441689105513566e-06, "loss": 0.5919, "step": 6000 }, { "epoch": 0.64, "grad_norm": 1.9918294481166579, "learning_rate": 3.1425312663834396e-06, "loss": 0.6114, "step": 6001 }, { "epoch": 0.64, "grad_norm": 1.6696666490476306, "learning_rate": 3.1408938533452836e-06, "loss": 0.6388, "step": 6002 }, { "epoch": 0.64, "grad_norm": 1.2064839517256138, "learning_rate": 3.139256671640637e-06, "loss": 0.5095, "step": 6003 }, { "epoch": 0.64, "grad_norm": 1.8464758477151322, "learning_rate": 3.1376197214732184e-06, "loss": 0.6603, "step": 6004 }, { "epoch": 0.64, "grad_norm": 1.8508334421239063, "learning_rate": 3.1359830030467177e-06, "loss": 0.5701, "step": 6005 }, { "epoch": 0.64, "grad_norm": 2.1480802492266333, "learning_rate": 3.134346516564797e-06, "loss": 0.6536, "step": 6006 }, { "epoch": 0.64, "grad_norm": 1.6748121421580604, "learning_rate": 3.132710262231089e-06, "loss": 0.5109, "step": 6007 }, { "epoch": 0.64, "grad_norm": 1.113241035904903, "learning_rate": 3.1310742402491947e-06, "loss": 0.4965, "step": 6008 }, { "epoch": 0.64, "grad_norm": 1.9674343157775185, "learning_rate": 3.1294384508226905e-06, "loss": 0.5214, "step": 6009 }, { "epoch": 0.64, "grad_norm": 1.5956613918666298, "learning_rate": 3.127802894155122e-06, "loss": 0.587, "step": 6010 }, { "epoch": 0.64, "grad_norm": 1.8148239741635543, "learning_rate": 3.126167570450005e-06, "loss": 0.6845, "step": 6011 }, { "epoch": 0.64, "grad_norm": 1.6633716338417457, "learning_rate": 3.1245324799108286e-06, "loss": 0.5624, "step": 6012 }, { "epoch": 0.64, "grad_norm": 1.7807576245647883, "learning_rate": 3.1228976227410503e-06, "loss": 0.6764, "step": 6013 }, { "epoch": 0.64, "grad_norm": 1.6851196694087234, "learning_rate": 3.121262999144099e-06, "loss": 0.6013, "step": 6014 }, { "epoch": 0.64, "grad_norm": 1.831834122613456, "learning_rate": 3.1196286093233774e-06, "loss": 0.662, "step": 6015 }, { "epoch": 0.64, "grad_norm": 1.7035508899342715, "learning_rate": 3.1179944534822537e-06, "loss": 0.5996, "step": 6016 }, { "epoch": 0.64, "grad_norm": 1.1184165507014117, "learning_rate": 3.116360531824074e-06, "loss": 0.5014, "step": 6017 }, { "epoch": 0.64, "grad_norm": 1.142167623262668, "learning_rate": 3.1147268445521507e-06, "loss": 0.5028, "step": 6018 }, { "epoch": 0.64, "grad_norm": 1.7424403357542673, "learning_rate": 3.1130933918697658e-06, "loss": 0.5645, "step": 6019 }, { "epoch": 0.64, "grad_norm": 1.6944480503272326, "learning_rate": 3.111460173980175e-06, "loss": 0.5721, "step": 6020 }, { "epoch": 0.64, "grad_norm": 1.168633560435798, "learning_rate": 3.1098271910866073e-06, "loss": 0.5146, "step": 6021 }, { "epoch": 0.64, "grad_norm": 1.9310514488151285, "learning_rate": 3.108194443392253e-06, "loss": 0.6738, "step": 6022 }, { "epoch": 0.64, "grad_norm": 1.7571778466762815, "learning_rate": 3.1065619311002833e-06, "loss": 0.6022, "step": 6023 }, { "epoch": 0.64, "grad_norm": 1.7998956385044405, "learning_rate": 3.104929654413835e-06, "loss": 0.5488, "step": 6024 }, { "epoch": 0.64, "grad_norm": 1.710347861021877, "learning_rate": 3.103297613536016e-06, "loss": 0.5699, "step": 6025 }, { "epoch": 0.64, "grad_norm": 1.9515803983115574, "learning_rate": 3.1016658086699064e-06, "loss": 0.6465, "step": 6026 }, { "epoch": 0.64, "grad_norm": 1.61030341075062, "learning_rate": 3.1000342400185556e-06, "loss": 0.6102, "step": 6027 }, { "epoch": 0.64, "grad_norm": 1.7723568397178582, "learning_rate": 3.0984029077849847e-06, "loss": 0.6452, "step": 6028 }, { "epoch": 0.64, "grad_norm": 1.7503626428556958, "learning_rate": 3.096771812172184e-06, "loss": 0.5961, "step": 6029 }, { "epoch": 0.64, "grad_norm": 1.7188596861126453, "learning_rate": 3.095140953383115e-06, "loss": 0.5845, "step": 6030 }, { "epoch": 0.64, "grad_norm": 1.5298856023556455, "learning_rate": 3.0935103316207104e-06, "loss": 0.4437, "step": 6031 }, { "epoch": 0.64, "grad_norm": 1.5984419306751771, "learning_rate": 3.091879947087872e-06, "loss": 0.5357, "step": 6032 }, { "epoch": 0.64, "grad_norm": 1.74031590387566, "learning_rate": 3.090249799987472e-06, "loss": 0.6055, "step": 6033 }, { "epoch": 0.64, "grad_norm": 1.2730630993035008, "learning_rate": 3.0886198905223562e-06, "loss": 0.5224, "step": 6034 }, { "epoch": 0.64, "grad_norm": 1.6327184173623495, "learning_rate": 3.086990218895338e-06, "loss": 0.5869, "step": 6035 }, { "epoch": 0.64, "grad_norm": 1.7628592268604335, "learning_rate": 3.085360785309203e-06, "loss": 0.6807, "step": 6036 }, { "epoch": 0.64, "grad_norm": 1.793483335362191, "learning_rate": 3.083731589966703e-06, "loss": 0.6618, "step": 6037 }, { "epoch": 0.64, "grad_norm": 1.8552917983277373, "learning_rate": 3.082102633070563e-06, "loss": 0.648, "step": 6038 }, { "epoch": 0.64, "grad_norm": 1.8411542347552892, "learning_rate": 3.0804739148234795e-06, "loss": 0.5691, "step": 6039 }, { "epoch": 0.64, "grad_norm": 1.8889655596164832, "learning_rate": 3.0788454354281195e-06, "loss": 0.6713, "step": 6040 }, { "epoch": 0.64, "grad_norm": 1.8171068060435334, "learning_rate": 3.0772171950871184e-06, "loss": 0.5852, "step": 6041 }, { "epoch": 0.64, "grad_norm": 2.285314628930126, "learning_rate": 3.0755891940030813e-06, "loss": 0.7411, "step": 6042 }, { "epoch": 0.64, "grad_norm": 1.742221509469946, "learning_rate": 3.073961432378586e-06, "loss": 0.6478, "step": 6043 }, { "epoch": 0.64, "grad_norm": 1.6264166032489193, "learning_rate": 3.072333910416179e-06, "loss": 0.5383, "step": 6044 }, { "epoch": 0.64, "grad_norm": 1.653119948863565, "learning_rate": 3.0707066283183764e-06, "loss": 0.5881, "step": 6045 }, { "epoch": 0.64, "grad_norm": 1.90354305052376, "learning_rate": 3.069079586287665e-06, "loss": 0.6215, "step": 6046 }, { "epoch": 0.65, "grad_norm": 1.137646908223476, "learning_rate": 3.0674527845265035e-06, "loss": 0.4755, "step": 6047 }, { "epoch": 0.65, "grad_norm": 1.7523981472916796, "learning_rate": 3.0658262232373178e-06, "loss": 0.5821, "step": 6048 }, { "epoch": 0.65, "grad_norm": 1.5835979440487782, "learning_rate": 3.0641999026225054e-06, "loss": 0.57, "step": 6049 }, { "epoch": 0.65, "grad_norm": 1.6222772095855993, "learning_rate": 3.062573822884436e-06, "loss": 0.553, "step": 6050 }, { "epoch": 0.65, "grad_norm": 1.7777731751693544, "learning_rate": 3.0609479842254435e-06, "loss": 0.5548, "step": 6051 }, { "epoch": 0.65, "grad_norm": 1.6683986565256859, "learning_rate": 3.0593223868478374e-06, "loss": 0.5641, "step": 6052 }, { "epoch": 0.65, "grad_norm": 1.7911338813716329, "learning_rate": 3.0576970309538933e-06, "loss": 0.6813, "step": 6053 }, { "epoch": 0.65, "grad_norm": 1.8129307310019973, "learning_rate": 3.056071916745861e-06, "loss": 0.5806, "step": 6054 }, { "epoch": 0.65, "grad_norm": 1.8137243337109308, "learning_rate": 3.0544470444259554e-06, "loss": 0.6621, "step": 6055 }, { "epoch": 0.65, "grad_norm": 1.7941903789648375, "learning_rate": 3.0528224141963647e-06, "loss": 0.6393, "step": 6056 }, { "epoch": 0.65, "grad_norm": 2.0052940150347416, "learning_rate": 3.0511980262592467e-06, "loss": 0.6556, "step": 6057 }, { "epoch": 0.65, "grad_norm": 2.0231172083361817, "learning_rate": 3.049573880816728e-06, "loss": 0.7003, "step": 6058 }, { "epoch": 0.65, "grad_norm": 1.587270794411459, "learning_rate": 3.047949978070905e-06, "loss": 0.5795, "step": 6059 }, { "epoch": 0.65, "grad_norm": 1.764864496175386, "learning_rate": 3.046326318223843e-06, "loss": 0.5434, "step": 6060 }, { "epoch": 0.65, "grad_norm": 1.6137226833060752, "learning_rate": 3.0447029014775814e-06, "loss": 0.4512, "step": 6061 }, { "epoch": 0.65, "grad_norm": 1.180456823727229, "learning_rate": 3.0430797280341234e-06, "loss": 0.5007, "step": 6062 }, { "epoch": 0.65, "grad_norm": 1.7790887215703413, "learning_rate": 3.0414567980954456e-06, "loss": 0.5941, "step": 6063 }, { "epoch": 0.65, "grad_norm": 1.093546148166746, "learning_rate": 3.0398341118634957e-06, "loss": 0.4745, "step": 6064 }, { "epoch": 0.65, "grad_norm": 1.7968872723094738, "learning_rate": 3.038211669540184e-06, "loss": 0.6, "step": 6065 }, { "epoch": 0.65, "grad_norm": 1.5252050249455678, "learning_rate": 3.0365894713273997e-06, "loss": 0.5858, "step": 6066 }, { "epoch": 0.65, "grad_norm": 1.5889387454772077, "learning_rate": 3.0349675174269948e-06, "loss": 0.5363, "step": 6067 }, { "epoch": 0.65, "grad_norm": 1.9874695623033516, "learning_rate": 3.033345808040794e-06, "loss": 0.5847, "step": 6068 }, { "epoch": 0.65, "grad_norm": 1.6783904592089924, "learning_rate": 3.0317243433705918e-06, "loss": 0.4783, "step": 6069 }, { "epoch": 0.65, "grad_norm": 1.6161953262321096, "learning_rate": 3.03010312361815e-06, "loss": 0.5858, "step": 6070 }, { "epoch": 0.65, "grad_norm": 1.193173488611091, "learning_rate": 3.028482148985202e-06, "loss": 0.5085, "step": 6071 }, { "epoch": 0.65, "grad_norm": 2.0564485837321893, "learning_rate": 3.02686141967345e-06, "loss": 0.5996, "step": 6072 }, { "epoch": 0.65, "grad_norm": 1.7543779488243483, "learning_rate": 3.025240935884564e-06, "loss": 0.5312, "step": 6073 }, { "epoch": 0.65, "grad_norm": 1.650137629743182, "learning_rate": 3.023620697820188e-06, "loss": 0.5423, "step": 6074 }, { "epoch": 0.65, "grad_norm": 1.791607945563046, "learning_rate": 3.0220007056819313e-06, "loss": 0.5791, "step": 6075 }, { "epoch": 0.65, "grad_norm": 1.1374129738783714, "learning_rate": 3.0203809596713727e-06, "loss": 0.5254, "step": 6076 }, { "epoch": 0.65, "grad_norm": 1.4472007193490068, "learning_rate": 3.018761459990064e-06, "loss": 0.503, "step": 6077 }, { "epoch": 0.65, "grad_norm": 1.536644971460336, "learning_rate": 3.0171422068395205e-06, "loss": 0.5246, "step": 6078 }, { "epoch": 0.65, "grad_norm": 1.666254417811011, "learning_rate": 3.015523200421234e-06, "loss": 0.6677, "step": 6079 }, { "epoch": 0.65, "grad_norm": 1.8541017411227922, "learning_rate": 3.013904440936658e-06, "loss": 0.6744, "step": 6080 }, { "epoch": 0.65, "grad_norm": 1.623020609199968, "learning_rate": 3.0122859285872214e-06, "loss": 0.5769, "step": 6081 }, { "epoch": 0.65, "grad_norm": 1.825584961648437, "learning_rate": 3.010667663574319e-06, "loss": 0.5178, "step": 6082 }, { "epoch": 0.65, "grad_norm": 1.6356657226245332, "learning_rate": 3.009049646099316e-06, "loss": 0.5921, "step": 6083 }, { "epoch": 0.65, "grad_norm": 1.8129755759986845, "learning_rate": 3.0074318763635468e-06, "loss": 0.6155, "step": 6084 }, { "epoch": 0.65, "grad_norm": 1.1046002707853728, "learning_rate": 3.005814354568315e-06, "loss": 0.4893, "step": 6085 }, { "epoch": 0.65, "grad_norm": 1.7940229570079054, "learning_rate": 3.0041970809148925e-06, "loss": 0.6057, "step": 6086 }, { "epoch": 0.65, "grad_norm": 1.8845738261472929, "learning_rate": 3.0025800556045215e-06, "loss": 0.5485, "step": 6087 }, { "epoch": 0.65, "grad_norm": 1.6101140844791553, "learning_rate": 3.0009632788384124e-06, "loss": 0.6216, "step": 6088 }, { "epoch": 0.65, "grad_norm": 1.7727310022934215, "learning_rate": 2.9993467508177436e-06, "loss": 0.6468, "step": 6089 }, { "epoch": 0.65, "grad_norm": 1.949307207898823, "learning_rate": 2.997730471743667e-06, "loss": 0.6734, "step": 6090 }, { "epoch": 0.65, "grad_norm": 2.1098022798624028, "learning_rate": 2.9961144418172993e-06, "loss": 0.6739, "step": 6091 }, { "epoch": 0.65, "grad_norm": 1.6849551649882792, "learning_rate": 2.9944986612397264e-06, "loss": 0.6412, "step": 6092 }, { "epoch": 0.65, "grad_norm": 1.086623250511351, "learning_rate": 2.9928831302120074e-06, "loss": 0.4908, "step": 6093 }, { "epoch": 0.65, "grad_norm": 1.6865184425750388, "learning_rate": 2.991267848935163e-06, "loss": 0.5876, "step": 6094 }, { "epoch": 0.65, "grad_norm": 1.7493306183050095, "learning_rate": 2.989652817610188e-06, "loss": 0.665, "step": 6095 }, { "epoch": 0.65, "grad_norm": 1.7046181516034908, "learning_rate": 2.988038036438044e-06, "loss": 0.6437, "step": 6096 }, { "epoch": 0.65, "grad_norm": 1.1123189778770992, "learning_rate": 2.9864235056196655e-06, "loss": 0.49, "step": 6097 }, { "epoch": 0.65, "grad_norm": 1.9614722997414777, "learning_rate": 2.9848092253559513e-06, "loss": 0.5014, "step": 6098 }, { "epoch": 0.65, "grad_norm": 1.6990965054272655, "learning_rate": 2.9831951958477713e-06, "loss": 0.5954, "step": 6099 }, { "epoch": 0.65, "grad_norm": 1.6248348921547386, "learning_rate": 2.9815814172959624e-06, "loss": 0.5599, "step": 6100 }, { "epoch": 0.65, "grad_norm": 1.0941118921047799, "learning_rate": 2.9799678899013317e-06, "loss": 0.5018, "step": 6101 }, { "epoch": 0.65, "grad_norm": 1.8938461939200202, "learning_rate": 2.9783546138646548e-06, "loss": 0.5408, "step": 6102 }, { "epoch": 0.65, "grad_norm": 1.798529712049797, "learning_rate": 2.976741589386676e-06, "loss": 0.7302, "step": 6103 }, { "epoch": 0.65, "grad_norm": 1.8318830878329728, "learning_rate": 2.975128816668108e-06, "loss": 0.6925, "step": 6104 }, { "epoch": 0.65, "grad_norm": 1.8332925584840478, "learning_rate": 2.973516295909632e-06, "loss": 0.6383, "step": 6105 }, { "epoch": 0.65, "grad_norm": 1.8700804115496574, "learning_rate": 2.971904027311898e-06, "loss": 0.6901, "step": 6106 }, { "epoch": 0.65, "grad_norm": 1.798695528689302, "learning_rate": 2.9702920110755275e-06, "loss": 0.5822, "step": 6107 }, { "epoch": 0.65, "grad_norm": 1.7712787284013283, "learning_rate": 2.9686802474011075e-06, "loss": 0.5886, "step": 6108 }, { "epoch": 0.65, "grad_norm": 1.846415303033265, "learning_rate": 2.9670687364891904e-06, "loss": 0.6005, "step": 6109 }, { "epoch": 0.65, "grad_norm": 1.6719518084858938, "learning_rate": 2.9654574785403028e-06, "loss": 0.5483, "step": 6110 }, { "epoch": 0.65, "grad_norm": 1.6597416540558687, "learning_rate": 2.9638464737549383e-06, "loss": 0.6173, "step": 6111 }, { "epoch": 0.65, "grad_norm": 1.8553825879474743, "learning_rate": 2.962235722333557e-06, "loss": 0.6877, "step": 6112 }, { "epoch": 0.65, "grad_norm": 1.7405980598868491, "learning_rate": 2.960625224476591e-06, "loss": 0.5981, "step": 6113 }, { "epoch": 0.65, "grad_norm": 1.8147154085944905, "learning_rate": 2.9590149803844383e-06, "loss": 0.5334, "step": 6114 }, { "epoch": 0.65, "grad_norm": 1.8038895717554149, "learning_rate": 2.9574049902574653e-06, "loss": 0.6013, "step": 6115 }, { "epoch": 0.65, "grad_norm": 1.678751562392883, "learning_rate": 2.9557952542960076e-06, "loss": 0.5143, "step": 6116 }, { "epoch": 0.65, "grad_norm": 1.8228937666892766, "learning_rate": 2.954185772700369e-06, "loss": 0.5524, "step": 6117 }, { "epoch": 0.65, "grad_norm": 1.6624721329892864, "learning_rate": 2.952576545670821e-06, "loss": 0.5788, "step": 6118 }, { "epoch": 0.65, "grad_norm": 1.88042144424168, "learning_rate": 2.9509675734076047e-06, "loss": 0.5978, "step": 6119 }, { "epoch": 0.65, "grad_norm": 1.9253144687106027, "learning_rate": 2.949358856110928e-06, "loss": 0.6685, "step": 6120 }, { "epoch": 0.65, "grad_norm": 1.2502052084274853, "learning_rate": 2.947750393980969e-06, "loss": 0.5235, "step": 6121 }, { "epoch": 0.65, "grad_norm": 1.8700006868377612, "learning_rate": 2.946142187217873e-06, "loss": 0.5882, "step": 6122 }, { "epoch": 0.65, "grad_norm": 1.1513357495334278, "learning_rate": 2.9445342360217523e-06, "loss": 0.5196, "step": 6123 }, { "epoch": 0.65, "grad_norm": 1.8129941990523446, "learning_rate": 2.9429265405926875e-06, "loss": 0.595, "step": 6124 }, { "epoch": 0.65, "grad_norm": 1.9127441322394618, "learning_rate": 2.941319101130731e-06, "loss": 0.6035, "step": 6125 }, { "epoch": 0.65, "grad_norm": 1.7384599160162002, "learning_rate": 2.939711917835899e-06, "loss": 0.5141, "step": 6126 }, { "epoch": 0.65, "grad_norm": 1.9382406367589087, "learning_rate": 2.938104990908178e-06, "loss": 0.6306, "step": 6127 }, { "epoch": 0.65, "grad_norm": 1.1920701076486708, "learning_rate": 2.9364983205475234e-06, "loss": 0.497, "step": 6128 }, { "epoch": 0.65, "grad_norm": 1.9106461344280699, "learning_rate": 2.934891906953855e-06, "loss": 0.6219, "step": 6129 }, { "epoch": 0.65, "grad_norm": 1.5687489066338642, "learning_rate": 2.933285750327065e-06, "loss": 0.563, "step": 6130 }, { "epoch": 0.65, "grad_norm": 1.589829815658472, "learning_rate": 2.9316798508670117e-06, "loss": 0.5669, "step": 6131 }, { "epoch": 0.65, "grad_norm": 1.9726955379995421, "learning_rate": 2.930074208773521e-06, "loss": 0.6378, "step": 6132 }, { "epoch": 0.65, "grad_norm": 1.9106068822009028, "learning_rate": 2.9284688242463866e-06, "loss": 0.6117, "step": 6133 }, { "epoch": 0.65, "grad_norm": 1.127936070371703, "learning_rate": 2.9268636974853713e-06, "loss": 0.4917, "step": 6134 }, { "epoch": 0.65, "grad_norm": 1.8377879836191031, "learning_rate": 2.925258828690206e-06, "loss": 0.559, "step": 6135 }, { "epoch": 0.65, "grad_norm": 1.867702557244808, "learning_rate": 2.9236542180605885e-06, "loss": 0.6288, "step": 6136 }, { "epoch": 0.65, "grad_norm": 1.8440201391903348, "learning_rate": 2.922049865796184e-06, "loss": 0.6007, "step": 6137 }, { "epoch": 0.65, "grad_norm": 1.9097177705259867, "learning_rate": 2.920445772096626e-06, "loss": 0.6302, "step": 6138 }, { "epoch": 0.65, "grad_norm": 1.6487590947385005, "learning_rate": 2.918841937161517e-06, "loss": 0.5445, "step": 6139 }, { "epoch": 0.65, "grad_norm": 1.1775740200973253, "learning_rate": 2.9172383611904265e-06, "loss": 0.4843, "step": 6140 }, { "epoch": 0.66, "grad_norm": 1.8014221706934235, "learning_rate": 2.9156350443828917e-06, "loss": 0.5964, "step": 6141 }, { "epoch": 0.66, "grad_norm": 1.6901515719666664, "learning_rate": 2.914031986938417e-06, "loss": 0.5657, "step": 6142 }, { "epoch": 0.66, "grad_norm": 1.8726266621815397, "learning_rate": 2.912429189056475e-06, "loss": 0.6124, "step": 6143 }, { "epoch": 0.66, "grad_norm": 1.794709179361045, "learning_rate": 2.9108266509365068e-06, "loss": 0.6472, "step": 6144 }, { "epoch": 0.66, "grad_norm": 1.7768586813807266, "learning_rate": 2.90922437277792e-06, "loss": 0.6505, "step": 6145 }, { "epoch": 0.66, "grad_norm": 1.6683435482256719, "learning_rate": 2.907622354780091e-06, "loss": 0.5671, "step": 6146 }, { "epoch": 0.66, "grad_norm": 1.9181658348357122, "learning_rate": 2.9060205971423615e-06, "loss": 0.6676, "step": 6147 }, { "epoch": 0.66, "grad_norm": 1.1206232024993512, "learning_rate": 2.904419100064044e-06, "loss": 0.5118, "step": 6148 }, { "epoch": 0.66, "grad_norm": 1.836960229137068, "learning_rate": 2.902817863744417e-06, "loss": 0.5599, "step": 6149 }, { "epoch": 0.66, "grad_norm": 1.7125224211752255, "learning_rate": 2.9012168883827265e-06, "loss": 0.5635, "step": 6150 }, { "epoch": 0.66, "grad_norm": 1.1651896690929815, "learning_rate": 2.8996161741781846e-06, "loss": 0.4987, "step": 6151 }, { "epoch": 0.66, "grad_norm": 1.1403992091738322, "learning_rate": 2.8980157213299746e-06, "loss": 0.4958, "step": 6152 }, { "epoch": 0.66, "grad_norm": 1.6334092302449468, "learning_rate": 2.896415530037244e-06, "loss": 0.5965, "step": 6153 }, { "epoch": 0.66, "grad_norm": 2.0354617581971346, "learning_rate": 2.894815600499109e-06, "loss": 0.6669, "step": 6154 }, { "epoch": 0.66, "grad_norm": 1.1304049550879067, "learning_rate": 2.8932159329146527e-06, "loss": 0.489, "step": 6155 }, { "epoch": 0.66, "grad_norm": 2.0877804351466995, "learning_rate": 2.891616527482927e-06, "loss": 0.6944, "step": 6156 }, { "epoch": 0.66, "grad_norm": 1.8219119798815928, "learning_rate": 2.8900173844029478e-06, "loss": 0.6741, "step": 6157 }, { "epoch": 0.66, "grad_norm": 1.719364599954579, "learning_rate": 2.8884185038737035e-06, "loss": 0.6809, "step": 6158 }, { "epoch": 0.66, "grad_norm": 1.8768587519244975, "learning_rate": 2.8868198860941464e-06, "loss": 0.6489, "step": 6159 }, { "epoch": 0.66, "grad_norm": 1.7206543148699234, "learning_rate": 2.8852215312631958e-06, "loss": 0.5972, "step": 6160 }, { "epoch": 0.66, "grad_norm": 1.7767123067663124, "learning_rate": 2.8836234395797392e-06, "loss": 0.5082, "step": 6161 }, { "epoch": 0.66, "grad_norm": 1.695699711810264, "learning_rate": 2.8820256112426325e-06, "loss": 0.6193, "step": 6162 }, { "epoch": 0.66, "grad_norm": 1.8512193381789017, "learning_rate": 2.880428046450697e-06, "loss": 0.6253, "step": 6163 }, { "epoch": 0.66, "grad_norm": 1.2087311040153168, "learning_rate": 2.878830745402722e-06, "loss": 0.4963, "step": 6164 }, { "epoch": 0.66, "grad_norm": 1.811074306840946, "learning_rate": 2.8772337082974645e-06, "loss": 0.6554, "step": 6165 }, { "epoch": 0.66, "grad_norm": 1.8191176338822732, "learning_rate": 2.8756369353336466e-06, "loss": 0.6058, "step": 6166 }, { "epoch": 0.66, "grad_norm": 1.7847944449959934, "learning_rate": 2.8740404267099604e-06, "loss": 0.6548, "step": 6167 }, { "epoch": 0.66, "grad_norm": 1.9565387996415977, "learning_rate": 2.8724441826250627e-06, "loss": 0.6161, "step": 6168 }, { "epoch": 0.66, "grad_norm": 1.8071576374524978, "learning_rate": 2.8708482032775796e-06, "loss": 0.7499, "step": 6169 }, { "epoch": 0.66, "grad_norm": 1.6212856266058862, "learning_rate": 2.869252488866102e-06, "loss": 0.662, "step": 6170 }, { "epoch": 0.66, "grad_norm": 1.716518972386025, "learning_rate": 2.867657039589189e-06, "loss": 0.5553, "step": 6171 }, { "epoch": 0.66, "grad_norm": 1.0929008173615324, "learning_rate": 2.8660618556453673e-06, "loss": 0.503, "step": 6172 }, { "epoch": 0.66, "grad_norm": 1.808358604921995, "learning_rate": 2.864466937233129e-06, "loss": 0.6401, "step": 6173 }, { "epoch": 0.66, "grad_norm": 1.9431170722673767, "learning_rate": 2.8628722845509348e-06, "loss": 0.577, "step": 6174 }, { "epoch": 0.66, "grad_norm": 1.7670821089651514, "learning_rate": 2.861277897797211e-06, "loss": 0.532, "step": 6175 }, { "epoch": 0.66, "grad_norm": 1.7048929547711265, "learning_rate": 2.8596837771703513e-06, "loss": 0.6344, "step": 6176 }, { "epoch": 0.66, "grad_norm": 1.9463509229697709, "learning_rate": 2.8580899228687164e-06, "loss": 0.592, "step": 6177 }, { "epoch": 0.66, "grad_norm": 1.851797062247751, "learning_rate": 2.856496335090635e-06, "loss": 0.6002, "step": 6178 }, { "epoch": 0.66, "grad_norm": 1.6394479482656776, "learning_rate": 2.8549030140344015e-06, "loss": 0.5242, "step": 6179 }, { "epoch": 0.66, "grad_norm": 1.5029050568955804, "learning_rate": 2.8533099598982743e-06, "loss": 0.5101, "step": 6180 }, { "epoch": 0.66, "grad_norm": 1.1597320664075885, "learning_rate": 2.851717172880481e-06, "loss": 0.5068, "step": 6181 }, { "epoch": 0.66, "grad_norm": 1.6198347094392842, "learning_rate": 2.8501246531792203e-06, "loss": 0.5083, "step": 6182 }, { "epoch": 0.66, "grad_norm": 1.7890164535825495, "learning_rate": 2.8485324009926524e-06, "loss": 0.5879, "step": 6183 }, { "epoch": 0.66, "grad_norm": 1.7979318779601048, "learning_rate": 2.846940416518904e-06, "loss": 0.5879, "step": 6184 }, { "epoch": 0.66, "grad_norm": 1.6160730738226612, "learning_rate": 2.8453486999560713e-06, "loss": 0.5517, "step": 6185 }, { "epoch": 0.66, "grad_norm": 1.751558388911888, "learning_rate": 2.843757251502215e-06, "loss": 0.4851, "step": 6186 }, { "epoch": 0.66, "grad_norm": 1.7146481165237148, "learning_rate": 2.842166071355364e-06, "loss": 0.5804, "step": 6187 }, { "epoch": 0.66, "grad_norm": 1.8148165760443968, "learning_rate": 2.8405751597135123e-06, "loss": 0.6199, "step": 6188 }, { "epoch": 0.66, "grad_norm": 1.7682564921497728, "learning_rate": 2.838984516774621e-06, "loss": 0.5558, "step": 6189 }, { "epoch": 0.66, "grad_norm": 1.6759311056670823, "learning_rate": 2.837394142736619e-06, "loss": 0.5873, "step": 6190 }, { "epoch": 0.66, "grad_norm": 1.8991992831024516, "learning_rate": 2.8358040377974004e-06, "loss": 0.5637, "step": 6191 }, { "epoch": 0.66, "grad_norm": 1.6351801685761598, "learning_rate": 2.834214202154827e-06, "loss": 0.5844, "step": 6192 }, { "epoch": 0.66, "grad_norm": 1.8928064882079663, "learning_rate": 2.832624636006724e-06, "loss": 0.6503, "step": 6193 }, { "epoch": 0.66, "grad_norm": 1.8678500142082795, "learning_rate": 2.8310353395508895e-06, "loss": 0.7501, "step": 6194 }, { "epoch": 0.66, "grad_norm": 1.697593284453819, "learning_rate": 2.8294463129850784e-06, "loss": 0.604, "step": 6195 }, { "epoch": 0.66, "grad_norm": 1.137201835074518, "learning_rate": 2.827857556507021e-06, "loss": 0.4865, "step": 6196 }, { "epoch": 0.66, "grad_norm": 1.7535524619256433, "learning_rate": 2.826269070314407e-06, "loss": 0.6007, "step": 6197 }, { "epoch": 0.66, "grad_norm": 1.6274141617074822, "learning_rate": 2.824680854604901e-06, "loss": 0.5894, "step": 6198 }, { "epoch": 0.66, "grad_norm": 1.7278050530370581, "learning_rate": 2.823092909576126e-06, "loss": 0.5611, "step": 6199 }, { "epoch": 0.66, "grad_norm": 1.6829544934824687, "learning_rate": 2.821505235425675e-06, "loss": 0.5764, "step": 6200 }, { "epoch": 0.66, "grad_norm": 1.8277271447910752, "learning_rate": 2.8199178323511064e-06, "loss": 0.6864, "step": 6201 }, { "epoch": 0.66, "grad_norm": 1.6757077587105522, "learning_rate": 2.8183307005499444e-06, "loss": 0.5682, "step": 6202 }, { "epoch": 0.66, "grad_norm": 1.5277710085300833, "learning_rate": 2.816743840219681e-06, "loss": 0.5303, "step": 6203 }, { "epoch": 0.66, "grad_norm": 1.6674541846796156, "learning_rate": 2.815157251557772e-06, "loss": 0.6613, "step": 6204 }, { "epoch": 0.66, "grad_norm": 1.817792539803366, "learning_rate": 2.8135709347616426e-06, "loss": 0.5972, "step": 6205 }, { "epoch": 0.66, "grad_norm": 1.762520269268898, "learning_rate": 2.8119848900286805e-06, "loss": 0.527, "step": 6206 }, { "epoch": 0.66, "grad_norm": 1.7181582985597972, "learning_rate": 2.810399117556243e-06, "loss": 0.6178, "step": 6207 }, { "epoch": 0.66, "grad_norm": 1.616073694830389, "learning_rate": 2.8088136175416537e-06, "loss": 0.5865, "step": 6208 }, { "epoch": 0.66, "grad_norm": 2.250852736059673, "learning_rate": 2.8072283901821962e-06, "loss": 0.6856, "step": 6209 }, { "epoch": 0.66, "grad_norm": 1.1139639544388207, "learning_rate": 2.805643435675126e-06, "loss": 0.4802, "step": 6210 }, { "epoch": 0.66, "grad_norm": 1.7376403070370103, "learning_rate": 2.8040587542176647e-06, "loss": 0.689, "step": 6211 }, { "epoch": 0.66, "grad_norm": 1.9422002767377515, "learning_rate": 2.802474346006997e-06, "loss": 0.638, "step": 6212 }, { "epoch": 0.66, "grad_norm": 1.6566361833237804, "learning_rate": 2.8008902112402756e-06, "loss": 0.5181, "step": 6213 }, { "epoch": 0.66, "grad_norm": 1.632611851874573, "learning_rate": 2.7993063501146167e-06, "loss": 0.5422, "step": 6214 }, { "epoch": 0.66, "grad_norm": 1.7355689739537152, "learning_rate": 2.797722762827108e-06, "loss": 0.5445, "step": 6215 }, { "epoch": 0.66, "grad_norm": 1.6471337677793016, "learning_rate": 2.796139449574798e-06, "loss": 0.5529, "step": 6216 }, { "epoch": 0.66, "grad_norm": 1.7330076667207621, "learning_rate": 2.794556410554702e-06, "loss": 0.6162, "step": 6217 }, { "epoch": 0.66, "grad_norm": 2.0385628108329623, "learning_rate": 2.7929736459638026e-06, "loss": 0.5865, "step": 6218 }, { "epoch": 0.66, "grad_norm": 1.9288752544107923, "learning_rate": 2.791391155999046e-06, "loss": 0.5783, "step": 6219 }, { "epoch": 0.66, "grad_norm": 1.6892607258731585, "learning_rate": 2.7898089408573458e-06, "loss": 0.6438, "step": 6220 }, { "epoch": 0.66, "grad_norm": 1.7295918189022341, "learning_rate": 2.788227000735583e-06, "loss": 0.5247, "step": 6221 }, { "epoch": 0.66, "grad_norm": 1.7667803101082749, "learning_rate": 2.7866453358306034e-06, "loss": 0.619, "step": 6222 }, { "epoch": 0.66, "grad_norm": 1.9205597034799058, "learning_rate": 2.785063946339213e-06, "loss": 0.6328, "step": 6223 }, { "epoch": 0.66, "grad_norm": 1.6754869376255501, "learning_rate": 2.783482832458192e-06, "loss": 0.6058, "step": 6224 }, { "epoch": 0.66, "grad_norm": 1.1012078896049566, "learning_rate": 2.7819019943842805e-06, "loss": 0.4809, "step": 6225 }, { "epoch": 0.66, "grad_norm": 1.8602219419728625, "learning_rate": 2.780321432314189e-06, "loss": 0.6589, "step": 6226 }, { "epoch": 0.66, "grad_norm": 1.6374876375510183, "learning_rate": 2.7787411464445887e-06, "loss": 0.5637, "step": 6227 }, { "epoch": 0.66, "grad_norm": 1.1391951475222026, "learning_rate": 2.7771611369721197e-06, "loss": 0.4825, "step": 6228 }, { "epoch": 0.66, "grad_norm": 1.6676579254091162, "learning_rate": 2.775581404093387e-06, "loss": 0.5532, "step": 6229 }, { "epoch": 0.66, "grad_norm": 1.5249357664090577, "learning_rate": 2.7740019480049606e-06, "loss": 0.5701, "step": 6230 }, { "epoch": 0.66, "grad_norm": 1.838496356033279, "learning_rate": 2.772422768903374e-06, "loss": 0.6696, "step": 6231 }, { "epoch": 0.66, "grad_norm": 1.5550556929354324, "learning_rate": 2.7708438669851335e-06, "loss": 0.4727, "step": 6232 }, { "epoch": 0.66, "grad_norm": 1.7275805842862337, "learning_rate": 2.769265242446704e-06, "loss": 0.5968, "step": 6233 }, { "epoch": 0.66, "grad_norm": 1.8690531775477606, "learning_rate": 2.767686895484517e-06, "loss": 0.6384, "step": 6234 }, { "epoch": 0.67, "grad_norm": 1.9930141464574977, "learning_rate": 2.766108826294971e-06, "loss": 0.6425, "step": 6235 }, { "epoch": 0.67, "grad_norm": 1.816665762775846, "learning_rate": 2.7645310350744296e-06, "loss": 0.5518, "step": 6236 }, { "epoch": 0.67, "grad_norm": 1.750215826897527, "learning_rate": 2.7629535220192236e-06, "loss": 0.5716, "step": 6237 }, { "epoch": 0.67, "grad_norm": 1.5642286296815398, "learning_rate": 2.7613762873256422e-06, "loss": 0.6162, "step": 6238 }, { "epoch": 0.67, "grad_norm": 1.7459314831473258, "learning_rate": 2.7597993311899484e-06, "loss": 0.6241, "step": 6239 }, { "epoch": 0.67, "grad_norm": 1.675505273139745, "learning_rate": 2.7582226538083655e-06, "loss": 0.5962, "step": 6240 }, { "epoch": 0.67, "grad_norm": 2.0470770094069506, "learning_rate": 2.7566462553770844e-06, "loss": 0.6572, "step": 6241 }, { "epoch": 0.67, "grad_norm": 1.7949906751451592, "learning_rate": 2.7550701360922603e-06, "loss": 0.5784, "step": 6242 }, { "epoch": 0.67, "grad_norm": 1.6938917493238108, "learning_rate": 2.7534942961500142e-06, "loss": 0.5926, "step": 6243 }, { "epoch": 0.67, "grad_norm": 2.021759603280043, "learning_rate": 2.751918735746432e-06, "loss": 0.689, "step": 6244 }, { "epoch": 0.67, "grad_norm": 1.5925835262607695, "learning_rate": 2.7503434550775644e-06, "loss": 0.4842, "step": 6245 }, { "epoch": 0.67, "grad_norm": 1.8386681241441227, "learning_rate": 2.748768454339429e-06, "loss": 0.6432, "step": 6246 }, { "epoch": 0.67, "grad_norm": 1.846144016460603, "learning_rate": 2.747193733728005e-06, "loss": 0.5555, "step": 6247 }, { "epoch": 0.67, "grad_norm": 1.7553536666790797, "learning_rate": 2.74561929343924e-06, "loss": 0.6163, "step": 6248 }, { "epoch": 0.67, "grad_norm": 1.895638392125985, "learning_rate": 2.7440451336690476e-06, "loss": 0.5721, "step": 6249 }, { "epoch": 0.67, "grad_norm": 1.671540987122216, "learning_rate": 2.7424712546133032e-06, "loss": 0.5596, "step": 6250 }, { "epoch": 0.67, "grad_norm": 1.720918831880632, "learning_rate": 2.7408976564678513e-06, "loss": 0.603, "step": 6251 }, { "epoch": 0.67, "grad_norm": 2.119344556157542, "learning_rate": 2.739324339428494e-06, "loss": 0.6649, "step": 6252 }, { "epoch": 0.67, "grad_norm": 1.8804126963787635, "learning_rate": 2.737751303691006e-06, "loss": 0.5889, "step": 6253 }, { "epoch": 0.67, "grad_norm": 2.1854742733936057, "learning_rate": 2.736178549451124e-06, "loss": 0.6379, "step": 6254 }, { "epoch": 0.67, "grad_norm": 1.8943048038110242, "learning_rate": 2.7346060769045504e-06, "loss": 0.621, "step": 6255 }, { "epoch": 0.67, "grad_norm": 1.6511781151585134, "learning_rate": 2.7330338862469514e-06, "loss": 0.5562, "step": 6256 }, { "epoch": 0.67, "grad_norm": 1.8995189006488964, "learning_rate": 2.731461977673959e-06, "loss": 0.5986, "step": 6257 }, { "epoch": 0.67, "grad_norm": 1.7312798432441865, "learning_rate": 2.7298903513811713e-06, "loss": 0.6234, "step": 6258 }, { "epoch": 0.67, "grad_norm": 1.7879431941340007, "learning_rate": 2.728319007564148e-06, "loss": 0.661, "step": 6259 }, { "epoch": 0.67, "grad_norm": 1.7946162967960848, "learning_rate": 2.7267479464184166e-06, "loss": 0.6274, "step": 6260 }, { "epoch": 0.67, "grad_norm": 1.8725709377703825, "learning_rate": 2.7251771681394686e-06, "loss": 0.5694, "step": 6261 }, { "epoch": 0.67, "grad_norm": 2.120607021503561, "learning_rate": 2.7236066729227595e-06, "loss": 0.6506, "step": 6262 }, { "epoch": 0.67, "grad_norm": 1.768722452174102, "learning_rate": 2.7220364609637105e-06, "loss": 0.5077, "step": 6263 }, { "epoch": 0.67, "grad_norm": 1.8414462604484634, "learning_rate": 2.720466532457707e-06, "loss": 0.5459, "step": 6264 }, { "epoch": 0.67, "grad_norm": 1.759587187152544, "learning_rate": 2.7188968876000976e-06, "loss": 0.6118, "step": 6265 }, { "epoch": 0.67, "grad_norm": 1.8429340111829366, "learning_rate": 2.717327526586203e-06, "loss": 0.6141, "step": 6266 }, { "epoch": 0.67, "grad_norm": 1.632627582193389, "learning_rate": 2.7157584496112976e-06, "loss": 0.5916, "step": 6267 }, { "epoch": 0.67, "grad_norm": 1.655627960219595, "learning_rate": 2.714189656870628e-06, "loss": 0.5227, "step": 6268 }, { "epoch": 0.67, "grad_norm": 1.7862686430553167, "learning_rate": 2.7126211485594023e-06, "loss": 0.6092, "step": 6269 }, { "epoch": 0.67, "grad_norm": 1.8711071840599902, "learning_rate": 2.7110529248727947e-06, "loss": 0.5909, "step": 6270 }, { "epoch": 0.67, "grad_norm": 1.8956650099415409, "learning_rate": 2.7094849860059424e-06, "loss": 0.5863, "step": 6271 }, { "epoch": 0.67, "grad_norm": 1.7645154729227286, "learning_rate": 2.7079173321539496e-06, "loss": 0.589, "step": 6272 }, { "epoch": 0.67, "grad_norm": 1.8650562088722598, "learning_rate": 2.706349963511884e-06, "loss": 0.5371, "step": 6273 }, { "epoch": 0.67, "grad_norm": 1.6254750623400345, "learning_rate": 2.7047828802747754e-06, "loss": 0.5592, "step": 6274 }, { "epoch": 0.67, "grad_norm": 2.003834484041923, "learning_rate": 2.7032160826376217e-06, "loss": 0.6898, "step": 6275 }, { "epoch": 0.67, "grad_norm": 1.7289058485045847, "learning_rate": 2.701649570795384e-06, "loss": 0.585, "step": 6276 }, { "epoch": 0.67, "grad_norm": 1.5896347051632993, "learning_rate": 2.700083344942985e-06, "loss": 0.5325, "step": 6277 }, { "epoch": 0.67, "grad_norm": 1.1937954711950802, "learning_rate": 2.6985174052753175e-06, "loss": 0.5048, "step": 6278 }, { "epoch": 0.67, "grad_norm": 1.6542992898515052, "learning_rate": 2.6969517519872344e-06, "loss": 0.5783, "step": 6279 }, { "epoch": 0.67, "grad_norm": 1.79790864127545, "learning_rate": 2.695386385273553e-06, "loss": 0.6493, "step": 6280 }, { "epoch": 0.67, "grad_norm": 1.704045722936237, "learning_rate": 2.6938213053290575e-06, "loss": 0.5963, "step": 6281 }, { "epoch": 0.67, "grad_norm": 1.8692623696547201, "learning_rate": 2.6922565123484938e-06, "loss": 0.601, "step": 6282 }, { "epoch": 0.67, "grad_norm": 1.7391335822751361, "learning_rate": 2.690692006526574e-06, "loss": 0.5481, "step": 6283 }, { "epoch": 0.67, "grad_norm": 1.8098189137557432, "learning_rate": 2.6891277880579737e-06, "loss": 0.6233, "step": 6284 }, { "epoch": 0.67, "grad_norm": 1.7254207470727478, "learning_rate": 2.687563857137332e-06, "loss": 0.6085, "step": 6285 }, { "epoch": 0.67, "grad_norm": 1.7999638634053021, "learning_rate": 2.6860002139592547e-06, "loss": 0.6572, "step": 6286 }, { "epoch": 0.67, "grad_norm": 2.112357001085462, "learning_rate": 2.684436858718308e-06, "loss": 0.6198, "step": 6287 }, { "epoch": 0.67, "grad_norm": 1.7408755319524303, "learning_rate": 2.682873791609025e-06, "loss": 0.6045, "step": 6288 }, { "epoch": 0.67, "grad_norm": 1.1315340394164517, "learning_rate": 2.6813110128259022e-06, "loss": 0.4733, "step": 6289 }, { "epoch": 0.67, "grad_norm": 1.7516542231717382, "learning_rate": 2.6797485225634014e-06, "loss": 0.6154, "step": 6290 }, { "epoch": 0.67, "grad_norm": 1.1095863329527424, "learning_rate": 2.6781863210159465e-06, "loss": 0.5146, "step": 6291 }, { "epoch": 0.67, "grad_norm": 1.7323965023118468, "learning_rate": 2.6766244083779266e-06, "loss": 0.441, "step": 6292 }, { "epoch": 0.67, "grad_norm": 1.7171105880041193, "learning_rate": 2.675062784843694e-06, "loss": 0.5745, "step": 6293 }, { "epoch": 0.67, "grad_norm": 1.9964036457364045, "learning_rate": 2.6735014506075664e-06, "loss": 0.6054, "step": 6294 }, { "epoch": 0.67, "grad_norm": 1.683975912491929, "learning_rate": 2.6719404058638247e-06, "loss": 0.6032, "step": 6295 }, { "epoch": 0.67, "grad_norm": 1.6826620167746336, "learning_rate": 2.6703796508067137e-06, "loss": 0.5446, "step": 6296 }, { "epoch": 0.67, "grad_norm": 1.7004320068508745, "learning_rate": 2.6688191856304426e-06, "loss": 0.5043, "step": 6297 }, { "epoch": 0.67, "grad_norm": 1.637884642061102, "learning_rate": 2.667259010529183e-06, "loss": 0.5836, "step": 6298 }, { "epoch": 0.67, "grad_norm": 1.139528781654397, "learning_rate": 2.6656991256970732e-06, "loss": 0.5176, "step": 6299 }, { "epoch": 0.67, "grad_norm": 1.7847511750921032, "learning_rate": 2.664139531328213e-06, "loss": 0.6175, "step": 6300 }, { "epoch": 0.67, "grad_norm": 1.8705145652387638, "learning_rate": 2.6625802276166668e-06, "loss": 0.716, "step": 6301 }, { "epoch": 0.67, "grad_norm": 1.9665034767123164, "learning_rate": 2.6610212147564636e-06, "loss": 0.6202, "step": 6302 }, { "epoch": 0.67, "grad_norm": 1.628607642634974, "learning_rate": 2.6594624929415944e-06, "loss": 0.4781, "step": 6303 }, { "epoch": 0.67, "grad_norm": 1.0914817700326376, "learning_rate": 2.657904062366016e-06, "loss": 0.4869, "step": 6304 }, { "epoch": 0.67, "grad_norm": 1.0712930110543348, "learning_rate": 2.656345923223648e-06, "loss": 0.4605, "step": 6305 }, { "epoch": 0.67, "grad_norm": 2.015674878937444, "learning_rate": 2.6547880757083733e-06, "loss": 0.6326, "step": 6306 }, { "epoch": 0.67, "grad_norm": 1.9756406914492424, "learning_rate": 2.6532305200140396e-06, "loss": 0.6027, "step": 6307 }, { "epoch": 0.67, "grad_norm": 1.7213136222977237, "learning_rate": 2.6516732563344564e-06, "loss": 0.7134, "step": 6308 }, { "epoch": 0.67, "grad_norm": 1.7138287704895028, "learning_rate": 2.6501162848634023e-06, "loss": 0.6044, "step": 6309 }, { "epoch": 0.67, "grad_norm": 1.8209469168582915, "learning_rate": 2.6485596057946095e-06, "loss": 0.5568, "step": 6310 }, { "epoch": 0.67, "grad_norm": 1.78073516409281, "learning_rate": 2.64700321932178e-06, "loss": 0.5341, "step": 6311 }, { "epoch": 0.67, "grad_norm": 1.8394778875871483, "learning_rate": 2.645447125638584e-06, "loss": 0.5895, "step": 6312 }, { "epoch": 0.67, "grad_norm": 1.973056689551445, "learning_rate": 2.6438913249386477e-06, "loss": 0.6209, "step": 6313 }, { "epoch": 0.67, "grad_norm": 1.9719681197745662, "learning_rate": 2.642335817415563e-06, "loss": 0.665, "step": 6314 }, { "epoch": 0.67, "grad_norm": 1.192500377014218, "learning_rate": 2.6407806032628875e-06, "loss": 0.4997, "step": 6315 }, { "epoch": 0.67, "grad_norm": 1.8093892513246195, "learning_rate": 2.639225682674139e-06, "loss": 0.5872, "step": 6316 }, { "epoch": 0.67, "grad_norm": 2.015470528715844, "learning_rate": 2.637671055842801e-06, "loss": 0.587, "step": 6317 }, { "epoch": 0.67, "grad_norm": 1.8881511611319348, "learning_rate": 2.6361167229623196e-06, "loss": 0.5066, "step": 6318 }, { "epoch": 0.67, "grad_norm": 1.854792044272147, "learning_rate": 2.634562684226105e-06, "loss": 0.6896, "step": 6319 }, { "epoch": 0.67, "grad_norm": 1.8173897010509874, "learning_rate": 2.6330089398275304e-06, "loss": 0.6302, "step": 6320 }, { "epoch": 0.67, "grad_norm": 1.8458207890861034, "learning_rate": 2.631455489959932e-06, "loss": 0.6056, "step": 6321 }, { "epoch": 0.67, "grad_norm": 1.7074378059965243, "learning_rate": 2.62990233481661e-06, "loss": 0.5854, "step": 6322 }, { "epoch": 0.67, "grad_norm": 1.7476794648369756, "learning_rate": 2.6283494745908288e-06, "loss": 0.6271, "step": 6323 }, { "epoch": 0.67, "grad_norm": 1.1532086928527367, "learning_rate": 2.6267969094758115e-06, "loss": 0.4948, "step": 6324 }, { "epoch": 0.67, "grad_norm": 1.7119137294713798, "learning_rate": 2.6252446396647503e-06, "loss": 0.6241, "step": 6325 }, { "epoch": 0.67, "grad_norm": 1.734244415610171, "learning_rate": 2.6236926653507978e-06, "loss": 0.573, "step": 6326 }, { "epoch": 0.67, "grad_norm": 1.1237355502553408, "learning_rate": 2.6221409867270703e-06, "loss": 0.5178, "step": 6327 }, { "epoch": 0.67, "grad_norm": 1.5698637434702258, "learning_rate": 2.620589603986645e-06, "loss": 0.5027, "step": 6328 }, { "epoch": 0.68, "grad_norm": 1.885427082712049, "learning_rate": 2.6190385173225697e-06, "loss": 0.6515, "step": 6329 }, { "epoch": 0.68, "grad_norm": 1.77436352001701, "learning_rate": 2.6174877269278463e-06, "loss": 0.5854, "step": 6330 }, { "epoch": 0.68, "grad_norm": 1.9852303300310084, "learning_rate": 2.6159372329954456e-06, "loss": 0.5593, "step": 6331 }, { "epoch": 0.68, "grad_norm": 1.8007671732789963, "learning_rate": 2.6143870357182988e-06, "loss": 0.6556, "step": 6332 }, { "epoch": 0.68, "grad_norm": 1.7703875135753124, "learning_rate": 2.612837135289301e-06, "loss": 0.6278, "step": 6333 }, { "epoch": 0.68, "grad_norm": 1.7762122499019364, "learning_rate": 2.611287531901311e-06, "loss": 0.6019, "step": 6334 }, { "epoch": 0.68, "grad_norm": 1.7712383371649054, "learning_rate": 2.6097382257471494e-06, "loss": 0.5964, "step": 6335 }, { "epoch": 0.68, "grad_norm": 1.1495738147443977, "learning_rate": 2.6081892170196012e-06, "loss": 0.5102, "step": 6336 }, { "epoch": 0.68, "grad_norm": 1.793451281342465, "learning_rate": 2.6066405059114147e-06, "loss": 0.6382, "step": 6337 }, { "epoch": 0.68, "grad_norm": 1.5446634801020094, "learning_rate": 2.6050920926152965e-06, "loss": 0.5458, "step": 6338 }, { "epoch": 0.68, "grad_norm": 1.7361663055151098, "learning_rate": 2.6035439773239222e-06, "loss": 0.5727, "step": 6339 }, { "epoch": 0.68, "grad_norm": 1.8785248306077325, "learning_rate": 2.6019961602299282e-06, "loss": 0.5929, "step": 6340 }, { "epoch": 0.68, "grad_norm": 1.6285625622706699, "learning_rate": 2.6004486415259124e-06, "loss": 0.61, "step": 6341 }, { "epoch": 0.68, "grad_norm": 1.8850919167117828, "learning_rate": 2.5989014214044372e-06, "loss": 0.6677, "step": 6342 }, { "epoch": 0.68, "grad_norm": 1.7110656524395975, "learning_rate": 2.597354500058027e-06, "loss": 0.565, "step": 6343 }, { "epoch": 0.68, "grad_norm": 1.7885963709867818, "learning_rate": 2.5958078776791695e-06, "loss": 0.5677, "step": 6344 }, { "epoch": 0.68, "grad_norm": 1.8391523306681423, "learning_rate": 2.5942615544603132e-06, "loss": 0.6476, "step": 6345 }, { "epoch": 0.68, "grad_norm": 1.7423973613473904, "learning_rate": 2.5927155305938755e-06, "loss": 0.5762, "step": 6346 }, { "epoch": 0.68, "grad_norm": 1.6450502026599232, "learning_rate": 2.5911698062722303e-06, "loss": 0.5539, "step": 6347 }, { "epoch": 0.68, "grad_norm": 1.907937992967138, "learning_rate": 2.5896243816877153e-06, "loss": 0.6341, "step": 6348 }, { "epoch": 0.68, "grad_norm": 1.837774372377251, "learning_rate": 2.588079257032632e-06, "loss": 0.5432, "step": 6349 }, { "epoch": 0.68, "grad_norm": 1.962394874285005, "learning_rate": 2.5865344324992447e-06, "loss": 0.5589, "step": 6350 }, { "epoch": 0.68, "grad_norm": 1.7608235136459538, "learning_rate": 2.58498990827978e-06, "loss": 0.6179, "step": 6351 }, { "epoch": 0.68, "grad_norm": 1.733973705565014, "learning_rate": 2.583445684566429e-06, "loss": 0.5521, "step": 6352 }, { "epoch": 0.68, "grad_norm": 1.2061551409039373, "learning_rate": 2.58190176155134e-06, "loss": 0.5069, "step": 6353 }, { "epoch": 0.68, "grad_norm": 1.6905066118879835, "learning_rate": 2.5803581394266282e-06, "loss": 0.6412, "step": 6354 }, { "epoch": 0.68, "grad_norm": 1.5669949444728553, "learning_rate": 2.578814818384372e-06, "loss": 0.5478, "step": 6355 }, { "epoch": 0.68, "grad_norm": 1.8781207978075205, "learning_rate": 2.5772717986166098e-06, "loss": 0.6048, "step": 6356 }, { "epoch": 0.68, "grad_norm": 1.8317382172623176, "learning_rate": 2.5757290803153446e-06, "loss": 0.5912, "step": 6357 }, { "epoch": 0.68, "grad_norm": 1.7793205619610333, "learning_rate": 2.574186663672541e-06, "loss": 0.6621, "step": 6358 }, { "epoch": 0.68, "grad_norm": 1.7432624540085468, "learning_rate": 2.5726445488801244e-06, "loss": 0.5003, "step": 6359 }, { "epoch": 0.68, "grad_norm": 1.5325537920350467, "learning_rate": 2.5711027361299865e-06, "loss": 0.4844, "step": 6360 }, { "epoch": 0.68, "grad_norm": 1.6756264925009516, "learning_rate": 2.569561225613976e-06, "loss": 0.5324, "step": 6361 }, { "epoch": 0.68, "grad_norm": 1.7240337621249726, "learning_rate": 2.56802001752391e-06, "loss": 0.5896, "step": 6362 }, { "epoch": 0.68, "grad_norm": 1.7234215672859994, "learning_rate": 2.566479112051565e-06, "loss": 0.5796, "step": 6363 }, { "epoch": 0.68, "grad_norm": 1.7885785119069433, "learning_rate": 2.5649385093886797e-06, "loss": 0.5208, "step": 6364 }, { "epoch": 0.68, "grad_norm": 1.1346796775441994, "learning_rate": 2.5633982097269542e-06, "loss": 0.4851, "step": 6365 }, { "epoch": 0.68, "grad_norm": 1.110558287839848, "learning_rate": 2.561858213258055e-06, "loss": 0.5278, "step": 6366 }, { "epoch": 0.68, "grad_norm": 1.1396054468502437, "learning_rate": 2.5603185201736038e-06, "loss": 0.506, "step": 6367 }, { "epoch": 0.68, "grad_norm": 1.8636196201232258, "learning_rate": 2.558779130665191e-06, "loss": 0.6948, "step": 6368 }, { "epoch": 0.68, "grad_norm": 1.909638170502817, "learning_rate": 2.557240044924366e-06, "loss": 0.593, "step": 6369 }, { "epoch": 0.68, "grad_norm": 1.9031501587779276, "learning_rate": 2.5557012631426416e-06, "loss": 0.6097, "step": 6370 }, { "epoch": 0.68, "grad_norm": 1.6275079894705091, "learning_rate": 2.5541627855114936e-06, "loss": 0.4817, "step": 6371 }, { "epoch": 0.68, "grad_norm": 1.7090811541367594, "learning_rate": 2.5526246122223574e-06, "loss": 0.624, "step": 6372 }, { "epoch": 0.68, "grad_norm": 1.6805383212107567, "learning_rate": 2.5510867434666325e-06, "loss": 0.5569, "step": 6373 }, { "epoch": 0.68, "grad_norm": 1.7552761843205356, "learning_rate": 2.5495491794356804e-06, "loss": 0.6194, "step": 6374 }, { "epoch": 0.68, "grad_norm": 1.72299267580918, "learning_rate": 2.5480119203208244e-06, "loss": 0.6106, "step": 6375 }, { "epoch": 0.68, "grad_norm": 1.7184947282035306, "learning_rate": 2.546474966313348e-06, "loss": 0.562, "step": 6376 }, { "epoch": 0.68, "grad_norm": 1.7058976469494136, "learning_rate": 2.5449383176045007e-06, "loss": 0.5512, "step": 6377 }, { "epoch": 0.68, "grad_norm": 1.8879203102792208, "learning_rate": 2.5434019743854888e-06, "loss": 0.545, "step": 6378 }, { "epoch": 0.68, "grad_norm": 2.0995416212317863, "learning_rate": 2.5418659368474872e-06, "loss": 0.6554, "step": 6379 }, { "epoch": 0.68, "grad_norm": 1.7218912088034222, "learning_rate": 2.540330205181629e-06, "loss": 0.5301, "step": 6380 }, { "epoch": 0.68, "grad_norm": 2.044510216463309, "learning_rate": 2.5387947795790067e-06, "loss": 0.6022, "step": 6381 }, { "epoch": 0.68, "grad_norm": 1.7272855198719221, "learning_rate": 2.537259660230679e-06, "loss": 0.582, "step": 6382 }, { "epoch": 0.68, "grad_norm": 1.7104800909435014, "learning_rate": 2.5357248473276643e-06, "loss": 0.6108, "step": 6383 }, { "epoch": 0.68, "grad_norm": 1.675348123811384, "learning_rate": 2.5341903410609426e-06, "loss": 0.5822, "step": 6384 }, { "epoch": 0.68, "grad_norm": 1.686715522786487, "learning_rate": 2.532656141621458e-06, "loss": 0.5338, "step": 6385 }, { "epoch": 0.68, "grad_norm": 1.3239513099055062, "learning_rate": 2.531122249200114e-06, "loss": 0.5043, "step": 6386 }, { "epoch": 0.68, "grad_norm": 1.3652911216981274, "learning_rate": 2.529588663987779e-06, "loss": 0.5239, "step": 6387 }, { "epoch": 0.68, "grad_norm": 1.8156621547895104, "learning_rate": 2.5280553861752784e-06, "loss": 0.5983, "step": 6388 }, { "epoch": 0.68, "grad_norm": 1.8549086837887718, "learning_rate": 2.526522415953403e-06, "loss": 0.6192, "step": 6389 }, { "epoch": 0.68, "grad_norm": 1.8263161928177267, "learning_rate": 2.5249897535129055e-06, "loss": 0.61, "step": 6390 }, { "epoch": 0.68, "grad_norm": 1.6581225091477927, "learning_rate": 2.523457399044497e-06, "loss": 0.576, "step": 6391 }, { "epoch": 0.68, "grad_norm": 1.6584950232219255, "learning_rate": 2.521925352738854e-06, "loss": 0.6181, "step": 6392 }, { "epoch": 0.68, "grad_norm": 1.692623012227944, "learning_rate": 2.520393614786613e-06, "loss": 0.6883, "step": 6393 }, { "epoch": 0.68, "grad_norm": 1.8070280863489736, "learning_rate": 2.518862185378372e-06, "loss": 0.6074, "step": 6394 }, { "epoch": 0.68, "grad_norm": 1.877415291243109, "learning_rate": 2.51733106470469e-06, "loss": 0.6908, "step": 6395 }, { "epoch": 0.68, "grad_norm": 1.6847054604221627, "learning_rate": 2.51580025295609e-06, "loss": 0.5823, "step": 6396 }, { "epoch": 0.68, "grad_norm": 1.8232427706008798, "learning_rate": 2.5142697503230533e-06, "loss": 0.5637, "step": 6397 }, { "epoch": 0.68, "grad_norm": 1.3503876376841417, "learning_rate": 2.5127395569960256e-06, "loss": 0.4941, "step": 6398 }, { "epoch": 0.68, "grad_norm": 1.219986107339937, "learning_rate": 2.5112096731654124e-06, "loss": 0.4949, "step": 6399 }, { "epoch": 0.68, "grad_norm": 1.7254553186899848, "learning_rate": 2.5096800990215818e-06, "loss": 0.5365, "step": 6400 }, { "epoch": 0.68, "grad_norm": 1.8695759032712493, "learning_rate": 2.5081508347548615e-06, "loss": 0.5939, "step": 6401 }, { "epoch": 0.68, "grad_norm": 1.5833483005353592, "learning_rate": 2.506621880555543e-06, "loss": 0.5038, "step": 6402 }, { "epoch": 0.68, "grad_norm": 1.0993462989008413, "learning_rate": 2.5050932366138782e-06, "loss": 0.4861, "step": 6403 }, { "epoch": 0.68, "grad_norm": 1.970650539103335, "learning_rate": 2.503564903120079e-06, "loss": 0.6175, "step": 6404 }, { "epoch": 0.68, "grad_norm": 1.799565857234963, "learning_rate": 2.5020368802643224e-06, "loss": 0.5655, "step": 6405 }, { "epoch": 0.68, "grad_norm": 1.6457297382892333, "learning_rate": 2.5005091682367417e-06, "loss": 0.5555, "step": 6406 }, { "epoch": 0.68, "grad_norm": 1.9502295148827506, "learning_rate": 2.4989817672274357e-06, "loss": 0.6427, "step": 6407 }, { "epoch": 0.68, "grad_norm": 1.9614343737509057, "learning_rate": 2.4974546774264625e-06, "loss": 0.5719, "step": 6408 }, { "epoch": 0.68, "grad_norm": 1.7701047416271063, "learning_rate": 2.495927899023842e-06, "loss": 0.6081, "step": 6409 }, { "epoch": 0.68, "grad_norm": 1.2664280090881683, "learning_rate": 2.4944014322095554e-06, "loss": 0.5261, "step": 6410 }, { "epoch": 0.68, "grad_norm": 1.6600895289652058, "learning_rate": 2.4928752771735446e-06, "loss": 0.4904, "step": 6411 }, { "epoch": 0.68, "grad_norm": 1.1667439647333362, "learning_rate": 2.4913494341057137e-06, "loss": 0.5021, "step": 6412 }, { "epoch": 0.68, "grad_norm": 2.036762924172915, "learning_rate": 2.4898239031959275e-06, "loss": 0.6566, "step": 6413 }, { "epoch": 0.68, "grad_norm": 1.7796339812207873, "learning_rate": 2.488298684634011e-06, "loss": 0.6009, "step": 6414 }, { "epoch": 0.68, "grad_norm": 1.7265029583671503, "learning_rate": 2.486773778609752e-06, "loss": 0.5621, "step": 6415 }, { "epoch": 0.68, "grad_norm": 1.147560029552876, "learning_rate": 2.4852491853128977e-06, "loss": 0.5061, "step": 6416 }, { "epoch": 0.68, "grad_norm": 1.6448643952441797, "learning_rate": 2.4837249049331586e-06, "loss": 0.5338, "step": 6417 }, { "epoch": 0.68, "grad_norm": 1.879098479137302, "learning_rate": 2.482200937660204e-06, "loss": 0.6534, "step": 6418 }, { "epoch": 0.68, "grad_norm": 1.7765896114010642, "learning_rate": 2.480677283683666e-06, "loss": 0.5413, "step": 6419 }, { "epoch": 0.68, "grad_norm": 1.5976539668914378, "learning_rate": 2.4791539431931356e-06, "loss": 0.504, "step": 6420 }, { "epoch": 0.68, "grad_norm": 1.6799652360930606, "learning_rate": 2.4776309163781665e-06, "loss": 0.5307, "step": 6421 }, { "epoch": 0.69, "grad_norm": 1.1362562901150308, "learning_rate": 2.476108203428274e-06, "loss": 0.4927, "step": 6422 }, { "epoch": 0.69, "grad_norm": 1.6545314562344948, "learning_rate": 2.4745858045329325e-06, "loss": 0.5611, "step": 6423 }, { "epoch": 0.69, "grad_norm": 1.851023315376458, "learning_rate": 2.473063719881578e-06, "loss": 0.579, "step": 6424 }, { "epoch": 0.69, "grad_norm": 2.1196156737556415, "learning_rate": 2.471541949663608e-06, "loss": 0.6049, "step": 6425 }, { "epoch": 0.69, "grad_norm": 1.6530603615393473, "learning_rate": 2.4700204940683798e-06, "loss": 0.5505, "step": 6426 }, { "epoch": 0.69, "grad_norm": 1.8962259517745572, "learning_rate": 2.4684993532852123e-06, "loss": 0.5973, "step": 6427 }, { "epoch": 0.69, "grad_norm": 1.7083479789335874, "learning_rate": 2.4669785275033857e-06, "loss": 0.5785, "step": 6428 }, { "epoch": 0.69, "grad_norm": 1.7347153276994138, "learning_rate": 2.4654580169121407e-06, "loss": 0.6481, "step": 6429 }, { "epoch": 0.69, "grad_norm": 1.8413205294384785, "learning_rate": 2.463937821700677e-06, "loss": 0.5918, "step": 6430 }, { "epoch": 0.69, "grad_norm": 1.818619357452727, "learning_rate": 2.4624179420581573e-06, "loss": 0.5837, "step": 6431 }, { "epoch": 0.69, "grad_norm": 1.194091937301793, "learning_rate": 2.4608983781737043e-06, "loss": 0.4941, "step": 6432 }, { "epoch": 0.69, "grad_norm": 1.1510762123762295, "learning_rate": 2.459379130236401e-06, "loss": 0.5022, "step": 6433 }, { "epoch": 0.69, "grad_norm": 1.703995073009619, "learning_rate": 2.4578601984352924e-06, "loss": 0.5671, "step": 6434 }, { "epoch": 0.69, "grad_norm": 1.6927229332769027, "learning_rate": 2.4563415829593825e-06, "loss": 0.5715, "step": 6435 }, { "epoch": 0.69, "grad_norm": 1.8457860617782402, "learning_rate": 2.4548232839976367e-06, "loss": 0.6386, "step": 6436 }, { "epoch": 0.69, "grad_norm": 1.7966308048361204, "learning_rate": 2.4533053017389807e-06, "loss": 0.6267, "step": 6437 }, { "epoch": 0.69, "grad_norm": 1.5753477074707984, "learning_rate": 2.451787636372303e-06, "loss": 0.5419, "step": 6438 }, { "epoch": 0.69, "grad_norm": 1.9228063433284508, "learning_rate": 2.4502702880864476e-06, "loss": 0.62, "step": 6439 }, { "epoch": 0.69, "grad_norm": 1.7605854873994538, "learning_rate": 2.448753257070224e-06, "loss": 0.526, "step": 6440 }, { "epoch": 0.69, "grad_norm": 1.883686503055561, "learning_rate": 2.4472365435123975e-06, "loss": 0.7186, "step": 6441 }, { "epoch": 0.69, "grad_norm": 1.1583102634356244, "learning_rate": 2.4457201476017014e-06, "loss": 0.5179, "step": 6442 }, { "epoch": 0.69, "grad_norm": 1.7861516893057714, "learning_rate": 2.444204069526823e-06, "loss": 0.6331, "step": 6443 }, { "epoch": 0.69, "grad_norm": 2.0347363416633235, "learning_rate": 2.4426883094764113e-06, "loss": 0.6017, "step": 6444 }, { "epoch": 0.69, "grad_norm": 1.9568999410087529, "learning_rate": 2.4411728676390774e-06, "loss": 0.6247, "step": 6445 }, { "epoch": 0.69, "grad_norm": 1.7202303900113067, "learning_rate": 2.43965774420339e-06, "loss": 0.6005, "step": 6446 }, { "epoch": 0.69, "grad_norm": 1.805865538820026, "learning_rate": 2.438142939357882e-06, "loss": 0.608, "step": 6447 }, { "epoch": 0.69, "grad_norm": 2.047536572656239, "learning_rate": 2.4366284532910434e-06, "loss": 0.5996, "step": 6448 }, { "epoch": 0.69, "grad_norm": 1.7030693117354117, "learning_rate": 2.4351142861913262e-06, "loss": 0.5815, "step": 6449 }, { "epoch": 0.69, "grad_norm": 1.8684387992382254, "learning_rate": 2.4336004382471417e-06, "loss": 0.5361, "step": 6450 }, { "epoch": 0.69, "grad_norm": 1.7008872629827791, "learning_rate": 2.432086909646862e-06, "loss": 0.5778, "step": 6451 }, { "epoch": 0.69, "grad_norm": 1.6456664717483938, "learning_rate": 2.430573700578822e-06, "loss": 0.6228, "step": 6452 }, { "epoch": 0.69, "grad_norm": 1.6842632648791773, "learning_rate": 2.4290608112313103e-06, "loss": 0.6991, "step": 6453 }, { "epoch": 0.69, "grad_norm": 1.8404840894207455, "learning_rate": 2.427548241792581e-06, "loss": 0.6191, "step": 6454 }, { "epoch": 0.69, "grad_norm": 1.744517416152594, "learning_rate": 2.426035992450848e-06, "loss": 0.6428, "step": 6455 }, { "epoch": 0.69, "grad_norm": 1.7582330138269118, "learning_rate": 2.424524063394284e-06, "loss": 0.5476, "step": 6456 }, { "epoch": 0.69, "grad_norm": 1.8551959715574469, "learning_rate": 2.423012454811023e-06, "loss": 0.5523, "step": 6457 }, { "epoch": 0.69, "grad_norm": 1.7711562271087606, "learning_rate": 2.4215011668891553e-06, "loss": 0.609, "step": 6458 }, { "epoch": 0.69, "grad_norm": 1.7474208236051751, "learning_rate": 2.4199901998167395e-06, "loss": 0.6224, "step": 6459 }, { "epoch": 0.69, "grad_norm": 1.7294415227345845, "learning_rate": 2.418479553781787e-06, "loss": 0.6113, "step": 6460 }, { "epoch": 0.69, "grad_norm": 1.6704608250614352, "learning_rate": 2.4169692289722707e-06, "loss": 0.5458, "step": 6461 }, { "epoch": 0.69, "grad_norm": 1.7880982821390163, "learning_rate": 2.415459225576125e-06, "loss": 0.5304, "step": 6462 }, { "epoch": 0.69, "grad_norm": 1.8067927442795881, "learning_rate": 2.4139495437812445e-06, "loss": 0.562, "step": 6463 }, { "epoch": 0.69, "grad_norm": 1.7540718235339383, "learning_rate": 2.412440183775481e-06, "loss": 0.6047, "step": 6464 }, { "epoch": 0.69, "grad_norm": 1.7706327797788948, "learning_rate": 2.4109311457466492e-06, "loss": 0.5505, "step": 6465 }, { "epoch": 0.69, "grad_norm": 1.5704120257364134, "learning_rate": 2.4094224298825237e-06, "loss": 0.5549, "step": 6466 }, { "epoch": 0.69, "grad_norm": 1.7715357774878122, "learning_rate": 2.407914036370838e-06, "loss": 0.5597, "step": 6467 }, { "epoch": 0.69, "grad_norm": 1.7326877384429822, "learning_rate": 2.4064059653992827e-06, "loss": 0.5699, "step": 6468 }, { "epoch": 0.69, "grad_norm": 2.1959573398996195, "learning_rate": 2.404898217155513e-06, "loss": 0.6683, "step": 6469 }, { "epoch": 0.69, "grad_norm": 1.8557769373564268, "learning_rate": 2.403390791827142e-06, "loss": 0.5533, "step": 6470 }, { "epoch": 0.69, "grad_norm": 1.1278898294172748, "learning_rate": 2.4018836896017426e-06, "loss": 0.5133, "step": 6471 }, { "epoch": 0.69, "grad_norm": 1.7896958031410652, "learning_rate": 2.4003769106668472e-06, "loss": 0.6419, "step": 6472 }, { "epoch": 0.69, "grad_norm": 1.847042813447593, "learning_rate": 2.3988704552099482e-06, "loss": 0.5272, "step": 6473 }, { "epoch": 0.69, "grad_norm": 1.6674110693253055, "learning_rate": 2.397364323418499e-06, "loss": 0.5159, "step": 6474 }, { "epoch": 0.69, "grad_norm": 2.806564681477031, "learning_rate": 2.3958585154799087e-06, "loss": 0.4919, "step": 6475 }, { "epoch": 0.69, "grad_norm": 1.9491487283257851, "learning_rate": 2.394353031581553e-06, "loss": 0.6287, "step": 6476 }, { "epoch": 0.69, "grad_norm": 1.1070891624996642, "learning_rate": 2.3928478719107606e-06, "loss": 0.5194, "step": 6477 }, { "epoch": 0.69, "grad_norm": 1.91444144196301, "learning_rate": 2.3913430366548235e-06, "loss": 0.6365, "step": 6478 }, { "epoch": 0.69, "grad_norm": 1.8651160879125024, "learning_rate": 2.3898385260009925e-06, "loss": 0.6717, "step": 6479 }, { "epoch": 0.69, "grad_norm": 1.1026498466282226, "learning_rate": 2.388334340136477e-06, "loss": 0.4929, "step": 6480 }, { "epoch": 0.69, "grad_norm": 1.8231812144734476, "learning_rate": 2.386830479248449e-06, "loss": 0.559, "step": 6481 }, { "epoch": 0.69, "grad_norm": 1.9081913363719774, "learning_rate": 2.385326943524035e-06, "loss": 0.5808, "step": 6482 }, { "epoch": 0.69, "grad_norm": 1.9127927075618198, "learning_rate": 2.3838237331503246e-06, "loss": 0.5423, "step": 6483 }, { "epoch": 0.69, "grad_norm": 1.7257063392691923, "learning_rate": 2.382320848314367e-06, "loss": 0.5868, "step": 6484 }, { "epoch": 0.69, "grad_norm": 1.8035569380061933, "learning_rate": 2.3808182892031688e-06, "loss": 0.6607, "step": 6485 }, { "epoch": 0.69, "grad_norm": 1.1296109014444138, "learning_rate": 2.3793160560036995e-06, "loss": 0.4968, "step": 6486 }, { "epoch": 0.69, "grad_norm": 1.883374000692052, "learning_rate": 2.377814148902885e-06, "loss": 0.5483, "step": 6487 }, { "epoch": 0.69, "grad_norm": 1.8382742134221721, "learning_rate": 2.376312568087611e-06, "loss": 0.6372, "step": 6488 }, { "epoch": 0.69, "grad_norm": 1.7349032028528417, "learning_rate": 2.3748113137447245e-06, "loss": 0.5543, "step": 6489 }, { "epoch": 0.69, "grad_norm": 1.038593057467163, "learning_rate": 2.373310386061029e-06, "loss": 0.4821, "step": 6490 }, { "epoch": 0.69, "grad_norm": 1.8287231109494417, "learning_rate": 2.3718097852232903e-06, "loss": 0.6002, "step": 6491 }, { "epoch": 0.69, "grad_norm": 1.8084637826583791, "learning_rate": 2.370309511418229e-06, "loss": 0.5448, "step": 6492 }, { "epoch": 0.69, "grad_norm": 1.6982339469535137, "learning_rate": 2.3688095648325326e-06, "loss": 0.4929, "step": 6493 }, { "epoch": 0.69, "grad_norm": 1.6876436720789352, "learning_rate": 2.367309945652842e-06, "loss": 0.5546, "step": 6494 }, { "epoch": 0.69, "grad_norm": 1.7539881739304832, "learning_rate": 2.3658106540657604e-06, "loss": 0.6246, "step": 6495 }, { "epoch": 0.69, "grad_norm": 1.7644250567301643, "learning_rate": 2.3643116902578443e-06, "loss": 0.6488, "step": 6496 }, { "epoch": 0.69, "grad_norm": 1.8313350177697265, "learning_rate": 2.362813054415616e-06, "loss": 0.6366, "step": 6497 }, { "epoch": 0.69, "grad_norm": 1.8857586811675704, "learning_rate": 2.361314746725555e-06, "loss": 0.6543, "step": 6498 }, { "epoch": 0.69, "grad_norm": 1.7342949699426902, "learning_rate": 2.359816767374099e-06, "loss": 0.4905, "step": 6499 }, { "epoch": 0.69, "grad_norm": 1.766345011019063, "learning_rate": 2.358319116547646e-06, "loss": 0.6697, "step": 6500 }, { "epoch": 0.69, "grad_norm": 1.862236117239651, "learning_rate": 2.3568217944325527e-06, "loss": 0.5451, "step": 6501 }, { "epoch": 0.69, "grad_norm": 1.7313293398053395, "learning_rate": 2.3553248012151347e-06, "loss": 0.5851, "step": 6502 }, { "epoch": 0.69, "grad_norm": 1.6507304395449989, "learning_rate": 2.3538281370816672e-06, "loss": 0.559, "step": 6503 }, { "epoch": 0.69, "grad_norm": 1.7780305242978311, "learning_rate": 2.352331802218384e-06, "loss": 0.6082, "step": 6504 }, { "epoch": 0.69, "grad_norm": 1.541802859921563, "learning_rate": 2.3508357968114777e-06, "loss": 0.6248, "step": 6505 }, { "epoch": 0.69, "grad_norm": 1.8808047601629596, "learning_rate": 2.3493401210471e-06, "loss": 0.5671, "step": 6506 }, { "epoch": 0.69, "grad_norm": 1.8234652000508895, "learning_rate": 2.347844775111362e-06, "loss": 0.6626, "step": 6507 }, { "epoch": 0.69, "grad_norm": 1.962008288643058, "learning_rate": 2.346349759190332e-06, "loss": 0.654, "step": 6508 }, { "epoch": 0.69, "grad_norm": 1.7561646897598315, "learning_rate": 2.3448550734700433e-06, "loss": 0.6768, "step": 6509 }, { "epoch": 0.69, "grad_norm": 1.5783602474988518, "learning_rate": 2.3433607181364815e-06, "loss": 0.5856, "step": 6510 }, { "epoch": 0.69, "grad_norm": 1.916153053557191, "learning_rate": 2.341866693375591e-06, "loss": 0.564, "step": 6511 }, { "epoch": 0.69, "grad_norm": 1.7732657297669836, "learning_rate": 2.3403729993732794e-06, "loss": 0.5699, "step": 6512 }, { "epoch": 0.69, "grad_norm": 1.9477315907992205, "learning_rate": 2.3388796363154097e-06, "loss": 0.5885, "step": 6513 }, { "epoch": 0.69, "grad_norm": 1.8356634931590894, "learning_rate": 2.3373866043878064e-06, "loss": 0.6026, "step": 6514 }, { "epoch": 0.69, "grad_norm": 1.8532909555310055, "learning_rate": 2.33589390377625e-06, "loss": 0.5818, "step": 6515 }, { "epoch": 0.7, "grad_norm": 1.6660298108221443, "learning_rate": 2.3344015346664823e-06, "loss": 0.5907, "step": 6516 }, { "epoch": 0.7, "grad_norm": 1.624630603022551, "learning_rate": 2.3329094972442023e-06, "loss": 0.4763, "step": 6517 }, { "epoch": 0.7, "grad_norm": 1.8767687450741894, "learning_rate": 2.331417791695068e-06, "loss": 0.5839, "step": 6518 }, { "epoch": 0.7, "grad_norm": 1.1800088214699465, "learning_rate": 2.329926418204697e-06, "loss": 0.5056, "step": 6519 }, { "epoch": 0.7, "grad_norm": 2.075232631994119, "learning_rate": 2.3284353769586643e-06, "loss": 0.5685, "step": 6520 }, { "epoch": 0.7, "grad_norm": 1.688158344645054, "learning_rate": 2.326944668142504e-06, "loss": 0.6462, "step": 6521 }, { "epoch": 0.7, "grad_norm": 1.577515304927316, "learning_rate": 2.3254542919417095e-06, "loss": 0.5885, "step": 6522 }, { "epoch": 0.7, "grad_norm": 1.0960709161975448, "learning_rate": 2.323964248541732e-06, "loss": 0.4684, "step": 6523 }, { "epoch": 0.7, "grad_norm": 1.7490825176768727, "learning_rate": 2.322474538127981e-06, "loss": 0.6395, "step": 6524 }, { "epoch": 0.7, "grad_norm": 1.7432263959906855, "learning_rate": 2.3209851608858256e-06, "loss": 0.6695, "step": 6525 }, { "epoch": 0.7, "grad_norm": 1.6320588356427792, "learning_rate": 2.3194961170005935e-06, "loss": 0.6016, "step": 6526 }, { "epoch": 0.7, "grad_norm": 1.9667400305860463, "learning_rate": 2.31800740665757e-06, "loss": 0.6922, "step": 6527 }, { "epoch": 0.7, "grad_norm": 1.7442369925068169, "learning_rate": 2.316519030041998e-06, "loss": 0.554, "step": 6528 }, { "epoch": 0.7, "grad_norm": 1.6443747737949783, "learning_rate": 2.315030987339082e-06, "loss": 0.5107, "step": 6529 }, { "epoch": 0.7, "grad_norm": 1.6837420727326409, "learning_rate": 2.3135432787339825e-06, "loss": 0.6614, "step": 6530 }, { "epoch": 0.7, "grad_norm": 2.2390317283954957, "learning_rate": 2.3120559044118185e-06, "loss": 0.5552, "step": 6531 }, { "epoch": 0.7, "grad_norm": 1.716933175651395, "learning_rate": 2.3105688645576692e-06, "loss": 0.5618, "step": 6532 }, { "epoch": 0.7, "grad_norm": 1.7722080878853634, "learning_rate": 2.309082159356569e-06, "loss": 0.5775, "step": 6533 }, { "epoch": 0.7, "grad_norm": 1.7008913997063992, "learning_rate": 2.3075957889935143e-06, "loss": 0.5965, "step": 6534 }, { "epoch": 0.7, "grad_norm": 1.7672577476857487, "learning_rate": 2.306109753653457e-06, "loss": 0.5735, "step": 6535 }, { "epoch": 0.7, "grad_norm": 1.9290323458341068, "learning_rate": 2.304624053521309e-06, "loss": 0.5925, "step": 6536 }, { "epoch": 0.7, "grad_norm": 1.1458911451599112, "learning_rate": 2.3031386887819395e-06, "loss": 0.494, "step": 6537 }, { "epoch": 0.7, "grad_norm": 1.5061115930464304, "learning_rate": 2.3016536596201773e-06, "loss": 0.483, "step": 6538 }, { "epoch": 0.7, "grad_norm": 1.840383462688336, "learning_rate": 2.300168966220807e-06, "loss": 0.6232, "step": 6539 }, { "epoch": 0.7, "grad_norm": 1.834819481536335, "learning_rate": 2.298684608768574e-06, "loss": 0.6291, "step": 6540 }, { "epoch": 0.7, "grad_norm": 1.7766171136060707, "learning_rate": 2.297200587448181e-06, "loss": 0.6333, "step": 6541 }, { "epoch": 0.7, "grad_norm": 1.715526829405008, "learning_rate": 2.2957169024442887e-06, "loss": 0.6205, "step": 6542 }, { "epoch": 0.7, "grad_norm": 1.7948211398678742, "learning_rate": 2.294233553941515e-06, "loss": 0.6828, "step": 6543 }, { "epoch": 0.7, "grad_norm": 1.7977150034716736, "learning_rate": 2.2927505421244374e-06, "loss": 0.5627, "step": 6544 }, { "epoch": 0.7, "grad_norm": 1.8888203608282088, "learning_rate": 2.291267867177591e-06, "loss": 0.5647, "step": 6545 }, { "epoch": 0.7, "grad_norm": 1.8654366768717516, "learning_rate": 2.28978552928547e-06, "loss": 0.5405, "step": 6546 }, { "epoch": 0.7, "grad_norm": 1.1574028084139014, "learning_rate": 2.2883035286325234e-06, "loss": 0.4958, "step": 6547 }, { "epoch": 0.7, "grad_norm": 1.6805506671402508, "learning_rate": 2.2868218654031626e-06, "loss": 0.5496, "step": 6548 }, { "epoch": 0.7, "grad_norm": 1.1047293005690522, "learning_rate": 2.2853405397817535e-06, "loss": 0.4927, "step": 6549 }, { "epoch": 0.7, "grad_norm": 1.8076665063131498, "learning_rate": 2.2838595519526226e-06, "loss": 0.579, "step": 6550 }, { "epoch": 0.7, "grad_norm": 1.7354731584100564, "learning_rate": 2.282378902100052e-06, "loss": 0.613, "step": 6551 }, { "epoch": 0.7, "grad_norm": 1.084591954294235, "learning_rate": 2.2808985904082832e-06, "loss": 0.5013, "step": 6552 }, { "epoch": 0.7, "grad_norm": 1.9043233242242914, "learning_rate": 2.2794186170615173e-06, "loss": 0.6484, "step": 6553 }, { "epoch": 0.7, "grad_norm": 1.6897980276488356, "learning_rate": 2.2779389822439065e-06, "loss": 0.5895, "step": 6554 }, { "epoch": 0.7, "grad_norm": 1.8108895811911339, "learning_rate": 2.27645968613957e-06, "loss": 0.6233, "step": 6555 }, { "epoch": 0.7, "grad_norm": 1.8525432040207621, "learning_rate": 2.274980728932579e-06, "loss": 0.6063, "step": 6556 }, { "epoch": 0.7, "grad_norm": 1.527458681906237, "learning_rate": 2.2735021108069642e-06, "loss": 0.4928, "step": 6557 }, { "epoch": 0.7, "grad_norm": 1.6598127791584425, "learning_rate": 2.272023831946715e-06, "loss": 0.6167, "step": 6558 }, { "epoch": 0.7, "grad_norm": 1.638041604653473, "learning_rate": 2.2705458925357756e-06, "loss": 0.5588, "step": 6559 }, { "epoch": 0.7, "grad_norm": 1.1785630700646037, "learning_rate": 2.269068292758051e-06, "loss": 0.4913, "step": 6560 }, { "epoch": 0.7, "grad_norm": 1.6777639552513472, "learning_rate": 2.2675910327974034e-06, "loss": 0.5012, "step": 6561 }, { "epoch": 0.7, "grad_norm": 1.5099942388784502, "learning_rate": 2.2661141128376505e-06, "loss": 0.5254, "step": 6562 }, { "epoch": 0.7, "grad_norm": 1.705444530145336, "learning_rate": 2.2646375330625716e-06, "loss": 0.5622, "step": 6563 }, { "epoch": 0.7, "grad_norm": 2.1043587397366013, "learning_rate": 2.2631612936558994e-06, "loss": 0.6488, "step": 6564 }, { "epoch": 0.7, "grad_norm": 2.5329851877618106, "learning_rate": 2.261685394801328e-06, "loss": 0.6629, "step": 6565 }, { "epoch": 0.7, "grad_norm": 1.7340708692238218, "learning_rate": 2.2602098366825058e-06, "loss": 0.6276, "step": 6566 }, { "epoch": 0.7, "grad_norm": 1.7425130921631782, "learning_rate": 2.2587346194830434e-06, "loss": 0.6552, "step": 6567 }, { "epoch": 0.7, "grad_norm": 1.9836425826877275, "learning_rate": 2.2572597433865017e-06, "loss": 0.5392, "step": 6568 }, { "epoch": 0.7, "grad_norm": 1.86482924450067, "learning_rate": 2.2557852085764053e-06, "loss": 0.7007, "step": 6569 }, { "epoch": 0.7, "grad_norm": 1.1047891647656474, "learning_rate": 2.2543110152362354e-06, "loss": 0.4903, "step": 6570 }, { "epoch": 0.7, "grad_norm": 1.6323313077438508, "learning_rate": 2.2528371635494267e-06, "loss": 0.5048, "step": 6571 }, { "epoch": 0.7, "grad_norm": 1.7379788397817761, "learning_rate": 2.251363653699379e-06, "loss": 0.6069, "step": 6572 }, { "epoch": 0.7, "grad_norm": 1.7136913309661566, "learning_rate": 2.2498904858694424e-06, "loss": 0.6027, "step": 6573 }, { "epoch": 0.7, "grad_norm": 1.6506116442834409, "learning_rate": 2.2484176602429282e-06, "loss": 0.531, "step": 6574 }, { "epoch": 0.7, "grad_norm": 1.9317851921848805, "learning_rate": 2.2469451770031026e-06, "loss": 0.6097, "step": 6575 }, { "epoch": 0.7, "grad_norm": 1.550862715265153, "learning_rate": 2.245473036333192e-06, "loss": 0.5375, "step": 6576 }, { "epoch": 0.7, "grad_norm": 1.7704478254532987, "learning_rate": 2.2440012384163778e-06, "loss": 0.6164, "step": 6577 }, { "epoch": 0.7, "grad_norm": 1.8806148063038768, "learning_rate": 2.2425297834358e-06, "loss": 0.6638, "step": 6578 }, { "epoch": 0.7, "grad_norm": 1.9360727508723248, "learning_rate": 2.2410586715745557e-06, "loss": 0.5553, "step": 6579 }, { "epoch": 0.7, "grad_norm": 1.1225537513697559, "learning_rate": 2.2395879030156997e-06, "loss": 0.4772, "step": 6580 }, { "epoch": 0.7, "grad_norm": 1.7475407903608358, "learning_rate": 2.238117477942243e-06, "loss": 0.6143, "step": 6581 }, { "epoch": 0.7, "grad_norm": 1.6916188733250663, "learning_rate": 2.236647396537156e-06, "loss": 0.5756, "step": 6582 }, { "epoch": 0.7, "grad_norm": 1.8678763613994298, "learning_rate": 2.2351776589833623e-06, "loss": 0.5517, "step": 6583 }, { "epoch": 0.7, "grad_norm": 1.7141185494821074, "learning_rate": 2.233708265463746e-06, "loss": 0.5672, "step": 6584 }, { "epoch": 0.7, "grad_norm": 1.0618562663549576, "learning_rate": 2.2322392161611484e-06, "loss": 0.4851, "step": 6585 }, { "epoch": 0.7, "grad_norm": 1.7385406692813778, "learning_rate": 2.230770511258367e-06, "loss": 0.6137, "step": 6586 }, { "epoch": 0.7, "grad_norm": 1.9320660048759892, "learning_rate": 2.229302150938156e-06, "loss": 0.5858, "step": 6587 }, { "epoch": 0.7, "grad_norm": 1.815659041195711, "learning_rate": 2.227834135383226e-06, "loss": 0.6099, "step": 6588 }, { "epoch": 0.7, "grad_norm": 1.7182886122634242, "learning_rate": 2.22636646477625e-06, "loss": 0.6293, "step": 6589 }, { "epoch": 0.7, "grad_norm": 2.176416857157666, "learning_rate": 2.2248991392998527e-06, "loss": 0.6098, "step": 6590 }, { "epoch": 0.7, "grad_norm": 1.8296060930161824, "learning_rate": 2.223432159136616e-06, "loss": 0.627, "step": 6591 }, { "epoch": 0.7, "grad_norm": 2.0205745618997084, "learning_rate": 2.221965524469081e-06, "loss": 0.6514, "step": 6592 }, { "epoch": 0.7, "grad_norm": 1.8406340826768155, "learning_rate": 2.220499235479745e-06, "loss": 0.6056, "step": 6593 }, { "epoch": 0.7, "grad_norm": 1.094756183220965, "learning_rate": 2.2190332923510616e-06, "loss": 0.5299, "step": 6594 }, { "epoch": 0.7, "grad_norm": 1.864785581913554, "learning_rate": 2.2175676952654423e-06, "loss": 0.653, "step": 6595 }, { "epoch": 0.7, "grad_norm": 1.6877766589718852, "learning_rate": 2.216102444405258e-06, "loss": 0.536, "step": 6596 }, { "epoch": 0.7, "grad_norm": 1.6597289636489951, "learning_rate": 2.2146375399528285e-06, "loss": 0.5365, "step": 6597 }, { "epoch": 0.7, "grad_norm": 1.880266748568519, "learning_rate": 2.2131729820904384e-06, "loss": 0.6467, "step": 6598 }, { "epoch": 0.7, "grad_norm": 1.898558839624239, "learning_rate": 2.211708771000327e-06, "loss": 0.7005, "step": 6599 }, { "epoch": 0.7, "grad_norm": 1.772848051691749, "learning_rate": 2.210244906864689e-06, "loss": 0.6035, "step": 6600 }, { "epoch": 0.7, "grad_norm": 1.8053390789945405, "learning_rate": 2.2087813898656775e-06, "loss": 0.6461, "step": 6601 }, { "epoch": 0.7, "grad_norm": 1.748613064456298, "learning_rate": 2.2073182201854016e-06, "loss": 0.6024, "step": 6602 }, { "epoch": 0.7, "grad_norm": 1.7346870996353514, "learning_rate": 2.2058553980059272e-06, "loss": 0.5416, "step": 6603 }, { "epoch": 0.7, "grad_norm": 1.8998163572211124, "learning_rate": 2.2043929235092776e-06, "loss": 0.5801, "step": 6604 }, { "epoch": 0.7, "grad_norm": 1.5536750295938775, "learning_rate": 2.20293079687743e-06, "loss": 0.5462, "step": 6605 }, { "epoch": 0.7, "grad_norm": 1.744343073873987, "learning_rate": 2.2014690182923247e-06, "loss": 0.5687, "step": 6606 }, { "epoch": 0.7, "grad_norm": 1.7780644039259434, "learning_rate": 2.2000075879358535e-06, "loss": 0.5255, "step": 6607 }, { "epoch": 0.7, "grad_norm": 1.8272672857776617, "learning_rate": 2.1985465059898645e-06, "loss": 0.6174, "step": 6608 }, { "epoch": 0.7, "grad_norm": 1.2219660869748104, "learning_rate": 2.1970857726361655e-06, "loss": 0.5087, "step": 6609 }, { "epoch": 0.71, "grad_norm": 1.8897785456309395, "learning_rate": 2.1956253880565204e-06, "loss": 0.6055, "step": 6610 }, { "epoch": 0.71, "grad_norm": 2.035934912313602, "learning_rate": 2.194165352432645e-06, "loss": 0.6251, "step": 6611 }, { "epoch": 0.71, "grad_norm": 1.6908291983852568, "learning_rate": 2.1927056659462183e-06, "loss": 0.4806, "step": 6612 }, { "epoch": 0.71, "grad_norm": 1.803934185935237, "learning_rate": 2.1912463287788716e-06, "loss": 0.586, "step": 6613 }, { "epoch": 0.71, "grad_norm": 1.8066004273239533, "learning_rate": 2.1897873411121945e-06, "loss": 0.5649, "step": 6614 }, { "epoch": 0.71, "grad_norm": 1.5882866128269333, "learning_rate": 2.1883287031277333e-06, "loss": 0.6325, "step": 6615 }, { "epoch": 0.71, "grad_norm": 1.0787597714560015, "learning_rate": 2.18687041500699e-06, "loss": 0.4938, "step": 6616 }, { "epoch": 0.71, "grad_norm": 1.690885290022315, "learning_rate": 2.1854124769314222e-06, "loss": 0.5591, "step": 6617 }, { "epoch": 0.71, "grad_norm": 1.8350106424564072, "learning_rate": 2.1839548890824463e-06, "loss": 0.6396, "step": 6618 }, { "epoch": 0.71, "grad_norm": 1.8602492421204166, "learning_rate": 2.182497651641434e-06, "loss": 0.5865, "step": 6619 }, { "epoch": 0.71, "grad_norm": 1.8915755804476824, "learning_rate": 2.181040764789712e-06, "loss": 0.6245, "step": 6620 }, { "epoch": 0.71, "grad_norm": 1.806295152430452, "learning_rate": 2.179584228708565e-06, "loss": 0.5973, "step": 6621 }, { "epoch": 0.71, "grad_norm": 1.8303275313457577, "learning_rate": 2.178128043579232e-06, "loss": 0.5625, "step": 6622 }, { "epoch": 0.71, "grad_norm": 1.618170823376766, "learning_rate": 2.176672209582914e-06, "loss": 0.5117, "step": 6623 }, { "epoch": 0.71, "grad_norm": 1.8034655074231822, "learning_rate": 2.1752167269007618e-06, "loss": 0.7101, "step": 6624 }, { "epoch": 0.71, "grad_norm": 1.158641084466733, "learning_rate": 2.1737615957138874e-06, "loss": 0.4951, "step": 6625 }, { "epoch": 0.71, "grad_norm": 1.738962523693109, "learning_rate": 2.172306816203353e-06, "loss": 0.6699, "step": 6626 }, { "epoch": 0.71, "grad_norm": 1.9721671247604897, "learning_rate": 2.1708523885501813e-06, "loss": 0.7777, "step": 6627 }, { "epoch": 0.71, "grad_norm": 1.8205238981525187, "learning_rate": 2.169398312935352e-06, "loss": 0.6242, "step": 6628 }, { "epoch": 0.71, "grad_norm": 1.0857067797884705, "learning_rate": 2.1679445895397987e-06, "loss": 0.5017, "step": 6629 }, { "epoch": 0.71, "grad_norm": 1.913194495082694, "learning_rate": 2.1664912185444127e-06, "loss": 0.6327, "step": 6630 }, { "epoch": 0.71, "grad_norm": 1.8190542708234907, "learning_rate": 2.165038200130041e-06, "loss": 0.6169, "step": 6631 }, { "epoch": 0.71, "grad_norm": 2.4749586302192963, "learning_rate": 2.1635855344774853e-06, "loss": 0.5058, "step": 6632 }, { "epoch": 0.71, "grad_norm": 1.631200631281387, "learning_rate": 2.1621332217675056e-06, "loss": 0.5346, "step": 6633 }, { "epoch": 0.71, "grad_norm": 1.774484965105909, "learning_rate": 2.1606812621808165e-06, "loss": 0.4824, "step": 6634 }, { "epoch": 0.71, "grad_norm": 1.8001065389752018, "learning_rate": 2.15922965589809e-06, "loss": 0.628, "step": 6635 }, { "epoch": 0.71, "grad_norm": 1.843403173556055, "learning_rate": 2.157778403099953e-06, "loss": 0.5952, "step": 6636 }, { "epoch": 0.71, "grad_norm": 1.9096479535549484, "learning_rate": 2.156327503966988e-06, "loss": 0.6048, "step": 6637 }, { "epoch": 0.71, "grad_norm": 1.5880706347560887, "learning_rate": 2.1548769586797354e-06, "loss": 0.4838, "step": 6638 }, { "epoch": 0.71, "grad_norm": 1.794095340089798, "learning_rate": 2.1534267674186894e-06, "loss": 0.6299, "step": 6639 }, { "epoch": 0.71, "grad_norm": 1.646968900249753, "learning_rate": 2.151976930364301e-06, "loss": 0.5737, "step": 6640 }, { "epoch": 0.71, "grad_norm": 1.7832523365479287, "learning_rate": 2.1505274476969782e-06, "loss": 0.6379, "step": 6641 }, { "epoch": 0.71, "grad_norm": 1.692757891495506, "learning_rate": 2.149078319597084e-06, "loss": 0.5785, "step": 6642 }, { "epoch": 0.71, "grad_norm": 1.8601775962283846, "learning_rate": 2.1476295462449363e-06, "loss": 0.6089, "step": 6643 }, { "epoch": 0.71, "grad_norm": 1.936705226737404, "learning_rate": 2.146181127820811e-06, "loss": 0.6396, "step": 6644 }, { "epoch": 0.71, "grad_norm": 1.244825841427278, "learning_rate": 2.1447330645049376e-06, "loss": 0.4881, "step": 6645 }, { "epoch": 0.71, "grad_norm": 1.8397347847427492, "learning_rate": 2.1432853564775027e-06, "loss": 0.5271, "step": 6646 }, { "epoch": 0.71, "grad_norm": 1.613466687187041, "learning_rate": 2.1418380039186486e-06, "loss": 0.5412, "step": 6647 }, { "epoch": 0.71, "grad_norm": 3.308245878128552, "learning_rate": 2.1403910070084733e-06, "loss": 0.5531, "step": 6648 }, { "epoch": 0.71, "grad_norm": 1.6069763390017782, "learning_rate": 2.1389443659270304e-06, "loss": 0.5163, "step": 6649 }, { "epoch": 0.71, "grad_norm": 1.8460806569754045, "learning_rate": 2.1374980808543294e-06, "loss": 0.5209, "step": 6650 }, { "epoch": 0.71, "grad_norm": 1.6680636069622992, "learning_rate": 2.1360521519703355e-06, "loss": 0.6151, "step": 6651 }, { "epoch": 0.71, "grad_norm": 1.0931049053197592, "learning_rate": 2.1346065794549686e-06, "loss": 0.5071, "step": 6652 }, { "epoch": 0.71, "grad_norm": 1.9998875938612133, "learning_rate": 2.133161363488106e-06, "loss": 0.6272, "step": 6653 }, { "epoch": 0.71, "grad_norm": 1.7117279216593944, "learning_rate": 2.131716504249579e-06, "loss": 0.5469, "step": 6654 }, { "epoch": 0.71, "grad_norm": 1.7519963529551363, "learning_rate": 2.1302720019191766e-06, "loss": 0.6193, "step": 6655 }, { "epoch": 0.71, "grad_norm": 2.100548539571435, "learning_rate": 2.1288278566766405e-06, "loss": 0.6122, "step": 6656 }, { "epoch": 0.71, "grad_norm": 1.6094737969256985, "learning_rate": 2.1273840687016706e-06, "loss": 0.5347, "step": 6657 }, { "epoch": 0.71, "grad_norm": 1.6320219938502425, "learning_rate": 2.1259406381739214e-06, "loss": 0.483, "step": 6658 }, { "epoch": 0.71, "grad_norm": 1.6526525274619912, "learning_rate": 2.1244975652730014e-06, "loss": 0.5587, "step": 6659 }, { "epoch": 0.71, "grad_norm": 2.0788329974429396, "learning_rate": 2.1230548501784774e-06, "loss": 0.5365, "step": 6660 }, { "epoch": 0.71, "grad_norm": 1.9250325116889966, "learning_rate": 2.1216124930698697e-06, "loss": 0.6418, "step": 6661 }, { "epoch": 0.71, "grad_norm": 2.0463355488394384, "learning_rate": 2.1201704941266542e-06, "loss": 0.5995, "step": 6662 }, { "epoch": 0.71, "grad_norm": 1.6580966028426665, "learning_rate": 2.118728853528264e-06, "loss": 0.4917, "step": 6663 }, { "epoch": 0.71, "grad_norm": 1.8888143467072085, "learning_rate": 2.1172875714540846e-06, "loss": 0.671, "step": 6664 }, { "epoch": 0.71, "grad_norm": 1.8639869788573564, "learning_rate": 2.11584664808346e-06, "loss": 0.6558, "step": 6665 }, { "epoch": 0.71, "grad_norm": 1.8585377687041496, "learning_rate": 2.114406083595687e-06, "loss": 0.6168, "step": 6666 }, { "epoch": 0.71, "grad_norm": 1.8379158881009465, "learning_rate": 2.1129658781700194e-06, "loss": 0.619, "step": 6667 }, { "epoch": 0.71, "grad_norm": 1.8494355376354976, "learning_rate": 2.111526031985666e-06, "loss": 0.5429, "step": 6668 }, { "epoch": 0.71, "grad_norm": 1.8507973623580776, "learning_rate": 2.11008654522179e-06, "loss": 0.6213, "step": 6669 }, { "epoch": 0.71, "grad_norm": 1.7022583637788355, "learning_rate": 2.1086474180575113e-06, "loss": 0.5452, "step": 6670 }, { "epoch": 0.71, "grad_norm": 1.8543754378705377, "learning_rate": 2.1072086506719035e-06, "loss": 0.528, "step": 6671 }, { "epoch": 0.71, "grad_norm": 1.7661636916196788, "learning_rate": 2.1057702432439976e-06, "loss": 0.5661, "step": 6672 }, { "epoch": 0.71, "grad_norm": 1.8268125824436512, "learning_rate": 2.104332195952777e-06, "loss": 0.6364, "step": 6673 }, { "epoch": 0.71, "grad_norm": 1.9931677472944618, "learning_rate": 2.102894508977182e-06, "loss": 0.5381, "step": 6674 }, { "epoch": 0.71, "grad_norm": 1.7584659139277037, "learning_rate": 2.1014571824961085e-06, "loss": 0.5302, "step": 6675 }, { "epoch": 0.71, "grad_norm": 1.7794505073928657, "learning_rate": 2.100020216688406e-06, "loss": 0.5929, "step": 6676 }, { "epoch": 0.71, "grad_norm": 1.6509697695235728, "learning_rate": 2.0985836117328805e-06, "loss": 0.4799, "step": 6677 }, { "epoch": 0.71, "grad_norm": 1.8014375033582304, "learning_rate": 2.097147367808293e-06, "loss": 0.5872, "step": 6678 }, { "epoch": 0.71, "grad_norm": 1.8116854243289278, "learning_rate": 2.095711485093358e-06, "loss": 0.607, "step": 6679 }, { "epoch": 0.71, "grad_norm": 1.1195487432164768, "learning_rate": 2.094275963766747e-06, "loss": 0.4958, "step": 6680 }, { "epoch": 0.71, "grad_norm": 1.7324036764484934, "learning_rate": 2.0928408040070853e-06, "loss": 0.5978, "step": 6681 }, { "epoch": 0.71, "grad_norm": 1.734914055162931, "learning_rate": 2.091406005992956e-06, "loss": 0.5141, "step": 6682 }, { "epoch": 0.71, "grad_norm": 1.086655750087574, "learning_rate": 2.0899715699028905e-06, "loss": 0.4756, "step": 6683 }, { "epoch": 0.71, "grad_norm": 1.822990081972234, "learning_rate": 2.088537495915382e-06, "loss": 0.5775, "step": 6684 }, { "epoch": 0.71, "grad_norm": 2.039042865366609, "learning_rate": 2.087103784208874e-06, "loss": 0.6406, "step": 6685 }, { "epoch": 0.71, "grad_norm": 1.7990058210540152, "learning_rate": 2.0856704349617707e-06, "loss": 0.6408, "step": 6686 }, { "epoch": 0.71, "grad_norm": 1.0894838185972087, "learning_rate": 2.0842374483524256e-06, "loss": 0.475, "step": 6687 }, { "epoch": 0.71, "grad_norm": 1.7816820523774006, "learning_rate": 2.0828048245591492e-06, "loss": 0.5944, "step": 6688 }, { "epoch": 0.71, "grad_norm": 1.1767908239932179, "learning_rate": 2.0813725637602068e-06, "loss": 0.5011, "step": 6689 }, { "epoch": 0.71, "grad_norm": 2.0992891960442868, "learning_rate": 2.0799406661338178e-06, "loss": 0.5765, "step": 6690 }, { "epoch": 0.71, "grad_norm": 1.9545954183867427, "learning_rate": 2.0785091318581577e-06, "loss": 0.6388, "step": 6691 }, { "epoch": 0.71, "grad_norm": 1.67643331646551, "learning_rate": 2.0770779611113556e-06, "loss": 0.6446, "step": 6692 }, { "epoch": 0.71, "grad_norm": 1.521168829965018, "learning_rate": 2.0756471540714966e-06, "loss": 0.4336, "step": 6693 }, { "epoch": 0.71, "grad_norm": 1.9595068970041711, "learning_rate": 2.074216710916619e-06, "loss": 0.5664, "step": 6694 }, { "epoch": 0.71, "grad_norm": 1.7997457841879931, "learning_rate": 2.0727866318247168e-06, "loss": 0.5817, "step": 6695 }, { "epoch": 0.71, "grad_norm": 1.7141273304630273, "learning_rate": 2.071356916973739e-06, "loss": 0.5952, "step": 6696 }, { "epoch": 0.71, "grad_norm": 1.7441701470175626, "learning_rate": 2.069927566541591e-06, "loss": 0.5764, "step": 6697 }, { "epoch": 0.71, "grad_norm": 1.7057292454443962, "learning_rate": 2.0684985807061253e-06, "loss": 0.5484, "step": 6698 }, { "epoch": 0.71, "grad_norm": 2.2154303119601946, "learning_rate": 2.0670699596451575e-06, "loss": 0.6321, "step": 6699 }, { "epoch": 0.71, "grad_norm": 1.7042512473381488, "learning_rate": 2.0656417035364547e-06, "loss": 0.6165, "step": 6700 }, { "epoch": 0.71, "grad_norm": 1.9160489184143832, "learning_rate": 2.0642138125577384e-06, "loss": 0.6208, "step": 6701 }, { "epoch": 0.71, "grad_norm": 1.9612175267856202, "learning_rate": 2.0627862868866825e-06, "loss": 0.6254, "step": 6702 }, { "epoch": 0.71, "grad_norm": 1.8567476550722806, "learning_rate": 2.0613591267009223e-06, "loss": 0.5895, "step": 6703 }, { "epoch": 0.72, "grad_norm": 1.1540074117036458, "learning_rate": 2.059932332178041e-06, "loss": 0.5095, "step": 6704 }, { "epoch": 0.72, "grad_norm": 1.9631964812965748, "learning_rate": 2.058505903495579e-06, "loss": 0.6084, "step": 6705 }, { "epoch": 0.72, "grad_norm": 1.953185410638523, "learning_rate": 2.0570798408310294e-06, "loss": 0.5887, "step": 6706 }, { "epoch": 0.72, "grad_norm": 1.627176696788143, "learning_rate": 2.0556541443618416e-06, "loss": 0.5165, "step": 6707 }, { "epoch": 0.72, "grad_norm": 1.999095077704268, "learning_rate": 2.054228814265419e-06, "loss": 0.6184, "step": 6708 }, { "epoch": 0.72, "grad_norm": 1.759135496514968, "learning_rate": 2.052803850719119e-06, "loss": 0.538, "step": 6709 }, { "epoch": 0.72, "grad_norm": 1.8204327060375753, "learning_rate": 2.051379253900253e-06, "loss": 0.6306, "step": 6710 }, { "epoch": 0.72, "grad_norm": 1.5534719887217041, "learning_rate": 2.0499550239860906e-06, "loss": 0.5358, "step": 6711 }, { "epoch": 0.72, "grad_norm": 1.581624406107951, "learning_rate": 2.0485311611538472e-06, "loss": 0.4688, "step": 6712 }, { "epoch": 0.72, "grad_norm": 2.1298714457612884, "learning_rate": 2.047107665580701e-06, "loss": 0.6023, "step": 6713 }, { "epoch": 0.72, "grad_norm": 1.0894225077143627, "learning_rate": 2.0456845374437808e-06, "loss": 0.498, "step": 6714 }, { "epoch": 0.72, "grad_norm": 1.5230098585216356, "learning_rate": 2.0442617769201695e-06, "loss": 0.479, "step": 6715 }, { "epoch": 0.72, "grad_norm": 1.0778922225262397, "learning_rate": 2.0428393841869066e-06, "loss": 0.4748, "step": 6716 }, { "epoch": 0.72, "grad_norm": 1.9461066077515519, "learning_rate": 2.0414173594209826e-06, "loss": 0.6051, "step": 6717 }, { "epoch": 0.72, "grad_norm": 1.7702283792418843, "learning_rate": 2.0399957027993427e-06, "loss": 0.5364, "step": 6718 }, { "epoch": 0.72, "grad_norm": 1.8147214815427024, "learning_rate": 2.038574414498892e-06, "loss": 0.5789, "step": 6719 }, { "epoch": 0.72, "grad_norm": 1.72677947076908, "learning_rate": 2.037153494696481e-06, "loss": 0.5127, "step": 6720 }, { "epoch": 0.72, "grad_norm": 1.8027300352688567, "learning_rate": 2.0357329435689203e-06, "loss": 0.5267, "step": 6721 }, { "epoch": 0.72, "grad_norm": 2.1264855282851847, "learning_rate": 2.0343127612929724e-06, "loss": 0.6117, "step": 6722 }, { "epoch": 0.72, "grad_norm": 1.5654730790568152, "learning_rate": 2.0328929480453547e-06, "loss": 0.6441, "step": 6723 }, { "epoch": 0.72, "grad_norm": 2.0797192313308375, "learning_rate": 2.031473504002738e-06, "loss": 0.6024, "step": 6724 }, { "epoch": 0.72, "grad_norm": 1.797485536501312, "learning_rate": 2.030054429341749e-06, "loss": 0.5353, "step": 6725 }, { "epoch": 0.72, "grad_norm": 1.8957354412109935, "learning_rate": 2.0286357242389636e-06, "loss": 0.5534, "step": 6726 }, { "epoch": 0.72, "grad_norm": 1.934916255935512, "learning_rate": 2.0272173888709174e-06, "loss": 0.6525, "step": 6727 }, { "epoch": 0.72, "grad_norm": 1.5121035152459295, "learning_rate": 2.0257994234140965e-06, "loss": 0.5343, "step": 6728 }, { "epoch": 0.72, "grad_norm": 1.9100321628010308, "learning_rate": 2.024381828044943e-06, "loss": 0.5434, "step": 6729 }, { "epoch": 0.72, "grad_norm": 1.6886167769750224, "learning_rate": 2.0229646029398513e-06, "loss": 0.613, "step": 6730 }, { "epoch": 0.72, "grad_norm": 1.6191026339380685, "learning_rate": 2.021547748275172e-06, "loss": 0.5224, "step": 6731 }, { "epoch": 0.72, "grad_norm": 1.8936842012427195, "learning_rate": 2.020131264227206e-06, "loss": 0.7025, "step": 6732 }, { "epoch": 0.72, "grad_norm": 1.7366204261695481, "learning_rate": 2.0187151509722112e-06, "loss": 0.5613, "step": 6733 }, { "epoch": 0.72, "grad_norm": 1.747568349544356, "learning_rate": 2.0172994086863984e-06, "loss": 0.5202, "step": 6734 }, { "epoch": 0.72, "grad_norm": 1.2148764737315534, "learning_rate": 2.0158840375459306e-06, "loss": 0.523, "step": 6735 }, { "epoch": 0.72, "grad_norm": 2.0313169665610102, "learning_rate": 2.0144690377269294e-06, "loss": 0.7062, "step": 6736 }, { "epoch": 0.72, "grad_norm": 1.7629060019916798, "learning_rate": 2.013054409405465e-06, "loss": 0.5413, "step": 6737 }, { "epoch": 0.72, "grad_norm": 1.7153801040251653, "learning_rate": 2.0116401527575637e-06, "loss": 0.6244, "step": 6738 }, { "epoch": 0.72, "grad_norm": 1.6699275131896039, "learning_rate": 2.010226267959205e-06, "loss": 0.5477, "step": 6739 }, { "epoch": 0.72, "grad_norm": 1.6495235598382125, "learning_rate": 2.008812755186324e-06, "loss": 0.5567, "step": 6740 }, { "epoch": 0.72, "grad_norm": 1.0872700151790875, "learning_rate": 2.007399614614805e-06, "loss": 0.4997, "step": 6741 }, { "epoch": 0.72, "grad_norm": 1.0755021190860794, "learning_rate": 2.0059868464204895e-06, "loss": 0.4944, "step": 6742 }, { "epoch": 0.72, "grad_norm": 1.0899800256049763, "learning_rate": 2.004574450779173e-06, "loss": 0.5097, "step": 6743 }, { "epoch": 0.72, "grad_norm": 1.0928532603756145, "learning_rate": 2.0031624278666036e-06, "loss": 0.4964, "step": 6744 }, { "epoch": 0.72, "grad_norm": 1.5993594957965882, "learning_rate": 2.001750777858482e-06, "loss": 0.5307, "step": 6745 }, { "epoch": 0.72, "grad_norm": 1.8092177958325482, "learning_rate": 2.0003395009304643e-06, "loss": 0.5252, "step": 6746 }, { "epoch": 0.72, "grad_norm": 1.0482965403946123, "learning_rate": 1.9989285972581603e-06, "loss": 0.5023, "step": 6747 }, { "epoch": 0.72, "grad_norm": 1.8447141214167164, "learning_rate": 1.9975180670171308e-06, "loss": 0.5584, "step": 6748 }, { "epoch": 0.72, "grad_norm": 1.7393943453603766, "learning_rate": 1.9961079103828923e-06, "loss": 0.4677, "step": 6749 }, { "epoch": 0.72, "grad_norm": 1.8769409512343995, "learning_rate": 1.994698127530915e-06, "loss": 0.6068, "step": 6750 }, { "epoch": 0.72, "grad_norm": 1.7212660450219965, "learning_rate": 1.993288718636621e-06, "loss": 0.5848, "step": 6751 }, { "epoch": 0.72, "grad_norm": 1.8053684705225523, "learning_rate": 1.991879683875386e-06, "loss": 0.5392, "step": 6752 }, { "epoch": 0.72, "grad_norm": 2.0152617122651946, "learning_rate": 1.990471023422543e-06, "loss": 0.6807, "step": 6753 }, { "epoch": 0.72, "grad_norm": 1.9397016232484956, "learning_rate": 1.989062737453376e-06, "loss": 0.6784, "step": 6754 }, { "epoch": 0.72, "grad_norm": 1.6563543486100576, "learning_rate": 1.987654826143117e-06, "loss": 0.5288, "step": 6755 }, { "epoch": 0.72, "grad_norm": 1.7089570970138013, "learning_rate": 1.986247289666959e-06, "loss": 0.6302, "step": 6756 }, { "epoch": 0.72, "grad_norm": 1.9005379601400376, "learning_rate": 1.984840128200044e-06, "loss": 0.5513, "step": 6757 }, { "epoch": 0.72, "grad_norm": 1.6261681192387785, "learning_rate": 1.9834333419174706e-06, "loss": 0.6307, "step": 6758 }, { "epoch": 0.72, "grad_norm": 1.9072732172177884, "learning_rate": 1.982026930994288e-06, "loss": 0.6635, "step": 6759 }, { "epoch": 0.72, "grad_norm": 1.6214120904576794, "learning_rate": 1.9806208956054996e-06, "loss": 0.5668, "step": 6760 }, { "epoch": 0.72, "grad_norm": 1.7712434109304418, "learning_rate": 1.9792152359260623e-06, "loss": 0.5949, "step": 6761 }, { "epoch": 0.72, "grad_norm": 1.5868744007750015, "learning_rate": 1.977809952130887e-06, "loss": 0.4339, "step": 6762 }, { "epoch": 0.72, "grad_norm": 1.607210858543944, "learning_rate": 1.976405044394835e-06, "loss": 0.4962, "step": 6763 }, { "epoch": 0.72, "grad_norm": 1.7692289264430419, "learning_rate": 1.975000512892724e-06, "loss": 0.5953, "step": 6764 }, { "epoch": 0.72, "grad_norm": 1.8875648300401844, "learning_rate": 1.9735963577993232e-06, "loss": 0.5713, "step": 6765 }, { "epoch": 0.72, "grad_norm": 1.2829956000453056, "learning_rate": 1.972192579289356e-06, "loss": 0.5042, "step": 6766 }, { "epoch": 0.72, "grad_norm": 1.8722340272602604, "learning_rate": 1.970789177537497e-06, "loss": 0.6636, "step": 6767 }, { "epoch": 0.72, "grad_norm": 1.672145285566523, "learning_rate": 1.969386152718376e-06, "loss": 0.5496, "step": 6768 }, { "epoch": 0.72, "grad_norm": 1.6814099260704973, "learning_rate": 1.9679835050065753e-06, "loss": 0.5853, "step": 6769 }, { "epoch": 0.72, "grad_norm": 1.7034437904270932, "learning_rate": 1.96658123457663e-06, "loss": 0.5098, "step": 6770 }, { "epoch": 0.72, "grad_norm": 1.9935687949742622, "learning_rate": 1.9651793416030275e-06, "loss": 0.6627, "step": 6771 }, { "epoch": 0.72, "grad_norm": 1.7116562429703261, "learning_rate": 1.9637778262602098e-06, "loss": 0.6304, "step": 6772 }, { "epoch": 0.72, "grad_norm": 1.1302321624828229, "learning_rate": 1.962376688722571e-06, "loss": 0.473, "step": 6773 }, { "epoch": 0.72, "grad_norm": 2.0276867599852273, "learning_rate": 1.960975929164459e-06, "loss": 0.6452, "step": 6774 }, { "epoch": 0.72, "grad_norm": 1.7616109608993105, "learning_rate": 1.9595755477601724e-06, "loss": 0.536, "step": 6775 }, { "epoch": 0.72, "grad_norm": 1.8277701895414455, "learning_rate": 1.9581755446839664e-06, "loss": 0.6175, "step": 6776 }, { "epoch": 0.72, "grad_norm": 1.9340718981877307, "learning_rate": 1.9567759201100456e-06, "loss": 0.5994, "step": 6777 }, { "epoch": 0.72, "grad_norm": 1.8447976312742809, "learning_rate": 1.9553766742125695e-06, "loss": 0.6083, "step": 6778 }, { "epoch": 0.72, "grad_norm": 1.9855496495650555, "learning_rate": 1.95397780716565e-06, "loss": 0.6235, "step": 6779 }, { "epoch": 0.72, "grad_norm": 1.6765007649141563, "learning_rate": 1.9525793191433516e-06, "loss": 0.5357, "step": 6780 }, { "epoch": 0.72, "grad_norm": 1.9152918433629431, "learning_rate": 1.9511812103196925e-06, "loss": 0.6036, "step": 6781 }, { "epoch": 0.72, "grad_norm": 1.526817126604618, "learning_rate": 1.9497834808686426e-06, "loss": 0.5643, "step": 6782 }, { "epoch": 0.72, "grad_norm": 1.7613487881460765, "learning_rate": 1.948386130964125e-06, "loss": 0.5821, "step": 6783 }, { "epoch": 0.72, "grad_norm": 1.0975282756910192, "learning_rate": 1.9469891607800155e-06, "loss": 0.4884, "step": 6784 }, { "epoch": 0.72, "grad_norm": 1.7247982888383895, "learning_rate": 1.945592570490144e-06, "loss": 0.5481, "step": 6785 }, { "epoch": 0.72, "grad_norm": 1.550199026619202, "learning_rate": 1.9441963602682908e-06, "loss": 0.6454, "step": 6786 }, { "epoch": 0.72, "grad_norm": 1.854605086127165, "learning_rate": 1.9428005302881897e-06, "loss": 0.5802, "step": 6787 }, { "epoch": 0.72, "grad_norm": 1.0452368210705585, "learning_rate": 1.9414050807235283e-06, "loss": 0.4692, "step": 6788 }, { "epoch": 0.72, "grad_norm": 1.7783704076812685, "learning_rate": 1.9400100117479462e-06, "loss": 0.6015, "step": 6789 }, { "epoch": 0.72, "grad_norm": 1.6350175770306699, "learning_rate": 1.9386153235350353e-06, "loss": 0.5497, "step": 6790 }, { "epoch": 0.72, "grad_norm": 1.1538761023373232, "learning_rate": 1.93722101625834e-06, "loss": 0.4859, "step": 6791 }, { "epoch": 0.72, "grad_norm": 1.1394446269872205, "learning_rate": 1.935827090091358e-06, "loss": 0.5068, "step": 6792 }, { "epoch": 0.72, "grad_norm": 1.8228684498918235, "learning_rate": 1.9344335452075393e-06, "loss": 0.6175, "step": 6793 }, { "epoch": 0.72, "grad_norm": 1.9163876377535907, "learning_rate": 1.933040381780286e-06, "loss": 0.6567, "step": 6794 }, { "epoch": 0.72, "grad_norm": 1.9788749690385745, "learning_rate": 1.9316475999829536e-06, "loss": 0.5597, "step": 6795 }, { "epoch": 0.72, "grad_norm": 1.7245833039030556, "learning_rate": 1.9302551999888497e-06, "loss": 0.6358, "step": 6796 }, { "epoch": 0.73, "grad_norm": 1.8097992694447798, "learning_rate": 1.9288631819712355e-06, "loss": 0.618, "step": 6797 }, { "epoch": 0.73, "grad_norm": 1.6888143042538213, "learning_rate": 1.927471546103318e-06, "loss": 0.5397, "step": 6798 }, { "epoch": 0.73, "grad_norm": 1.9060704870308187, "learning_rate": 1.926080292558269e-06, "loss": 0.6523, "step": 6799 }, { "epoch": 0.73, "grad_norm": 1.676920371309759, "learning_rate": 1.9246894215092028e-06, "loss": 0.6009, "step": 6800 }, { "epoch": 0.73, "grad_norm": 1.6780175483431066, "learning_rate": 1.923298933129189e-06, "loss": 0.4641, "step": 6801 }, { "epoch": 0.73, "grad_norm": 1.1215299185184338, "learning_rate": 1.9219088275912507e-06, "loss": 0.4882, "step": 6802 }, { "epoch": 0.73, "grad_norm": 1.8090276349983132, "learning_rate": 1.9205191050683613e-06, "loss": 0.5768, "step": 6803 }, { "epoch": 0.73, "grad_norm": 1.7582015094368948, "learning_rate": 1.9191297657334486e-06, "loss": 0.5987, "step": 6804 }, { "epoch": 0.73, "grad_norm": 1.6622625287462927, "learning_rate": 1.9177408097593913e-06, "loss": 0.5958, "step": 6805 }, { "epoch": 0.73, "grad_norm": 1.7550063382625183, "learning_rate": 1.9163522373190212e-06, "loss": 0.6274, "step": 6806 }, { "epoch": 0.73, "grad_norm": 1.9181939655078584, "learning_rate": 1.914964048585122e-06, "loss": 0.6533, "step": 6807 }, { "epoch": 0.73, "grad_norm": 2.0998199801231796, "learning_rate": 1.913576243730429e-06, "loss": 0.6759, "step": 6808 }, { "epoch": 0.73, "grad_norm": 1.0948114190951865, "learning_rate": 1.9121888229276315e-06, "loss": 0.461, "step": 6809 }, { "epoch": 0.73, "grad_norm": 1.587297461113675, "learning_rate": 1.9108017863493692e-06, "loss": 0.6096, "step": 6810 }, { "epoch": 0.73, "grad_norm": 1.7940646099074857, "learning_rate": 1.909415134168237e-06, "loss": 0.6044, "step": 6811 }, { "epoch": 0.73, "grad_norm": 1.7534300756091168, "learning_rate": 1.908028866556776e-06, "loss": 0.5788, "step": 6812 }, { "epoch": 0.73, "grad_norm": 1.135655165182802, "learning_rate": 1.9066429836874844e-06, "loss": 0.4841, "step": 6813 }, { "epoch": 0.73, "grad_norm": 1.621417641149861, "learning_rate": 1.905257485732812e-06, "loss": 0.6012, "step": 6814 }, { "epoch": 0.73, "grad_norm": 1.7789427239529907, "learning_rate": 1.9038723728651586e-06, "loss": 0.5905, "step": 6815 }, { "epoch": 0.73, "grad_norm": 1.5834151059230646, "learning_rate": 1.9024876452568796e-06, "loss": 0.485, "step": 6816 }, { "epoch": 0.73, "grad_norm": 1.838607788959566, "learning_rate": 1.9011033030802794e-06, "loss": 0.7184, "step": 6817 }, { "epoch": 0.73, "grad_norm": 1.631511133336715, "learning_rate": 1.8997193465076157e-06, "loss": 0.5534, "step": 6818 }, { "epoch": 0.73, "grad_norm": 1.6866886278217001, "learning_rate": 1.8983357757110977e-06, "loss": 0.5984, "step": 6819 }, { "epoch": 0.73, "grad_norm": 1.5635694705830976, "learning_rate": 1.896952590862886e-06, "loss": 0.5511, "step": 6820 }, { "epoch": 0.73, "grad_norm": 1.8067109064880598, "learning_rate": 1.8955697921350946e-06, "loss": 0.5151, "step": 6821 }, { "epoch": 0.73, "grad_norm": 1.9161185733957657, "learning_rate": 1.894187379699789e-06, "loss": 0.6427, "step": 6822 }, { "epoch": 0.73, "grad_norm": 1.1666336673398479, "learning_rate": 1.892805353728986e-06, "loss": 0.4847, "step": 6823 }, { "epoch": 0.73, "grad_norm": 1.7517496000327746, "learning_rate": 1.8914237143946552e-06, "loss": 0.5139, "step": 6824 }, { "epoch": 0.73, "grad_norm": 1.727506625404338, "learning_rate": 1.8900424618687169e-06, "loss": 0.5327, "step": 6825 }, { "epoch": 0.73, "grad_norm": 1.9781859239876491, "learning_rate": 1.888661596323047e-06, "loss": 0.5949, "step": 6826 }, { "epoch": 0.73, "grad_norm": 1.9090400737357418, "learning_rate": 1.8872811179294653e-06, "loss": 0.6574, "step": 6827 }, { "epoch": 0.73, "grad_norm": 1.635661167865736, "learning_rate": 1.8859010268597517e-06, "loss": 0.5526, "step": 6828 }, { "epoch": 0.73, "grad_norm": 1.9640570551458139, "learning_rate": 1.884521323285633e-06, "loss": 0.588, "step": 6829 }, { "epoch": 0.73, "grad_norm": 1.8597226031355656, "learning_rate": 1.8831420073787904e-06, "loss": 0.5689, "step": 6830 }, { "epoch": 0.73, "grad_norm": 1.8466224686255925, "learning_rate": 1.8817630793108555e-06, "loss": 0.6251, "step": 6831 }, { "epoch": 0.73, "grad_norm": 1.7036250314860182, "learning_rate": 1.8803845392534104e-06, "loss": 0.4702, "step": 6832 }, { "epoch": 0.73, "grad_norm": 1.6713424646788408, "learning_rate": 1.879006387377994e-06, "loss": 0.4767, "step": 6833 }, { "epoch": 0.73, "grad_norm": 1.649996438280249, "learning_rate": 1.8776286238560915e-06, "loss": 0.5419, "step": 6834 }, { "epoch": 0.73, "grad_norm": 1.7798141223037518, "learning_rate": 1.8762512488591416e-06, "loss": 0.5761, "step": 6835 }, { "epoch": 0.73, "grad_norm": 1.6687809777660148, "learning_rate": 1.8748742625585349e-06, "loss": 0.5349, "step": 6836 }, { "epoch": 0.73, "grad_norm": 1.9029652159847634, "learning_rate": 1.8734976651256131e-06, "loss": 0.6258, "step": 6837 }, { "epoch": 0.73, "grad_norm": 1.9163343616714943, "learning_rate": 1.8721214567316708e-06, "loss": 0.5327, "step": 6838 }, { "epoch": 0.73, "grad_norm": 1.6299318756374315, "learning_rate": 1.8707456375479522e-06, "loss": 0.5846, "step": 6839 }, { "epoch": 0.73, "grad_norm": 1.7461264889540304, "learning_rate": 1.8693702077456565e-06, "loss": 0.5953, "step": 6840 }, { "epoch": 0.73, "grad_norm": 1.7818722492061405, "learning_rate": 1.8679951674959286e-06, "loss": 0.6289, "step": 6841 }, { "epoch": 0.73, "grad_norm": 2.0033627511662258, "learning_rate": 1.8666205169698692e-06, "loss": 0.6064, "step": 6842 }, { "epoch": 0.73, "grad_norm": 1.1078152127476995, "learning_rate": 1.865246256338531e-06, "loss": 0.4962, "step": 6843 }, { "epoch": 0.73, "grad_norm": 1.7102177992841987, "learning_rate": 1.8638723857729162e-06, "loss": 0.5391, "step": 6844 }, { "epoch": 0.73, "grad_norm": 1.9979650964413238, "learning_rate": 1.862498905443979e-06, "loss": 0.4918, "step": 6845 }, { "epoch": 0.73, "grad_norm": 1.7205200874965594, "learning_rate": 1.8611258155226263e-06, "loss": 0.65, "step": 6846 }, { "epoch": 0.73, "grad_norm": 2.15255301422199, "learning_rate": 1.8597531161797139e-06, "loss": 0.6315, "step": 6847 }, { "epoch": 0.73, "grad_norm": 1.7703169463427357, "learning_rate": 1.858380807586051e-06, "loss": 0.5793, "step": 6848 }, { "epoch": 0.73, "grad_norm": 1.8551950042148513, "learning_rate": 1.857008889912396e-06, "loss": 0.5359, "step": 6849 }, { "epoch": 0.73, "grad_norm": 1.6169385684885285, "learning_rate": 1.8556373633294645e-06, "loss": 0.5811, "step": 6850 }, { "epoch": 0.73, "grad_norm": 1.7558988606744002, "learning_rate": 1.8542662280079154e-06, "loss": 0.5541, "step": 6851 }, { "epoch": 0.73, "grad_norm": 1.6661616175956517, "learning_rate": 1.8528954841183644e-06, "loss": 0.5492, "step": 6852 }, { "epoch": 0.73, "grad_norm": 1.7338319764953982, "learning_rate": 1.8515251318313766e-06, "loss": 0.5986, "step": 6853 }, { "epoch": 0.73, "grad_norm": 1.5569381852855408, "learning_rate": 1.8501551713174675e-06, "loss": 0.5253, "step": 6854 }, { "epoch": 0.73, "grad_norm": 1.9127147323360822, "learning_rate": 1.848785602747108e-06, "loss": 0.6129, "step": 6855 }, { "epoch": 0.73, "grad_norm": 1.9186365235408223, "learning_rate": 1.8474164262907129e-06, "loss": 0.5609, "step": 6856 }, { "epoch": 0.73, "grad_norm": 1.7811859468258755, "learning_rate": 1.8460476421186547e-06, "loss": 0.5446, "step": 6857 }, { "epoch": 0.73, "grad_norm": 1.7621688069894323, "learning_rate": 1.844679250401254e-06, "loss": 0.5648, "step": 6858 }, { "epoch": 0.73, "grad_norm": 1.7949278567408744, "learning_rate": 1.8433112513087842e-06, "loss": 0.5952, "step": 6859 }, { "epoch": 0.73, "grad_norm": 1.827788064390807, "learning_rate": 1.8419436450114692e-06, "loss": 0.5844, "step": 6860 }, { "epoch": 0.73, "grad_norm": 2.0641438716645832, "learning_rate": 1.8405764316794832e-06, "loss": 0.5409, "step": 6861 }, { "epoch": 0.73, "grad_norm": 1.6371767554570866, "learning_rate": 1.8392096114829523e-06, "loss": 0.594, "step": 6862 }, { "epoch": 0.73, "grad_norm": 1.8315197998182593, "learning_rate": 1.8378431845919536e-06, "loss": 0.6296, "step": 6863 }, { "epoch": 0.73, "grad_norm": 1.7064288069996776, "learning_rate": 1.8364771511765157e-06, "loss": 0.5672, "step": 6864 }, { "epoch": 0.73, "grad_norm": 1.9980126619692626, "learning_rate": 1.8351115114066175e-06, "loss": 0.5665, "step": 6865 }, { "epoch": 0.73, "grad_norm": 1.6421952821935222, "learning_rate": 1.833746265452187e-06, "loss": 0.6251, "step": 6866 }, { "epoch": 0.73, "grad_norm": 1.9470974720090646, "learning_rate": 1.8323814134831097e-06, "loss": 0.5954, "step": 6867 }, { "epoch": 0.73, "grad_norm": 2.0284313097500255, "learning_rate": 1.8310169556692154e-06, "loss": 0.6744, "step": 6868 }, { "epoch": 0.73, "grad_norm": 1.8323656220746831, "learning_rate": 1.8296528921802887e-06, "loss": 0.6684, "step": 6869 }, { "epoch": 0.73, "grad_norm": 1.665641950356524, "learning_rate": 1.8282892231860604e-06, "loss": 0.5791, "step": 6870 }, { "epoch": 0.73, "grad_norm": 1.8328415084466947, "learning_rate": 1.8269259488562179e-06, "loss": 0.593, "step": 6871 }, { "epoch": 0.73, "grad_norm": 1.6878800730689643, "learning_rate": 1.825563069360396e-06, "loss": 0.6258, "step": 6872 }, { "epoch": 0.73, "grad_norm": 1.7421797604053118, "learning_rate": 1.8242005848681821e-06, "loss": 0.54, "step": 6873 }, { "epoch": 0.73, "grad_norm": 1.9973828786202241, "learning_rate": 1.8228384955491136e-06, "loss": 0.7291, "step": 6874 }, { "epoch": 0.73, "grad_norm": 2.1075730170347526, "learning_rate": 1.8214768015726786e-06, "loss": 0.6128, "step": 6875 }, { "epoch": 0.73, "grad_norm": 1.0537257056800124, "learning_rate": 1.8201155031083168e-06, "loss": 0.4726, "step": 6876 }, { "epoch": 0.73, "grad_norm": 1.7486903488644099, "learning_rate": 1.8187546003254175e-06, "loss": 0.5016, "step": 6877 }, { "epoch": 0.73, "grad_norm": 1.099317523834349, "learning_rate": 1.817394093393322e-06, "loss": 0.4848, "step": 6878 }, { "epoch": 0.73, "grad_norm": 1.7976839886539864, "learning_rate": 1.8160339824813217e-06, "loss": 0.585, "step": 6879 }, { "epoch": 0.73, "grad_norm": 1.782609067748756, "learning_rate": 1.8146742677586587e-06, "loss": 0.6079, "step": 6880 }, { "epoch": 0.73, "grad_norm": 1.8417265515966246, "learning_rate": 1.8133149493945257e-06, "loss": 0.6234, "step": 6881 }, { "epoch": 0.73, "grad_norm": 1.5645952485420764, "learning_rate": 1.811956027558065e-06, "loss": 0.5523, "step": 6882 }, { "epoch": 0.73, "grad_norm": 1.8649400828489424, "learning_rate": 1.8105975024183757e-06, "loss": 0.5658, "step": 6883 }, { "epoch": 0.73, "grad_norm": 1.7593112962371595, "learning_rate": 1.809239374144498e-06, "loss": 0.6451, "step": 6884 }, { "epoch": 0.73, "grad_norm": 1.109919061092109, "learning_rate": 1.8078816429054287e-06, "loss": 0.5299, "step": 6885 }, { "epoch": 0.73, "grad_norm": 1.0764876200022135, "learning_rate": 1.8065243088701134e-06, "loss": 0.4908, "step": 6886 }, { "epoch": 0.73, "grad_norm": 1.9175509669675763, "learning_rate": 1.8051673722074497e-06, "loss": 0.5812, "step": 6887 }, { "epoch": 0.73, "grad_norm": 3.1340855944783734, "learning_rate": 1.8038108330862847e-06, "loss": 0.5435, "step": 6888 }, { "epoch": 0.73, "grad_norm": 1.814350552306336, "learning_rate": 1.802454691675416e-06, "loss": 0.5308, "step": 6889 }, { "epoch": 0.73, "grad_norm": 1.6398803446223904, "learning_rate": 1.8010989481435914e-06, "loss": 0.5323, "step": 6890 }, { "epoch": 0.74, "grad_norm": 1.7640900759374178, "learning_rate": 1.79974360265951e-06, "loss": 0.5888, "step": 6891 }, { "epoch": 0.74, "grad_norm": 1.7854649161378742, "learning_rate": 1.7983886553918212e-06, "loss": 0.6605, "step": 6892 }, { "epoch": 0.74, "grad_norm": 1.6398763981911748, "learning_rate": 1.7970341065091246e-06, "loss": 0.5489, "step": 6893 }, { "epoch": 0.74, "grad_norm": 1.7651491483700479, "learning_rate": 1.7956799561799703e-06, "loss": 0.6772, "step": 6894 }, { "epoch": 0.74, "grad_norm": 1.7295363130035153, "learning_rate": 1.7943262045728593e-06, "loss": 0.5848, "step": 6895 }, { "epoch": 0.74, "grad_norm": 1.5281023503079603, "learning_rate": 1.7929728518562412e-06, "loss": 0.5108, "step": 6896 }, { "epoch": 0.74, "grad_norm": 1.8079807299875366, "learning_rate": 1.7916198981985188e-06, "loss": 0.5675, "step": 6897 }, { "epoch": 0.74, "grad_norm": 1.0830982865056002, "learning_rate": 1.7902673437680423e-06, "loss": 0.4993, "step": 6898 }, { "epoch": 0.74, "grad_norm": 2.292539491047561, "learning_rate": 1.7889151887331146e-06, "loss": 0.6105, "step": 6899 }, { "epoch": 0.74, "grad_norm": 1.729343408051164, "learning_rate": 1.787563433261988e-06, "loss": 0.5904, "step": 6900 }, { "epoch": 0.74, "grad_norm": 1.6144854697672388, "learning_rate": 1.7862120775228641e-06, "loss": 0.5262, "step": 6901 }, { "epoch": 0.74, "grad_norm": 1.836260958155928, "learning_rate": 1.7848611216838969e-06, "loss": 0.586, "step": 6902 }, { "epoch": 0.74, "grad_norm": 2.112694883503822, "learning_rate": 1.7835105659131886e-06, "loss": 0.6407, "step": 6903 }, { "epoch": 0.74, "grad_norm": 1.1041057625696318, "learning_rate": 1.7821604103787925e-06, "loss": 0.48, "step": 6904 }, { "epoch": 0.74, "grad_norm": 1.7659129288372728, "learning_rate": 1.780810655248712e-06, "loss": 0.5874, "step": 6905 }, { "epoch": 0.74, "grad_norm": 1.878017266612733, "learning_rate": 1.7794613006909011e-06, "loss": 0.5993, "step": 6906 }, { "epoch": 0.74, "grad_norm": 1.8015317670839923, "learning_rate": 1.778112346873263e-06, "loss": 0.586, "step": 6907 }, { "epoch": 0.74, "grad_norm": 2.026122350791119, "learning_rate": 1.7767637939636524e-06, "loss": 0.6154, "step": 6908 }, { "epoch": 0.74, "grad_norm": 2.0748293661757087, "learning_rate": 1.7754156421298724e-06, "loss": 0.6622, "step": 6909 }, { "epoch": 0.74, "grad_norm": 1.8365897913204225, "learning_rate": 1.7740678915396781e-06, "loss": 0.5923, "step": 6910 }, { "epoch": 0.74, "grad_norm": 1.887948755524918, "learning_rate": 1.7727205423607729e-06, "loss": 0.5989, "step": 6911 }, { "epoch": 0.74, "grad_norm": 2.0310674352399687, "learning_rate": 1.7713735947608114e-06, "loss": 0.5771, "step": 6912 }, { "epoch": 0.74, "grad_norm": 1.8211541241266047, "learning_rate": 1.7700270489073972e-06, "loss": 0.5952, "step": 6913 }, { "epoch": 0.74, "grad_norm": 1.6693158970942674, "learning_rate": 1.7686809049680853e-06, "loss": 0.6016, "step": 6914 }, { "epoch": 0.74, "grad_norm": 1.8059580193202707, "learning_rate": 1.7673351631103802e-06, "loss": 0.5787, "step": 6915 }, { "epoch": 0.74, "grad_norm": 1.788083014169418, "learning_rate": 1.7659898235017352e-06, "loss": 0.5615, "step": 6916 }, { "epoch": 0.74, "grad_norm": 1.8034399589455337, "learning_rate": 1.764644886309555e-06, "loss": 0.4936, "step": 6917 }, { "epoch": 0.74, "grad_norm": 1.690399955798146, "learning_rate": 1.7633003517011942e-06, "loss": 0.5984, "step": 6918 }, { "epoch": 0.74, "grad_norm": 1.8862791173927163, "learning_rate": 1.761956219843956e-06, "loss": 0.4723, "step": 6919 }, { "epoch": 0.74, "grad_norm": 1.864309978674708, "learning_rate": 1.7606124909050942e-06, "loss": 0.5519, "step": 6920 }, { "epoch": 0.74, "grad_norm": 1.499335991788207, "learning_rate": 1.7592691650518135e-06, "loss": 0.5166, "step": 6921 }, { "epoch": 0.74, "grad_norm": 1.6839002001249124, "learning_rate": 1.7579262424512666e-06, "loss": 0.6082, "step": 6922 }, { "epoch": 0.74, "grad_norm": 1.7109996794538493, "learning_rate": 1.7565837232705573e-06, "loss": 0.4963, "step": 6923 }, { "epoch": 0.74, "grad_norm": 1.8246649202573058, "learning_rate": 1.7552416076767387e-06, "loss": 0.6388, "step": 6924 }, { "epoch": 0.74, "grad_norm": 1.8972628837301988, "learning_rate": 1.7538998958368141e-06, "loss": 0.5617, "step": 6925 }, { "epoch": 0.74, "grad_norm": 1.768527540370486, "learning_rate": 1.7525585879177375e-06, "loss": 0.5534, "step": 6926 }, { "epoch": 0.74, "grad_norm": 1.877899905992963, "learning_rate": 1.7512176840864086e-06, "loss": 0.5616, "step": 6927 }, { "epoch": 0.74, "grad_norm": 1.9877808723436619, "learning_rate": 1.749877184509679e-06, "loss": 0.5557, "step": 6928 }, { "epoch": 0.74, "grad_norm": 1.8245429078023498, "learning_rate": 1.7485370893543546e-06, "loss": 0.6872, "step": 6929 }, { "epoch": 0.74, "grad_norm": 2.064808687130788, "learning_rate": 1.7471973987871842e-06, "loss": 0.6748, "step": 6930 }, { "epoch": 0.74, "grad_norm": 1.9274601693777664, "learning_rate": 1.745858112974871e-06, "loss": 0.655, "step": 6931 }, { "epoch": 0.74, "grad_norm": 1.8425307174954433, "learning_rate": 1.7445192320840637e-06, "loss": 0.6041, "step": 6932 }, { "epoch": 0.74, "grad_norm": 1.6513210177772122, "learning_rate": 1.743180756281364e-06, "loss": 0.6122, "step": 6933 }, { "epoch": 0.74, "grad_norm": 1.789256290607584, "learning_rate": 1.741842685733322e-06, "loss": 0.6196, "step": 6934 }, { "epoch": 0.74, "grad_norm": 1.9608519031494958, "learning_rate": 1.7405050206064372e-06, "loss": 0.6329, "step": 6935 }, { "epoch": 0.74, "grad_norm": 1.6748765847198677, "learning_rate": 1.7391677610671588e-06, "loss": 0.5311, "step": 6936 }, { "epoch": 0.74, "grad_norm": 1.6050005088491701, "learning_rate": 1.7378309072818855e-06, "loss": 0.5663, "step": 6937 }, { "epoch": 0.74, "grad_norm": 1.102038581282271, "learning_rate": 1.7364944594169659e-06, "loss": 0.4919, "step": 6938 }, { "epoch": 0.74, "grad_norm": 1.7943993292146114, "learning_rate": 1.735158417638697e-06, "loss": 0.5887, "step": 6939 }, { "epoch": 0.74, "grad_norm": 1.6746692264086265, "learning_rate": 1.733822782113327e-06, "loss": 0.506, "step": 6940 }, { "epoch": 0.74, "grad_norm": 1.6458612851582597, "learning_rate": 1.7324875530070534e-06, "loss": 0.5877, "step": 6941 }, { "epoch": 0.74, "grad_norm": 1.6070442030192866, "learning_rate": 1.7311527304860193e-06, "loss": 0.5835, "step": 6942 }, { "epoch": 0.74, "grad_norm": 1.7498786561008346, "learning_rate": 1.7298183147163217e-06, "loss": 0.5622, "step": 6943 }, { "epoch": 0.74, "grad_norm": 1.739782712692476, "learning_rate": 1.7284843058640056e-06, "loss": 0.6191, "step": 6944 }, { "epoch": 0.74, "grad_norm": 1.1165120906772088, "learning_rate": 1.727150704095064e-06, "loss": 0.5134, "step": 6945 }, { "epoch": 0.74, "grad_norm": 1.7572197393729365, "learning_rate": 1.7258175095754432e-06, "loss": 0.5621, "step": 6946 }, { "epoch": 0.74, "grad_norm": 1.9383334014507183, "learning_rate": 1.7244847224710355e-06, "loss": 0.514, "step": 6947 }, { "epoch": 0.74, "grad_norm": 1.711421001608363, "learning_rate": 1.7231523429476814e-06, "loss": 0.5468, "step": 6948 }, { "epoch": 0.74, "grad_norm": 1.8101130824495584, "learning_rate": 1.721820371171174e-06, "loss": 0.561, "step": 6949 }, { "epoch": 0.74, "grad_norm": 1.992197830885645, "learning_rate": 1.7204888073072534e-06, "loss": 0.5433, "step": 6950 }, { "epoch": 0.74, "grad_norm": 1.1289078452173258, "learning_rate": 1.7191576515216097e-06, "loss": 0.504, "step": 6951 }, { "epoch": 0.74, "grad_norm": 1.660905098593851, "learning_rate": 1.7178269039798818e-06, "loss": 0.6089, "step": 6952 }, { "epoch": 0.74, "grad_norm": 1.7087296507772525, "learning_rate": 1.716496564847659e-06, "loss": 0.5826, "step": 6953 }, { "epoch": 0.74, "grad_norm": 1.093355905855412, "learning_rate": 1.7151666342904783e-06, "loss": 0.4962, "step": 6954 }, { "epoch": 0.74, "grad_norm": 1.0765165409763084, "learning_rate": 1.7138371124738279e-06, "loss": 0.4702, "step": 6955 }, { "epoch": 0.74, "grad_norm": 1.8210033070209355, "learning_rate": 1.712507999563141e-06, "loss": 0.5864, "step": 6956 }, { "epoch": 0.74, "grad_norm": 1.7239873128639256, "learning_rate": 1.7111792957238045e-06, "loss": 0.5445, "step": 6957 }, { "epoch": 0.74, "grad_norm": 1.801766563374782, "learning_rate": 1.7098510011211517e-06, "loss": 0.5172, "step": 6958 }, { "epoch": 0.74, "grad_norm": 1.651487278185841, "learning_rate": 1.7085231159204662e-06, "loss": 0.5595, "step": 6959 }, { "epoch": 0.74, "grad_norm": 1.8126926100935021, "learning_rate": 1.7071956402869805e-06, "loss": 0.5836, "step": 6960 }, { "epoch": 0.74, "grad_norm": 1.7752316057735993, "learning_rate": 1.705868574385876e-06, "loss": 0.5381, "step": 6961 }, { "epoch": 0.74, "grad_norm": 2.100213174796513, "learning_rate": 1.704541918382281e-06, "loss": 0.7366, "step": 6962 }, { "epoch": 0.74, "grad_norm": 1.708350708017358, "learning_rate": 1.7032156724412779e-06, "loss": 0.6051, "step": 6963 }, { "epoch": 0.74, "grad_norm": 1.8762708200010845, "learning_rate": 1.701889836727894e-06, "loss": 0.5795, "step": 6964 }, { "epoch": 0.74, "grad_norm": 1.6549424236950032, "learning_rate": 1.7005644114071063e-06, "loss": 0.6351, "step": 6965 }, { "epoch": 0.74, "grad_norm": 1.6100363534368702, "learning_rate": 1.699239396643841e-06, "loss": 0.576, "step": 6966 }, { "epoch": 0.74, "grad_norm": 1.842168327446453, "learning_rate": 1.6979147926029726e-06, "loss": 0.5673, "step": 6967 }, { "epoch": 0.74, "grad_norm": 1.7527683475639109, "learning_rate": 1.6965905994493265e-06, "loss": 0.5693, "step": 6968 }, { "epoch": 0.74, "grad_norm": 1.795901534841047, "learning_rate": 1.6952668173476756e-06, "loss": 0.5081, "step": 6969 }, { "epoch": 0.74, "grad_norm": 1.8216945937522655, "learning_rate": 1.6939434464627397e-06, "loss": 0.6176, "step": 6970 }, { "epoch": 0.74, "grad_norm": 1.7216626375894617, "learning_rate": 1.69262048695919e-06, "loss": 0.6278, "step": 6971 }, { "epoch": 0.74, "grad_norm": 1.9758719372904832, "learning_rate": 1.691297939001646e-06, "loss": 0.6843, "step": 6972 }, { "epoch": 0.74, "grad_norm": 1.5812256769031185, "learning_rate": 1.6899758027546765e-06, "loss": 0.4841, "step": 6973 }, { "epoch": 0.74, "grad_norm": 1.6053429935691987, "learning_rate": 1.6886540783827982e-06, "loss": 0.5231, "step": 6974 }, { "epoch": 0.74, "grad_norm": 1.76335929466528, "learning_rate": 1.6873327660504762e-06, "loss": 0.576, "step": 6975 }, { "epoch": 0.74, "grad_norm": 1.8774684569839135, "learning_rate": 1.6860118659221254e-06, "loss": 0.6375, "step": 6976 }, { "epoch": 0.74, "grad_norm": 2.2518512924603353, "learning_rate": 1.6846913781621088e-06, "loss": 0.5425, "step": 6977 }, { "epoch": 0.74, "grad_norm": 1.8540230335307764, "learning_rate": 1.6833713029347383e-06, "loss": 0.5585, "step": 6978 }, { "epoch": 0.74, "grad_norm": 1.6925712056213595, "learning_rate": 1.682051640404272e-06, "loss": 0.5428, "step": 6979 }, { "epoch": 0.74, "grad_norm": 1.7437991949185092, "learning_rate": 1.6807323907349233e-06, "loss": 0.6187, "step": 6980 }, { "epoch": 0.74, "grad_norm": 1.681795448601327, "learning_rate": 1.6794135540908479e-06, "loss": 0.5346, "step": 6981 }, { "epoch": 0.74, "grad_norm": 1.1280890193386413, "learning_rate": 1.678095130636152e-06, "loss": 0.4983, "step": 6982 }, { "epoch": 0.74, "grad_norm": 1.6013186239881476, "learning_rate": 1.676777120534891e-06, "loss": 0.5754, "step": 6983 }, { "epoch": 0.74, "grad_norm": 1.8431071617236845, "learning_rate": 1.6754595239510697e-06, "loss": 0.7065, "step": 6984 }, { "epoch": 0.75, "grad_norm": 1.1542806477543717, "learning_rate": 1.6741423410486364e-06, "loss": 0.4871, "step": 6985 }, { "epoch": 0.75, "grad_norm": 1.6696582623450225, "learning_rate": 1.6728255719914942e-06, "loss": 0.5335, "step": 6986 }, { "epoch": 0.75, "grad_norm": 2.011181613146188, "learning_rate": 1.6715092169434916e-06, "loss": 0.4633, "step": 6987 }, { "epoch": 0.75, "grad_norm": 1.7539441851958317, "learning_rate": 1.670193276068426e-06, "loss": 0.5652, "step": 6988 }, { "epoch": 0.75, "grad_norm": 1.0870463719665087, "learning_rate": 1.668877749530044e-06, "loss": 0.4743, "step": 6989 }, { "epoch": 0.75, "grad_norm": 1.9352581059940972, "learning_rate": 1.6675626374920396e-06, "loss": 0.553, "step": 6990 }, { "epoch": 0.75, "grad_norm": 1.862813509720816, "learning_rate": 1.6662479401180553e-06, "loss": 0.5609, "step": 6991 }, { "epoch": 0.75, "grad_norm": 1.646568687952511, "learning_rate": 1.6649336575716834e-06, "loss": 0.4926, "step": 6992 }, { "epoch": 0.75, "grad_norm": 1.8315199119382661, "learning_rate": 1.663619790016463e-06, "loss": 0.6783, "step": 6993 }, { "epoch": 0.75, "grad_norm": 1.8442176077242733, "learning_rate": 1.6623063376158827e-06, "loss": 0.5565, "step": 6994 }, { "epoch": 0.75, "grad_norm": 1.8517172759181229, "learning_rate": 1.6609933005333778e-06, "loss": 0.693, "step": 6995 }, { "epoch": 0.75, "grad_norm": 1.8108019052852327, "learning_rate": 1.6596806789323317e-06, "loss": 0.603, "step": 6996 }, { "epoch": 0.75, "grad_norm": 1.7611161179101247, "learning_rate": 1.6583684729760812e-06, "loss": 0.5372, "step": 6997 }, { "epoch": 0.75, "grad_norm": 1.914148645733231, "learning_rate": 1.6570566828279071e-06, "loss": 0.5938, "step": 6998 }, { "epoch": 0.75, "grad_norm": 1.7736726779788803, "learning_rate": 1.6557453086510362e-06, "loss": 0.574, "step": 6999 }, { "epoch": 0.75, "grad_norm": 1.8520780515944724, "learning_rate": 1.6544343506086468e-06, "loss": 0.57, "step": 7000 }, { "epoch": 0.75, "grad_norm": 1.7676131504006032, "learning_rate": 1.6531238088638663e-06, "loss": 0.6059, "step": 7001 }, { "epoch": 0.75, "grad_norm": 1.7825852819228951, "learning_rate": 1.6518136835797678e-06, "loss": 0.5774, "step": 7002 }, { "epoch": 0.75, "grad_norm": 1.7320058398525964, "learning_rate": 1.6505039749193741e-06, "loss": 0.627, "step": 7003 }, { "epoch": 0.75, "grad_norm": 1.7605595753891359, "learning_rate": 1.6491946830456556e-06, "loss": 0.585, "step": 7004 }, { "epoch": 0.75, "grad_norm": 1.9088002410600435, "learning_rate": 1.647885808121531e-06, "loss": 0.6121, "step": 7005 }, { "epoch": 0.75, "grad_norm": 1.763048052596477, "learning_rate": 1.646577350309867e-06, "loss": 0.5892, "step": 7006 }, { "epoch": 0.75, "grad_norm": 1.552306392462, "learning_rate": 1.6452693097734784e-06, "loss": 0.5097, "step": 7007 }, { "epoch": 0.75, "grad_norm": 1.6389405044167025, "learning_rate": 1.643961686675129e-06, "loss": 0.5287, "step": 7008 }, { "epoch": 0.75, "grad_norm": 1.7225694967401446, "learning_rate": 1.6426544811775285e-06, "loss": 0.6623, "step": 7009 }, { "epoch": 0.75, "grad_norm": 1.7806994757367909, "learning_rate": 1.641347693443337e-06, "loss": 0.5678, "step": 7010 }, { "epoch": 0.75, "grad_norm": 1.746327721840045, "learning_rate": 1.6400413236351608e-06, "loss": 0.6127, "step": 7011 }, { "epoch": 0.75, "grad_norm": 1.8418888207945932, "learning_rate": 1.6387353719155551e-06, "loss": 0.6074, "step": 7012 }, { "epoch": 0.75, "grad_norm": 1.8977380792307585, "learning_rate": 1.6374298384470238e-06, "loss": 0.6625, "step": 7013 }, { "epoch": 0.75, "grad_norm": 1.7931032514808773, "learning_rate": 1.636124723392017e-06, "loss": 0.6077, "step": 7014 }, { "epoch": 0.75, "grad_norm": 1.7427454846558195, "learning_rate": 1.6348200269129334e-06, "loss": 0.585, "step": 7015 }, { "epoch": 0.75, "grad_norm": 1.7183827471970308, "learning_rate": 1.6335157491721209e-06, "loss": 0.5698, "step": 7016 }, { "epoch": 0.75, "grad_norm": 1.7483616085118074, "learning_rate": 1.6322118903318734e-06, "loss": 0.5168, "step": 7017 }, { "epoch": 0.75, "grad_norm": 2.0324279264593033, "learning_rate": 1.6309084505544338e-06, "loss": 0.709, "step": 7018 }, { "epoch": 0.75, "grad_norm": 1.7535568721270136, "learning_rate": 1.629605430001992e-06, "loss": 0.5502, "step": 7019 }, { "epoch": 0.75, "grad_norm": 1.9084118831157764, "learning_rate": 1.6283028288366875e-06, "loss": 0.5192, "step": 7020 }, { "epoch": 0.75, "grad_norm": 1.616083012435352, "learning_rate": 1.6270006472206057e-06, "loss": 0.5658, "step": 7021 }, { "epoch": 0.75, "grad_norm": 1.596009624824899, "learning_rate": 1.62569888531578e-06, "loss": 0.4991, "step": 7022 }, { "epoch": 0.75, "grad_norm": 1.8613777996308198, "learning_rate": 1.6243975432841935e-06, "loss": 0.6114, "step": 7023 }, { "epoch": 0.75, "grad_norm": 1.78297651275839, "learning_rate": 1.623096621287774e-06, "loss": 0.4951, "step": 7024 }, { "epoch": 0.75, "grad_norm": 1.9278703638377166, "learning_rate": 1.6217961194884001e-06, "loss": 0.5661, "step": 7025 }, { "epoch": 0.75, "grad_norm": 1.731747191557611, "learning_rate": 1.6204960380478957e-06, "loss": 0.5609, "step": 7026 }, { "epoch": 0.75, "grad_norm": 1.9320736306335833, "learning_rate": 1.6191963771280339e-06, "loss": 0.5872, "step": 7027 }, { "epoch": 0.75, "grad_norm": 1.9223518613717956, "learning_rate": 1.6178971368905344e-06, "loss": 0.5349, "step": 7028 }, { "epoch": 0.75, "grad_norm": 1.6658999159938663, "learning_rate": 1.6165983174970663e-06, "loss": 0.6574, "step": 7029 }, { "epoch": 0.75, "grad_norm": 1.8700110613022982, "learning_rate": 1.6152999191092434e-06, "loss": 0.5738, "step": 7030 }, { "epoch": 0.75, "grad_norm": 1.6562826207640107, "learning_rate": 1.6140019418886304e-06, "loss": 0.5589, "step": 7031 }, { "epoch": 0.75, "grad_norm": 2.0334679437287506, "learning_rate": 1.6127043859967373e-06, "loss": 0.6687, "step": 7032 }, { "epoch": 0.75, "grad_norm": 1.8418717158232385, "learning_rate": 1.6114072515950225e-06, "loss": 0.6443, "step": 7033 }, { "epoch": 0.75, "grad_norm": 1.6933587864793065, "learning_rate": 1.6101105388448918e-06, "loss": 0.6262, "step": 7034 }, { "epoch": 0.75, "grad_norm": 1.7847577477615337, "learning_rate": 1.608814247907699e-06, "loss": 0.6596, "step": 7035 }, { "epoch": 0.75, "grad_norm": 2.1064964871650704, "learning_rate": 1.6075183789447446e-06, "loss": 0.6346, "step": 7036 }, { "epoch": 0.75, "grad_norm": 2.07073758707384, "learning_rate": 1.6062229321172774e-06, "loss": 0.6851, "step": 7037 }, { "epoch": 0.75, "grad_norm": 1.935170267230944, "learning_rate": 1.6049279075864932e-06, "loss": 0.6142, "step": 7038 }, { "epoch": 0.75, "grad_norm": 1.7355719091496355, "learning_rate": 1.6036333055135345e-06, "loss": 0.57, "step": 7039 }, { "epoch": 0.75, "grad_norm": 1.8942778234995705, "learning_rate": 1.6023391260594934e-06, "loss": 0.5424, "step": 7040 }, { "epoch": 0.75, "grad_norm": 1.7201591111055619, "learning_rate": 1.6010453693854088e-06, "loss": 0.5247, "step": 7041 }, { "epoch": 0.75, "grad_norm": 1.6514700229812609, "learning_rate": 1.5997520356522616e-06, "loss": 0.5484, "step": 7042 }, { "epoch": 0.75, "grad_norm": 1.6854475563079239, "learning_rate": 1.5984591250209902e-06, "loss": 0.5828, "step": 7043 }, { "epoch": 0.75, "grad_norm": 1.5591689434231881, "learning_rate": 1.5971666376524726e-06, "loss": 0.5423, "step": 7044 }, { "epoch": 0.75, "grad_norm": 1.7786642776908612, "learning_rate": 1.5958745737075366e-06, "loss": 0.5547, "step": 7045 }, { "epoch": 0.75, "grad_norm": 1.7838050951261122, "learning_rate": 1.5945829333469565e-06, "loss": 0.5523, "step": 7046 }, { "epoch": 0.75, "grad_norm": 1.690759057087285, "learning_rate": 1.593291716731456e-06, "loss": 0.5673, "step": 7047 }, { "epoch": 0.75, "grad_norm": 1.7671435682951309, "learning_rate": 1.5920009240217032e-06, "loss": 0.5463, "step": 7048 }, { "epoch": 0.75, "grad_norm": 1.9243633028389926, "learning_rate": 1.590710555378316e-06, "loss": 0.6245, "step": 7049 }, { "epoch": 0.75, "grad_norm": 1.7823186174206846, "learning_rate": 1.5894206109618576e-06, "loss": 0.6523, "step": 7050 }, { "epoch": 0.75, "grad_norm": 1.2035830219116879, "learning_rate": 1.5881310909328395e-06, "loss": 0.5078, "step": 7051 }, { "epoch": 0.75, "grad_norm": 1.9578574440492398, "learning_rate": 1.58684199545172e-06, "loss": 0.5189, "step": 7052 }, { "epoch": 0.75, "grad_norm": 1.6021988365372821, "learning_rate": 1.5855533246789045e-06, "loss": 0.5804, "step": 7053 }, { "epoch": 0.75, "grad_norm": 1.5948718584790764, "learning_rate": 1.5842650787747465e-06, "loss": 0.584, "step": 7054 }, { "epoch": 0.75, "grad_norm": 1.097606427012183, "learning_rate": 1.582977257899545e-06, "loss": 0.4931, "step": 7055 }, { "epoch": 0.75, "grad_norm": 2.169331524835489, "learning_rate": 1.5816898622135485e-06, "loss": 0.5855, "step": 7056 }, { "epoch": 0.75, "grad_norm": 2.1349231681795504, "learning_rate": 1.5804028918769488e-06, "loss": 0.5929, "step": 7057 }, { "epoch": 0.75, "grad_norm": 1.77985883031768, "learning_rate": 1.5791163470498877e-06, "loss": 0.6687, "step": 7058 }, { "epoch": 0.75, "grad_norm": 1.1693463043872134, "learning_rate": 1.5778302278924524e-06, "loss": 0.5114, "step": 7059 }, { "epoch": 0.75, "grad_norm": 1.697760798090249, "learning_rate": 1.5765445345646807e-06, "loss": 0.6274, "step": 7060 }, { "epoch": 0.75, "grad_norm": 1.1157319337239657, "learning_rate": 1.5752592672265543e-06, "loss": 0.4853, "step": 7061 }, { "epoch": 0.75, "grad_norm": 1.7962732169759754, "learning_rate": 1.5739744260380014e-06, "loss": 0.5997, "step": 7062 }, { "epoch": 0.75, "grad_norm": 1.921746811769098, "learning_rate": 1.5726900111588984e-06, "loss": 0.6127, "step": 7063 }, { "epoch": 0.75, "grad_norm": 1.816516277588312, "learning_rate": 1.5714060227490684e-06, "loss": 0.6747, "step": 7064 }, { "epoch": 0.75, "grad_norm": 1.6636988954489775, "learning_rate": 1.5701224609682819e-06, "loss": 0.6687, "step": 7065 }, { "epoch": 0.75, "grad_norm": 1.7912912622721637, "learning_rate": 1.5688393259762552e-06, "loss": 0.6715, "step": 7066 }, { "epoch": 0.75, "grad_norm": 2.0025663346346048, "learning_rate": 1.5675566179326535e-06, "loss": 0.5466, "step": 7067 }, { "epoch": 0.75, "grad_norm": 1.804681677884136, "learning_rate": 1.5662743369970857e-06, "loss": 0.6049, "step": 7068 }, { "epoch": 0.75, "grad_norm": 1.8297555508880705, "learning_rate": 1.5649924833291113e-06, "loss": 0.556, "step": 7069 }, { "epoch": 0.75, "grad_norm": 1.9647663547718095, "learning_rate": 1.5637110570882351e-06, "loss": 0.675, "step": 7070 }, { "epoch": 0.75, "grad_norm": 2.055439502328527, "learning_rate": 1.5624300584339052e-06, "loss": 0.7396, "step": 7071 }, { "epoch": 0.75, "grad_norm": 1.8568693665144815, "learning_rate": 1.5611494875255223e-06, "loss": 0.6446, "step": 7072 }, { "epoch": 0.75, "grad_norm": 1.865356213648072, "learning_rate": 1.55986934452243e-06, "loss": 0.6119, "step": 7073 }, { "epoch": 0.75, "grad_norm": 1.721047765863251, "learning_rate": 1.558589629583921e-06, "loss": 0.5784, "step": 7074 }, { "epoch": 0.75, "grad_norm": 1.7415422851856344, "learning_rate": 1.557310342869231e-06, "loss": 0.5614, "step": 7075 }, { "epoch": 0.75, "grad_norm": 1.8750229137707664, "learning_rate": 1.5560314845375486e-06, "loss": 0.5676, "step": 7076 }, { "epoch": 0.75, "grad_norm": 1.706632341572853, "learning_rate": 1.5547530547480045e-06, "loss": 0.6745, "step": 7077 }, { "epoch": 0.75, "grad_norm": 1.777544316825041, "learning_rate": 1.5534750536596759e-06, "loss": 0.5693, "step": 7078 }, { "epoch": 0.76, "grad_norm": 1.061045795295951, "learning_rate": 1.5521974814315893e-06, "loss": 0.4828, "step": 7079 }, { "epoch": 0.76, "grad_norm": 1.723588867186318, "learning_rate": 1.550920338222715e-06, "loss": 0.5632, "step": 7080 }, { "epoch": 0.76, "grad_norm": 1.7860309482116326, "learning_rate": 1.5496436241919726e-06, "loss": 0.5408, "step": 7081 }, { "epoch": 0.76, "grad_norm": 1.9137915616498362, "learning_rate": 1.5483673394982263e-06, "loss": 0.6636, "step": 7082 }, { "epoch": 0.76, "grad_norm": 1.9268174186475155, "learning_rate": 1.5470914843002876e-06, "loss": 0.617, "step": 7083 }, { "epoch": 0.76, "grad_norm": 1.63262550189934, "learning_rate": 1.5458160587569166e-06, "loss": 0.5756, "step": 7084 }, { "epoch": 0.76, "grad_norm": 1.6692883944426908, "learning_rate": 1.5445410630268143e-06, "loss": 0.6033, "step": 7085 }, { "epoch": 0.76, "grad_norm": 1.9177535680202982, "learning_rate": 1.543266497268633e-06, "loss": 0.65, "step": 7086 }, { "epoch": 0.76, "grad_norm": 1.8974534009866613, "learning_rate": 1.541992361640971e-06, "loss": 0.572, "step": 7087 }, { "epoch": 0.76, "grad_norm": 1.7562505263028376, "learning_rate": 1.5407186563023724e-06, "loss": 0.6514, "step": 7088 }, { "epoch": 0.76, "grad_norm": 1.7767507610143014, "learning_rate": 1.5394453814113268e-06, "loss": 0.622, "step": 7089 }, { "epoch": 0.76, "grad_norm": 1.1013766546272246, "learning_rate": 1.5381725371262724e-06, "loss": 0.4918, "step": 7090 }, { "epoch": 0.76, "grad_norm": 1.95199291246588, "learning_rate": 1.536900123605592e-06, "loss": 0.6339, "step": 7091 }, { "epoch": 0.76, "grad_norm": 1.6207658174954929, "learning_rate": 1.5356281410076134e-06, "loss": 0.5593, "step": 7092 }, { "epoch": 0.76, "grad_norm": 1.862509340369873, "learning_rate": 1.534356589490617e-06, "loss": 0.5687, "step": 7093 }, { "epoch": 0.76, "grad_norm": 1.9325767463390287, "learning_rate": 1.5330854692128229e-06, "loss": 0.5915, "step": 7094 }, { "epoch": 0.76, "grad_norm": 1.8712376664789836, "learning_rate": 1.5318147803324002e-06, "loss": 0.6917, "step": 7095 }, { "epoch": 0.76, "grad_norm": 1.6849243216836598, "learning_rate": 1.5305445230074635e-06, "loss": 0.5655, "step": 7096 }, { "epoch": 0.76, "grad_norm": 1.9004616836978614, "learning_rate": 1.5292746973960753e-06, "loss": 0.6774, "step": 7097 }, { "epoch": 0.76, "grad_norm": 1.6964209645659956, "learning_rate": 1.5280053036562431e-06, "loss": 0.6122, "step": 7098 }, { "epoch": 0.76, "grad_norm": 2.110368249951296, "learning_rate": 1.5267363419459219e-06, "loss": 0.6466, "step": 7099 }, { "epoch": 0.76, "grad_norm": 1.754331534303211, "learning_rate": 1.52546781242301e-06, "loss": 0.6665, "step": 7100 }, { "epoch": 0.76, "grad_norm": 1.5655137893690183, "learning_rate": 1.5241997152453542e-06, "loss": 0.5079, "step": 7101 }, { "epoch": 0.76, "grad_norm": 2.1843036846897284, "learning_rate": 1.5229320505707474e-06, "loss": 0.6357, "step": 7102 }, { "epoch": 0.76, "grad_norm": 1.7149821576839144, "learning_rate": 1.5216648185569293e-06, "loss": 0.5412, "step": 7103 }, { "epoch": 0.76, "grad_norm": 1.9518659201654331, "learning_rate": 1.5203980193615842e-06, "loss": 0.6354, "step": 7104 }, { "epoch": 0.76, "grad_norm": 1.703364541663484, "learning_rate": 1.5191316531423434e-06, "loss": 0.6299, "step": 7105 }, { "epoch": 0.76, "grad_norm": 1.7923662385597334, "learning_rate": 1.5178657200567843e-06, "loss": 0.5624, "step": 7106 }, { "epoch": 0.76, "grad_norm": 1.6239481852739064, "learning_rate": 1.5166002202624309e-06, "loss": 0.5859, "step": 7107 }, { "epoch": 0.76, "grad_norm": 1.9062457472285512, "learning_rate": 1.5153351539167517e-06, "loss": 0.6375, "step": 7108 }, { "epoch": 0.76, "grad_norm": 1.7444311410407676, "learning_rate": 1.5140705211771611e-06, "loss": 0.636, "step": 7109 }, { "epoch": 0.76, "grad_norm": 1.8409288658700986, "learning_rate": 1.5128063222010242e-06, "loss": 0.5741, "step": 7110 }, { "epoch": 0.76, "grad_norm": 1.880206217098405, "learning_rate": 1.511542557145646e-06, "loss": 0.6364, "step": 7111 }, { "epoch": 0.76, "grad_norm": 1.52149851682461, "learning_rate": 1.5102792261682813e-06, "loss": 0.5312, "step": 7112 }, { "epoch": 0.76, "grad_norm": 1.6356818659625894, "learning_rate": 1.5090163294261312e-06, "loss": 0.5112, "step": 7113 }, { "epoch": 0.76, "grad_norm": 2.047449352737496, "learning_rate": 1.5077538670763376e-06, "loss": 0.6315, "step": 7114 }, { "epoch": 0.76, "grad_norm": 1.7416846680115114, "learning_rate": 1.5064918392759937e-06, "loss": 0.5931, "step": 7115 }, { "epoch": 0.76, "grad_norm": 1.703291272253828, "learning_rate": 1.505230246182137e-06, "loss": 0.604, "step": 7116 }, { "epoch": 0.76, "grad_norm": 1.6505863619176269, "learning_rate": 1.5039690879517515e-06, "loss": 0.5381, "step": 7117 }, { "epoch": 0.76, "grad_norm": 1.1407577429910178, "learning_rate": 1.5027083647417657e-06, "loss": 0.4931, "step": 7118 }, { "epoch": 0.76, "grad_norm": 1.5599942651607266, "learning_rate": 1.5014480767090545e-06, "loss": 0.496, "step": 7119 }, { "epoch": 0.76, "grad_norm": 1.7476198787061032, "learning_rate": 1.5001882240104398e-06, "loss": 0.6253, "step": 7120 }, { "epoch": 0.76, "grad_norm": 1.151418073798663, "learning_rate": 1.4989288068026874e-06, "loss": 0.501, "step": 7121 }, { "epoch": 0.76, "grad_norm": 1.72609306623776, "learning_rate": 1.4976698252425115e-06, "loss": 0.6048, "step": 7122 }, { "epoch": 0.76, "grad_norm": 1.5906943039646273, "learning_rate": 1.4964112794865687e-06, "loss": 0.5943, "step": 7123 }, { "epoch": 0.76, "grad_norm": 1.8148310561974355, "learning_rate": 1.4951531696914639e-06, "loss": 0.5862, "step": 7124 }, { "epoch": 0.76, "grad_norm": 1.9411784227686448, "learning_rate": 1.4938954960137475e-06, "loss": 0.7441, "step": 7125 }, { "epoch": 0.76, "grad_norm": 1.8834754849058433, "learning_rate": 1.4926382586099137e-06, "loss": 0.6568, "step": 7126 }, { "epoch": 0.76, "grad_norm": 1.9313710490628082, "learning_rate": 1.4913814576364072e-06, "loss": 0.6279, "step": 7127 }, { "epoch": 0.76, "grad_norm": 1.8004156486955003, "learning_rate": 1.490125093249612e-06, "loss": 0.6297, "step": 7128 }, { "epoch": 0.76, "grad_norm": 1.620199066784469, "learning_rate": 1.4888691656058612e-06, "loss": 0.5211, "step": 7129 }, { "epoch": 0.76, "grad_norm": 1.8061957156679034, "learning_rate": 1.487613674861434e-06, "loss": 0.6162, "step": 7130 }, { "epoch": 0.76, "grad_norm": 1.8302385086482238, "learning_rate": 1.486358621172554e-06, "loss": 0.5768, "step": 7131 }, { "epoch": 0.76, "grad_norm": 1.7695459659013197, "learning_rate": 1.4851040046953913e-06, "loss": 0.6238, "step": 7132 }, { "epoch": 0.76, "grad_norm": 1.921037327807667, "learning_rate": 1.4838498255860607e-06, "loss": 0.6592, "step": 7133 }, { "epoch": 0.76, "grad_norm": 1.8396085824539814, "learning_rate": 1.4825960840006232e-06, "loss": 0.5929, "step": 7134 }, { "epoch": 0.76, "grad_norm": 1.7627851574018802, "learning_rate": 1.4813427800950852e-06, "loss": 0.5902, "step": 7135 }, { "epoch": 0.76, "grad_norm": 1.717509662794542, "learning_rate": 1.4800899140253988e-06, "loss": 0.5874, "step": 7136 }, { "epoch": 0.76, "grad_norm": 1.7856370061622953, "learning_rate": 1.4788374859474608e-06, "loss": 0.5675, "step": 7137 }, { "epoch": 0.76, "grad_norm": 1.0872523999074428, "learning_rate": 1.4775854960171155e-06, "loss": 0.4659, "step": 7138 }, { "epoch": 0.76, "grad_norm": 1.6981873960255707, "learning_rate": 1.4763339443901498e-06, "loss": 0.5711, "step": 7139 }, { "epoch": 0.76, "grad_norm": 1.72413230652536, "learning_rate": 1.475082831222298e-06, "loss": 0.5108, "step": 7140 }, { "epoch": 0.76, "grad_norm": 1.5963974717696339, "learning_rate": 1.4738321566692405e-06, "loss": 0.5436, "step": 7141 }, { "epoch": 0.76, "grad_norm": 1.7663740249192212, "learning_rate": 1.4725819208866006e-06, "loss": 0.5106, "step": 7142 }, { "epoch": 0.76, "grad_norm": 2.3094598961202406, "learning_rate": 1.4713321240299495e-06, "loss": 0.5935, "step": 7143 }, { "epoch": 0.76, "grad_norm": 1.1191124147441422, "learning_rate": 1.4700827662548018e-06, "loss": 0.5149, "step": 7144 }, { "epoch": 0.76, "grad_norm": 1.8039680052426754, "learning_rate": 1.4688338477166192e-06, "loss": 0.5977, "step": 7145 }, { "epoch": 0.76, "grad_norm": 1.7490665132330685, "learning_rate": 1.467585368570808e-06, "loss": 0.5802, "step": 7146 }, { "epoch": 0.76, "grad_norm": 1.7972134545794136, "learning_rate": 1.466337328972719e-06, "loss": 0.6153, "step": 7147 }, { "epoch": 0.76, "grad_norm": 1.8466650513027152, "learning_rate": 1.46508972907765e-06, "loss": 0.6586, "step": 7148 }, { "epoch": 0.76, "grad_norm": 1.0631060877362046, "learning_rate": 1.463842569040842e-06, "loss": 0.4689, "step": 7149 }, { "epoch": 0.76, "grad_norm": 1.1200856747041907, "learning_rate": 1.4625958490174835e-06, "loss": 0.4904, "step": 7150 }, { "epoch": 0.76, "grad_norm": 1.6708634667035893, "learning_rate": 1.4613495691627072e-06, "loss": 0.622, "step": 7151 }, { "epoch": 0.76, "grad_norm": 1.7586539239201404, "learning_rate": 1.46010372963159e-06, "loss": 0.5764, "step": 7152 }, { "epoch": 0.76, "grad_norm": 1.96292154769203, "learning_rate": 1.458858330579156e-06, "loss": 0.6425, "step": 7153 }, { "epoch": 0.76, "grad_norm": 1.6651242543441063, "learning_rate": 1.457613372160373e-06, "loss": 0.5776, "step": 7154 }, { "epoch": 0.76, "grad_norm": 1.1534378759891797, "learning_rate": 1.4563688545301546e-06, "loss": 0.5126, "step": 7155 }, { "epoch": 0.76, "grad_norm": 1.6598716661465147, "learning_rate": 1.4551247778433592e-06, "loss": 0.5504, "step": 7156 }, { "epoch": 0.76, "grad_norm": 1.6664294911812445, "learning_rate": 1.453881142254791e-06, "loss": 0.5385, "step": 7157 }, { "epoch": 0.76, "grad_norm": 1.682889400128046, "learning_rate": 1.4526379479191989e-06, "loss": 0.6088, "step": 7158 }, { "epoch": 0.76, "grad_norm": 1.8884496109620932, "learning_rate": 1.4513951949912764e-06, "loss": 0.6054, "step": 7159 }, { "epoch": 0.76, "grad_norm": 1.078312587514833, "learning_rate": 1.4501528836256628e-06, "loss": 0.4958, "step": 7160 }, { "epoch": 0.76, "grad_norm": 1.8607676939096751, "learning_rate": 1.4489110139769424e-06, "loss": 0.5315, "step": 7161 }, { "epoch": 0.76, "grad_norm": 1.7925544954729256, "learning_rate": 1.4476695861996437e-06, "loss": 0.5798, "step": 7162 }, { "epoch": 0.76, "grad_norm": 3.1056819017480506, "learning_rate": 1.446428600448242e-06, "loss": 0.6658, "step": 7163 }, { "epoch": 0.76, "grad_norm": 1.8670454090181172, "learning_rate": 1.4451880568771547e-06, "loss": 0.6237, "step": 7164 }, { "epoch": 0.76, "grad_norm": 1.6285604914886294, "learning_rate": 1.4439479556407477e-06, "loss": 0.5089, "step": 7165 }, { "epoch": 0.76, "grad_norm": 1.9165510437578768, "learning_rate": 1.4427082968933292e-06, "loss": 0.6758, "step": 7166 }, { "epoch": 0.76, "grad_norm": 1.7475341122563215, "learning_rate": 1.4414690807891534e-06, "loss": 0.5569, "step": 7167 }, { "epoch": 0.76, "grad_norm": 1.7736193795580146, "learning_rate": 1.4402303074824193e-06, "loss": 0.5493, "step": 7168 }, { "epoch": 0.76, "grad_norm": 1.886664923973382, "learning_rate": 1.4389919771272704e-06, "loss": 0.5562, "step": 7169 }, { "epoch": 0.76, "grad_norm": 1.0469330637211052, "learning_rate": 1.437754089877796e-06, "loss": 0.4777, "step": 7170 }, { "epoch": 0.76, "grad_norm": 1.7595546698802147, "learning_rate": 1.4365166458880302e-06, "loss": 0.5918, "step": 7171 }, { "epoch": 0.77, "grad_norm": 1.7959412232093273, "learning_rate": 1.4352796453119483e-06, "loss": 0.613, "step": 7172 }, { "epoch": 0.77, "grad_norm": 1.6615183743328255, "learning_rate": 1.4340430883034773e-06, "loss": 0.6088, "step": 7173 }, { "epoch": 0.77, "grad_norm": 1.8940344353361878, "learning_rate": 1.4328069750164835e-06, "loss": 0.6081, "step": 7174 }, { "epoch": 0.77, "grad_norm": 1.8072867663514374, "learning_rate": 1.4315713056047802e-06, "loss": 0.7003, "step": 7175 }, { "epoch": 0.77, "grad_norm": 1.9532703522553985, "learning_rate": 1.430336080222125e-06, "loss": 0.6197, "step": 7176 }, { "epoch": 0.77, "grad_norm": 1.1394863866826292, "learning_rate": 1.4291012990222204e-06, "loss": 0.5094, "step": 7177 }, { "epoch": 0.77, "grad_norm": 1.8494403730236233, "learning_rate": 1.4278669621587131e-06, "loss": 0.5688, "step": 7178 }, { "epoch": 0.77, "grad_norm": 1.6993734934477547, "learning_rate": 1.4266330697851955e-06, "loss": 0.5626, "step": 7179 }, { "epoch": 0.77, "grad_norm": 1.8235000505632897, "learning_rate": 1.425399622055203e-06, "loss": 0.5731, "step": 7180 }, { "epoch": 0.77, "grad_norm": 1.683079345735684, "learning_rate": 1.4241666191222182e-06, "loss": 0.5097, "step": 7181 }, { "epoch": 0.77, "grad_norm": 1.8169984016552472, "learning_rate": 1.4229340611396657e-06, "loss": 0.5349, "step": 7182 }, { "epoch": 0.77, "grad_norm": 1.844534960426272, "learning_rate": 1.4217019482609168e-06, "loss": 0.5645, "step": 7183 }, { "epoch": 0.77, "grad_norm": 1.7158952800917995, "learning_rate": 1.4204702806392862e-06, "loss": 0.5534, "step": 7184 }, { "epoch": 0.77, "grad_norm": 2.219513630515806, "learning_rate": 1.4192390584280347e-06, "loss": 0.5917, "step": 7185 }, { "epoch": 0.77, "grad_norm": 1.1343280534936873, "learning_rate": 1.4180082817803648e-06, "loss": 0.5067, "step": 7186 }, { "epoch": 0.77, "grad_norm": 2.1862712099011095, "learning_rate": 1.416777950849425e-06, "loss": 0.616, "step": 7187 }, { "epoch": 0.77, "grad_norm": 1.953007756142515, "learning_rate": 1.4155480657883103e-06, "loss": 0.618, "step": 7188 }, { "epoch": 0.77, "grad_norm": 1.7772502312440022, "learning_rate": 1.414318626750056e-06, "loss": 0.6371, "step": 7189 }, { "epoch": 0.77, "grad_norm": 2.137275097804621, "learning_rate": 1.4130896338876482e-06, "loss": 0.6327, "step": 7190 }, { "epoch": 0.77, "grad_norm": 1.8156371184371023, "learning_rate": 1.4118610873540112e-06, "loss": 0.5059, "step": 7191 }, { "epoch": 0.77, "grad_norm": 1.7395677552159523, "learning_rate": 1.4106329873020174e-06, "loss": 0.49, "step": 7192 }, { "epoch": 0.77, "grad_norm": 1.8261301099693876, "learning_rate": 1.4094053338844822e-06, "loss": 0.5952, "step": 7193 }, { "epoch": 0.77, "grad_norm": 1.8847348783615876, "learning_rate": 1.4081781272541657e-06, "loss": 0.6524, "step": 7194 }, { "epoch": 0.77, "grad_norm": 1.9569737052771823, "learning_rate": 1.4069513675637725e-06, "loss": 0.6317, "step": 7195 }, { "epoch": 0.77, "grad_norm": 1.923069189632972, "learning_rate": 1.4057250549659513e-06, "loss": 0.5846, "step": 7196 }, { "epoch": 0.77, "grad_norm": 1.73493740949964, "learning_rate": 1.4044991896132959e-06, "loss": 0.5613, "step": 7197 }, { "epoch": 0.77, "grad_norm": 1.7711457154774777, "learning_rate": 1.4032737716583432e-06, "loss": 0.5537, "step": 7198 }, { "epoch": 0.77, "grad_norm": 1.1101240328913529, "learning_rate": 1.4020488012535777e-06, "loss": 0.4989, "step": 7199 }, { "epoch": 0.77, "grad_norm": 1.6442721335083355, "learning_rate": 1.4008242785514227e-06, "loss": 0.5425, "step": 7200 }, { "epoch": 0.77, "grad_norm": 1.8888938155216726, "learning_rate": 1.399600203704249e-06, "loss": 0.6455, "step": 7201 }, { "epoch": 0.77, "grad_norm": 1.692235939063199, "learning_rate": 1.3983765768643725e-06, "loss": 0.549, "step": 7202 }, { "epoch": 0.77, "grad_norm": 1.6311839674068451, "learning_rate": 1.3971533981840523e-06, "loss": 0.5238, "step": 7203 }, { "epoch": 0.77, "grad_norm": 1.6953001096111435, "learning_rate": 1.3959306678154922e-06, "loss": 0.502, "step": 7204 }, { "epoch": 0.77, "grad_norm": 1.793852854166907, "learning_rate": 1.3947083859108384e-06, "loss": 0.5987, "step": 7205 }, { "epoch": 0.77, "grad_norm": 1.8362511300519686, "learning_rate": 1.3934865526221826e-06, "loss": 0.5511, "step": 7206 }, { "epoch": 0.77, "grad_norm": 1.5508611210730137, "learning_rate": 1.3922651681015625e-06, "loss": 0.581, "step": 7207 }, { "epoch": 0.77, "grad_norm": 1.9077952937322487, "learning_rate": 1.3910442325009582e-06, "loss": 0.6555, "step": 7208 }, { "epoch": 0.77, "grad_norm": 1.8337963292334487, "learning_rate": 1.3898237459722929e-06, "loss": 0.5656, "step": 7209 }, { "epoch": 0.77, "grad_norm": 1.9240990885165554, "learning_rate": 1.3886037086674347e-06, "loss": 0.5609, "step": 7210 }, { "epoch": 0.77, "grad_norm": 1.8095709902478583, "learning_rate": 1.387384120738197e-06, "loss": 0.5594, "step": 7211 }, { "epoch": 0.77, "grad_norm": 1.939808227475084, "learning_rate": 1.386164982336336e-06, "loss": 0.5751, "step": 7212 }, { "epoch": 0.77, "grad_norm": 1.9527763924584416, "learning_rate": 1.384946293613552e-06, "loss": 0.6808, "step": 7213 }, { "epoch": 0.77, "grad_norm": 1.9322590591664157, "learning_rate": 1.3837280547214922e-06, "loss": 0.636, "step": 7214 }, { "epoch": 0.77, "grad_norm": 1.9508922910709656, "learning_rate": 1.3825102658117406e-06, "loss": 0.6676, "step": 7215 }, { "epoch": 0.77, "grad_norm": 1.958105836251208, "learning_rate": 1.3812929270358334e-06, "loss": 0.6099, "step": 7216 }, { "epoch": 0.77, "grad_norm": 1.9248776612160052, "learning_rate": 1.3800760385452449e-06, "loss": 0.5644, "step": 7217 }, { "epoch": 0.77, "grad_norm": 2.5330916358134177, "learning_rate": 1.378859600491398e-06, "loss": 0.6613, "step": 7218 }, { "epoch": 0.77, "grad_norm": 1.6852299587331265, "learning_rate": 1.3776436130256565e-06, "loss": 0.6442, "step": 7219 }, { "epoch": 0.77, "grad_norm": 1.883244243917469, "learning_rate": 1.3764280762993288e-06, "loss": 0.6742, "step": 7220 }, { "epoch": 0.77, "grad_norm": 1.8778225967778162, "learning_rate": 1.3752129904636668e-06, "loss": 0.6567, "step": 7221 }, { "epoch": 0.77, "grad_norm": 1.8981727410996498, "learning_rate": 1.3739983556698683e-06, "loss": 0.6008, "step": 7222 }, { "epoch": 0.77, "grad_norm": 1.8396489257528528, "learning_rate": 1.3727841720690704e-06, "loss": 0.5736, "step": 7223 }, { "epoch": 0.77, "grad_norm": 1.9185527649120517, "learning_rate": 1.3715704398123613e-06, "loss": 0.5368, "step": 7224 }, { "epoch": 0.77, "grad_norm": 1.7365189346275025, "learning_rate": 1.3703571590507675e-06, "loss": 0.551, "step": 7225 }, { "epoch": 0.77, "grad_norm": 1.701979998410793, "learning_rate": 1.3691443299352602e-06, "loss": 0.5476, "step": 7226 }, { "epoch": 0.77, "grad_norm": 1.7429957182285505, "learning_rate": 1.3679319526167551e-06, "loss": 0.6657, "step": 7227 }, { "epoch": 0.77, "grad_norm": 2.0132504246777083, "learning_rate": 1.366720027246113e-06, "loss": 0.7188, "step": 7228 }, { "epoch": 0.77, "grad_norm": 1.7201716770887239, "learning_rate": 1.3655085539741335e-06, "loss": 0.4698, "step": 7229 }, { "epoch": 0.77, "grad_norm": 1.6288159861898397, "learning_rate": 1.3642975329515663e-06, "loss": 0.5393, "step": 7230 }, { "epoch": 0.77, "grad_norm": 1.0735950672461474, "learning_rate": 1.3630869643291005e-06, "loss": 0.4746, "step": 7231 }, { "epoch": 0.77, "grad_norm": 1.754219483245319, "learning_rate": 1.3618768482573713e-06, "loss": 0.6344, "step": 7232 }, { "epoch": 0.77, "grad_norm": 2.300424834525195, "learning_rate": 1.3606671848869563e-06, "loss": 0.5665, "step": 7233 }, { "epoch": 0.77, "grad_norm": 1.5806127913374257, "learning_rate": 1.3594579743683773e-06, "loss": 0.5238, "step": 7234 }, { "epoch": 0.77, "grad_norm": 1.72291501016302, "learning_rate": 1.3582492168520988e-06, "loss": 0.5499, "step": 7235 }, { "epoch": 0.77, "grad_norm": 1.9744122235133053, "learning_rate": 1.357040912488531e-06, "loss": 0.7308, "step": 7236 }, { "epoch": 0.77, "grad_norm": 1.097442870840678, "learning_rate": 1.3558330614280258e-06, "loss": 0.4852, "step": 7237 }, { "epoch": 0.77, "grad_norm": 1.784681261326236, "learning_rate": 1.3546256638208788e-06, "loss": 0.6413, "step": 7238 }, { "epoch": 0.77, "grad_norm": 2.004171103398907, "learning_rate": 1.3534187198173288e-06, "loss": 0.6365, "step": 7239 }, { "epoch": 0.77, "grad_norm": 1.4683191154788457, "learning_rate": 1.3522122295675616e-06, "loss": 0.6195, "step": 7240 }, { "epoch": 0.77, "grad_norm": 1.1386100787294364, "learning_rate": 1.3510061932217034e-06, "loss": 0.492, "step": 7241 }, { "epoch": 0.77, "grad_norm": 1.70760321353936, "learning_rate": 1.349800610929825e-06, "loss": 0.6714, "step": 7242 }, { "epoch": 0.77, "grad_norm": 1.7731068218358232, "learning_rate": 1.3485954828419372e-06, "loss": 0.5472, "step": 7243 }, { "epoch": 0.77, "grad_norm": 1.1124344095601797, "learning_rate": 1.3473908091079995e-06, "loss": 0.5101, "step": 7244 }, { "epoch": 0.77, "grad_norm": 1.764910709355728, "learning_rate": 1.3461865898779125e-06, "loss": 0.6853, "step": 7245 }, { "epoch": 0.77, "grad_norm": 1.6716776598476508, "learning_rate": 1.3449828253015201e-06, "loss": 0.4981, "step": 7246 }, { "epoch": 0.77, "grad_norm": 1.653779472610418, "learning_rate": 1.3437795155286105e-06, "loss": 0.593, "step": 7247 }, { "epoch": 0.77, "grad_norm": 1.8395260125134756, "learning_rate": 1.3425766607089135e-06, "loss": 0.6037, "step": 7248 }, { "epoch": 0.77, "grad_norm": 1.9877623060000922, "learning_rate": 1.3413742609921043e-06, "loss": 0.6064, "step": 7249 }, { "epoch": 0.77, "grad_norm": 1.7426808385655232, "learning_rate": 1.3401723165278007e-06, "loss": 0.5884, "step": 7250 }, { "epoch": 0.77, "grad_norm": 1.7118277682063354, "learning_rate": 1.3389708274655644e-06, "loss": 0.6037, "step": 7251 }, { "epoch": 0.77, "grad_norm": 1.8435399098938126, "learning_rate": 1.3377697939548983e-06, "loss": 0.6118, "step": 7252 }, { "epoch": 0.77, "grad_norm": 1.8806774605265042, "learning_rate": 1.336569216145252e-06, "loss": 0.4709, "step": 7253 }, { "epoch": 0.77, "grad_norm": 1.8529257883789263, "learning_rate": 1.3353690941860153e-06, "loss": 0.6033, "step": 7254 }, { "epoch": 0.77, "grad_norm": 1.050125958480745, "learning_rate": 1.3341694282265232e-06, "loss": 0.4927, "step": 7255 }, { "epoch": 0.77, "grad_norm": 1.5812425424210161, "learning_rate": 1.3329702184160515e-06, "loss": 0.5262, "step": 7256 }, { "epoch": 0.77, "grad_norm": 1.7188015749241188, "learning_rate": 1.3317714649038254e-06, "loss": 0.5393, "step": 7257 }, { "epoch": 0.77, "grad_norm": 1.912900181245329, "learning_rate": 1.330573167839005e-06, "loss": 0.567, "step": 7258 }, { "epoch": 0.77, "grad_norm": 1.8086243890045124, "learning_rate": 1.3293753273706988e-06, "loss": 0.6381, "step": 7259 }, { "epoch": 0.77, "grad_norm": 1.924318151893065, "learning_rate": 1.3281779436479575e-06, "loss": 0.6383, "step": 7260 }, { "epoch": 0.77, "grad_norm": 1.8107670993802258, "learning_rate": 1.3269810168197739e-06, "loss": 0.6437, "step": 7261 }, { "epoch": 0.77, "grad_norm": 1.152380360626305, "learning_rate": 1.325784547035086e-06, "loss": 0.5095, "step": 7262 }, { "epoch": 0.77, "grad_norm": 1.849078297265408, "learning_rate": 1.3245885344427728e-06, "loss": 0.6048, "step": 7263 }, { "epoch": 0.77, "grad_norm": 1.7578152345201827, "learning_rate": 1.3233929791916577e-06, "loss": 0.5479, "step": 7264 }, { "epoch": 0.77, "grad_norm": 1.7127475259459934, "learning_rate": 1.3221978814305069e-06, "loss": 0.5535, "step": 7265 }, { "epoch": 0.78, "grad_norm": 1.1311731649077896, "learning_rate": 1.3210032413080292e-06, "loss": 0.4779, "step": 7266 }, { "epoch": 0.78, "grad_norm": 1.1173727861664366, "learning_rate": 1.3198090589728774e-06, "loss": 0.5106, "step": 7267 }, { "epoch": 0.78, "grad_norm": 1.6129639704708454, "learning_rate": 1.3186153345736458e-06, "loss": 0.543, "step": 7268 }, { "epoch": 0.78, "grad_norm": 1.9490757143951527, "learning_rate": 1.317422068258874e-06, "loss": 0.576, "step": 7269 }, { "epoch": 0.78, "grad_norm": 1.9037821325701005, "learning_rate": 1.3162292601770422e-06, "loss": 0.5786, "step": 7270 }, { "epoch": 0.78, "grad_norm": 1.7342707028337092, "learning_rate": 1.3150369104765754e-06, "loss": 0.6081, "step": 7271 }, { "epoch": 0.78, "grad_norm": 1.6779240224412804, "learning_rate": 1.31384501930584e-06, "loss": 0.5877, "step": 7272 }, { "epoch": 0.78, "grad_norm": 1.7646505629525366, "learning_rate": 1.3126535868131473e-06, "loss": 0.6327, "step": 7273 }, { "epoch": 0.78, "grad_norm": 1.7237841994929204, "learning_rate": 1.3114626131467496e-06, "loss": 0.5745, "step": 7274 }, { "epoch": 0.78, "grad_norm": 1.0973369621696176, "learning_rate": 1.3102720984548433e-06, "loss": 0.4873, "step": 7275 }, { "epoch": 0.78, "grad_norm": 1.5496024810445481, "learning_rate": 1.3090820428855666e-06, "loss": 0.5437, "step": 7276 }, { "epoch": 0.78, "grad_norm": 1.8878774679453296, "learning_rate": 1.3078924465870024e-06, "loss": 0.5985, "step": 7277 }, { "epoch": 0.78, "grad_norm": 1.1227428303889426, "learning_rate": 1.3067033097071746e-06, "loss": 0.494, "step": 7278 }, { "epoch": 0.78, "grad_norm": 1.6732650653558248, "learning_rate": 1.305514632394051e-06, "loss": 0.5978, "step": 7279 }, { "epoch": 0.78, "grad_norm": 1.8932432517597724, "learning_rate": 1.3043264147955408e-06, "loss": 0.5555, "step": 7280 }, { "epoch": 0.78, "grad_norm": 1.8141580412073142, "learning_rate": 1.303138657059499e-06, "loss": 0.5756, "step": 7281 }, { "epoch": 0.78, "grad_norm": 2.1342389229502294, "learning_rate": 1.3019513593337196e-06, "loss": 0.5272, "step": 7282 }, { "epoch": 0.78, "grad_norm": 1.6936369479012356, "learning_rate": 1.300764521765942e-06, "loss": 0.5279, "step": 7283 }, { "epoch": 0.78, "grad_norm": 1.712803957179147, "learning_rate": 1.299578144503848e-06, "loss": 0.5568, "step": 7284 }, { "epoch": 0.78, "grad_norm": 1.8497974349287887, "learning_rate": 1.298392227695061e-06, "loss": 0.6474, "step": 7285 }, { "epoch": 0.78, "grad_norm": 1.7312530870118537, "learning_rate": 1.2972067714871479e-06, "loss": 0.5813, "step": 7286 }, { "epoch": 0.78, "grad_norm": 1.8602247971506518, "learning_rate": 1.2960217760276183e-06, "loss": 0.5473, "step": 7287 }, { "epoch": 0.78, "grad_norm": 1.893198102190311, "learning_rate": 1.2948372414639244e-06, "loss": 0.5906, "step": 7288 }, { "epoch": 0.78, "grad_norm": 1.8630083287284631, "learning_rate": 1.2936531679434605e-06, "loss": 0.6532, "step": 7289 }, { "epoch": 0.78, "grad_norm": 1.9037962725122402, "learning_rate": 1.292469555613564e-06, "loss": 0.611, "step": 7290 }, { "epoch": 0.78, "grad_norm": 1.808979459724165, "learning_rate": 1.2912864046215162e-06, "loss": 0.6041, "step": 7291 }, { "epoch": 0.78, "grad_norm": 1.7773356240395521, "learning_rate": 1.2901037151145385e-06, "loss": 0.5366, "step": 7292 }, { "epoch": 0.78, "grad_norm": 1.0649617761758936, "learning_rate": 1.2889214872397958e-06, "loss": 0.4904, "step": 7293 }, { "epoch": 0.78, "grad_norm": 1.759348474663686, "learning_rate": 1.287739721144397e-06, "loss": 0.5689, "step": 7294 }, { "epoch": 0.78, "grad_norm": 1.768049058099605, "learning_rate": 1.2865584169753915e-06, "loss": 0.6346, "step": 7295 }, { "epoch": 0.78, "grad_norm": 1.6028192592365158, "learning_rate": 1.2853775748797727e-06, "loss": 0.5432, "step": 7296 }, { "epoch": 0.78, "grad_norm": 1.7221550729611277, "learning_rate": 1.284197195004475e-06, "loss": 0.5287, "step": 7297 }, { "epoch": 0.78, "grad_norm": 1.7303021409002133, "learning_rate": 1.2830172774963778e-06, "loss": 0.6162, "step": 7298 }, { "epoch": 0.78, "grad_norm": 1.448027370516427, "learning_rate": 1.2818378225022998e-06, "loss": 0.4859, "step": 7299 }, { "epoch": 0.78, "grad_norm": 1.0934073531917108, "learning_rate": 1.280658830169006e-06, "loss": 0.475, "step": 7300 }, { "epoch": 0.78, "grad_norm": 1.8709221388792003, "learning_rate": 1.2794803006431984e-06, "loss": 0.6364, "step": 7301 }, { "epoch": 0.78, "grad_norm": 1.9744510232171029, "learning_rate": 1.2783022340715245e-06, "loss": 0.6991, "step": 7302 }, { "epoch": 0.78, "grad_norm": 1.753094930799586, "learning_rate": 1.2771246306005769e-06, "loss": 0.4734, "step": 7303 }, { "epoch": 0.78, "grad_norm": 1.0749113298790598, "learning_rate": 1.275947490376887e-06, "loss": 0.4778, "step": 7304 }, { "epoch": 0.78, "grad_norm": 1.6778003687179777, "learning_rate": 1.2747708135469293e-06, "loss": 0.513, "step": 7305 }, { "epoch": 0.78, "grad_norm": 1.8461689976372542, "learning_rate": 1.273594600257121e-06, "loss": 0.5847, "step": 7306 }, { "epoch": 0.78, "grad_norm": 1.593557448215429, "learning_rate": 1.272418850653821e-06, "loss": 0.6288, "step": 7307 }, { "epoch": 0.78, "grad_norm": 1.8334420650208358, "learning_rate": 1.271243564883331e-06, "loss": 0.5778, "step": 7308 }, { "epoch": 0.78, "grad_norm": 1.8725588428167248, "learning_rate": 1.2700687430918956e-06, "loss": 0.5832, "step": 7309 }, { "epoch": 0.78, "grad_norm": 1.71979362186327, "learning_rate": 1.2688943854257002e-06, "loss": 0.5314, "step": 7310 }, { "epoch": 0.78, "grad_norm": 1.6921254344961092, "learning_rate": 1.2677204920308732e-06, "loss": 0.604, "step": 7311 }, { "epoch": 0.78, "grad_norm": 1.1000166066408381, "learning_rate": 1.266547063053486e-06, "loss": 0.5, "step": 7312 }, { "epoch": 0.78, "grad_norm": 1.7220933218693577, "learning_rate": 1.2653740986395512e-06, "loss": 0.4973, "step": 7313 }, { "epoch": 0.78, "grad_norm": 1.735142276586004, "learning_rate": 1.2642015989350248e-06, "loss": 0.5601, "step": 7314 }, { "epoch": 0.78, "grad_norm": 1.7736158530003625, "learning_rate": 1.263029564085802e-06, "loss": 0.5945, "step": 7315 }, { "epoch": 0.78, "grad_norm": 1.6902398262450493, "learning_rate": 1.261857994237723e-06, "loss": 0.5736, "step": 7316 }, { "epoch": 0.78, "grad_norm": 1.8423575138861579, "learning_rate": 1.2606868895365692e-06, "loss": 0.6784, "step": 7317 }, { "epoch": 0.78, "grad_norm": 1.808759613167532, "learning_rate": 1.259516250128065e-06, "loss": 0.7207, "step": 7318 }, { "epoch": 0.78, "grad_norm": 1.8094814806059865, "learning_rate": 1.2583460761578742e-06, "loss": 0.5531, "step": 7319 }, { "epoch": 0.78, "grad_norm": 1.7961951265909324, "learning_rate": 1.2571763677716076e-06, "loss": 0.6746, "step": 7320 }, { "epoch": 0.78, "grad_norm": 1.9317283831017484, "learning_rate": 1.2560071251148143e-06, "loss": 0.6224, "step": 7321 }, { "epoch": 0.78, "grad_norm": 1.636122501490957, "learning_rate": 1.2548383483329852e-06, "loss": 0.5499, "step": 7322 }, { "epoch": 0.78, "grad_norm": 1.6956521557188056, "learning_rate": 1.2536700375715543e-06, "loss": 0.5276, "step": 7323 }, { "epoch": 0.78, "grad_norm": 2.0475058503442507, "learning_rate": 1.252502192975899e-06, "loss": 0.6128, "step": 7324 }, { "epoch": 0.78, "grad_norm": 1.5431859871392564, "learning_rate": 1.2513348146913363e-06, "loss": 0.583, "step": 7325 }, { "epoch": 0.78, "grad_norm": 1.7383275843847465, "learning_rate": 1.2501679028631259e-06, "loss": 0.595, "step": 7326 }, { "epoch": 0.78, "grad_norm": 1.817072052368523, "learning_rate": 1.24900145763647e-06, "loss": 0.6436, "step": 7327 }, { "epoch": 0.78, "grad_norm": 1.8779295381333223, "learning_rate": 1.2478354791565135e-06, "loss": 0.5836, "step": 7328 }, { "epoch": 0.78, "grad_norm": 2.0674792761859515, "learning_rate": 1.2466699675683418e-06, "loss": 0.5693, "step": 7329 }, { "epoch": 0.78, "grad_norm": 2.0848771797799266, "learning_rate": 1.2455049230169808e-06, "loss": 0.5729, "step": 7330 }, { "epoch": 0.78, "grad_norm": 1.9069193053070308, "learning_rate": 1.2443403456474017e-06, "loss": 0.553, "step": 7331 }, { "epoch": 0.78, "grad_norm": 1.68350482340949, "learning_rate": 1.2431762356045157e-06, "loss": 0.6202, "step": 7332 }, { "epoch": 0.78, "grad_norm": 1.9765957622293353, "learning_rate": 1.2420125930331755e-06, "loss": 0.6131, "step": 7333 }, { "epoch": 0.78, "grad_norm": 1.154516191311745, "learning_rate": 1.240849418078177e-06, "loss": 0.4948, "step": 7334 }, { "epoch": 0.78, "grad_norm": 1.6603745180806437, "learning_rate": 1.239686710884257e-06, "loss": 0.5762, "step": 7335 }, { "epoch": 0.78, "grad_norm": 1.646380040751837, "learning_rate": 1.2385244715960925e-06, "loss": 0.5413, "step": 7336 }, { "epoch": 0.78, "grad_norm": 1.8325895346730967, "learning_rate": 1.2373627003583077e-06, "loss": 0.6236, "step": 7337 }, { "epoch": 0.78, "grad_norm": 1.9611914267373496, "learning_rate": 1.2362013973154624e-06, "loss": 0.5703, "step": 7338 }, { "epoch": 0.78, "grad_norm": 1.6074247428511663, "learning_rate": 1.235040562612061e-06, "loss": 0.5362, "step": 7339 }, { "epoch": 0.78, "grad_norm": 1.142800280746993, "learning_rate": 1.2338801963925495e-06, "loss": 0.4963, "step": 7340 }, { "epoch": 0.78, "grad_norm": 1.8008373486911438, "learning_rate": 1.2327202988013148e-06, "loss": 0.5949, "step": 7341 }, { "epoch": 0.78, "grad_norm": 1.7293768168938661, "learning_rate": 1.231560869982687e-06, "loss": 0.57, "step": 7342 }, { "epoch": 0.78, "grad_norm": 1.7760504561841626, "learning_rate": 1.2304019100809372e-06, "loss": 0.6779, "step": 7343 }, { "epoch": 0.78, "grad_norm": 1.892383686386467, "learning_rate": 1.2292434192402752e-06, "loss": 0.632, "step": 7344 }, { "epoch": 0.78, "grad_norm": 1.865926904781998, "learning_rate": 1.2280853976048573e-06, "loss": 0.6025, "step": 7345 }, { "epoch": 0.78, "grad_norm": 1.0687751668692391, "learning_rate": 1.226927845318779e-06, "loss": 0.4711, "step": 7346 }, { "epoch": 0.78, "grad_norm": 1.5765733989937118, "learning_rate": 1.2257707625260767e-06, "loss": 0.5094, "step": 7347 }, { "epoch": 0.78, "grad_norm": 1.8346185062322669, "learning_rate": 1.2246141493707303e-06, "loss": 0.6065, "step": 7348 }, { "epoch": 0.78, "grad_norm": 1.7494769335255775, "learning_rate": 1.2234580059966599e-06, "loss": 0.5992, "step": 7349 }, { "epoch": 0.78, "grad_norm": 1.9823376661353491, "learning_rate": 1.2223023325477274e-06, "loss": 0.6066, "step": 7350 }, { "epoch": 0.78, "grad_norm": 1.697923014765599, "learning_rate": 1.2211471291677363e-06, "loss": 0.5443, "step": 7351 }, { "epoch": 0.78, "grad_norm": 1.766438651474485, "learning_rate": 1.2199923960004317e-06, "loss": 0.6419, "step": 7352 }, { "epoch": 0.78, "grad_norm": 1.1024432519017213, "learning_rate": 1.2188381331894982e-06, "loss": 0.4783, "step": 7353 }, { "epoch": 0.78, "grad_norm": 1.7832639524133551, "learning_rate": 1.2176843408785677e-06, "loss": 0.5636, "step": 7354 }, { "epoch": 0.78, "grad_norm": 1.6143189347003846, "learning_rate": 1.2165310192112073e-06, "loss": 0.5362, "step": 7355 }, { "epoch": 0.78, "grad_norm": 1.7933826530893975, "learning_rate": 1.2153781683309284e-06, "loss": 0.5676, "step": 7356 }, { "epoch": 0.78, "grad_norm": 1.836571922861242, "learning_rate": 1.2142257883811842e-06, "loss": 0.6226, "step": 7357 }, { "epoch": 0.78, "grad_norm": 1.7888820001599521, "learning_rate": 1.2130738795053664e-06, "loss": 0.6086, "step": 7358 }, { "epoch": 0.78, "grad_norm": 1.098434846519314, "learning_rate": 1.2119224418468106e-06, "loss": 0.4939, "step": 7359 }, { "epoch": 0.79, "grad_norm": 1.7573183812228559, "learning_rate": 1.2107714755487932e-06, "loss": 0.6746, "step": 7360 }, { "epoch": 0.79, "grad_norm": 1.899960110484354, "learning_rate": 1.2096209807545328e-06, "loss": 0.5826, "step": 7361 }, { "epoch": 0.79, "grad_norm": 1.9707311591849268, "learning_rate": 1.2084709576071885e-06, "loss": 0.6305, "step": 7362 }, { "epoch": 0.79, "grad_norm": 1.694432432870934, "learning_rate": 1.2073214062498595e-06, "loss": 0.5301, "step": 7363 }, { "epoch": 0.79, "grad_norm": 1.9548905503111103, "learning_rate": 1.2061723268255888e-06, "loss": 0.5818, "step": 7364 }, { "epoch": 0.79, "grad_norm": 1.0550924362767689, "learning_rate": 1.205023719477359e-06, "loss": 0.5045, "step": 7365 }, { "epoch": 0.79, "grad_norm": 1.9191595371334778, "learning_rate": 1.2038755843480943e-06, "loss": 0.5722, "step": 7366 }, { "epoch": 0.79, "grad_norm": 1.7428075747789946, "learning_rate": 1.2027279215806598e-06, "loss": 0.5281, "step": 7367 }, { "epoch": 0.79, "grad_norm": 1.833499059360867, "learning_rate": 1.2015807313178628e-06, "loss": 0.6128, "step": 7368 }, { "epoch": 0.79, "grad_norm": 1.9340185950560729, "learning_rate": 1.2004340137024512e-06, "loss": 0.5972, "step": 7369 }, { "epoch": 0.79, "grad_norm": 1.8799798587926824, "learning_rate": 1.1992877688771126e-06, "loss": 0.621, "step": 7370 }, { "epoch": 0.79, "grad_norm": 1.8756772465712286, "learning_rate": 1.1981419969844804e-06, "loss": 0.6269, "step": 7371 }, { "epoch": 0.79, "grad_norm": 1.73164500901683, "learning_rate": 1.196996698167125e-06, "loss": 0.5532, "step": 7372 }, { "epoch": 0.79, "grad_norm": 1.9006967030401356, "learning_rate": 1.1958518725675571e-06, "loss": 0.6223, "step": 7373 }, { "epoch": 0.79, "grad_norm": 1.951759219151778, "learning_rate": 1.1947075203282322e-06, "loss": 0.5674, "step": 7374 }, { "epoch": 0.79, "grad_norm": 1.8262159448597313, "learning_rate": 1.1935636415915435e-06, "loss": 0.5295, "step": 7375 }, { "epoch": 0.79, "grad_norm": 1.856345693307724, "learning_rate": 1.1924202364998284e-06, "loss": 0.6084, "step": 7376 }, { "epoch": 0.79, "grad_norm": 1.8583910669108075, "learning_rate": 1.191277305195363e-06, "loss": 0.5309, "step": 7377 }, { "epoch": 0.79, "grad_norm": 1.6837952588435356, "learning_rate": 1.1901348478203655e-06, "loss": 0.539, "step": 7378 }, { "epoch": 0.79, "grad_norm": 1.8136428205947492, "learning_rate": 1.1889928645169952e-06, "loss": 0.6024, "step": 7379 }, { "epoch": 0.79, "grad_norm": 1.9673619099116404, "learning_rate": 1.1878513554273513e-06, "loss": 0.6487, "step": 7380 }, { "epoch": 0.79, "grad_norm": 1.845807699224345, "learning_rate": 1.1867103206934756e-06, "loss": 0.6003, "step": 7381 }, { "epoch": 0.79, "grad_norm": 1.697998138615772, "learning_rate": 1.1855697604573496e-06, "loss": 0.5749, "step": 7382 }, { "epoch": 0.79, "grad_norm": 1.748790761636667, "learning_rate": 1.1844296748608969e-06, "loss": 0.5727, "step": 7383 }, { "epoch": 0.79, "grad_norm": 3.9227474626541405, "learning_rate": 1.1832900640459805e-06, "loss": 0.4853, "step": 7384 }, { "epoch": 0.79, "grad_norm": 1.7657061627868538, "learning_rate": 1.1821509281544058e-06, "loss": 0.5895, "step": 7385 }, { "epoch": 0.79, "grad_norm": 1.914108847971546, "learning_rate": 1.181012267327918e-06, "loss": 0.6262, "step": 7386 }, { "epoch": 0.79, "grad_norm": 2.000925380011607, "learning_rate": 1.1798740817082045e-06, "loss": 0.5814, "step": 7387 }, { "epoch": 0.79, "grad_norm": 2.09716189936598, "learning_rate": 1.1787363714368915e-06, "loss": 0.6021, "step": 7388 }, { "epoch": 0.79, "grad_norm": 2.128924370127195, "learning_rate": 1.1775991366555489e-06, "loss": 0.6871, "step": 7389 }, { "epoch": 0.79, "grad_norm": 1.616948760981434, "learning_rate": 1.1764623775056845e-06, "loss": 0.5315, "step": 7390 }, { "epoch": 0.79, "grad_norm": 1.7210478644094107, "learning_rate": 1.1753260941287492e-06, "loss": 0.6288, "step": 7391 }, { "epoch": 0.79, "grad_norm": 1.7818402893544887, "learning_rate": 1.1741902866661326e-06, "loss": 0.5577, "step": 7392 }, { "epoch": 0.79, "grad_norm": 1.7322541635296553, "learning_rate": 1.1730549552591675e-06, "loss": 0.599, "step": 7393 }, { "epoch": 0.79, "grad_norm": 1.9987383068952436, "learning_rate": 1.1719201000491254e-06, "loss": 0.516, "step": 7394 }, { "epoch": 0.79, "grad_norm": 1.989704484700866, "learning_rate": 1.17078572117722e-06, "loss": 0.6259, "step": 7395 }, { "epoch": 0.79, "grad_norm": 1.7856304592931171, "learning_rate": 1.169651818784604e-06, "loss": 0.5831, "step": 7396 }, { "epoch": 0.79, "grad_norm": 2.0341837489101504, "learning_rate": 1.1685183930123729e-06, "loss": 0.5631, "step": 7397 }, { "epoch": 0.79, "grad_norm": 1.757829303629003, "learning_rate": 1.1673854440015615e-06, "loss": 0.5689, "step": 7398 }, { "epoch": 0.79, "grad_norm": 1.7152857197177067, "learning_rate": 1.166252971893146e-06, "loss": 0.6507, "step": 7399 }, { "epoch": 0.79, "grad_norm": 1.8997007768175183, "learning_rate": 1.165120976828042e-06, "loss": 0.559, "step": 7400 }, { "epoch": 0.79, "grad_norm": 1.8140094985612805, "learning_rate": 1.1639894589471074e-06, "loss": 0.619, "step": 7401 }, { "epoch": 0.79, "grad_norm": 1.1132718412400813, "learning_rate": 1.1628584183911402e-06, "loss": 0.4748, "step": 7402 }, { "epoch": 0.79, "grad_norm": 1.750687744175268, "learning_rate": 1.1617278553008781e-06, "loss": 0.6048, "step": 7403 }, { "epoch": 0.79, "grad_norm": 1.7126067084272032, "learning_rate": 1.1605977698170001e-06, "loss": 0.5998, "step": 7404 }, { "epoch": 0.79, "grad_norm": 1.9001793165707053, "learning_rate": 1.1594681620801263e-06, "loss": 0.5401, "step": 7405 }, { "epoch": 0.79, "grad_norm": 1.8543854425701949, "learning_rate": 1.1583390322308164e-06, "loss": 0.562, "step": 7406 }, { "epoch": 0.79, "grad_norm": 1.692214053091297, "learning_rate": 1.1572103804095708e-06, "loss": 0.5816, "step": 7407 }, { "epoch": 0.79, "grad_norm": 1.9716087806973275, "learning_rate": 1.1560822067568312e-06, "loss": 0.5118, "step": 7408 }, { "epoch": 0.79, "grad_norm": 1.7880442989431824, "learning_rate": 1.154954511412979e-06, "loss": 0.5411, "step": 7409 }, { "epoch": 0.79, "grad_norm": 1.6724240043556668, "learning_rate": 1.153827294518336e-06, "loss": 0.5343, "step": 7410 }, { "epoch": 0.79, "grad_norm": 1.835250619233051, "learning_rate": 1.1527005562131655e-06, "loss": 0.6079, "step": 7411 }, { "epoch": 0.79, "grad_norm": 1.7781679409201254, "learning_rate": 1.1515742966376697e-06, "loss": 0.5419, "step": 7412 }, { "epoch": 0.79, "grad_norm": 1.9513877613774346, "learning_rate": 1.1504485159319922e-06, "loss": 0.6907, "step": 7413 }, { "epoch": 0.79, "grad_norm": 1.6949615502992108, "learning_rate": 1.1493232142362177e-06, "loss": 0.5585, "step": 7414 }, { "epoch": 0.79, "grad_norm": 1.8891030236680064, "learning_rate": 1.1481983916903715e-06, "loss": 0.5375, "step": 7415 }, { "epoch": 0.79, "grad_norm": 1.645807552963527, "learning_rate": 1.1470740484344134e-06, "loss": 0.505, "step": 7416 }, { "epoch": 0.79, "grad_norm": 1.1361279359791256, "learning_rate": 1.1459501846082533e-06, "loss": 0.4898, "step": 7417 }, { "epoch": 0.79, "grad_norm": 1.7086014669599798, "learning_rate": 1.144826800351735e-06, "loss": 0.5773, "step": 7418 }, { "epoch": 0.79, "grad_norm": 1.7768511552886208, "learning_rate": 1.1437038958046442e-06, "loss": 0.5324, "step": 7419 }, { "epoch": 0.79, "grad_norm": 1.8822755971897183, "learning_rate": 1.1425814711067068e-06, "loss": 0.6849, "step": 7420 }, { "epoch": 0.79, "grad_norm": 2.0093553619264743, "learning_rate": 1.1414595263975885e-06, "loss": 0.5922, "step": 7421 }, { "epoch": 0.79, "grad_norm": 1.8034836258239928, "learning_rate": 1.1403380618168974e-06, "loss": 0.6055, "step": 7422 }, { "epoch": 0.79, "grad_norm": 1.8780099684585292, "learning_rate": 1.1392170775041788e-06, "loss": 0.6026, "step": 7423 }, { "epoch": 0.79, "grad_norm": 1.0507318489372284, "learning_rate": 1.1380965735989208e-06, "loss": 0.4704, "step": 7424 }, { "epoch": 0.79, "grad_norm": 1.7846726623179834, "learning_rate": 1.1369765502405495e-06, "loss": 0.5977, "step": 7425 }, { "epoch": 0.79, "grad_norm": 1.8070623704445934, "learning_rate": 1.1358570075684339e-06, "loss": 0.5991, "step": 7426 }, { "epoch": 0.79, "grad_norm": 1.7692453259669756, "learning_rate": 1.1347379457218805e-06, "loss": 0.622, "step": 7427 }, { "epoch": 0.79, "grad_norm": 1.713238759325656, "learning_rate": 1.1336193648401372e-06, "loss": 0.5743, "step": 7428 }, { "epoch": 0.79, "grad_norm": 2.105492302287005, "learning_rate": 1.132501265062394e-06, "loss": 0.6438, "step": 7429 }, { "epoch": 0.79, "grad_norm": 1.8665573515079679, "learning_rate": 1.1313836465277755e-06, "loss": 0.6241, "step": 7430 }, { "epoch": 0.79, "grad_norm": 1.791674109708273, "learning_rate": 1.1302665093753522e-06, "loss": 0.5723, "step": 7431 }, { "epoch": 0.79, "grad_norm": 1.7944970428228684, "learning_rate": 1.1291498537441303e-06, "loss": 0.5394, "step": 7432 }, { "epoch": 0.79, "grad_norm": 1.7178915050958536, "learning_rate": 1.1280336797730613e-06, "loss": 0.5651, "step": 7433 }, { "epoch": 0.79, "grad_norm": 1.775829086252718, "learning_rate": 1.1269179876010322e-06, "loss": 0.5743, "step": 7434 }, { "epoch": 0.79, "grad_norm": 1.609298342689295, "learning_rate": 1.1258027773668717e-06, "loss": 0.5459, "step": 7435 }, { "epoch": 0.79, "grad_norm": 1.6773142938601406, "learning_rate": 1.1246880492093482e-06, "loss": 0.5463, "step": 7436 }, { "epoch": 0.79, "grad_norm": 1.7677612072643214, "learning_rate": 1.1235738032671706e-06, "loss": 0.5901, "step": 7437 }, { "epoch": 0.79, "grad_norm": 1.8364799666031344, "learning_rate": 1.122460039678987e-06, "loss": 0.6359, "step": 7438 }, { "epoch": 0.79, "grad_norm": 1.7679430090683914, "learning_rate": 1.1213467585833865e-06, "loss": 0.5903, "step": 7439 }, { "epoch": 0.79, "grad_norm": 1.7778654717345843, "learning_rate": 1.1202339601188972e-06, "loss": 0.5789, "step": 7440 }, { "epoch": 0.79, "grad_norm": 1.6736210957445967, "learning_rate": 1.1191216444239878e-06, "loss": 0.5363, "step": 7441 }, { "epoch": 0.79, "grad_norm": 1.8303048838678226, "learning_rate": 1.1180098116370664e-06, "loss": 0.5373, "step": 7442 }, { "epoch": 0.79, "grad_norm": 1.9865917556750157, "learning_rate": 1.1168984618964824e-06, "loss": 0.6799, "step": 7443 }, { "epoch": 0.79, "grad_norm": 2.0529449039314196, "learning_rate": 1.1157875953405223e-06, "loss": 0.5932, "step": 7444 }, { "epoch": 0.79, "grad_norm": 2.188212439541709, "learning_rate": 1.114677212107415e-06, "loss": 0.5963, "step": 7445 }, { "epoch": 0.79, "grad_norm": 2.274006543117729, "learning_rate": 1.113567312335328e-06, "loss": 0.5998, "step": 7446 }, { "epoch": 0.79, "grad_norm": 1.9252352032878162, "learning_rate": 1.1124578961623695e-06, "loss": 0.6263, "step": 7447 }, { "epoch": 0.79, "grad_norm": 1.1096860772498727, "learning_rate": 1.1113489637265873e-06, "loss": 0.4844, "step": 7448 }, { "epoch": 0.79, "grad_norm": 1.1226399441478572, "learning_rate": 1.1102405151659667e-06, "loss": 0.4907, "step": 7449 }, { "epoch": 0.79, "grad_norm": 1.9430569324424096, "learning_rate": 1.1091325506184387e-06, "loss": 0.6008, "step": 7450 }, { "epoch": 0.79, "grad_norm": 1.7731975833016975, "learning_rate": 1.1080250702218681e-06, "loss": 0.6236, "step": 7451 }, { "epoch": 0.79, "grad_norm": 1.8768816413325395, "learning_rate": 1.1069180741140617e-06, "loss": 0.5682, "step": 7452 }, { "epoch": 0.79, "grad_norm": 1.633122427075063, "learning_rate": 1.1058115624327664e-06, "loss": 0.6445, "step": 7453 }, { "epoch": 0.8, "grad_norm": 1.9045926287794073, "learning_rate": 1.1047055353156676e-06, "loss": 0.6223, "step": 7454 }, { "epoch": 0.8, "grad_norm": 1.8743416546923424, "learning_rate": 1.103599992900392e-06, "loss": 0.607, "step": 7455 }, { "epoch": 0.8, "grad_norm": 1.6153290766464716, "learning_rate": 1.102494935324505e-06, "loss": 0.5947, "step": 7456 }, { "epoch": 0.8, "grad_norm": 1.9119978156507234, "learning_rate": 1.1013903627255112e-06, "loss": 0.6182, "step": 7457 }, { "epoch": 0.8, "grad_norm": 1.9711975489888733, "learning_rate": 1.1002862752408578e-06, "loss": 0.6478, "step": 7458 }, { "epoch": 0.8, "grad_norm": 1.638874226796262, "learning_rate": 1.0991826730079258e-06, "loss": 0.6031, "step": 7459 }, { "epoch": 0.8, "grad_norm": 1.796536287997297, "learning_rate": 1.0980795561640418e-06, "loss": 0.5367, "step": 7460 }, { "epoch": 0.8, "grad_norm": 1.5983095035975827, "learning_rate": 1.096976924846468e-06, "loss": 0.4993, "step": 7461 }, { "epoch": 0.8, "grad_norm": 1.7313570879069182, "learning_rate": 1.095874779192409e-06, "loss": 0.5691, "step": 7462 }, { "epoch": 0.8, "grad_norm": 1.9613346716406819, "learning_rate": 1.094773119339007e-06, "loss": 0.6167, "step": 7463 }, { "epoch": 0.8, "grad_norm": 1.8274903154665803, "learning_rate": 1.0936719454233451e-06, "loss": 0.5639, "step": 7464 }, { "epoch": 0.8, "grad_norm": 1.808304944624537, "learning_rate": 1.0925712575824448e-06, "loss": 0.6332, "step": 7465 }, { "epoch": 0.8, "grad_norm": 1.6095018212685304, "learning_rate": 1.091471055953266e-06, "loss": 0.476, "step": 7466 }, { "epoch": 0.8, "grad_norm": 1.638162257891568, "learning_rate": 1.0903713406727135e-06, "loss": 0.5967, "step": 7467 }, { "epoch": 0.8, "grad_norm": 1.6270111215019403, "learning_rate": 1.089272111877625e-06, "loss": 0.5735, "step": 7468 }, { "epoch": 0.8, "grad_norm": 1.0868748540574877, "learning_rate": 1.0881733697047815e-06, "loss": 0.4827, "step": 7469 }, { "epoch": 0.8, "grad_norm": 1.801847491870643, "learning_rate": 1.0870751142909025e-06, "loss": 0.5309, "step": 7470 }, { "epoch": 0.8, "grad_norm": 1.148439074401616, "learning_rate": 1.0859773457726458e-06, "loss": 0.5017, "step": 7471 }, { "epoch": 0.8, "grad_norm": 1.932325213159809, "learning_rate": 1.0848800642866119e-06, "loss": 0.6731, "step": 7472 }, { "epoch": 0.8, "grad_norm": 1.769055551057895, "learning_rate": 1.0837832699693357e-06, "loss": 0.53, "step": 7473 }, { "epoch": 0.8, "grad_norm": 1.8225886048355981, "learning_rate": 1.082686962957295e-06, "loss": 0.6089, "step": 7474 }, { "epoch": 0.8, "grad_norm": 1.7890936870521263, "learning_rate": 1.0815911433869063e-06, "loss": 0.5013, "step": 7475 }, { "epoch": 0.8, "grad_norm": 1.8133960507880278, "learning_rate": 1.0804958113945264e-06, "loss": 0.6176, "step": 7476 }, { "epoch": 0.8, "grad_norm": 2.023235902491464, "learning_rate": 1.0794009671164484e-06, "loss": 0.6526, "step": 7477 }, { "epoch": 0.8, "grad_norm": 1.8847471392678279, "learning_rate": 1.078306610688909e-06, "loss": 0.6571, "step": 7478 }, { "epoch": 0.8, "grad_norm": 1.665956664920776, "learning_rate": 1.07721274224808e-06, "loss": 0.6191, "step": 7479 }, { "epoch": 0.8, "grad_norm": 1.8389509391454684, "learning_rate": 1.0761193619300752e-06, "loss": 0.5467, "step": 7480 }, { "epoch": 0.8, "grad_norm": 1.9036219002403438, "learning_rate": 1.0750264698709467e-06, "loss": 0.5294, "step": 7481 }, { "epoch": 0.8, "grad_norm": 1.7095203014451879, "learning_rate": 1.0739340662066855e-06, "loss": 0.6118, "step": 7482 }, { "epoch": 0.8, "grad_norm": 1.9250599829839903, "learning_rate": 1.0728421510732218e-06, "loss": 0.6052, "step": 7483 }, { "epoch": 0.8, "grad_norm": 1.8852084485649745, "learning_rate": 1.0717507246064273e-06, "loss": 0.6218, "step": 7484 }, { "epoch": 0.8, "grad_norm": 1.8224802060468448, "learning_rate": 1.070659786942111e-06, "loss": 0.6309, "step": 7485 }, { "epoch": 0.8, "grad_norm": 1.8999503641063813, "learning_rate": 1.06956933821602e-06, "loss": 0.6177, "step": 7486 }, { "epoch": 0.8, "grad_norm": 1.775321886763939, "learning_rate": 1.0684793785638431e-06, "loss": 0.571, "step": 7487 }, { "epoch": 0.8, "grad_norm": 1.6004788796625422, "learning_rate": 1.0673899081212047e-06, "loss": 0.5488, "step": 7488 }, { "epoch": 0.8, "grad_norm": 1.8653160046386597, "learning_rate": 1.0663009270236713e-06, "loss": 0.5286, "step": 7489 }, { "epoch": 0.8, "grad_norm": 1.8617943541307327, "learning_rate": 1.0652124354067483e-06, "loss": 0.5065, "step": 7490 }, { "epoch": 0.8, "grad_norm": 1.737259371367394, "learning_rate": 1.0641244334058799e-06, "loss": 0.5822, "step": 7491 }, { "epoch": 0.8, "grad_norm": 1.663495400102425, "learning_rate": 1.0630369211564479e-06, "loss": 0.5964, "step": 7492 }, { "epoch": 0.8, "grad_norm": 1.6930673765193183, "learning_rate": 1.0619498987937748e-06, "loss": 0.529, "step": 7493 }, { "epoch": 0.8, "grad_norm": 1.834970040515578, "learning_rate": 1.0608633664531226e-06, "loss": 0.6888, "step": 7494 }, { "epoch": 0.8, "grad_norm": 1.8747160620959022, "learning_rate": 1.0597773242696901e-06, "loss": 0.6053, "step": 7495 }, { "epoch": 0.8, "grad_norm": 1.7989665205514944, "learning_rate": 1.0586917723786173e-06, "loss": 0.6003, "step": 7496 }, { "epoch": 0.8, "grad_norm": 1.7316863305741936, "learning_rate": 1.0576067109149813e-06, "loss": 0.5033, "step": 7497 }, { "epoch": 0.8, "grad_norm": 1.7314215089293825, "learning_rate": 1.0565221400138003e-06, "loss": 0.5212, "step": 7498 }, { "epoch": 0.8, "grad_norm": 1.751415486609709, "learning_rate": 1.05543805981003e-06, "loss": 0.5749, "step": 7499 }, { "epoch": 0.8, "grad_norm": 1.7947085629971553, "learning_rate": 1.0543544704385634e-06, "loss": 0.5817, "step": 7500 }, { "epoch": 0.8, "grad_norm": 1.8601315253445807, "learning_rate": 1.053271372034239e-06, "loss": 0.5694, "step": 7501 }, { "epoch": 0.8, "grad_norm": 1.5739585239262592, "learning_rate": 1.0521887647318258e-06, "loss": 0.5203, "step": 7502 }, { "epoch": 0.8, "grad_norm": 1.7851030879357013, "learning_rate": 1.0511066486660366e-06, "loss": 0.564, "step": 7503 }, { "epoch": 0.8, "grad_norm": 1.8354053735058804, "learning_rate": 1.0500250239715215e-06, "loss": 0.5974, "step": 7504 }, { "epoch": 0.8, "grad_norm": 1.9867029237539977, "learning_rate": 1.0489438907828708e-06, "loss": 0.5621, "step": 7505 }, { "epoch": 0.8, "grad_norm": 1.8439454392825623, "learning_rate": 1.047863249234612e-06, "loss": 0.5918, "step": 7506 }, { "epoch": 0.8, "grad_norm": 1.59443989241496, "learning_rate": 1.0467830994612127e-06, "loss": 0.5532, "step": 7507 }, { "epoch": 0.8, "grad_norm": 1.9275587236472769, "learning_rate": 1.0457034415970785e-06, "loss": 0.6846, "step": 7508 }, { "epoch": 0.8, "grad_norm": 1.8082563058188006, "learning_rate": 1.044624275776554e-06, "loss": 0.5494, "step": 7509 }, { "epoch": 0.8, "grad_norm": 1.094442359131495, "learning_rate": 1.0435456021339236e-06, "loss": 0.4942, "step": 7510 }, { "epoch": 0.8, "grad_norm": 1.950925337228709, "learning_rate": 1.042467420803408e-06, "loss": 0.568, "step": 7511 }, { "epoch": 0.8, "grad_norm": 1.6754053562081637, "learning_rate": 1.041389731919169e-06, "loss": 0.5828, "step": 7512 }, { "epoch": 0.8, "grad_norm": 2.4536871259632824, "learning_rate": 1.0403125356153066e-06, "loss": 0.6998, "step": 7513 }, { "epoch": 0.8, "grad_norm": 1.8428819297039734, "learning_rate": 1.039235832025859e-06, "loss": 0.618, "step": 7514 }, { "epoch": 0.8, "grad_norm": 1.9016427055237433, "learning_rate": 1.0381596212848028e-06, "loss": 0.5263, "step": 7515 }, { "epoch": 0.8, "grad_norm": 1.9297263980297483, "learning_rate": 1.0370839035260538e-06, "loss": 0.6216, "step": 7516 }, { "epoch": 0.8, "grad_norm": 1.755109790136654, "learning_rate": 1.0360086788834673e-06, "loss": 0.5799, "step": 7517 }, { "epoch": 0.8, "grad_norm": 1.8010412322315528, "learning_rate": 1.0349339474908349e-06, "loss": 0.56, "step": 7518 }, { "epoch": 0.8, "grad_norm": 1.62866010261492, "learning_rate": 1.0338597094818898e-06, "loss": 0.5744, "step": 7519 }, { "epoch": 0.8, "grad_norm": 1.8831001655883657, "learning_rate": 1.0327859649903016e-06, "loss": 0.5836, "step": 7520 }, { "epoch": 0.8, "grad_norm": 1.747410987334502, "learning_rate": 1.0317127141496792e-06, "loss": 0.5658, "step": 7521 }, { "epoch": 0.8, "grad_norm": 2.0109323624318716, "learning_rate": 1.0306399570935693e-06, "loss": 0.5946, "step": 7522 }, { "epoch": 0.8, "grad_norm": 1.1138990610786188, "learning_rate": 1.0295676939554594e-06, "loss": 0.4901, "step": 7523 }, { "epoch": 0.8, "grad_norm": 1.0976126646110551, "learning_rate": 1.0284959248687731e-06, "loss": 0.478, "step": 7524 }, { "epoch": 0.8, "grad_norm": 1.8367776899293977, "learning_rate": 1.0274246499668733e-06, "loss": 0.5253, "step": 7525 }, { "epoch": 0.8, "grad_norm": 1.7699733826213733, "learning_rate": 1.0263538693830627e-06, "loss": 0.5487, "step": 7526 }, { "epoch": 0.8, "grad_norm": 1.6459210440255634, "learning_rate": 1.0252835832505797e-06, "loss": 0.5186, "step": 7527 }, { "epoch": 0.8, "grad_norm": 2.1803157404742683, "learning_rate": 1.0242137917026045e-06, "loss": 0.58, "step": 7528 }, { "epoch": 0.8, "grad_norm": 1.8518852994771027, "learning_rate": 1.023144494872253e-06, "loss": 0.575, "step": 7529 }, { "epoch": 0.8, "grad_norm": 1.7473508099271449, "learning_rate": 1.0220756928925807e-06, "loss": 0.6002, "step": 7530 }, { "epoch": 0.8, "grad_norm": 1.074673206742126, "learning_rate": 1.0210073858965825e-06, "loss": 0.4867, "step": 7531 }, { "epoch": 0.8, "grad_norm": 1.877434139167031, "learning_rate": 1.0199395740171892e-06, "loss": 0.5895, "step": 7532 }, { "epoch": 0.8, "grad_norm": 2.041957792860945, "learning_rate": 1.0188722573872723e-06, "loss": 0.6942, "step": 7533 }, { "epoch": 0.8, "grad_norm": 1.1189027473466489, "learning_rate": 1.017805436139641e-06, "loss": 0.5049, "step": 7534 }, { "epoch": 0.8, "grad_norm": 1.70700148309866, "learning_rate": 1.0167391104070423e-06, "loss": 0.6185, "step": 7535 }, { "epoch": 0.8, "grad_norm": 1.6919454942993908, "learning_rate": 1.015673280322162e-06, "loss": 0.5862, "step": 7536 }, { "epoch": 0.8, "grad_norm": 1.6600150279738404, "learning_rate": 1.0146079460176244e-06, "loss": 0.5362, "step": 7537 }, { "epoch": 0.8, "grad_norm": 1.889488848495165, "learning_rate": 1.0135431076259915e-06, "loss": 0.6278, "step": 7538 }, { "epoch": 0.8, "grad_norm": 1.7050600662210065, "learning_rate": 1.0124787652797642e-06, "loss": 0.5377, "step": 7539 }, { "epoch": 0.8, "grad_norm": 1.9664822358887044, "learning_rate": 1.0114149191113814e-06, "loss": 0.5626, "step": 7540 }, { "epoch": 0.8, "grad_norm": 1.8061811382548891, "learning_rate": 1.0103515692532206e-06, "loss": 0.5771, "step": 7541 }, { "epoch": 0.8, "grad_norm": 1.7497100328840811, "learning_rate": 1.0092887158375964e-06, "loss": 0.5626, "step": 7542 }, { "epoch": 0.8, "grad_norm": 1.8354377750577844, "learning_rate": 1.0082263589967634e-06, "loss": 0.6584, "step": 7543 }, { "epoch": 0.8, "grad_norm": 1.7723596964155217, "learning_rate": 1.0071644988629142e-06, "loss": 0.5598, "step": 7544 }, { "epoch": 0.8, "grad_norm": 1.6961709191162049, "learning_rate": 1.0061031355681766e-06, "loss": 0.5263, "step": 7545 }, { "epoch": 0.8, "grad_norm": 1.6417974893735705, "learning_rate": 1.0050422692446187e-06, "loss": 0.5929, "step": 7546 }, { "epoch": 0.81, "grad_norm": 1.8109732326778156, "learning_rate": 1.0039819000242495e-06, "loss": 0.5849, "step": 7547 }, { "epoch": 0.81, "grad_norm": 1.9466965318854155, "learning_rate": 1.0029220280390129e-06, "loss": 0.6096, "step": 7548 }, { "epoch": 0.81, "grad_norm": 1.8927298754175854, "learning_rate": 1.0018626534207903e-06, "loss": 0.5326, "step": 7549 }, { "epoch": 0.81, "grad_norm": 1.6083820025351059, "learning_rate": 1.0008037763014033e-06, "loss": 0.5502, "step": 7550 }, { "epoch": 0.81, "grad_norm": 1.742920844106581, "learning_rate": 9.997453968126102e-07, "loss": 0.6174, "step": 7551 }, { "epoch": 0.81, "grad_norm": 1.776923614109412, "learning_rate": 9.986875150861091e-07, "loss": 0.6513, "step": 7552 }, { "epoch": 0.81, "grad_norm": 1.6376570361056304, "learning_rate": 9.976301312535342e-07, "loss": 0.5829, "step": 7553 }, { "epoch": 0.81, "grad_norm": 1.87947986652423, "learning_rate": 9.965732454464583e-07, "loss": 0.5769, "step": 7554 }, { "epoch": 0.81, "grad_norm": 2.023258555190098, "learning_rate": 9.955168577963936e-07, "loss": 0.5769, "step": 7555 }, { "epoch": 0.81, "grad_norm": 1.7490151096187072, "learning_rate": 9.944609684347884e-07, "loss": 0.591, "step": 7556 }, { "epoch": 0.81, "grad_norm": 1.793871352044861, "learning_rate": 9.934055774930296e-07, "loss": 0.5312, "step": 7557 }, { "epoch": 0.81, "grad_norm": 1.9845862610029388, "learning_rate": 9.92350685102444e-07, "loss": 0.6442, "step": 7558 }, { "epoch": 0.81, "grad_norm": 2.1842187010135716, "learning_rate": 9.912962913942919e-07, "loss": 0.6208, "step": 7559 }, { "epoch": 0.81, "grad_norm": 1.8434968929160491, "learning_rate": 9.902423964997765e-07, "loss": 0.6359, "step": 7560 }, { "epoch": 0.81, "grad_norm": 1.738671426624282, "learning_rate": 9.891890005500354e-07, "loss": 0.572, "step": 7561 }, { "epoch": 0.81, "grad_norm": 1.7723412229460775, "learning_rate": 9.881361036761456e-07, "loss": 0.5363, "step": 7562 }, { "epoch": 0.81, "grad_norm": 1.6124865033844713, "learning_rate": 9.870837060091215e-07, "loss": 0.5906, "step": 7563 }, { "epoch": 0.81, "grad_norm": 1.8953290703933268, "learning_rate": 9.86031807679918e-07, "loss": 0.6513, "step": 7564 }, { "epoch": 0.81, "grad_norm": 1.811720334492033, "learning_rate": 9.849804088194237e-07, "loss": 0.6792, "step": 7565 }, { "epoch": 0.81, "grad_norm": 1.959664978208364, "learning_rate": 9.839295095584673e-07, "loss": 0.5629, "step": 7566 }, { "epoch": 0.81, "grad_norm": 1.7886592677141229, "learning_rate": 9.82879110027815e-07, "loss": 0.7172, "step": 7567 }, { "epoch": 0.81, "grad_norm": 1.0695360256379856, "learning_rate": 9.818292103581712e-07, "loss": 0.4907, "step": 7568 }, { "epoch": 0.81, "grad_norm": 1.8436784698894215, "learning_rate": 9.80779810680177e-07, "loss": 0.632, "step": 7569 }, { "epoch": 0.81, "grad_norm": 1.9132200191486728, "learning_rate": 9.797309111244123e-07, "loss": 0.599, "step": 7570 }, { "epoch": 0.81, "grad_norm": 1.7088432079663862, "learning_rate": 9.786825118213943e-07, "loss": 0.5904, "step": 7571 }, { "epoch": 0.81, "grad_norm": 1.075964145778017, "learning_rate": 9.77634612901579e-07, "loss": 0.4807, "step": 7572 }, { "epoch": 0.81, "grad_norm": 1.9142288778482581, "learning_rate": 9.765872144953592e-07, "loss": 0.5988, "step": 7573 }, { "epoch": 0.81, "grad_norm": 1.8123472633537092, "learning_rate": 9.755403167330635e-07, "loss": 0.5508, "step": 7574 }, { "epoch": 0.81, "grad_norm": 1.7342330721820298, "learning_rate": 9.744939197449615e-07, "loss": 0.5101, "step": 7575 }, { "epoch": 0.81, "grad_norm": 1.6088507649059556, "learning_rate": 9.734480236612592e-07, "loss": 0.4809, "step": 7576 }, { "epoch": 0.81, "grad_norm": 2.031762875027932, "learning_rate": 9.724026286121003e-07, "loss": 0.5947, "step": 7577 }, { "epoch": 0.81, "grad_norm": 1.779936345596462, "learning_rate": 9.71357734727566e-07, "loss": 0.6298, "step": 7578 }, { "epoch": 0.81, "grad_norm": 1.7388798274329111, "learning_rate": 9.703133421376748e-07, "loss": 0.5959, "step": 7579 }, { "epoch": 0.81, "grad_norm": 1.9494147628242462, "learning_rate": 9.69269450972382e-07, "loss": 0.5693, "step": 7580 }, { "epoch": 0.81, "grad_norm": 1.6726906703877928, "learning_rate": 9.682260613615851e-07, "loss": 0.5513, "step": 7581 }, { "epoch": 0.81, "grad_norm": 1.6416253016439524, "learning_rate": 9.671831734351133e-07, "loss": 0.5776, "step": 7582 }, { "epoch": 0.81, "grad_norm": 1.6995958465140717, "learning_rate": 9.661407873227373e-07, "loss": 0.529, "step": 7583 }, { "epoch": 0.81, "grad_norm": 1.856015062379911, "learning_rate": 9.650989031541636e-07, "loss": 0.6551, "step": 7584 }, { "epoch": 0.81, "grad_norm": 1.891639111388796, "learning_rate": 9.640575210590358e-07, "loss": 0.5616, "step": 7585 }, { "epoch": 0.81, "grad_norm": 1.7232000309635593, "learning_rate": 9.630166411669363e-07, "loss": 0.6569, "step": 7586 }, { "epoch": 0.81, "grad_norm": 1.6700908872316402, "learning_rate": 9.61976263607386e-07, "loss": 0.525, "step": 7587 }, { "epoch": 0.81, "grad_norm": 1.8889165404460673, "learning_rate": 9.609363885098393e-07, "loss": 0.6488, "step": 7588 }, { "epoch": 0.81, "grad_norm": 1.8875316382994387, "learning_rate": 9.598970160036919e-07, "loss": 0.5829, "step": 7589 }, { "epoch": 0.81, "grad_norm": 1.8042072083590845, "learning_rate": 9.58858146218275e-07, "loss": 0.5424, "step": 7590 }, { "epoch": 0.81, "grad_norm": 1.7526197785886322, "learning_rate": 9.578197792828593e-07, "loss": 0.6024, "step": 7591 }, { "epoch": 0.81, "grad_norm": 1.7697217920202135, "learning_rate": 9.5678191532665e-07, "loss": 0.6203, "step": 7592 }, { "epoch": 0.81, "grad_norm": 1.771292409480416, "learning_rate": 9.557445544787918e-07, "loss": 0.5728, "step": 7593 }, { "epoch": 0.81, "grad_norm": 1.788052631361812, "learning_rate": 9.54707696868367e-07, "loss": 0.622, "step": 7594 }, { "epoch": 0.81, "grad_norm": 1.1032036017704088, "learning_rate": 9.536713426243943e-07, "loss": 0.496, "step": 7595 }, { "epoch": 0.81, "grad_norm": 1.7205100984344492, "learning_rate": 9.526354918758279e-07, "loss": 0.512, "step": 7596 }, { "epoch": 0.81, "grad_norm": 1.800706291902871, "learning_rate": 9.516001447515644e-07, "loss": 0.537, "step": 7597 }, { "epoch": 0.81, "grad_norm": 1.0923608856135, "learning_rate": 9.505653013804339e-07, "loss": 0.5085, "step": 7598 }, { "epoch": 0.81, "grad_norm": 1.8191756663882006, "learning_rate": 9.495309618912046e-07, "loss": 0.6426, "step": 7599 }, { "epoch": 0.81, "grad_norm": 1.841858089022824, "learning_rate": 9.484971264125814e-07, "loss": 0.5311, "step": 7600 }, { "epoch": 0.81, "grad_norm": 1.860594010194824, "learning_rate": 9.474637950732096e-07, "loss": 0.5868, "step": 7601 }, { "epoch": 0.81, "grad_norm": 1.8746996650602867, "learning_rate": 9.464309680016659e-07, "loss": 0.431, "step": 7602 }, { "epoch": 0.81, "grad_norm": 1.7552965641855522, "learning_rate": 9.453986453264696e-07, "loss": 0.5696, "step": 7603 }, { "epoch": 0.81, "grad_norm": 1.7689191366758605, "learning_rate": 9.443668271760747e-07, "loss": 0.6492, "step": 7604 }, { "epoch": 0.81, "grad_norm": 1.1137060189590515, "learning_rate": 9.433355136788741e-07, "loss": 0.4895, "step": 7605 }, { "epoch": 0.81, "grad_norm": 1.7361300893410339, "learning_rate": 9.423047049631956e-07, "loss": 0.641, "step": 7606 }, { "epoch": 0.81, "grad_norm": 1.8704498344320335, "learning_rate": 9.412744011573061e-07, "loss": 0.6046, "step": 7607 }, { "epoch": 0.81, "grad_norm": 2.0000109187717787, "learning_rate": 9.40244602389409e-07, "loss": 0.6794, "step": 7608 }, { "epoch": 0.81, "grad_norm": 2.0284384909554247, "learning_rate": 9.392153087876454e-07, "loss": 0.618, "step": 7609 }, { "epoch": 0.81, "grad_norm": 2.3221064058857523, "learning_rate": 9.381865204800922e-07, "loss": 0.6231, "step": 7610 }, { "epoch": 0.81, "grad_norm": 2.0714634058089416, "learning_rate": 9.371582375947646e-07, "loss": 0.6524, "step": 7611 }, { "epoch": 0.81, "grad_norm": 1.7400380244624056, "learning_rate": 9.361304602596144e-07, "loss": 0.584, "step": 7612 }, { "epoch": 0.81, "grad_norm": 1.7329631309014097, "learning_rate": 9.351031886025297e-07, "loss": 0.6126, "step": 7613 }, { "epoch": 0.81, "grad_norm": 1.8613899095666877, "learning_rate": 9.340764227513388e-07, "loss": 0.5863, "step": 7614 }, { "epoch": 0.81, "grad_norm": 1.9078620517596976, "learning_rate": 9.330501628338046e-07, "loss": 0.5163, "step": 7615 }, { "epoch": 0.81, "grad_norm": 1.7077535646747921, "learning_rate": 9.320244089776271e-07, "loss": 0.6093, "step": 7616 }, { "epoch": 0.81, "grad_norm": 1.6869598932862577, "learning_rate": 9.309991613104419e-07, "loss": 0.5538, "step": 7617 }, { "epoch": 0.81, "grad_norm": 1.7327208359351658, "learning_rate": 9.299744199598249e-07, "loss": 0.5111, "step": 7618 }, { "epoch": 0.81, "grad_norm": 1.6758085899432833, "learning_rate": 9.289501850532867e-07, "loss": 0.5789, "step": 7619 }, { "epoch": 0.81, "grad_norm": 1.8848883191531927, "learning_rate": 9.279264567182756e-07, "loss": 0.7038, "step": 7620 }, { "epoch": 0.81, "grad_norm": 1.634565744693695, "learning_rate": 9.269032350821772e-07, "loss": 0.5544, "step": 7621 }, { "epoch": 0.81, "grad_norm": 1.4288837739265625, "learning_rate": 9.258805202723136e-07, "loss": 0.5001, "step": 7622 }, { "epoch": 0.81, "grad_norm": 1.561362894395804, "learning_rate": 9.248583124159438e-07, "loss": 0.4459, "step": 7623 }, { "epoch": 0.81, "grad_norm": 1.9912013370963273, "learning_rate": 9.238366116402636e-07, "loss": 0.5905, "step": 7624 }, { "epoch": 0.81, "grad_norm": 1.7137239669325015, "learning_rate": 9.228154180724069e-07, "loss": 0.5634, "step": 7625 }, { "epoch": 0.81, "grad_norm": 1.0447230613272216, "learning_rate": 9.217947318394421e-07, "loss": 0.4567, "step": 7626 }, { "epoch": 0.81, "grad_norm": 1.7081473602810122, "learning_rate": 9.20774553068377e-07, "loss": 0.5575, "step": 7627 }, { "epoch": 0.81, "grad_norm": 1.8637417467177821, "learning_rate": 9.197548818861546e-07, "loss": 0.5896, "step": 7628 }, { "epoch": 0.81, "grad_norm": 2.172313939076588, "learning_rate": 9.187357184196561e-07, "loss": 0.5312, "step": 7629 }, { "epoch": 0.81, "grad_norm": 1.7731070575380126, "learning_rate": 9.177170627956972e-07, "loss": 0.4782, "step": 7630 }, { "epoch": 0.81, "grad_norm": 1.7067651220520885, "learning_rate": 9.166989151410332e-07, "loss": 0.6317, "step": 7631 }, { "epoch": 0.81, "grad_norm": 1.7446847917072092, "learning_rate": 9.156812755823547e-07, "loss": 0.5222, "step": 7632 }, { "epoch": 0.81, "grad_norm": 1.7976507715456573, "learning_rate": 9.146641442462889e-07, "loss": 0.6054, "step": 7633 }, { "epoch": 0.81, "grad_norm": 1.8623685036027358, "learning_rate": 9.136475212594004e-07, "loss": 0.5502, "step": 7634 }, { "epoch": 0.81, "grad_norm": 1.7511560642204997, "learning_rate": 9.126314067481906e-07, "loss": 0.5885, "step": 7635 }, { "epoch": 0.81, "grad_norm": 1.784682009813474, "learning_rate": 9.116158008390963e-07, "loss": 0.5924, "step": 7636 }, { "epoch": 0.81, "grad_norm": 1.7823683700417219, "learning_rate": 9.10600703658493e-07, "loss": 0.6528, "step": 7637 }, { "epoch": 0.81, "grad_norm": 1.8540441857258592, "learning_rate": 9.095861153326919e-07, "loss": 0.5687, "step": 7638 }, { "epoch": 0.81, "grad_norm": 1.879681367995253, "learning_rate": 9.085720359879408e-07, "loss": 0.5692, "step": 7639 }, { "epoch": 0.81, "grad_norm": 1.7048588272495366, "learning_rate": 9.075584657504239e-07, "loss": 0.5666, "step": 7640 }, { "epoch": 0.82, "grad_norm": 1.7830832546700814, "learning_rate": 9.065454047462624e-07, "loss": 0.5431, "step": 7641 }, { "epoch": 0.82, "grad_norm": 1.9408454779254023, "learning_rate": 9.055328531015145e-07, "loss": 0.6161, "step": 7642 }, { "epoch": 0.82, "grad_norm": 1.798095328499514, "learning_rate": 9.045208109421749e-07, "loss": 0.5976, "step": 7643 }, { "epoch": 0.82, "grad_norm": 1.764583681269586, "learning_rate": 9.035092783941746e-07, "loss": 0.6279, "step": 7644 }, { "epoch": 0.82, "grad_norm": 1.8166833456447602, "learning_rate": 9.024982555833806e-07, "loss": 0.6531, "step": 7645 }, { "epoch": 0.82, "grad_norm": 1.4820908474078656, "learning_rate": 9.014877426355984e-07, "loss": 0.5221, "step": 7646 }, { "epoch": 0.82, "grad_norm": 1.7408133065025215, "learning_rate": 9.004777396765674e-07, "loss": 0.6091, "step": 7647 }, { "epoch": 0.82, "grad_norm": 1.7790193641407106, "learning_rate": 8.99468246831966e-07, "loss": 0.56, "step": 7648 }, { "epoch": 0.82, "grad_norm": 1.8269059266440237, "learning_rate": 8.984592642274071e-07, "loss": 0.5865, "step": 7649 }, { "epoch": 0.82, "grad_norm": 1.8794356157784688, "learning_rate": 8.974507919884423e-07, "loss": 0.5687, "step": 7650 }, { "epoch": 0.82, "grad_norm": 1.7627568709046504, "learning_rate": 8.964428302405575e-07, "loss": 0.6554, "step": 7651 }, { "epoch": 0.82, "grad_norm": 1.0665436030788353, "learning_rate": 8.954353791091769e-07, "loss": 0.4742, "step": 7652 }, { "epoch": 0.82, "grad_norm": 1.8015841219819015, "learning_rate": 8.944284387196594e-07, "loss": 0.567, "step": 7653 }, { "epoch": 0.82, "grad_norm": 2.03441165998867, "learning_rate": 8.934220091973017e-07, "loss": 0.6225, "step": 7654 }, { "epoch": 0.82, "grad_norm": 1.74119638618815, "learning_rate": 8.924160906673362e-07, "loss": 0.5347, "step": 7655 }, { "epoch": 0.82, "grad_norm": 1.7085056860537287, "learning_rate": 8.914106832549329e-07, "loss": 0.5655, "step": 7656 }, { "epoch": 0.82, "grad_norm": 1.6739024896410593, "learning_rate": 8.904057870851962e-07, "loss": 0.5886, "step": 7657 }, { "epoch": 0.82, "grad_norm": 1.094386955328981, "learning_rate": 8.894014022831687e-07, "loss": 0.5032, "step": 7658 }, { "epoch": 0.82, "grad_norm": 1.5486827955920581, "learning_rate": 8.883975289738283e-07, "loss": 0.5289, "step": 7659 }, { "epoch": 0.82, "grad_norm": 1.0434911644279456, "learning_rate": 8.873941672820901e-07, "loss": 0.474, "step": 7660 }, { "epoch": 0.82, "grad_norm": 1.706529777988319, "learning_rate": 8.863913173328043e-07, "loss": 0.5891, "step": 7661 }, { "epoch": 0.82, "grad_norm": 1.7245708091007532, "learning_rate": 8.853889792507592e-07, "loss": 0.6226, "step": 7662 }, { "epoch": 0.82, "grad_norm": 1.709026456346846, "learning_rate": 8.843871531606773e-07, "loss": 0.5527, "step": 7663 }, { "epoch": 0.82, "grad_norm": 1.9493191478516085, "learning_rate": 8.833858391872191e-07, "loss": 0.5844, "step": 7664 }, { "epoch": 0.82, "grad_norm": 1.8081466781253421, "learning_rate": 8.823850374549808e-07, "loss": 0.5441, "step": 7665 }, { "epoch": 0.82, "grad_norm": 2.1345522648623265, "learning_rate": 8.813847480884946e-07, "loss": 0.5403, "step": 7666 }, { "epoch": 0.82, "grad_norm": 1.9432895449110317, "learning_rate": 8.803849712122292e-07, "loss": 0.6264, "step": 7667 }, { "epoch": 0.82, "grad_norm": 1.0905575990405152, "learning_rate": 8.79385706950589e-07, "loss": 0.4956, "step": 7668 }, { "epoch": 0.82, "grad_norm": 1.691050437726653, "learning_rate": 8.783869554279162e-07, "loss": 0.5893, "step": 7669 }, { "epoch": 0.82, "grad_norm": 1.0486793483302064, "learning_rate": 8.773887167684875e-07, "loss": 0.4787, "step": 7670 }, { "epoch": 0.82, "grad_norm": 1.7053110873516093, "learning_rate": 8.763909910965163e-07, "loss": 0.5329, "step": 7671 }, { "epoch": 0.82, "grad_norm": 1.0859015661893825, "learning_rate": 8.753937785361522e-07, "loss": 0.4745, "step": 7672 }, { "epoch": 0.82, "grad_norm": 1.8938978250861342, "learning_rate": 8.743970792114819e-07, "loss": 0.5664, "step": 7673 }, { "epoch": 0.82, "grad_norm": 1.763509289035765, "learning_rate": 8.73400893246526e-07, "loss": 0.6139, "step": 7674 }, { "epoch": 0.82, "grad_norm": 1.0877407585216126, "learning_rate": 8.724052207652428e-07, "loss": 0.4736, "step": 7675 }, { "epoch": 0.82, "grad_norm": 1.9917801795309735, "learning_rate": 8.714100618915261e-07, "loss": 0.5414, "step": 7676 }, { "epoch": 0.82, "grad_norm": 1.103934813569463, "learning_rate": 8.704154167492074e-07, "loss": 0.5116, "step": 7677 }, { "epoch": 0.82, "grad_norm": 1.778542262701834, "learning_rate": 8.694212854620532e-07, "loss": 0.4763, "step": 7678 }, { "epoch": 0.82, "grad_norm": 1.8401271579534926, "learning_rate": 8.684276681537646e-07, "loss": 0.6362, "step": 7679 }, { "epoch": 0.82, "grad_norm": 1.994026305070927, "learning_rate": 8.674345649479809e-07, "loss": 0.6578, "step": 7680 }, { "epoch": 0.82, "grad_norm": 2.1490362631434112, "learning_rate": 8.664419759682763e-07, "loss": 0.6242, "step": 7681 }, { "epoch": 0.82, "grad_norm": 2.038797410353039, "learning_rate": 8.654499013381613e-07, "loss": 0.611, "step": 7682 }, { "epoch": 0.82, "grad_norm": 1.107324957995358, "learning_rate": 8.644583411810825e-07, "loss": 0.4795, "step": 7683 }, { "epoch": 0.82, "grad_norm": 1.7419890640016413, "learning_rate": 8.634672956204221e-07, "loss": 0.6284, "step": 7684 }, { "epoch": 0.82, "grad_norm": 1.8835211606431013, "learning_rate": 8.624767647794985e-07, "loss": 0.6223, "step": 7685 }, { "epoch": 0.82, "grad_norm": 1.9422121180892002, "learning_rate": 8.614867487815659e-07, "loss": 0.5909, "step": 7686 }, { "epoch": 0.82, "grad_norm": 1.96663202401575, "learning_rate": 8.604972477498158e-07, "loss": 0.6136, "step": 7687 }, { "epoch": 0.82, "grad_norm": 1.7642192744658407, "learning_rate": 8.595082618073741e-07, "loss": 0.536, "step": 7688 }, { "epoch": 0.82, "grad_norm": 1.6943320245765925, "learning_rate": 8.585197910773013e-07, "loss": 0.6344, "step": 7689 }, { "epoch": 0.82, "grad_norm": 1.7879731834881836, "learning_rate": 8.575318356825968e-07, "loss": 0.5994, "step": 7690 }, { "epoch": 0.82, "grad_norm": 1.758032158445081, "learning_rate": 8.565443957461944e-07, "loss": 0.5418, "step": 7691 }, { "epoch": 0.82, "grad_norm": 1.5519754563026573, "learning_rate": 8.555574713909631e-07, "loss": 0.4766, "step": 7692 }, { "epoch": 0.82, "grad_norm": 1.6058116581121784, "learning_rate": 8.545710627397086e-07, "loss": 0.4943, "step": 7693 }, { "epoch": 0.82, "grad_norm": 1.8281903563446815, "learning_rate": 8.535851699151737e-07, "loss": 0.6056, "step": 7694 }, { "epoch": 0.82, "grad_norm": 1.7230051152185266, "learning_rate": 8.525997930400348e-07, "loss": 0.6485, "step": 7695 }, { "epoch": 0.82, "grad_norm": 1.7465375180484084, "learning_rate": 8.516149322369055e-07, "loss": 0.5201, "step": 7696 }, { "epoch": 0.82, "grad_norm": 1.6067005729852053, "learning_rate": 8.506305876283339e-07, "loss": 0.6571, "step": 7697 }, { "epoch": 0.82, "grad_norm": 1.8671708069094193, "learning_rate": 8.496467593368046e-07, "loss": 0.5772, "step": 7698 }, { "epoch": 0.82, "grad_norm": 1.8647645640213264, "learning_rate": 8.486634474847383e-07, "loss": 0.555, "step": 7699 }, { "epoch": 0.82, "grad_norm": 1.0747349293067496, "learning_rate": 8.476806521944913e-07, "loss": 0.486, "step": 7700 }, { "epoch": 0.82, "grad_norm": 1.6679678040111354, "learning_rate": 8.466983735883554e-07, "loss": 0.4629, "step": 7701 }, { "epoch": 0.82, "grad_norm": 1.939929390915223, "learning_rate": 8.457166117885585e-07, "loss": 0.6099, "step": 7702 }, { "epoch": 0.82, "grad_norm": 1.0696168652292457, "learning_rate": 8.447353669172626e-07, "loss": 0.4751, "step": 7703 }, { "epoch": 0.82, "grad_norm": 1.1043067126220756, "learning_rate": 8.437546390965667e-07, "loss": 0.4741, "step": 7704 }, { "epoch": 0.82, "grad_norm": 1.7393338732641381, "learning_rate": 8.427744284485062e-07, "loss": 0.5853, "step": 7705 }, { "epoch": 0.82, "grad_norm": 1.5918036266660505, "learning_rate": 8.417947350950506e-07, "loss": 0.5407, "step": 7706 }, { "epoch": 0.82, "grad_norm": 1.8736245000588918, "learning_rate": 8.408155591581063e-07, "loss": 0.6499, "step": 7707 }, { "epoch": 0.82, "grad_norm": 1.8984660942992746, "learning_rate": 8.398369007595148e-07, "loss": 0.6231, "step": 7708 }, { "epoch": 0.82, "grad_norm": 1.8654224585722503, "learning_rate": 8.388587600210529e-07, "loss": 0.5897, "step": 7709 }, { "epoch": 0.82, "grad_norm": 1.0588109006516864, "learning_rate": 8.378811370644325e-07, "loss": 0.486, "step": 7710 }, { "epoch": 0.82, "grad_norm": 1.8819454901156634, "learning_rate": 8.369040320113031e-07, "loss": 0.6038, "step": 7711 }, { "epoch": 0.82, "grad_norm": 1.0731608167844329, "learning_rate": 8.359274449832483e-07, "loss": 0.4913, "step": 7712 }, { "epoch": 0.82, "grad_norm": 1.8665768403684289, "learning_rate": 8.349513761017868e-07, "loss": 0.5607, "step": 7713 }, { "epoch": 0.82, "grad_norm": 1.7743933501088598, "learning_rate": 8.339758254883745e-07, "loss": 0.516, "step": 7714 }, { "epoch": 0.82, "grad_norm": 1.75812830908967, "learning_rate": 8.330007932644002e-07, "loss": 0.5287, "step": 7715 }, { "epoch": 0.82, "grad_norm": 1.85923416811108, "learning_rate": 8.320262795511924e-07, "loss": 0.5927, "step": 7716 }, { "epoch": 0.82, "grad_norm": 2.036997838319863, "learning_rate": 8.310522844700086e-07, "loss": 0.5882, "step": 7717 }, { "epoch": 0.82, "grad_norm": 1.1015310159308693, "learning_rate": 8.300788081420486e-07, "loss": 0.4998, "step": 7718 }, { "epoch": 0.82, "grad_norm": 1.888365341123657, "learning_rate": 8.291058506884425e-07, "loss": 0.665, "step": 7719 }, { "epoch": 0.82, "grad_norm": 1.7895209736258932, "learning_rate": 8.281334122302598e-07, "loss": 0.6188, "step": 7720 }, { "epoch": 0.82, "grad_norm": 1.7667824433237755, "learning_rate": 8.27161492888503e-07, "loss": 0.5848, "step": 7721 }, { "epoch": 0.82, "grad_norm": 1.786546908154206, "learning_rate": 8.261900927841099e-07, "loss": 0.5617, "step": 7722 }, { "epoch": 0.82, "grad_norm": 1.7623768405617106, "learning_rate": 8.252192120379554e-07, "loss": 0.6051, "step": 7723 }, { "epoch": 0.82, "grad_norm": 2.0574299719183484, "learning_rate": 8.242488507708479e-07, "loss": 0.7036, "step": 7724 }, { "epoch": 0.82, "grad_norm": 1.7305104233450839, "learning_rate": 8.23279009103533e-07, "loss": 0.6556, "step": 7725 }, { "epoch": 0.82, "grad_norm": 1.808288674145468, "learning_rate": 8.223096871566899e-07, "loss": 0.6519, "step": 7726 }, { "epoch": 0.82, "grad_norm": 1.7840346618670353, "learning_rate": 8.21340885050933e-07, "loss": 0.5619, "step": 7727 }, { "epoch": 0.82, "grad_norm": 1.8467330573070175, "learning_rate": 8.203726029068149e-07, "loss": 0.6635, "step": 7728 }, { "epoch": 0.82, "grad_norm": 1.7508315363248785, "learning_rate": 8.194048408448213e-07, "loss": 0.5736, "step": 7729 }, { "epoch": 0.82, "grad_norm": 1.6376247303101634, "learning_rate": 8.184375989853722e-07, "loss": 0.5823, "step": 7730 }, { "epoch": 0.82, "grad_norm": 1.0756291014097608, "learning_rate": 8.174708774488261e-07, "loss": 0.4731, "step": 7731 }, { "epoch": 0.82, "grad_norm": 1.9025004097069633, "learning_rate": 8.165046763554718e-07, "loss": 0.5883, "step": 7732 }, { "epoch": 0.82, "grad_norm": 1.5316960847579597, "learning_rate": 8.155389958255383e-07, "loss": 0.5181, "step": 7733 }, { "epoch": 0.82, "grad_norm": 1.0660377079072478, "learning_rate": 8.145738359791866e-07, "loss": 0.4918, "step": 7734 }, { "epoch": 0.83, "grad_norm": 1.140392024142969, "learning_rate": 8.13609196936515e-07, "loss": 0.4893, "step": 7735 }, { "epoch": 0.83, "grad_norm": 1.825675526665528, "learning_rate": 8.126450788175561e-07, "loss": 0.5893, "step": 7736 }, { "epoch": 0.83, "grad_norm": 1.8269057218111435, "learning_rate": 8.116814817422769e-07, "loss": 0.5692, "step": 7737 }, { "epoch": 0.83, "grad_norm": 1.0885509961679651, "learning_rate": 8.107184058305811e-07, "loss": 0.4984, "step": 7738 }, { "epoch": 0.83, "grad_norm": 1.617691160753596, "learning_rate": 8.097558512023063e-07, "loss": 0.5194, "step": 7739 }, { "epoch": 0.83, "grad_norm": 1.668327552786671, "learning_rate": 8.087938179772265e-07, "loss": 0.6122, "step": 7740 }, { "epoch": 0.83, "grad_norm": 1.7801862913906168, "learning_rate": 8.078323062750488e-07, "loss": 0.6, "step": 7741 }, { "epoch": 0.83, "grad_norm": 1.6137016051049888, "learning_rate": 8.068713162154179e-07, "loss": 0.5593, "step": 7742 }, { "epoch": 0.83, "grad_norm": 1.7014240986921323, "learning_rate": 8.059108479179112e-07, "loss": 0.6211, "step": 7743 }, { "epoch": 0.83, "grad_norm": 1.6713258456463846, "learning_rate": 8.049509015020424e-07, "loss": 0.5573, "step": 7744 }, { "epoch": 0.83, "grad_norm": 1.0298181071748131, "learning_rate": 8.039914770872626e-07, "loss": 0.4809, "step": 7745 }, { "epoch": 0.83, "grad_norm": 1.597065741710745, "learning_rate": 8.030325747929524e-07, "loss": 0.4742, "step": 7746 }, { "epoch": 0.83, "grad_norm": 1.774367223689511, "learning_rate": 8.02074194738432e-07, "loss": 0.667, "step": 7747 }, { "epoch": 0.83, "grad_norm": 1.6226702822198609, "learning_rate": 8.011163370429548e-07, "loss": 0.5979, "step": 7748 }, { "epoch": 0.83, "grad_norm": 1.9976787809663437, "learning_rate": 8.0015900182571e-07, "loss": 0.5648, "step": 7749 }, { "epoch": 0.83, "grad_norm": 1.633598502545854, "learning_rate": 7.992021892058204e-07, "loss": 0.5683, "step": 7750 }, { "epoch": 0.83, "grad_norm": 1.8163130821731488, "learning_rate": 7.982458993023456e-07, "loss": 0.525, "step": 7751 }, { "epoch": 0.83, "grad_norm": 1.8234440830238599, "learning_rate": 7.972901322342796e-07, "loss": 0.6573, "step": 7752 }, { "epoch": 0.83, "grad_norm": 1.8934238402789136, "learning_rate": 7.963348881205502e-07, "loss": 0.5488, "step": 7753 }, { "epoch": 0.83, "grad_norm": 1.5688460934281505, "learning_rate": 7.953801670800215e-07, "loss": 0.5086, "step": 7754 }, { "epoch": 0.83, "grad_norm": 1.8508901883052131, "learning_rate": 7.944259692314915e-07, "loss": 0.6143, "step": 7755 }, { "epoch": 0.83, "grad_norm": 1.8871368937054527, "learning_rate": 7.93472294693694e-07, "loss": 0.6365, "step": 7756 }, { "epoch": 0.83, "grad_norm": 1.7492422905486953, "learning_rate": 7.925191435852969e-07, "loss": 0.6224, "step": 7757 }, { "epoch": 0.83, "grad_norm": 1.0728681117394363, "learning_rate": 7.915665160249037e-07, "loss": 0.476, "step": 7758 }, { "epoch": 0.83, "grad_norm": 1.7677621038498461, "learning_rate": 7.906144121310516e-07, "loss": 0.5547, "step": 7759 }, { "epoch": 0.83, "grad_norm": 1.7030520778941252, "learning_rate": 7.896628320222144e-07, "loss": 0.5075, "step": 7760 }, { "epoch": 0.83, "grad_norm": 1.8240416174823457, "learning_rate": 7.887117758167995e-07, "loss": 0.7127, "step": 7761 }, { "epoch": 0.83, "grad_norm": 1.8478633654616257, "learning_rate": 7.877612436331489e-07, "loss": 0.6496, "step": 7762 }, { "epoch": 0.83, "grad_norm": 1.7620314894942706, "learning_rate": 7.868112355895402e-07, "loss": 0.5839, "step": 7763 }, { "epoch": 0.83, "grad_norm": 1.8359770639471864, "learning_rate": 7.858617518041856e-07, "loss": 0.6295, "step": 7764 }, { "epoch": 0.83, "grad_norm": 1.6286794799142417, "learning_rate": 7.84912792395231e-07, "loss": 0.5593, "step": 7765 }, { "epoch": 0.83, "grad_norm": 1.7918605502531169, "learning_rate": 7.839643574807593e-07, "loss": 0.5424, "step": 7766 }, { "epoch": 0.83, "grad_norm": 2.191524976755367, "learning_rate": 7.830164471787855e-07, "loss": 0.6535, "step": 7767 }, { "epoch": 0.83, "grad_norm": 1.7468114243827868, "learning_rate": 7.820690616072612e-07, "loss": 0.5104, "step": 7768 }, { "epoch": 0.83, "grad_norm": 1.6204254592989917, "learning_rate": 7.811222008840719e-07, "loss": 0.4951, "step": 7769 }, { "epoch": 0.83, "grad_norm": 1.869624071182901, "learning_rate": 7.801758651270386e-07, "loss": 0.5477, "step": 7770 }, { "epoch": 0.83, "grad_norm": 2.056636357706203, "learning_rate": 7.792300544539155e-07, "loss": 0.6309, "step": 7771 }, { "epoch": 0.83, "grad_norm": 1.7528576656614168, "learning_rate": 7.782847689823925e-07, "loss": 0.5268, "step": 7772 }, { "epoch": 0.83, "grad_norm": 1.7352075749834812, "learning_rate": 7.773400088300948e-07, "loss": 0.5716, "step": 7773 }, { "epoch": 0.83, "grad_norm": 1.5564512257531042, "learning_rate": 7.7639577411458e-07, "loss": 0.5363, "step": 7774 }, { "epoch": 0.83, "grad_norm": 1.924862769797192, "learning_rate": 7.754520649533431e-07, "loss": 0.7034, "step": 7775 }, { "epoch": 0.83, "grad_norm": 1.85485826308178, "learning_rate": 7.745088814638118e-07, "loss": 0.5666, "step": 7776 }, { "epoch": 0.83, "grad_norm": 1.8184323231716915, "learning_rate": 7.73566223763349e-07, "loss": 0.6301, "step": 7777 }, { "epoch": 0.83, "grad_norm": 1.745911321596363, "learning_rate": 7.726240919692513e-07, "loss": 0.6288, "step": 7778 }, { "epoch": 0.83, "grad_norm": 1.683646119447724, "learning_rate": 7.716824861987521e-07, "loss": 0.5219, "step": 7779 }, { "epoch": 0.83, "grad_norm": 1.6747016479810422, "learning_rate": 7.707414065690167e-07, "loss": 0.4658, "step": 7780 }, { "epoch": 0.83, "grad_norm": 1.7075527843320244, "learning_rate": 7.698008531971469e-07, "loss": 0.5971, "step": 7781 }, { "epoch": 0.83, "grad_norm": 1.7654803500918528, "learning_rate": 7.688608262001773e-07, "loss": 0.6498, "step": 7782 }, { "epoch": 0.83, "grad_norm": 1.8310934733351685, "learning_rate": 7.67921325695079e-07, "loss": 0.6067, "step": 7783 }, { "epoch": 0.83, "grad_norm": 1.7215555749259992, "learning_rate": 7.669823517987563e-07, "loss": 0.5246, "step": 7784 }, { "epoch": 0.83, "grad_norm": 1.7446498158886687, "learning_rate": 7.660439046280477e-07, "loss": 0.5731, "step": 7785 }, { "epoch": 0.83, "grad_norm": 1.7198287808712631, "learning_rate": 7.651059842997272e-07, "loss": 0.625, "step": 7786 }, { "epoch": 0.83, "grad_norm": 1.9405085968026934, "learning_rate": 7.641685909305024e-07, "loss": 0.6146, "step": 7787 }, { "epoch": 0.83, "grad_norm": 1.8029576431699625, "learning_rate": 7.632317246370174e-07, "loss": 0.5749, "step": 7788 }, { "epoch": 0.83, "grad_norm": 1.8172764219810391, "learning_rate": 7.622953855358456e-07, "loss": 0.618, "step": 7789 }, { "epoch": 0.83, "grad_norm": 1.9431034211256974, "learning_rate": 7.613595737434987e-07, "loss": 0.5189, "step": 7790 }, { "epoch": 0.83, "grad_norm": 1.7314537378839636, "learning_rate": 7.604242893764252e-07, "loss": 0.5605, "step": 7791 }, { "epoch": 0.83, "grad_norm": 1.6542641632189672, "learning_rate": 7.594895325510027e-07, "loss": 0.5353, "step": 7792 }, { "epoch": 0.83, "grad_norm": 1.0903328303302053, "learning_rate": 7.585553033835464e-07, "loss": 0.4913, "step": 7793 }, { "epoch": 0.83, "grad_norm": 2.094405860748831, "learning_rate": 7.576216019903038e-07, "loss": 0.6835, "step": 7794 }, { "epoch": 0.83, "grad_norm": 1.718970877019577, "learning_rate": 7.566884284874593e-07, "loss": 0.508, "step": 7795 }, { "epoch": 0.83, "grad_norm": 1.9770151581923023, "learning_rate": 7.557557829911289e-07, "loss": 0.622, "step": 7796 }, { "epoch": 0.83, "grad_norm": 1.7312884039719894, "learning_rate": 7.548236656173652e-07, "loss": 0.5729, "step": 7797 }, { "epoch": 0.83, "grad_norm": 1.7168989122752063, "learning_rate": 7.538920764821533e-07, "loss": 0.6249, "step": 7798 }, { "epoch": 0.83, "grad_norm": 1.8697902835860205, "learning_rate": 7.529610157014133e-07, "loss": 0.6534, "step": 7799 }, { "epoch": 0.83, "grad_norm": 1.8340558871993422, "learning_rate": 7.520304833910003e-07, "loss": 0.5485, "step": 7800 }, { "epoch": 0.83, "grad_norm": 1.6853428522759837, "learning_rate": 7.511004796667015e-07, "loss": 0.6083, "step": 7801 }, { "epoch": 0.83, "grad_norm": 1.656808736943504, "learning_rate": 7.501710046442412e-07, "loss": 0.5855, "step": 7802 }, { "epoch": 0.83, "grad_norm": 1.7219537611953533, "learning_rate": 7.492420584392774e-07, "loss": 0.6172, "step": 7803 }, { "epoch": 0.83, "grad_norm": 1.8868823153915162, "learning_rate": 7.48313641167398e-07, "loss": 0.643, "step": 7804 }, { "epoch": 0.83, "grad_norm": 1.7339706258384309, "learning_rate": 7.473857529441303e-07, "loss": 0.5565, "step": 7805 }, { "epoch": 0.83, "grad_norm": 1.6909787818254474, "learning_rate": 7.464583938849329e-07, "loss": 0.5533, "step": 7806 }, { "epoch": 0.83, "grad_norm": 1.7503599379765054, "learning_rate": 7.455315641052013e-07, "loss": 0.6102, "step": 7807 }, { "epoch": 0.83, "grad_norm": 1.9094572474745222, "learning_rate": 7.446052637202628e-07, "loss": 0.735, "step": 7808 }, { "epoch": 0.83, "grad_norm": 1.848267424778384, "learning_rate": 7.436794928453794e-07, "loss": 0.5437, "step": 7809 }, { "epoch": 0.83, "grad_norm": 1.8970260977959694, "learning_rate": 7.427542515957464e-07, "loss": 0.6578, "step": 7810 }, { "epoch": 0.83, "grad_norm": 1.6739056597653095, "learning_rate": 7.41829540086495e-07, "loss": 0.6311, "step": 7811 }, { "epoch": 0.83, "grad_norm": 2.0438058408538775, "learning_rate": 7.409053584326891e-07, "loss": 0.5521, "step": 7812 }, { "epoch": 0.83, "grad_norm": 1.6494233770407494, "learning_rate": 7.39981706749327e-07, "loss": 0.5556, "step": 7813 }, { "epoch": 0.83, "grad_norm": 1.7398663800202434, "learning_rate": 7.390585851513416e-07, "loss": 0.532, "step": 7814 }, { "epoch": 0.83, "grad_norm": 1.6495694660015987, "learning_rate": 7.381359937535981e-07, "loss": 0.6067, "step": 7815 }, { "epoch": 0.83, "grad_norm": 1.7809660607916378, "learning_rate": 7.372139326708988e-07, "loss": 0.5323, "step": 7816 }, { "epoch": 0.83, "grad_norm": 1.0253908570724075, "learning_rate": 7.362924020179779e-07, "loss": 0.4668, "step": 7817 }, { "epoch": 0.83, "grad_norm": 1.7480816562126338, "learning_rate": 7.353714019095026e-07, "loss": 0.5872, "step": 7818 }, { "epoch": 0.83, "grad_norm": 1.7540209085191545, "learning_rate": 7.344509324600757e-07, "loss": 0.5901, "step": 7819 }, { "epoch": 0.83, "grad_norm": 1.8322595698754989, "learning_rate": 7.335309937842339e-07, "loss": 0.59, "step": 7820 }, { "epoch": 0.83, "grad_norm": 1.6594290955966329, "learning_rate": 7.326115859964484e-07, "loss": 0.632, "step": 7821 }, { "epoch": 0.83, "grad_norm": 1.73361171848459, "learning_rate": 7.316927092111226e-07, "loss": 0.6477, "step": 7822 }, { "epoch": 0.83, "grad_norm": 1.705945854999435, "learning_rate": 7.307743635425934e-07, "loss": 0.5704, "step": 7823 }, { "epoch": 0.83, "grad_norm": 1.956429813532821, "learning_rate": 7.298565491051363e-07, "loss": 0.5941, "step": 7824 }, { "epoch": 0.83, "grad_norm": 1.8228494172697578, "learning_rate": 7.289392660129557e-07, "loss": 0.5698, "step": 7825 }, { "epoch": 0.83, "grad_norm": 1.7488369031103594, "learning_rate": 7.280225143801917e-07, "loss": 0.5427, "step": 7826 }, { "epoch": 0.83, "grad_norm": 1.5775050288232217, "learning_rate": 7.271062943209179e-07, "loss": 0.5318, "step": 7827 }, { "epoch": 0.83, "grad_norm": 1.8874340460145156, "learning_rate": 7.261906059491425e-07, "loss": 0.561, "step": 7828 }, { "epoch": 0.84, "grad_norm": 1.7432964080671185, "learning_rate": 7.252754493788061e-07, "loss": 0.5817, "step": 7829 }, { "epoch": 0.84, "grad_norm": 1.1026486857980227, "learning_rate": 7.243608247237849e-07, "loss": 0.4804, "step": 7830 }, { "epoch": 0.84, "grad_norm": 2.200197913966377, "learning_rate": 7.234467320978889e-07, "loss": 0.5684, "step": 7831 }, { "epoch": 0.84, "grad_norm": 1.7450774672233937, "learning_rate": 7.225331716148587e-07, "loss": 0.5802, "step": 7832 }, { "epoch": 0.84, "grad_norm": 1.8992220127241433, "learning_rate": 7.21620143388373e-07, "loss": 0.5963, "step": 7833 }, { "epoch": 0.84, "grad_norm": 1.8959198073451908, "learning_rate": 7.207076475320407e-07, "loss": 0.5138, "step": 7834 }, { "epoch": 0.84, "grad_norm": 1.1309248706236572, "learning_rate": 7.197956841594078e-07, "loss": 0.4871, "step": 7835 }, { "epoch": 0.84, "grad_norm": 1.7081170416291491, "learning_rate": 7.188842533839513e-07, "loss": 0.5562, "step": 7836 }, { "epoch": 0.84, "grad_norm": 1.6947867835593746, "learning_rate": 7.179733553190832e-07, "loss": 0.5242, "step": 7837 }, { "epoch": 0.84, "grad_norm": 1.7796045028210759, "learning_rate": 7.170629900781489e-07, "loss": 0.5911, "step": 7838 }, { "epoch": 0.84, "grad_norm": 1.8972695121882022, "learning_rate": 7.16153157774428e-07, "loss": 0.587, "step": 7839 }, { "epoch": 0.84, "grad_norm": 1.9314220259124875, "learning_rate": 7.152438585211313e-07, "loss": 0.542, "step": 7840 }, { "epoch": 0.84, "grad_norm": 1.5878494200009758, "learning_rate": 7.143350924314085e-07, "loss": 0.557, "step": 7841 }, { "epoch": 0.84, "grad_norm": 1.6946541159871376, "learning_rate": 7.13426859618338e-07, "loss": 0.475, "step": 7842 }, { "epoch": 0.84, "grad_norm": 1.8575837076988997, "learning_rate": 7.125191601949338e-07, "loss": 0.5427, "step": 7843 }, { "epoch": 0.84, "grad_norm": 1.6327122264364344, "learning_rate": 7.11611994274144e-07, "loss": 0.4408, "step": 7844 }, { "epoch": 0.84, "grad_norm": 2.375284024611666, "learning_rate": 7.107053619688487e-07, "loss": 0.6302, "step": 7845 }, { "epoch": 0.84, "grad_norm": 2.206729769515375, "learning_rate": 7.097992633918644e-07, "loss": 0.618, "step": 7846 }, { "epoch": 0.84, "grad_norm": 2.019398585143374, "learning_rate": 7.088936986559364e-07, "loss": 0.6132, "step": 7847 }, { "epoch": 0.84, "grad_norm": 1.7038527113195516, "learning_rate": 7.07988667873748e-07, "loss": 0.5555, "step": 7848 }, { "epoch": 0.84, "grad_norm": 1.9573940257775246, "learning_rate": 7.070841711579152e-07, "loss": 0.5903, "step": 7849 }, { "epoch": 0.84, "grad_norm": 1.722243908466952, "learning_rate": 7.061802086209857e-07, "loss": 0.606, "step": 7850 }, { "epoch": 0.84, "grad_norm": 1.6484943782091086, "learning_rate": 7.052767803754429e-07, "loss": 0.583, "step": 7851 }, { "epoch": 0.84, "grad_norm": 1.8174912227027311, "learning_rate": 7.043738865337025e-07, "loss": 0.5655, "step": 7852 }, { "epoch": 0.84, "grad_norm": 1.749424911823883, "learning_rate": 7.034715272081138e-07, "loss": 0.5951, "step": 7853 }, { "epoch": 0.84, "grad_norm": 1.8701953329439116, "learning_rate": 7.025697025109596e-07, "loss": 0.5683, "step": 7854 }, { "epoch": 0.84, "grad_norm": 1.790486485987993, "learning_rate": 7.01668412554457e-07, "loss": 0.6175, "step": 7855 }, { "epoch": 0.84, "grad_norm": 1.8469818447626738, "learning_rate": 7.007676574507555e-07, "loss": 0.5445, "step": 7856 }, { "epoch": 0.84, "grad_norm": 1.5107024043379904, "learning_rate": 6.998674373119374e-07, "loss": 0.4988, "step": 7857 }, { "epoch": 0.84, "grad_norm": 1.7569635100579961, "learning_rate": 6.989677522500221e-07, "loss": 0.5793, "step": 7858 }, { "epoch": 0.84, "grad_norm": 1.7567668165529424, "learning_rate": 6.980686023769578e-07, "loss": 0.4774, "step": 7859 }, { "epoch": 0.84, "grad_norm": 1.737702097747887, "learning_rate": 6.971699878046301e-07, "loss": 0.5383, "step": 7860 }, { "epoch": 0.84, "grad_norm": 1.8733256421643987, "learning_rate": 6.962719086448527e-07, "loss": 0.5975, "step": 7861 }, { "epoch": 0.84, "grad_norm": 1.8550991095513862, "learning_rate": 6.95374365009378e-07, "loss": 0.5143, "step": 7862 }, { "epoch": 0.84, "grad_norm": 1.7339683993922244, "learning_rate": 6.944773570098895e-07, "loss": 0.5611, "step": 7863 }, { "epoch": 0.84, "grad_norm": 1.97374486243275, "learning_rate": 6.935808847580044e-07, "loss": 0.6051, "step": 7864 }, { "epoch": 0.84, "grad_norm": 1.8304338928000081, "learning_rate": 6.926849483652731e-07, "loss": 0.6722, "step": 7865 }, { "epoch": 0.84, "grad_norm": 1.9706941915483418, "learning_rate": 6.917895479431791e-07, "loss": 0.5725, "step": 7866 }, { "epoch": 0.84, "grad_norm": 1.8944107353983701, "learning_rate": 6.908946836031393e-07, "loss": 0.6192, "step": 7867 }, { "epoch": 0.84, "grad_norm": 1.8425681549741275, "learning_rate": 6.900003554565049e-07, "loss": 0.5875, "step": 7868 }, { "epoch": 0.84, "grad_norm": 1.8352594011698529, "learning_rate": 6.891065636145583e-07, "loss": 0.5398, "step": 7869 }, { "epoch": 0.84, "grad_norm": 1.807145863709577, "learning_rate": 6.882133081885178e-07, "loss": 0.6324, "step": 7870 }, { "epoch": 0.84, "grad_norm": 1.8488618870460831, "learning_rate": 6.873205892895319e-07, "loss": 0.6706, "step": 7871 }, { "epoch": 0.84, "grad_norm": 1.7662275610673537, "learning_rate": 6.864284070286853e-07, "loss": 0.582, "step": 7872 }, { "epoch": 0.84, "grad_norm": 1.731581313773953, "learning_rate": 6.855367615169945e-07, "loss": 0.5699, "step": 7873 }, { "epoch": 0.84, "grad_norm": 1.1451061705661547, "learning_rate": 6.846456528654083e-07, "loss": 0.4971, "step": 7874 }, { "epoch": 0.84, "grad_norm": 1.8438968868664167, "learning_rate": 6.83755081184811e-07, "loss": 0.6391, "step": 7875 }, { "epoch": 0.84, "grad_norm": 1.8480362144553304, "learning_rate": 6.828650465860176e-07, "loss": 0.627, "step": 7876 }, { "epoch": 0.84, "grad_norm": 2.01310050136342, "learning_rate": 6.819755491797786e-07, "loss": 0.6584, "step": 7877 }, { "epoch": 0.84, "grad_norm": 2.228067572998492, "learning_rate": 6.810865890767754e-07, "loss": 0.6004, "step": 7878 }, { "epoch": 0.84, "grad_norm": 1.8831544406227323, "learning_rate": 6.801981663876245e-07, "loss": 0.635, "step": 7879 }, { "epoch": 0.84, "grad_norm": 1.629881423781757, "learning_rate": 6.793102812228747e-07, "loss": 0.5125, "step": 7880 }, { "epoch": 0.84, "grad_norm": 1.896305059144138, "learning_rate": 6.784229336930076e-07, "loss": 0.5074, "step": 7881 }, { "epoch": 0.84, "grad_norm": 2.195018509858337, "learning_rate": 6.775361239084377e-07, "loss": 0.7019, "step": 7882 }, { "epoch": 0.84, "grad_norm": 1.7513897694122207, "learning_rate": 6.766498519795145e-07, "loss": 0.4997, "step": 7883 }, { "epoch": 0.84, "grad_norm": 1.9336846854720424, "learning_rate": 6.757641180165176e-07, "loss": 0.6104, "step": 7884 }, { "epoch": 0.84, "grad_norm": 1.7604648987396678, "learning_rate": 6.74878922129662e-07, "loss": 0.5225, "step": 7885 }, { "epoch": 0.84, "grad_norm": 1.6178685903317054, "learning_rate": 6.739942644290953e-07, "loss": 0.5549, "step": 7886 }, { "epoch": 0.84, "grad_norm": 1.517500292485633, "learning_rate": 6.731101450248973e-07, "loss": 0.5084, "step": 7887 }, { "epoch": 0.84, "grad_norm": 1.8479819333084464, "learning_rate": 6.722265640270808e-07, "loss": 0.5237, "step": 7888 }, { "epoch": 0.84, "grad_norm": 1.0820926931960384, "learning_rate": 6.713435215455938e-07, "loss": 0.4613, "step": 7889 }, { "epoch": 0.84, "grad_norm": 1.8198550918858445, "learning_rate": 6.704610176903137e-07, "loss": 0.5625, "step": 7890 }, { "epoch": 0.84, "grad_norm": 2.113438108346685, "learning_rate": 6.695790525710538e-07, "loss": 0.5961, "step": 7891 }, { "epoch": 0.84, "grad_norm": 1.8008121473587697, "learning_rate": 6.686976262975597e-07, "loss": 0.6634, "step": 7892 }, { "epoch": 0.84, "grad_norm": 1.9182399067091105, "learning_rate": 6.678167389795087e-07, "loss": 0.6757, "step": 7893 }, { "epoch": 0.84, "grad_norm": 1.6469336629518152, "learning_rate": 6.669363907265125e-07, "loss": 0.6108, "step": 7894 }, { "epoch": 0.84, "grad_norm": 1.7123855890423427, "learning_rate": 6.660565816481151e-07, "loss": 0.6136, "step": 7895 }, { "epoch": 0.84, "grad_norm": 1.7154656041402427, "learning_rate": 6.651773118537935e-07, "loss": 0.6967, "step": 7896 }, { "epoch": 0.84, "grad_norm": 1.0928260626780146, "learning_rate": 6.642985814529579e-07, "loss": 0.4869, "step": 7897 }, { "epoch": 0.84, "grad_norm": 1.6188298048020682, "learning_rate": 6.634203905549502e-07, "loss": 0.5332, "step": 7898 }, { "epoch": 0.84, "grad_norm": 1.8469767151628083, "learning_rate": 6.625427392690464e-07, "loss": 0.6222, "step": 7899 }, { "epoch": 0.84, "grad_norm": 1.8854307166953526, "learning_rate": 6.616656277044553e-07, "loss": 0.6374, "step": 7900 }, { "epoch": 0.84, "grad_norm": 1.7418609191307979, "learning_rate": 6.607890559703178e-07, "loss": 0.5313, "step": 7901 }, { "epoch": 0.84, "grad_norm": 1.7857929986287824, "learning_rate": 6.599130241757079e-07, "loss": 0.5829, "step": 7902 }, { "epoch": 0.84, "grad_norm": 1.6814804314334302, "learning_rate": 6.590375324296333e-07, "loss": 0.5606, "step": 7903 }, { "epoch": 0.84, "grad_norm": 1.6385501340026694, "learning_rate": 6.581625808410335e-07, "loss": 0.5417, "step": 7904 }, { "epoch": 0.84, "grad_norm": 1.0654009599547818, "learning_rate": 6.572881695187805e-07, "loss": 0.4882, "step": 7905 }, { "epoch": 0.84, "grad_norm": 1.9085321708931666, "learning_rate": 6.564142985716804e-07, "loss": 0.65, "step": 7906 }, { "epoch": 0.84, "grad_norm": 1.8046933512578587, "learning_rate": 6.555409681084707e-07, "loss": 0.5677, "step": 7907 }, { "epoch": 0.84, "grad_norm": 1.6381961846160755, "learning_rate": 6.546681782378222e-07, "loss": 0.5623, "step": 7908 }, { "epoch": 0.84, "grad_norm": 2.034337093295765, "learning_rate": 6.537959290683393e-07, "loss": 0.6037, "step": 7909 }, { "epoch": 0.84, "grad_norm": 1.658676627361127, "learning_rate": 6.529242207085567e-07, "loss": 0.5881, "step": 7910 }, { "epoch": 0.84, "grad_norm": 1.6964618507131553, "learning_rate": 6.52053053266945e-07, "loss": 0.6621, "step": 7911 }, { "epoch": 0.84, "grad_norm": 1.663420019138942, "learning_rate": 6.511824268519052e-07, "loss": 0.5384, "step": 7912 }, { "epoch": 0.84, "grad_norm": 1.7244352455334464, "learning_rate": 6.503123415717716e-07, "loss": 0.6309, "step": 7913 }, { "epoch": 0.84, "grad_norm": 1.0344261195089408, "learning_rate": 6.494427975348116e-07, "loss": 0.4646, "step": 7914 }, { "epoch": 0.84, "grad_norm": 1.040404874080583, "learning_rate": 6.485737948492237e-07, "loss": 0.4719, "step": 7915 }, { "epoch": 0.84, "grad_norm": 1.116753546972583, "learning_rate": 6.477053336231421e-07, "loss": 0.4898, "step": 7916 }, { "epoch": 0.84, "grad_norm": 1.9128729229080248, "learning_rate": 6.4683741396463e-07, "loss": 0.5468, "step": 7917 }, { "epoch": 0.84, "grad_norm": 1.8154739765151122, "learning_rate": 6.459700359816867e-07, "loss": 0.6248, "step": 7918 }, { "epoch": 0.84, "grad_norm": 1.8450148887573743, "learning_rate": 6.451031997822405e-07, "loss": 0.5155, "step": 7919 }, { "epoch": 0.84, "grad_norm": 1.7404842023756462, "learning_rate": 6.442369054741537e-07, "loss": 0.6188, "step": 7920 }, { "epoch": 0.84, "grad_norm": 1.7849592135159753, "learning_rate": 6.433711531652243e-07, "loss": 0.574, "step": 7921 }, { "epoch": 0.85, "grad_norm": 1.7701070947880233, "learning_rate": 6.42505942963178e-07, "loss": 0.5698, "step": 7922 }, { "epoch": 0.85, "grad_norm": 1.7565201980704397, "learning_rate": 6.416412749756762e-07, "loss": 0.6192, "step": 7923 }, { "epoch": 0.85, "grad_norm": 1.7241290549217925, "learning_rate": 6.407771493103116e-07, "loss": 0.6156, "step": 7924 }, { "epoch": 0.85, "grad_norm": 1.8600638808472838, "learning_rate": 6.399135660746097e-07, "loss": 0.6049, "step": 7925 }, { "epoch": 0.85, "grad_norm": 1.9283945271859955, "learning_rate": 6.390505253760281e-07, "loss": 0.6013, "step": 7926 }, { "epoch": 0.85, "grad_norm": 2.077172448459539, "learning_rate": 6.381880273219576e-07, "loss": 0.5405, "step": 7927 }, { "epoch": 0.85, "grad_norm": 1.6497205784225628, "learning_rate": 6.373260720197206e-07, "loss": 0.4969, "step": 7928 }, { "epoch": 0.85, "grad_norm": 1.6322352898386312, "learning_rate": 6.364646595765728e-07, "loss": 0.6245, "step": 7929 }, { "epoch": 0.85, "grad_norm": 1.8571324723895772, "learning_rate": 6.356037900997025e-07, "loss": 0.5982, "step": 7930 }, { "epoch": 0.85, "grad_norm": 1.7414193580667456, "learning_rate": 6.347434636962297e-07, "loss": 0.5396, "step": 7931 }, { "epoch": 0.85, "grad_norm": 1.5647571150263195, "learning_rate": 6.338836804732074e-07, "loss": 0.505, "step": 7932 }, { "epoch": 0.85, "grad_norm": 1.5336130306085192, "learning_rate": 6.330244405376196e-07, "loss": 0.5706, "step": 7933 }, { "epoch": 0.85, "grad_norm": 1.855173075503662, "learning_rate": 6.321657439963846e-07, "loss": 0.5754, "step": 7934 }, { "epoch": 0.85, "grad_norm": 1.9425963340356294, "learning_rate": 6.313075909563521e-07, "loss": 0.6078, "step": 7935 }, { "epoch": 0.85, "grad_norm": 1.7907763434814552, "learning_rate": 6.304499815243043e-07, "loss": 0.7034, "step": 7936 }, { "epoch": 0.85, "grad_norm": 1.7562480756947165, "learning_rate": 6.295929158069547e-07, "loss": 0.6171, "step": 7937 }, { "epoch": 0.85, "grad_norm": 1.9143411033136393, "learning_rate": 6.287363939109525e-07, "loss": 0.6619, "step": 7938 }, { "epoch": 0.85, "grad_norm": 1.7667020920824292, "learning_rate": 6.278804159428764e-07, "loss": 0.5701, "step": 7939 }, { "epoch": 0.85, "grad_norm": 1.8803759297988787, "learning_rate": 6.270249820092372e-07, "loss": 0.6393, "step": 7940 }, { "epoch": 0.85, "grad_norm": 1.779065985720067, "learning_rate": 6.261700922164799e-07, "loss": 0.5383, "step": 7941 }, { "epoch": 0.85, "grad_norm": 1.7947658235057182, "learning_rate": 6.253157466709792e-07, "loss": 0.6827, "step": 7942 }, { "epoch": 0.85, "grad_norm": 1.7191275744540784, "learning_rate": 6.244619454790446e-07, "loss": 0.5629, "step": 7943 }, { "epoch": 0.85, "grad_norm": 1.6749647229896119, "learning_rate": 6.236086887469173e-07, "loss": 0.6692, "step": 7944 }, { "epoch": 0.85, "grad_norm": 1.9752438077232066, "learning_rate": 6.22755976580769e-07, "loss": 0.6115, "step": 7945 }, { "epoch": 0.85, "grad_norm": 1.900090097413398, "learning_rate": 6.219038090867069e-07, "loss": 0.6089, "step": 7946 }, { "epoch": 0.85, "grad_norm": 1.610468487011185, "learning_rate": 6.210521863707658e-07, "loss": 0.5695, "step": 7947 }, { "epoch": 0.85, "grad_norm": 1.0607721944915884, "learning_rate": 6.20201108538917e-07, "loss": 0.4721, "step": 7948 }, { "epoch": 0.85, "grad_norm": 1.7762275629082054, "learning_rate": 6.193505756970619e-07, "loss": 0.5926, "step": 7949 }, { "epoch": 0.85, "grad_norm": 1.8151638921370554, "learning_rate": 6.185005879510347e-07, "loss": 0.5966, "step": 7950 }, { "epoch": 0.85, "grad_norm": 1.9188760657608457, "learning_rate": 6.176511454066014e-07, "loss": 0.5445, "step": 7951 }, { "epoch": 0.85, "grad_norm": 1.6263647060932787, "learning_rate": 6.168022481694613e-07, "loss": 0.5462, "step": 7952 }, { "epoch": 0.85, "grad_norm": 1.658122671854276, "learning_rate": 6.159538963452427e-07, "loss": 0.6394, "step": 7953 }, { "epoch": 0.85, "grad_norm": 1.7795711384236674, "learning_rate": 6.151060900395118e-07, "loss": 0.5767, "step": 7954 }, { "epoch": 0.85, "grad_norm": 1.8312266395427008, "learning_rate": 6.142588293577606e-07, "loss": 0.6182, "step": 7955 }, { "epoch": 0.85, "grad_norm": 2.1999204622216357, "learning_rate": 6.134121144054172e-07, "loss": 0.6446, "step": 7956 }, { "epoch": 0.85, "grad_norm": 1.6450230081704138, "learning_rate": 6.125659452878402e-07, "loss": 0.5284, "step": 7957 }, { "epoch": 0.85, "grad_norm": 1.739705048579163, "learning_rate": 6.117203221103201e-07, "loss": 0.5617, "step": 7958 }, { "epoch": 0.85, "grad_norm": 1.7216748706488239, "learning_rate": 6.108752449780814e-07, "loss": 0.5165, "step": 7959 }, { "epoch": 0.85, "grad_norm": 1.9054047449103284, "learning_rate": 6.100307139962786e-07, "loss": 0.5741, "step": 7960 }, { "epoch": 0.85, "grad_norm": 1.8625761326249246, "learning_rate": 6.091867292699993e-07, "loss": 0.5821, "step": 7961 }, { "epoch": 0.85, "grad_norm": 1.7796700663587264, "learning_rate": 6.08343290904262e-07, "loss": 0.5417, "step": 7962 }, { "epoch": 0.85, "grad_norm": 2.177080605495494, "learning_rate": 6.075003990040173e-07, "loss": 0.5514, "step": 7963 }, { "epoch": 0.85, "grad_norm": 1.9613000277115458, "learning_rate": 6.066580536741501e-07, "loss": 0.5275, "step": 7964 }, { "epoch": 0.85, "grad_norm": 1.7889213620347206, "learning_rate": 6.058162550194752e-07, "loss": 0.6104, "step": 7965 }, { "epoch": 0.85, "grad_norm": 1.8753064991840476, "learning_rate": 6.049750031447394e-07, "loss": 0.6351, "step": 7966 }, { "epoch": 0.85, "grad_norm": 1.8726586176908069, "learning_rate": 6.041342981546222e-07, "loss": 0.5996, "step": 7967 }, { "epoch": 0.85, "grad_norm": 1.864575524408213, "learning_rate": 6.032941401537345e-07, "loss": 0.5721, "step": 7968 }, { "epoch": 0.85, "grad_norm": 1.6097931811669148, "learning_rate": 6.0245452924662e-07, "loss": 0.5683, "step": 7969 }, { "epoch": 0.85, "grad_norm": 1.8364522986771048, "learning_rate": 6.016154655377521e-07, "loss": 0.5919, "step": 7970 }, { "epoch": 0.85, "grad_norm": 1.7644200291788328, "learning_rate": 6.007769491315401e-07, "loss": 0.594, "step": 7971 }, { "epoch": 0.85, "grad_norm": 1.7121650487867472, "learning_rate": 5.999389801323219e-07, "loss": 0.5871, "step": 7972 }, { "epoch": 0.85, "grad_norm": 1.8533831938636645, "learning_rate": 5.991015586443671e-07, "loss": 0.6192, "step": 7973 }, { "epoch": 0.85, "grad_norm": 1.7917111104893697, "learning_rate": 5.982646847718798e-07, "loss": 0.554, "step": 7974 }, { "epoch": 0.85, "grad_norm": 1.6868634939151959, "learning_rate": 5.97428358618995e-07, "loss": 0.5226, "step": 7975 }, { "epoch": 0.85, "grad_norm": 1.778490996288345, "learning_rate": 5.965925802897765e-07, "loss": 0.6752, "step": 7976 }, { "epoch": 0.85, "grad_norm": 1.874801205994332, "learning_rate": 5.957573498882236e-07, "loss": 0.5371, "step": 7977 }, { "epoch": 0.85, "grad_norm": 1.62865583496033, "learning_rate": 5.949226675182662e-07, "loss": 0.5796, "step": 7978 }, { "epoch": 0.85, "grad_norm": 1.7303407636484132, "learning_rate": 5.940885332837665e-07, "loss": 0.5867, "step": 7979 }, { "epoch": 0.85, "grad_norm": 1.769081294007518, "learning_rate": 5.932549472885174e-07, "loss": 0.5292, "step": 7980 }, { "epoch": 0.85, "grad_norm": 1.7790783036997702, "learning_rate": 5.924219096362443e-07, "loss": 0.5876, "step": 7981 }, { "epoch": 0.85, "grad_norm": 1.9571422400559457, "learning_rate": 5.91589420430605e-07, "loss": 0.5847, "step": 7982 }, { "epoch": 0.85, "grad_norm": 1.8416173335178436, "learning_rate": 5.907574797751874e-07, "loss": 0.5647, "step": 7983 }, { "epoch": 0.85, "grad_norm": 1.0782137722849636, "learning_rate": 5.899260877735131e-07, "loss": 0.4847, "step": 7984 }, { "epoch": 0.85, "grad_norm": 1.0603181832190283, "learning_rate": 5.890952445290332e-07, "loss": 0.4926, "step": 7985 }, { "epoch": 0.85, "grad_norm": 1.806967958635067, "learning_rate": 5.882649501451326e-07, "loss": 0.5659, "step": 7986 }, { "epoch": 0.85, "grad_norm": 1.8006997895110526, "learning_rate": 5.874352047251258e-07, "loss": 0.5583, "step": 7987 }, { "epoch": 0.85, "grad_norm": 1.924647607722237, "learning_rate": 5.866060083722624e-07, "loss": 0.5576, "step": 7988 }, { "epoch": 0.85, "grad_norm": 1.8158808710679826, "learning_rate": 5.857773611897211e-07, "loss": 0.5882, "step": 7989 }, { "epoch": 0.85, "grad_norm": 1.8606016010100863, "learning_rate": 5.849492632806109e-07, "loss": 0.5589, "step": 7990 }, { "epoch": 0.85, "grad_norm": 1.9089553366311554, "learning_rate": 5.841217147479756e-07, "loss": 0.6068, "step": 7991 }, { "epoch": 0.85, "grad_norm": 1.8337926818533141, "learning_rate": 5.832947156947883e-07, "loss": 0.5664, "step": 7992 }, { "epoch": 0.85, "grad_norm": 1.5865984349958375, "learning_rate": 5.824682662239556e-07, "loss": 0.5471, "step": 7993 }, { "epoch": 0.85, "grad_norm": 1.0575034486938535, "learning_rate": 5.816423664383142e-07, "loss": 0.4672, "step": 7994 }, { "epoch": 0.85, "grad_norm": 2.070586668261946, "learning_rate": 5.808170164406335e-07, "loss": 0.7357, "step": 7995 }, { "epoch": 0.85, "grad_norm": 1.0962402259813708, "learning_rate": 5.799922163336135e-07, "loss": 0.4715, "step": 7996 }, { "epoch": 0.85, "grad_norm": 1.810895988131029, "learning_rate": 5.791679662198862e-07, "loss": 0.5291, "step": 7997 }, { "epoch": 0.85, "grad_norm": 1.5506682335927071, "learning_rate": 5.783442662020161e-07, "loss": 0.4551, "step": 7998 }, { "epoch": 0.85, "grad_norm": 1.7170038947701627, "learning_rate": 5.775211163824979e-07, "loss": 0.5972, "step": 7999 }, { "epoch": 0.85, "grad_norm": 1.7092418593359142, "learning_rate": 5.766985168637579e-07, "loss": 0.6342, "step": 8000 }, { "epoch": 0.85, "grad_norm": 1.7485343519803969, "learning_rate": 5.758764677481543e-07, "loss": 0.5165, "step": 8001 }, { "epoch": 0.85, "grad_norm": 1.9304388163708914, "learning_rate": 5.750549691379781e-07, "loss": 0.5507, "step": 8002 }, { "epoch": 0.85, "grad_norm": 1.7337103674608818, "learning_rate": 5.742340211354491e-07, "loss": 0.682, "step": 8003 }, { "epoch": 0.85, "grad_norm": 1.789210704412878, "learning_rate": 5.734136238427207e-07, "loss": 0.564, "step": 8004 }, { "epoch": 0.85, "grad_norm": 1.0963129080397496, "learning_rate": 5.725937773618767e-07, "loss": 0.4875, "step": 8005 }, { "epoch": 0.85, "grad_norm": 1.8780127948990326, "learning_rate": 5.717744817949339e-07, "loss": 0.6602, "step": 8006 }, { "epoch": 0.85, "grad_norm": 1.5653949248480274, "learning_rate": 5.709557372438379e-07, "loss": 0.4781, "step": 8007 }, { "epoch": 0.85, "grad_norm": 1.7140504125521419, "learning_rate": 5.701375438104684e-07, "loss": 0.5729, "step": 8008 }, { "epoch": 0.85, "grad_norm": 1.841021314998048, "learning_rate": 5.693199015966344e-07, "loss": 0.5534, "step": 8009 }, { "epoch": 0.85, "grad_norm": 1.9092559528395567, "learning_rate": 5.68502810704078e-07, "loss": 0.5849, "step": 8010 }, { "epoch": 0.85, "grad_norm": 1.0961889998201269, "learning_rate": 5.676862712344722e-07, "loss": 0.4792, "step": 8011 }, { "epoch": 0.85, "grad_norm": 1.7433064825884799, "learning_rate": 5.668702832894207e-07, "loss": 0.6906, "step": 8012 }, { "epoch": 0.85, "grad_norm": 1.6132617026600373, "learning_rate": 5.660548469704591e-07, "loss": 0.4663, "step": 8013 }, { "epoch": 0.85, "grad_norm": 1.804156803824273, "learning_rate": 5.652399623790538e-07, "loss": 0.5867, "step": 8014 }, { "epoch": 0.85, "grad_norm": 1.866270585831936, "learning_rate": 5.644256296166045e-07, "loss": 0.5512, "step": 8015 }, { "epoch": 0.86, "grad_norm": 1.5065434314016417, "learning_rate": 5.636118487844394e-07, "loss": 0.4807, "step": 8016 }, { "epoch": 0.86, "grad_norm": 1.7037321190148091, "learning_rate": 5.627986199838198e-07, "loss": 0.5118, "step": 8017 }, { "epoch": 0.86, "grad_norm": 1.7888860833485378, "learning_rate": 5.61985943315938e-07, "loss": 0.5897, "step": 8018 }, { "epoch": 0.86, "grad_norm": 1.7262597480474366, "learning_rate": 5.611738188819177e-07, "loss": 0.4654, "step": 8019 }, { "epoch": 0.86, "grad_norm": 1.8290207557366323, "learning_rate": 5.603622467828135e-07, "loss": 0.4637, "step": 8020 }, { "epoch": 0.86, "grad_norm": 1.838566706390034, "learning_rate": 5.595512271196113e-07, "loss": 0.5866, "step": 8021 }, { "epoch": 0.86, "grad_norm": 1.9130757397971252, "learning_rate": 5.587407599932287e-07, "loss": 0.644, "step": 8022 }, { "epoch": 0.86, "grad_norm": 1.8336613908581694, "learning_rate": 5.579308455045141e-07, "loss": 0.69, "step": 8023 }, { "epoch": 0.86, "grad_norm": 1.701400113178309, "learning_rate": 5.571214837542477e-07, "loss": 0.5567, "step": 8024 }, { "epoch": 0.86, "grad_norm": 1.676225299175756, "learning_rate": 5.563126748431402e-07, "loss": 0.6635, "step": 8025 }, { "epoch": 0.86, "grad_norm": 1.9972626137877774, "learning_rate": 5.555044188718334e-07, "loss": 0.5427, "step": 8026 }, { "epoch": 0.86, "grad_norm": 1.9044520590118073, "learning_rate": 5.546967159409017e-07, "loss": 0.6288, "step": 8027 }, { "epoch": 0.86, "grad_norm": 1.6653620472356185, "learning_rate": 5.538895661508492e-07, "loss": 0.5285, "step": 8028 }, { "epoch": 0.86, "grad_norm": 1.922663183143071, "learning_rate": 5.530829696021111e-07, "loss": 0.5912, "step": 8029 }, { "epoch": 0.86, "grad_norm": 1.7835106447065774, "learning_rate": 5.522769263950556e-07, "loss": 0.592, "step": 8030 }, { "epoch": 0.86, "grad_norm": 1.7676168871578701, "learning_rate": 5.514714366299801e-07, "loss": 0.5576, "step": 8031 }, { "epoch": 0.86, "grad_norm": 2.020805034974309, "learning_rate": 5.506665004071143e-07, "loss": 0.57, "step": 8032 }, { "epoch": 0.86, "grad_norm": 1.9887557928876864, "learning_rate": 5.498621178266167e-07, "loss": 0.6265, "step": 8033 }, { "epoch": 0.86, "grad_norm": 1.6752621256840088, "learning_rate": 5.490582889885809e-07, "loss": 0.6063, "step": 8034 }, { "epoch": 0.86, "grad_norm": 1.7983776030659506, "learning_rate": 5.482550139930293e-07, "loss": 0.5756, "step": 8035 }, { "epoch": 0.86, "grad_norm": 2.101976857746194, "learning_rate": 5.474522929399146e-07, "loss": 0.5371, "step": 8036 }, { "epoch": 0.86, "grad_norm": 1.8452186321645498, "learning_rate": 5.466501259291213e-07, "loss": 0.6102, "step": 8037 }, { "epoch": 0.86, "grad_norm": 1.8305434810393166, "learning_rate": 5.458485130604662e-07, "loss": 0.6029, "step": 8038 }, { "epoch": 0.86, "grad_norm": 1.8170310946818862, "learning_rate": 5.450474544336959e-07, "loss": 0.5459, "step": 8039 }, { "epoch": 0.86, "grad_norm": 1.781591574476724, "learning_rate": 5.442469501484881e-07, "loss": 0.5588, "step": 8040 }, { "epoch": 0.86, "grad_norm": 1.1067478651772265, "learning_rate": 5.434470003044512e-07, "loss": 0.483, "step": 8041 }, { "epoch": 0.86, "grad_norm": 1.0983627775484646, "learning_rate": 5.426476050011254e-07, "loss": 0.5022, "step": 8042 }, { "epoch": 0.86, "grad_norm": 1.5769998151331444, "learning_rate": 5.418487643379821e-07, "loss": 0.493, "step": 8043 }, { "epoch": 0.86, "grad_norm": 1.0533085907441815, "learning_rate": 5.410504784144221e-07, "loss": 0.4738, "step": 8044 }, { "epoch": 0.86, "grad_norm": 1.6261112916304796, "learning_rate": 5.402527473297797e-07, "loss": 0.4791, "step": 8045 }, { "epoch": 0.86, "grad_norm": 1.8484359257104523, "learning_rate": 5.394555711833172e-07, "loss": 0.645, "step": 8046 }, { "epoch": 0.86, "grad_norm": 1.7303101085006547, "learning_rate": 5.386589500742312e-07, "loss": 0.5757, "step": 8047 }, { "epoch": 0.86, "grad_norm": 1.7554410357771921, "learning_rate": 5.378628841016453e-07, "loss": 0.5794, "step": 8048 }, { "epoch": 0.86, "grad_norm": 1.8266772006335599, "learning_rate": 5.370673733646165e-07, "loss": 0.5585, "step": 8049 }, { "epoch": 0.86, "grad_norm": 1.7811755140047607, "learning_rate": 5.362724179621321e-07, "loss": 0.6281, "step": 8050 }, { "epoch": 0.86, "grad_norm": 1.8334659191515748, "learning_rate": 5.354780179931123e-07, "loss": 0.5205, "step": 8051 }, { "epoch": 0.86, "grad_norm": 1.7776923053325435, "learning_rate": 5.346841735564052e-07, "loss": 0.5601, "step": 8052 }, { "epoch": 0.86, "grad_norm": 1.8206297595641963, "learning_rate": 5.338908847507907e-07, "loss": 0.6262, "step": 8053 }, { "epoch": 0.86, "grad_norm": 1.818915588913456, "learning_rate": 5.330981516749805e-07, "loss": 0.6174, "step": 8054 }, { "epoch": 0.86, "grad_norm": 1.6839465840725887, "learning_rate": 5.32305974427616e-07, "loss": 0.6413, "step": 8055 }, { "epoch": 0.86, "grad_norm": 1.8926281713976076, "learning_rate": 5.315143531072703e-07, "loss": 0.5523, "step": 8056 }, { "epoch": 0.86, "grad_norm": 1.9072111758156003, "learning_rate": 5.307232878124468e-07, "loss": 0.545, "step": 8057 }, { "epoch": 0.86, "grad_norm": 1.7829674439633771, "learning_rate": 5.299327786415792e-07, "loss": 0.5479, "step": 8058 }, { "epoch": 0.86, "grad_norm": 2.1685036417422983, "learning_rate": 5.291428256930336e-07, "loss": 0.6264, "step": 8059 }, { "epoch": 0.86, "grad_norm": 1.9326884476424613, "learning_rate": 5.28353429065106e-07, "loss": 0.5884, "step": 8060 }, { "epoch": 0.86, "grad_norm": 1.7472053294841317, "learning_rate": 5.275645888560233e-07, "loss": 0.5678, "step": 8061 }, { "epoch": 0.86, "grad_norm": 1.9118059966030592, "learning_rate": 5.267763051639407e-07, "loss": 0.5384, "step": 8062 }, { "epoch": 0.86, "grad_norm": 2.175220700432871, "learning_rate": 5.259885780869489e-07, "loss": 0.5496, "step": 8063 }, { "epoch": 0.86, "grad_norm": 1.6598830206819615, "learning_rate": 5.252014077230661e-07, "loss": 0.6181, "step": 8064 }, { "epoch": 0.86, "grad_norm": 1.7135788492574702, "learning_rate": 5.244147941702415e-07, "loss": 0.5493, "step": 8065 }, { "epoch": 0.86, "grad_norm": 1.103356289223282, "learning_rate": 5.23628737526356e-07, "loss": 0.5097, "step": 8066 }, { "epoch": 0.86, "grad_norm": 1.7481999362503626, "learning_rate": 5.228432378892201e-07, "loss": 0.5956, "step": 8067 }, { "epoch": 0.86, "grad_norm": 1.6634305693556335, "learning_rate": 5.220582953565767e-07, "loss": 0.5375, "step": 8068 }, { "epoch": 0.86, "grad_norm": 1.908107341135217, "learning_rate": 5.212739100260983e-07, "loss": 0.702, "step": 8069 }, { "epoch": 0.86, "grad_norm": 1.790808957152384, "learning_rate": 5.204900819953868e-07, "loss": 0.5336, "step": 8070 }, { "epoch": 0.86, "grad_norm": 1.1007924686306492, "learning_rate": 5.197068113619774e-07, "loss": 0.4985, "step": 8071 }, { "epoch": 0.86, "grad_norm": 1.5719720820008938, "learning_rate": 5.189240982233335e-07, "loss": 0.4614, "step": 8072 }, { "epoch": 0.86, "grad_norm": 1.090829797401113, "learning_rate": 5.181419426768509e-07, "loss": 0.4802, "step": 8073 }, { "epoch": 0.86, "grad_norm": 1.7529183064825837, "learning_rate": 5.173603448198545e-07, "loss": 0.5402, "step": 8074 }, { "epoch": 0.86, "grad_norm": 1.9939939433488156, "learning_rate": 5.165793047496009e-07, "loss": 0.6737, "step": 8075 }, { "epoch": 0.86, "grad_norm": 1.8077753431269694, "learning_rate": 5.157988225632781e-07, "loss": 0.6306, "step": 8076 }, { "epoch": 0.86, "grad_norm": 1.7991569015918651, "learning_rate": 5.150188983580024e-07, "loss": 0.5413, "step": 8077 }, { "epoch": 0.86, "grad_norm": 1.661669556474685, "learning_rate": 5.142395322308207e-07, "loss": 0.5061, "step": 8078 }, { "epoch": 0.86, "grad_norm": 1.8674281726348305, "learning_rate": 5.134607242787137e-07, "loss": 0.6216, "step": 8079 }, { "epoch": 0.86, "grad_norm": 1.7905107162089688, "learning_rate": 5.126824745985897e-07, "loss": 0.5766, "step": 8080 }, { "epoch": 0.86, "grad_norm": 1.753880166551617, "learning_rate": 5.119047832872887e-07, "loss": 0.5966, "step": 8081 }, { "epoch": 0.86, "grad_norm": 1.061886596055992, "learning_rate": 5.111276504415802e-07, "loss": 0.4863, "step": 8082 }, { "epoch": 0.86, "grad_norm": 1.8305950891825669, "learning_rate": 5.103510761581654e-07, "loss": 0.5805, "step": 8083 }, { "epoch": 0.86, "grad_norm": 1.9626198409183961, "learning_rate": 5.095750605336741e-07, "loss": 0.6843, "step": 8084 }, { "epoch": 0.86, "grad_norm": 1.8740108971566665, "learning_rate": 5.087996036646708e-07, "loss": 0.5261, "step": 8085 }, { "epoch": 0.86, "grad_norm": 1.8329071617800827, "learning_rate": 5.080247056476456e-07, "loss": 0.5967, "step": 8086 }, { "epoch": 0.86, "grad_norm": 1.8102329619082511, "learning_rate": 5.072503665790223e-07, "loss": 0.6156, "step": 8087 }, { "epoch": 0.86, "grad_norm": 2.6215745790952143, "learning_rate": 5.064765865551529e-07, "loss": 0.638, "step": 8088 }, { "epoch": 0.86, "grad_norm": 1.0930246244159603, "learning_rate": 5.057033656723215e-07, "loss": 0.4733, "step": 8089 }, { "epoch": 0.86, "grad_norm": 1.63573547470342, "learning_rate": 5.049307040267427e-07, "loss": 0.5357, "step": 8090 }, { "epoch": 0.86, "grad_norm": 1.7517041987704827, "learning_rate": 5.041586017145588e-07, "loss": 0.5701, "step": 8091 }, { "epoch": 0.86, "grad_norm": 1.956921470733695, "learning_rate": 5.033870588318463e-07, "loss": 0.6092, "step": 8092 }, { "epoch": 0.86, "grad_norm": 1.6965856480535297, "learning_rate": 5.02616075474609e-07, "loss": 0.5644, "step": 8093 }, { "epoch": 0.86, "grad_norm": 1.9325363118767995, "learning_rate": 5.018456517387837e-07, "loss": 0.6293, "step": 8094 }, { "epoch": 0.86, "grad_norm": 1.0694413757903303, "learning_rate": 5.010757877202355e-07, "loss": 0.4753, "step": 8095 }, { "epoch": 0.86, "grad_norm": 1.8405807717099425, "learning_rate": 5.003064835147609e-07, "loss": 0.629, "step": 8096 }, { "epoch": 0.86, "grad_norm": 2.04914825299714, "learning_rate": 4.995377392180867e-07, "loss": 0.6076, "step": 8097 }, { "epoch": 0.86, "grad_norm": 1.0674709442813661, "learning_rate": 4.987695549258693e-07, "loss": 0.4992, "step": 8098 }, { "epoch": 0.86, "grad_norm": 1.889197039058755, "learning_rate": 4.980019307336959e-07, "loss": 0.5698, "step": 8099 }, { "epoch": 0.86, "grad_norm": 1.821160771644125, "learning_rate": 4.972348667370846e-07, "loss": 0.5912, "step": 8100 }, { "epoch": 0.86, "grad_norm": 2.113333970878579, "learning_rate": 4.964683630314815e-07, "loss": 0.5795, "step": 8101 }, { "epoch": 0.86, "grad_norm": 1.0693815119719208, "learning_rate": 4.957024197122673e-07, "loss": 0.4599, "step": 8102 }, { "epoch": 0.86, "grad_norm": 1.6528911701854663, "learning_rate": 4.949370368747486e-07, "loss": 0.4629, "step": 8103 }, { "epoch": 0.86, "grad_norm": 1.575637841381994, "learning_rate": 4.941722146141659e-07, "loss": 0.5835, "step": 8104 }, { "epoch": 0.86, "grad_norm": 1.7375994472078855, "learning_rate": 4.934079530256858e-07, "loss": 0.5833, "step": 8105 }, { "epoch": 0.86, "grad_norm": 1.0618171266097498, "learning_rate": 4.926442522044079e-07, "loss": 0.4738, "step": 8106 }, { "epoch": 0.86, "grad_norm": 1.0805963207832279, "learning_rate": 4.91881112245362e-07, "loss": 0.4789, "step": 8107 }, { "epoch": 0.86, "grad_norm": 1.5661734476062725, "learning_rate": 4.911185332435075e-07, "loss": 0.4967, "step": 8108 }, { "epoch": 0.86, "grad_norm": 1.7475404517422568, "learning_rate": 4.903565152937345e-07, "loss": 0.6119, "step": 8109 }, { "epoch": 0.87, "grad_norm": 1.7775587615320274, "learning_rate": 4.895950584908621e-07, "loss": 0.5577, "step": 8110 }, { "epoch": 0.87, "grad_norm": 1.571984602195803, "learning_rate": 4.888341629296406e-07, "loss": 0.4895, "step": 8111 }, { "epoch": 0.87, "grad_norm": 1.8331120273098749, "learning_rate": 4.88073828704751e-07, "loss": 0.5773, "step": 8112 }, { "epoch": 0.87, "grad_norm": 1.9606477738975363, "learning_rate": 4.87314055910803e-07, "loss": 0.5543, "step": 8113 }, { "epoch": 0.87, "grad_norm": 1.8056693429189812, "learning_rate": 4.865548446423374e-07, "loss": 0.6231, "step": 8114 }, { "epoch": 0.87, "grad_norm": 1.9197286790816177, "learning_rate": 4.857961949938251e-07, "loss": 0.649, "step": 8115 }, { "epoch": 0.87, "grad_norm": 1.7173729052791535, "learning_rate": 4.85038107059666e-07, "loss": 0.613, "step": 8116 }, { "epoch": 0.87, "grad_norm": 2.004592285322513, "learning_rate": 4.842805809341911e-07, "loss": 0.5601, "step": 8117 }, { "epoch": 0.87, "grad_norm": 1.9249722550159876, "learning_rate": 4.835236167116625e-07, "loss": 0.6936, "step": 8118 }, { "epoch": 0.87, "grad_norm": 1.6968493536858524, "learning_rate": 4.827672144862722e-07, "loss": 0.549, "step": 8119 }, { "epoch": 0.87, "grad_norm": 1.8303342347590434, "learning_rate": 4.820113743521381e-07, "loss": 0.5472, "step": 8120 }, { "epoch": 0.87, "grad_norm": 1.7052882107689042, "learning_rate": 4.812560964033136e-07, "loss": 0.6269, "step": 8121 }, { "epoch": 0.87, "grad_norm": 1.6901115544051948, "learning_rate": 4.805013807337788e-07, "loss": 0.5292, "step": 8122 }, { "epoch": 0.87, "grad_norm": 1.7793625222987894, "learning_rate": 4.797472274374465e-07, "loss": 0.6498, "step": 8123 }, { "epoch": 0.87, "grad_norm": 1.8461532267146865, "learning_rate": 4.789936366081566e-07, "loss": 0.6188, "step": 8124 }, { "epoch": 0.87, "grad_norm": 1.8841620709370788, "learning_rate": 4.782406083396807e-07, "loss": 0.505, "step": 8125 }, { "epoch": 0.87, "grad_norm": 1.840343521596104, "learning_rate": 4.774881427257205e-07, "loss": 0.636, "step": 8126 }, { "epoch": 0.87, "grad_norm": 1.069915943793114, "learning_rate": 4.767362398599068e-07, "loss": 0.4916, "step": 8127 }, { "epoch": 0.87, "grad_norm": 1.0853316937994901, "learning_rate": 4.7598489983580133e-07, "loss": 0.4844, "step": 8128 }, { "epoch": 0.87, "grad_norm": 1.0923568402389765, "learning_rate": 4.752341227468954e-07, "loss": 0.5062, "step": 8129 }, { "epoch": 0.87, "grad_norm": 1.9250267319284071, "learning_rate": 4.7448390868660934e-07, "loss": 0.5843, "step": 8130 }, { "epoch": 0.87, "grad_norm": 1.8197037857550702, "learning_rate": 4.737342577482956e-07, "loss": 0.6397, "step": 8131 }, { "epoch": 0.87, "grad_norm": 1.7357340568786146, "learning_rate": 4.729851700252341e-07, "loss": 0.5576, "step": 8132 }, { "epoch": 0.87, "grad_norm": 1.814134033986635, "learning_rate": 4.7223664561063643e-07, "loss": 0.5838, "step": 8133 }, { "epoch": 0.87, "grad_norm": 1.6868539960564, "learning_rate": 4.71488684597643e-07, "loss": 0.5924, "step": 8134 }, { "epoch": 0.87, "grad_norm": 1.7267798645702264, "learning_rate": 4.7074128707932485e-07, "loss": 0.5885, "step": 8135 }, { "epoch": 0.87, "grad_norm": 1.7450898770184444, "learning_rate": 4.6999445314868276e-07, "loss": 0.6075, "step": 8136 }, { "epoch": 0.87, "grad_norm": 1.9960993496279558, "learning_rate": 4.692481828986473e-07, "loss": 0.6509, "step": 8137 }, { "epoch": 0.87, "grad_norm": 1.8639553295056617, "learning_rate": 4.6850247642207803e-07, "loss": 0.6844, "step": 8138 }, { "epoch": 0.87, "grad_norm": 1.9051640370035734, "learning_rate": 4.677573338117664e-07, "loss": 0.6375, "step": 8139 }, { "epoch": 0.87, "grad_norm": 1.8228391246741995, "learning_rate": 4.67012755160432e-07, "loss": 0.6164, "step": 8140 }, { "epoch": 0.87, "grad_norm": 1.6958020004768772, "learning_rate": 4.6626874056072415e-07, "loss": 0.5468, "step": 8141 }, { "epoch": 0.87, "grad_norm": 1.9374713576140812, "learning_rate": 4.6552529010522375e-07, "loss": 0.5691, "step": 8142 }, { "epoch": 0.87, "grad_norm": 1.1068055751428592, "learning_rate": 4.6478240388643903e-07, "loss": 0.5117, "step": 8143 }, { "epoch": 0.87, "grad_norm": 1.8571666985876536, "learning_rate": 4.6404008199680997e-07, "loss": 0.6069, "step": 8144 }, { "epoch": 0.87, "grad_norm": 1.9959785239982106, "learning_rate": 4.6329832452870593e-07, "loss": 0.5103, "step": 8145 }, { "epoch": 0.87, "grad_norm": 1.7571887939637303, "learning_rate": 4.625571315744248e-07, "loss": 0.5727, "step": 8146 }, { "epoch": 0.87, "grad_norm": 1.8244593613997684, "learning_rate": 4.618165032261962e-07, "loss": 0.6435, "step": 8147 }, { "epoch": 0.87, "grad_norm": 2.1462337814428385, "learning_rate": 4.6107643957617796e-07, "loss": 0.5686, "step": 8148 }, { "epoch": 0.87, "grad_norm": 1.9072819678383883, "learning_rate": 4.603369407164582e-07, "loss": 0.6026, "step": 8149 }, { "epoch": 0.87, "grad_norm": 1.930316829841476, "learning_rate": 4.5959800673905486e-07, "loss": 0.6191, "step": 8150 }, { "epoch": 0.87, "grad_norm": 1.7377086434074664, "learning_rate": 4.588596377359156e-07, "loss": 0.5888, "step": 8151 }, { "epoch": 0.87, "grad_norm": 1.113874765725909, "learning_rate": 4.581218337989168e-07, "loss": 0.4984, "step": 8152 }, { "epoch": 0.87, "grad_norm": 1.8318411130091383, "learning_rate": 4.5738459501986634e-07, "loss": 0.5936, "step": 8153 }, { "epoch": 0.87, "grad_norm": 1.8286547393522035, "learning_rate": 4.566479214905006e-07, "loss": 0.5632, "step": 8154 }, { "epoch": 0.87, "grad_norm": 1.8005194805262694, "learning_rate": 4.5591181330248534e-07, "loss": 0.5512, "step": 8155 }, { "epoch": 0.87, "grad_norm": 1.6755792283989654, "learning_rate": 4.551762705474172e-07, "loss": 0.5444, "step": 8156 }, { "epoch": 0.87, "grad_norm": 2.0500945555763685, "learning_rate": 4.544412933168207e-07, "loss": 0.643, "step": 8157 }, { "epoch": 0.87, "grad_norm": 1.7308616987020902, "learning_rate": 4.5370688170215227e-07, "loss": 0.4843, "step": 8158 }, { "epoch": 0.87, "grad_norm": 1.9594020735319715, "learning_rate": 4.529730357947953e-07, "loss": 0.6258, "step": 8159 }, { "epoch": 0.87, "grad_norm": 1.56348397309677, "learning_rate": 4.522397556860653e-07, "loss": 0.5834, "step": 8160 }, { "epoch": 0.87, "grad_norm": 1.8347262076567323, "learning_rate": 4.515070414672057e-07, "loss": 0.5593, "step": 8161 }, { "epoch": 0.87, "grad_norm": 1.6439152920022468, "learning_rate": 4.5077489322939147e-07, "loss": 0.5338, "step": 8162 }, { "epoch": 0.87, "grad_norm": 1.6851667869830838, "learning_rate": 4.5004331106372257e-07, "loss": 0.4876, "step": 8163 }, { "epoch": 0.87, "grad_norm": 1.6640965484692523, "learning_rate": 4.493122950612344e-07, "loss": 0.5462, "step": 8164 }, { "epoch": 0.87, "grad_norm": 1.8430790350762676, "learning_rate": 4.485818453128882e-07, "loss": 0.5553, "step": 8165 }, { "epoch": 0.87, "grad_norm": 1.9303620325098807, "learning_rate": 4.478519619095767e-07, "loss": 0.5218, "step": 8166 }, { "epoch": 0.87, "grad_norm": 1.7773463363462243, "learning_rate": 4.471226449421201e-07, "loss": 0.5498, "step": 8167 }, { "epoch": 0.87, "grad_norm": 1.8802677087861461, "learning_rate": 4.463938945012697e-07, "loss": 0.629, "step": 8168 }, { "epoch": 0.87, "grad_norm": 1.8374435340845467, "learning_rate": 4.4566571067770624e-07, "loss": 0.6246, "step": 8169 }, { "epoch": 0.87, "grad_norm": 1.7725852838171399, "learning_rate": 4.44938093562039e-07, "loss": 0.6239, "step": 8170 }, { "epoch": 0.87, "grad_norm": 1.8360842863831905, "learning_rate": 4.4421104324480714e-07, "loss": 0.6126, "step": 8171 }, { "epoch": 0.87, "grad_norm": 1.9641100052329936, "learning_rate": 4.4348455981648054e-07, "loss": 0.5721, "step": 8172 }, { "epoch": 0.87, "grad_norm": 1.7574275898616734, "learning_rate": 4.427586433674563e-07, "loss": 0.5914, "step": 8173 }, { "epoch": 0.87, "grad_norm": 1.854706631029951, "learning_rate": 4.4203329398806227e-07, "loss": 0.6178, "step": 8174 }, { "epoch": 0.87, "grad_norm": 1.9327353342865483, "learning_rate": 4.413085117685567e-07, "loss": 0.626, "step": 8175 }, { "epoch": 0.87, "grad_norm": 1.8379720698043376, "learning_rate": 4.4058429679912583e-07, "loss": 0.6341, "step": 8176 }, { "epoch": 0.87, "grad_norm": 1.7236157630812357, "learning_rate": 4.3986064916988426e-07, "loss": 0.5881, "step": 8177 }, { "epoch": 0.87, "grad_norm": 1.0298255032163153, "learning_rate": 4.3913756897087824e-07, "loss": 0.4773, "step": 8178 }, { "epoch": 0.87, "grad_norm": 1.7400117623747446, "learning_rate": 4.3841505629208247e-07, "loss": 0.49, "step": 8179 }, { "epoch": 0.87, "grad_norm": 1.8441528034815997, "learning_rate": 4.3769311122340066e-07, "loss": 0.6434, "step": 8180 }, { "epoch": 0.87, "grad_norm": 1.9186553261150032, "learning_rate": 4.3697173385466806e-07, "loss": 0.593, "step": 8181 }, { "epoch": 0.87, "grad_norm": 1.9933894105214423, "learning_rate": 4.362509242756463e-07, "loss": 0.6657, "step": 8182 }, { "epoch": 0.87, "grad_norm": 1.771527250373362, "learning_rate": 4.355306825760275e-07, "loss": 0.6356, "step": 8183 }, { "epoch": 0.87, "grad_norm": 1.8376574827321444, "learning_rate": 4.348110088454338e-07, "loss": 0.6856, "step": 8184 }, { "epoch": 0.87, "grad_norm": 1.8311762804616942, "learning_rate": 4.3409190317341587e-07, "loss": 0.5488, "step": 8185 }, { "epoch": 0.87, "grad_norm": 1.9112847413590741, "learning_rate": 4.333733656494538e-07, "loss": 0.586, "step": 8186 }, { "epoch": 0.87, "grad_norm": 1.6153445456384412, "learning_rate": 4.3265539636295763e-07, "loss": 0.5897, "step": 8187 }, { "epoch": 0.87, "grad_norm": 1.708279548653858, "learning_rate": 4.319379954032654e-07, "loss": 0.5066, "step": 8188 }, { "epoch": 0.87, "grad_norm": 1.904220244153371, "learning_rate": 4.3122116285964574e-07, "loss": 0.5538, "step": 8189 }, { "epoch": 0.87, "grad_norm": 1.8878622907104154, "learning_rate": 4.305048988212968e-07, "loss": 0.5325, "step": 8190 }, { "epoch": 0.87, "grad_norm": 1.7574734465185706, "learning_rate": 4.297892033773432e-07, "loss": 0.6471, "step": 8191 }, { "epoch": 0.87, "grad_norm": 2.083850101239209, "learning_rate": 4.290740766168422e-07, "loss": 0.5689, "step": 8192 }, { "epoch": 0.87, "grad_norm": 2.013027043406548, "learning_rate": 4.2835951862877866e-07, "loss": 0.5549, "step": 8193 }, { "epoch": 0.87, "grad_norm": 1.8564149367569647, "learning_rate": 4.276455295020665e-07, "loss": 0.6768, "step": 8194 }, { "epoch": 0.87, "grad_norm": 1.6894412360476954, "learning_rate": 4.269321093255502e-07, "loss": 0.6004, "step": 8195 }, { "epoch": 0.87, "grad_norm": 1.742580794715506, "learning_rate": 4.26219258188002e-07, "loss": 0.6049, "step": 8196 }, { "epoch": 0.87, "grad_norm": 1.9125178334502242, "learning_rate": 4.2550697617812276e-07, "loss": 0.6, "step": 8197 }, { "epoch": 0.87, "grad_norm": 1.6862937605670387, "learning_rate": 4.2479526338454593e-07, "loss": 0.6399, "step": 8198 }, { "epoch": 0.87, "grad_norm": 1.8705590678000878, "learning_rate": 4.240841198958301e-07, "loss": 0.5841, "step": 8199 }, { "epoch": 0.87, "grad_norm": 1.8107308119283363, "learning_rate": 4.233735458004656e-07, "loss": 0.6383, "step": 8200 }, { "epoch": 0.87, "grad_norm": 1.8464482060501897, "learning_rate": 4.226635411868707e-07, "loss": 0.6395, "step": 8201 }, { "epoch": 0.87, "grad_norm": 1.8435328836615381, "learning_rate": 4.2195410614339295e-07, "loss": 0.6325, "step": 8202 }, { "epoch": 0.87, "grad_norm": 1.0109969138103232, "learning_rate": 4.212452407583101e-07, "loss": 0.5082, "step": 8203 }, { "epoch": 0.88, "grad_norm": 1.6298477154933801, "learning_rate": 4.205369451198271e-07, "loss": 0.5442, "step": 8204 }, { "epoch": 0.88, "grad_norm": 1.8429685933222837, "learning_rate": 4.1982921931608024e-07, "loss": 0.6395, "step": 8205 }, { "epoch": 0.88, "grad_norm": 1.6131395653978553, "learning_rate": 4.191220634351323e-07, "loss": 0.5628, "step": 8206 }, { "epoch": 0.88, "grad_norm": 1.6855740908461, "learning_rate": 4.184154775649768e-07, "loss": 0.5373, "step": 8207 }, { "epoch": 0.88, "grad_norm": 1.85997811872384, "learning_rate": 4.177094617935368e-07, "loss": 0.5538, "step": 8208 }, { "epoch": 0.88, "grad_norm": 1.754172909282759, "learning_rate": 4.170040162086636e-07, "loss": 0.4908, "step": 8209 }, { "epoch": 0.88, "grad_norm": 1.9092545096923996, "learning_rate": 4.1629914089813703e-07, "loss": 0.6006, "step": 8210 }, { "epoch": 0.88, "grad_norm": 1.5343070900704032, "learning_rate": 4.1559483594966755e-07, "loss": 0.5573, "step": 8211 }, { "epoch": 0.88, "grad_norm": 2.045732075323456, "learning_rate": 4.1489110145089327e-07, "loss": 0.5842, "step": 8212 }, { "epoch": 0.88, "grad_norm": 1.932857341083053, "learning_rate": 4.141879374893809e-07, "loss": 0.5227, "step": 8213 }, { "epoch": 0.88, "grad_norm": 1.9631873576637415, "learning_rate": 4.1348534415262764e-07, "loss": 0.6565, "step": 8214 }, { "epoch": 0.88, "grad_norm": 1.8568327211122166, "learning_rate": 4.1278332152805966e-07, "loss": 0.6067, "step": 8215 }, { "epoch": 0.88, "grad_norm": 1.8452666390844021, "learning_rate": 4.1208186970303097e-07, "loss": 0.5762, "step": 8216 }, { "epoch": 0.88, "grad_norm": 1.7225810875850285, "learning_rate": 4.113809887648251e-07, "loss": 0.6541, "step": 8217 }, { "epoch": 0.88, "grad_norm": 1.8109866666172731, "learning_rate": 4.106806788006551e-07, "loss": 0.5495, "step": 8218 }, { "epoch": 0.88, "grad_norm": 1.9307404110892552, "learning_rate": 4.099809398976623e-07, "loss": 0.6008, "step": 8219 }, { "epoch": 0.88, "grad_norm": 1.8397115859799413, "learning_rate": 4.092817721429154e-07, "loss": 0.5892, "step": 8220 }, { "epoch": 0.88, "grad_norm": 1.7082822000161135, "learning_rate": 4.0858317562341545e-07, "loss": 0.5922, "step": 8221 }, { "epoch": 0.88, "grad_norm": 2.0483770236522933, "learning_rate": 4.0788515042608946e-07, "loss": 0.5579, "step": 8222 }, { "epoch": 0.88, "grad_norm": 2.3894178087923774, "learning_rate": 4.071876966377958e-07, "loss": 0.5451, "step": 8223 }, { "epoch": 0.88, "grad_norm": 1.8069833641615554, "learning_rate": 4.0649081434532e-07, "loss": 0.6507, "step": 8224 }, { "epoch": 0.88, "grad_norm": 1.5837786056888599, "learning_rate": 4.057945036353761e-07, "loss": 0.5733, "step": 8225 }, { "epoch": 0.88, "grad_norm": 1.9328167056923675, "learning_rate": 4.050987645946092e-07, "loss": 0.597, "step": 8226 }, { "epoch": 0.88, "grad_norm": 1.9062792477540043, "learning_rate": 4.044035973095917e-07, "loss": 0.6028, "step": 8227 }, { "epoch": 0.88, "grad_norm": 1.069228380789463, "learning_rate": 4.037090018668244e-07, "loss": 0.4619, "step": 8228 }, { "epoch": 0.88, "grad_norm": 1.0815434960146062, "learning_rate": 4.0301497835273815e-07, "loss": 0.4763, "step": 8229 }, { "epoch": 0.88, "grad_norm": 1.7556840633604083, "learning_rate": 4.023215268536923e-07, "loss": 0.6299, "step": 8230 }, { "epoch": 0.88, "grad_norm": 1.7508818362892506, "learning_rate": 4.016286474559733e-07, "loss": 0.5358, "step": 8231 }, { "epoch": 0.88, "grad_norm": 1.115939042719256, "learning_rate": 4.0093634024580054e-07, "loss": 0.5172, "step": 8232 }, { "epoch": 0.88, "grad_norm": 1.9414343477542204, "learning_rate": 4.002446053093184e-07, "loss": 0.6688, "step": 8233 }, { "epoch": 0.88, "grad_norm": 1.8989105483590136, "learning_rate": 3.99553442732602e-07, "loss": 0.419, "step": 8234 }, { "epoch": 0.88, "grad_norm": 1.8408554594222541, "learning_rate": 3.9886285260165357e-07, "loss": 0.6934, "step": 8235 }, { "epoch": 0.88, "grad_norm": 1.9226613561435268, "learning_rate": 3.9817283500240445e-07, "loss": 0.6414, "step": 8236 }, { "epoch": 0.88, "grad_norm": 1.7252527187483564, "learning_rate": 3.97483390020717e-07, "loss": 0.6232, "step": 8237 }, { "epoch": 0.88, "grad_norm": 1.0388458927910111, "learning_rate": 3.967945177423793e-07, "loss": 0.4766, "step": 8238 }, { "epoch": 0.88, "grad_norm": 1.8158952355996563, "learning_rate": 3.9610621825311055e-07, "loss": 0.5611, "step": 8239 }, { "epoch": 0.88, "grad_norm": 1.8624541909466632, "learning_rate": 3.954184916385573e-07, "loss": 0.6568, "step": 8240 }, { "epoch": 0.88, "grad_norm": 1.6368310971603215, "learning_rate": 3.9473133798429487e-07, "loss": 0.5414, "step": 8241 }, { "epoch": 0.88, "grad_norm": 1.1133349336463085, "learning_rate": 3.9404475737582825e-07, "loss": 0.478, "step": 8242 }, { "epoch": 0.88, "grad_norm": 1.9073048239127093, "learning_rate": 3.9335874989859024e-07, "loss": 0.6111, "step": 8243 }, { "epoch": 0.88, "grad_norm": 1.7158424316796885, "learning_rate": 3.9267331563794254e-07, "loss": 0.5922, "step": 8244 }, { "epoch": 0.88, "grad_norm": 1.9714496330780749, "learning_rate": 3.919884546791752e-07, "loss": 0.6981, "step": 8245 }, { "epoch": 0.88, "grad_norm": 2.098417739421138, "learning_rate": 3.9130416710750795e-07, "loss": 0.655, "step": 8246 }, { "epoch": 0.88, "grad_norm": 1.8493481251312567, "learning_rate": 3.9062045300808815e-07, "loss": 0.6435, "step": 8247 }, { "epoch": 0.88, "grad_norm": 1.8109358989062834, "learning_rate": 3.899373124659922e-07, "loss": 0.5054, "step": 8248 }, { "epoch": 0.88, "grad_norm": 1.0593080594043436, "learning_rate": 3.892547455662249e-07, "loss": 0.4664, "step": 8249 }, { "epoch": 0.88, "grad_norm": 1.869674211674543, "learning_rate": 3.8857275239371994e-07, "loss": 0.5967, "step": 8250 }, { "epoch": 0.88, "grad_norm": 1.7790284103524057, "learning_rate": 3.8789133303334004e-07, "loss": 0.613, "step": 8251 }, { "epoch": 0.88, "grad_norm": 1.8391440221996538, "learning_rate": 3.8721048756987567e-07, "loss": 0.4935, "step": 8252 }, { "epoch": 0.88, "grad_norm": 1.7590754616937914, "learning_rate": 3.8653021608804574e-07, "loss": 0.6359, "step": 8253 }, { "epoch": 0.88, "grad_norm": 1.9596583639297986, "learning_rate": 3.858505186724987e-07, "loss": 0.6022, "step": 8254 }, { "epoch": 0.88, "grad_norm": 2.0093276825275104, "learning_rate": 3.8517139540781124e-07, "loss": 0.6382, "step": 8255 }, { "epoch": 0.88, "grad_norm": 1.8067179925992403, "learning_rate": 3.844928463784886e-07, "loss": 0.6638, "step": 8256 }, { "epoch": 0.88, "grad_norm": 1.6939846324202006, "learning_rate": 3.838148716689638e-07, "loss": 0.5697, "step": 8257 }, { "epoch": 0.88, "grad_norm": 2.002654951027388, "learning_rate": 3.8313747136359933e-07, "loss": 0.6232, "step": 8258 }, { "epoch": 0.88, "grad_norm": 1.78720271381317, "learning_rate": 3.8246064554668614e-07, "loss": 0.5867, "step": 8259 }, { "epoch": 0.88, "grad_norm": 1.7761706457649609, "learning_rate": 3.817843943024435e-07, "loss": 0.5884, "step": 8260 }, { "epoch": 0.88, "grad_norm": 1.5342261092805851, "learning_rate": 3.811087177150186e-07, "loss": 0.5053, "step": 8261 }, { "epoch": 0.88, "grad_norm": 1.8757621386715104, "learning_rate": 3.8043361586848793e-07, "loss": 0.5883, "step": 8262 }, { "epoch": 0.88, "grad_norm": 1.827149694551272, "learning_rate": 3.7975908884685606e-07, "loss": 0.5389, "step": 8263 }, { "epoch": 0.88, "grad_norm": 1.6077721236104476, "learning_rate": 3.79085136734057e-07, "loss": 0.5527, "step": 8264 }, { "epoch": 0.88, "grad_norm": 1.7866354757328256, "learning_rate": 3.784117596139514e-07, "loss": 0.5602, "step": 8265 }, { "epoch": 0.88, "grad_norm": 1.761079892395379, "learning_rate": 3.7773895757032996e-07, "loss": 0.6264, "step": 8266 }, { "epoch": 0.88, "grad_norm": 1.7807536980948568, "learning_rate": 3.770667306869108e-07, "loss": 0.5585, "step": 8267 }, { "epoch": 0.88, "grad_norm": 1.6774292371219521, "learning_rate": 3.7639507904734083e-07, "loss": 0.5164, "step": 8268 }, { "epoch": 0.88, "grad_norm": 1.738046010555232, "learning_rate": 3.7572400273519605e-07, "loss": 0.5824, "step": 8269 }, { "epoch": 0.88, "grad_norm": 1.7668819754404816, "learning_rate": 3.750535018339801e-07, "loss": 0.5756, "step": 8270 }, { "epoch": 0.88, "grad_norm": 1.7007824766402073, "learning_rate": 3.743835764271247e-07, "loss": 0.5889, "step": 8271 }, { "epoch": 0.88, "grad_norm": 1.951049586583048, "learning_rate": 3.7371422659799097e-07, "loss": 0.6564, "step": 8272 }, { "epoch": 0.88, "grad_norm": 1.5634828074957703, "learning_rate": 3.730454524298677e-07, "loss": 0.4966, "step": 8273 }, { "epoch": 0.88, "grad_norm": 1.9341029537705254, "learning_rate": 3.7237725400597227e-07, "loss": 0.6818, "step": 8274 }, { "epoch": 0.88, "grad_norm": 1.8091993166684148, "learning_rate": 3.717096314094504e-07, "loss": 0.5802, "step": 8275 }, { "epoch": 0.88, "grad_norm": 1.8209306381453016, "learning_rate": 3.7104258472337564e-07, "loss": 0.5775, "step": 8276 }, { "epoch": 0.88, "grad_norm": 1.7667495398656548, "learning_rate": 3.70376114030751e-07, "loss": 0.5858, "step": 8277 }, { "epoch": 0.88, "grad_norm": 1.8690170019216865, "learning_rate": 3.697102194145069e-07, "loss": 0.5687, "step": 8278 }, { "epoch": 0.88, "grad_norm": 1.4859138209555458, "learning_rate": 3.690449009575026e-07, "loss": 0.4656, "step": 8279 }, { "epoch": 0.88, "grad_norm": 1.8372955082681237, "learning_rate": 3.683801587425251e-07, "loss": 0.4899, "step": 8280 }, { "epoch": 0.88, "grad_norm": 1.6412069102110673, "learning_rate": 3.677159928522905e-07, "loss": 0.5287, "step": 8281 }, { "epoch": 0.88, "grad_norm": 1.8104544730109777, "learning_rate": 3.6705240336944224e-07, "loss": 0.6096, "step": 8282 }, { "epoch": 0.88, "grad_norm": 1.8150766310627549, "learning_rate": 3.663893903765531e-07, "loss": 0.5919, "step": 8283 }, { "epoch": 0.88, "grad_norm": 1.6821418475337822, "learning_rate": 3.657269539561231e-07, "loss": 0.6145, "step": 8284 }, { "epoch": 0.88, "grad_norm": 1.5965166633770214, "learning_rate": 3.650650941905809e-07, "loss": 0.524, "step": 8285 }, { "epoch": 0.88, "grad_norm": 1.72611126131685, "learning_rate": 3.6440381116228385e-07, "loss": 0.524, "step": 8286 }, { "epoch": 0.88, "grad_norm": 1.8578637360948014, "learning_rate": 3.637431049535167e-07, "loss": 0.5804, "step": 8287 }, { "epoch": 0.88, "grad_norm": 1.6165598042081493, "learning_rate": 3.6308297564649317e-07, "loss": 0.4495, "step": 8288 }, { "epoch": 0.88, "grad_norm": 1.5762388629187485, "learning_rate": 3.624234233233548e-07, "loss": 0.6462, "step": 8289 }, { "epoch": 0.88, "grad_norm": 2.126216321276487, "learning_rate": 3.617644480661714e-07, "loss": 0.674, "step": 8290 }, { "epoch": 0.88, "grad_norm": 1.8849037600220824, "learning_rate": 3.611060499569419e-07, "loss": 0.566, "step": 8291 }, { "epoch": 0.88, "grad_norm": 1.820759433975365, "learning_rate": 3.6044822907759125e-07, "loss": 0.5829, "step": 8292 }, { "epoch": 0.88, "grad_norm": 1.7068630231451403, "learning_rate": 3.59790985509974e-07, "loss": 0.6472, "step": 8293 }, { "epoch": 0.88, "grad_norm": 1.745489134570055, "learning_rate": 3.591343193358721e-07, "loss": 0.6093, "step": 8294 }, { "epoch": 0.88, "grad_norm": 1.7498997473789528, "learning_rate": 3.5847823063699827e-07, "loss": 0.6469, "step": 8295 }, { "epoch": 0.88, "grad_norm": 1.1141394061335792, "learning_rate": 3.5782271949499015e-07, "loss": 0.485, "step": 8296 }, { "epoch": 0.89, "grad_norm": 1.8923190696268317, "learning_rate": 3.5716778599141466e-07, "loss": 0.6523, "step": 8297 }, { "epoch": 0.89, "grad_norm": 1.7524626500866591, "learning_rate": 3.565134302077672e-07, "loss": 0.5129, "step": 8298 }, { "epoch": 0.89, "grad_norm": 1.9700512451037273, "learning_rate": 3.558596522254709e-07, "loss": 0.6572, "step": 8299 }, { "epoch": 0.89, "grad_norm": 1.9132651984305062, "learning_rate": 3.5520645212587747e-07, "loss": 0.5741, "step": 8300 }, { "epoch": 0.89, "grad_norm": 1.7663264913019046, "learning_rate": 3.5455382999026566e-07, "loss": 0.5332, "step": 8301 }, { "epoch": 0.89, "grad_norm": 1.7592053971437314, "learning_rate": 3.5390178589984283e-07, "loss": 0.5951, "step": 8302 }, { "epoch": 0.89, "grad_norm": 1.6244175523927902, "learning_rate": 3.532503199357457e-07, "loss": 0.6059, "step": 8303 }, { "epoch": 0.89, "grad_norm": 1.985689672270542, "learning_rate": 3.525994321790366e-07, "loss": 0.6359, "step": 8304 }, { "epoch": 0.89, "grad_norm": 1.7943738813745511, "learning_rate": 3.5194912271070915e-07, "loss": 0.5361, "step": 8305 }, { "epoch": 0.89, "grad_norm": 1.7222247578563998, "learning_rate": 3.512993916116808e-07, "loss": 0.5947, "step": 8306 }, { "epoch": 0.89, "grad_norm": 1.9614970008811794, "learning_rate": 3.506502389627997e-07, "loss": 0.6436, "step": 8307 }, { "epoch": 0.89, "grad_norm": 1.8013958452808363, "learning_rate": 3.500016648448429e-07, "loss": 0.5638, "step": 8308 }, { "epoch": 0.89, "grad_norm": 1.9104530899483276, "learning_rate": 3.49353669338513e-07, "loss": 0.6684, "step": 8309 }, { "epoch": 0.89, "grad_norm": 1.845843859497185, "learning_rate": 3.487062525244428e-07, "loss": 0.6412, "step": 8310 }, { "epoch": 0.89, "grad_norm": 1.982285978599143, "learning_rate": 3.480594144831906e-07, "loss": 0.5363, "step": 8311 }, { "epoch": 0.89, "grad_norm": 2.3621314271008527, "learning_rate": 3.474131552952459e-07, "loss": 0.6209, "step": 8312 }, { "epoch": 0.89, "grad_norm": 2.02662290771181, "learning_rate": 3.4676747504102326e-07, "loss": 0.656, "step": 8313 }, { "epoch": 0.89, "grad_norm": 1.9424638790801636, "learning_rate": 3.4612237380086733e-07, "loss": 0.6151, "step": 8314 }, { "epoch": 0.89, "grad_norm": 1.6947569174762436, "learning_rate": 3.454778516550494e-07, "loss": 0.549, "step": 8315 }, { "epoch": 0.89, "grad_norm": 1.7385213939179818, "learning_rate": 3.4483390868376876e-07, "loss": 0.4948, "step": 8316 }, { "epoch": 0.89, "grad_norm": 1.7379201454492355, "learning_rate": 3.441905449671529e-07, "loss": 0.5967, "step": 8317 }, { "epoch": 0.89, "grad_norm": 1.881389520380953, "learning_rate": 3.4354776058525774e-07, "loss": 0.6645, "step": 8318 }, { "epoch": 0.89, "grad_norm": 1.6584404952542762, "learning_rate": 3.429055556180666e-07, "loss": 0.5597, "step": 8319 }, { "epoch": 0.89, "grad_norm": 1.7291371213961277, "learning_rate": 3.422639301454911e-07, "loss": 0.5822, "step": 8320 }, { "epoch": 0.89, "grad_norm": 1.7187348125815, "learning_rate": 3.4162288424736903e-07, "loss": 0.5852, "step": 8321 }, { "epoch": 0.89, "grad_norm": 2.091087917548234, "learning_rate": 3.409824180034682e-07, "loss": 0.6331, "step": 8322 }, { "epoch": 0.89, "grad_norm": 1.832553413816717, "learning_rate": 3.4034253149348384e-07, "loss": 0.5965, "step": 8323 }, { "epoch": 0.89, "grad_norm": 1.6817623698206783, "learning_rate": 3.397032247970378e-07, "loss": 0.5617, "step": 8324 }, { "epoch": 0.89, "grad_norm": 1.7698961865828793, "learning_rate": 3.390644979936819e-07, "loss": 0.5107, "step": 8325 }, { "epoch": 0.89, "grad_norm": 1.1055224329131934, "learning_rate": 3.3842635116289433e-07, "loss": 0.5129, "step": 8326 }, { "epoch": 0.89, "grad_norm": 1.752913107834856, "learning_rate": 3.3778878438407994e-07, "loss": 0.7001, "step": 8327 }, { "epoch": 0.89, "grad_norm": 1.7730389759793266, "learning_rate": 3.371517977365746e-07, "loss": 0.5925, "step": 8328 }, { "epoch": 0.89, "grad_norm": 1.729071945804133, "learning_rate": 3.365153912996405e-07, "loss": 0.5839, "step": 8329 }, { "epoch": 0.89, "grad_norm": 1.8121998024880952, "learning_rate": 3.35879565152466e-07, "loss": 0.5883, "step": 8330 }, { "epoch": 0.89, "grad_norm": 1.8394187771810355, "learning_rate": 3.3524431937416947e-07, "loss": 0.5816, "step": 8331 }, { "epoch": 0.89, "grad_norm": 1.7356104517873978, "learning_rate": 3.3460965404379543e-07, "loss": 0.5931, "step": 8332 }, { "epoch": 0.89, "grad_norm": 1.076826524328636, "learning_rate": 3.339755692403185e-07, "loss": 0.4722, "step": 8333 }, { "epoch": 0.89, "grad_norm": 1.6905730896652578, "learning_rate": 3.3334206504263833e-07, "loss": 0.5717, "step": 8334 }, { "epoch": 0.89, "grad_norm": 1.8886840325988161, "learning_rate": 3.3270914152958355e-07, "loss": 0.5753, "step": 8335 }, { "epoch": 0.89, "grad_norm": 1.9404617047072679, "learning_rate": 3.3207679877991064e-07, "loss": 0.5564, "step": 8336 }, { "epoch": 0.89, "grad_norm": 1.7556887337781193, "learning_rate": 3.3144503687230376e-07, "loss": 0.6214, "step": 8337 }, { "epoch": 0.89, "grad_norm": 1.907492689774368, "learning_rate": 3.308138558853746e-07, "loss": 0.619, "step": 8338 }, { "epoch": 0.89, "grad_norm": 1.8862572598188865, "learning_rate": 3.301832558976631e-07, "loss": 0.566, "step": 8339 }, { "epoch": 0.89, "grad_norm": 1.7278713052120607, "learning_rate": 3.295532369876359e-07, "loss": 0.5547, "step": 8340 }, { "epoch": 0.89, "grad_norm": 1.7804452155835422, "learning_rate": 3.28923799233688e-07, "loss": 0.5979, "step": 8341 }, { "epoch": 0.89, "grad_norm": 1.7326469484043836, "learning_rate": 3.2829494271414244e-07, "loss": 0.5665, "step": 8342 }, { "epoch": 0.89, "grad_norm": 1.7709024721930415, "learning_rate": 3.276666675072493e-07, "loss": 0.6335, "step": 8343 }, { "epoch": 0.89, "grad_norm": 2.071263001440736, "learning_rate": 3.270389736911861e-07, "loss": 0.6508, "step": 8344 }, { "epoch": 0.89, "grad_norm": 2.5119689402138268, "learning_rate": 3.264118613440592e-07, "loss": 0.661, "step": 8345 }, { "epoch": 0.89, "grad_norm": 1.0610626080570495, "learning_rate": 3.2578533054390183e-07, "loss": 0.4775, "step": 8346 }, { "epoch": 0.89, "grad_norm": 1.8164246852869121, "learning_rate": 3.2515938136867433e-07, "loss": 0.5654, "step": 8347 }, { "epoch": 0.89, "grad_norm": 2.0600808043815424, "learning_rate": 3.2453401389626606e-07, "loss": 0.6141, "step": 8348 }, { "epoch": 0.89, "grad_norm": 1.7781671875537026, "learning_rate": 3.23909228204492e-07, "loss": 0.5616, "step": 8349 }, { "epoch": 0.89, "grad_norm": 1.7407419765873582, "learning_rate": 3.2328502437109666e-07, "loss": 0.5507, "step": 8350 }, { "epoch": 0.89, "grad_norm": 1.6846925936564099, "learning_rate": 3.2266140247375067e-07, "loss": 0.5883, "step": 8351 }, { "epoch": 0.89, "grad_norm": 1.9978459425565844, "learning_rate": 3.220383625900536e-07, "loss": 0.6202, "step": 8352 }, { "epoch": 0.89, "grad_norm": 1.7343079553133407, "learning_rate": 3.214159047975324e-07, "loss": 0.4964, "step": 8353 }, { "epoch": 0.89, "grad_norm": 1.688967233199075, "learning_rate": 3.2079402917364e-07, "loss": 0.5032, "step": 8354 }, { "epoch": 0.89, "grad_norm": 1.8809877241926745, "learning_rate": 3.2017273579575845e-07, "loss": 0.6156, "step": 8355 }, { "epoch": 0.89, "grad_norm": 1.7412745085418553, "learning_rate": 3.195520247411971e-07, "loss": 0.6114, "step": 8356 }, { "epoch": 0.89, "grad_norm": 1.811922410992411, "learning_rate": 3.189318960871929e-07, "loss": 0.5513, "step": 8357 }, { "epoch": 0.89, "grad_norm": 1.9658948622811803, "learning_rate": 3.1831234991090976e-07, "loss": 0.5685, "step": 8358 }, { "epoch": 0.89, "grad_norm": 1.9492321946528575, "learning_rate": 3.1769338628943935e-07, "loss": 0.6357, "step": 8359 }, { "epoch": 0.89, "grad_norm": 1.8308388465103598, "learning_rate": 3.170750052998012e-07, "loss": 0.4966, "step": 8360 }, { "epoch": 0.89, "grad_norm": 2.0244065712081976, "learning_rate": 3.1645720701894145e-07, "loss": 0.6323, "step": 8361 }, { "epoch": 0.89, "grad_norm": 1.758516249895883, "learning_rate": 3.158399915237359e-07, "loss": 0.6291, "step": 8362 }, { "epoch": 0.89, "grad_norm": 1.7631515414720684, "learning_rate": 3.152233588909859e-07, "loss": 0.5175, "step": 8363 }, { "epoch": 0.89, "grad_norm": 1.6165952333420908, "learning_rate": 3.146073091974189e-07, "loss": 0.5475, "step": 8364 }, { "epoch": 0.89, "grad_norm": 1.7606332485151308, "learning_rate": 3.139918425196936e-07, "loss": 0.5797, "step": 8365 }, { "epoch": 0.89, "grad_norm": 1.781172417415338, "learning_rate": 3.133769589343932e-07, "loss": 0.7114, "step": 8366 }, { "epoch": 0.89, "grad_norm": 1.764160253877458, "learning_rate": 3.127626585180293e-07, "loss": 0.4885, "step": 8367 }, { "epoch": 0.89, "grad_norm": 1.71315776837885, "learning_rate": 3.1214894134704133e-07, "loss": 0.5923, "step": 8368 }, { "epoch": 0.89, "grad_norm": 1.7282314004867156, "learning_rate": 3.115358074977953e-07, "loss": 0.5932, "step": 8369 }, { "epoch": 0.89, "grad_norm": 2.066565164520505, "learning_rate": 3.109232570465853e-07, "loss": 0.553, "step": 8370 }, { "epoch": 0.89, "grad_norm": 1.0635309833021411, "learning_rate": 3.103112900696331e-07, "loss": 0.4862, "step": 8371 }, { "epoch": 0.89, "grad_norm": 1.5730752806824422, "learning_rate": 3.096999066430867e-07, "loss": 0.6216, "step": 8372 }, { "epoch": 0.89, "grad_norm": 1.704115410959276, "learning_rate": 3.090891068430224e-07, "loss": 0.5342, "step": 8373 }, { "epoch": 0.89, "grad_norm": 2.0029038388400955, "learning_rate": 3.084788907454433e-07, "loss": 0.6207, "step": 8374 }, { "epoch": 0.89, "grad_norm": 1.7222969131133772, "learning_rate": 3.07869258426281e-07, "loss": 0.5977, "step": 8375 }, { "epoch": 0.89, "grad_norm": 1.7106016148257872, "learning_rate": 3.072602099613925e-07, "loss": 0.6138, "step": 8376 }, { "epoch": 0.89, "grad_norm": 1.8454185091396227, "learning_rate": 3.0665174542656493e-07, "loss": 0.5506, "step": 8377 }, { "epoch": 0.89, "grad_norm": 1.7619839077149961, "learning_rate": 3.060438648975095e-07, "loss": 0.5154, "step": 8378 }, { "epoch": 0.89, "grad_norm": 1.7505437115871065, "learning_rate": 3.0543656844986725e-07, "loss": 0.6128, "step": 8379 }, { "epoch": 0.89, "grad_norm": 1.06510488085967, "learning_rate": 3.048298561592056e-07, "loss": 0.4621, "step": 8380 }, { "epoch": 0.89, "grad_norm": 1.9858787609729867, "learning_rate": 3.042237281010196e-07, "loss": 0.6563, "step": 8381 }, { "epoch": 0.89, "grad_norm": 1.8137341462174845, "learning_rate": 3.0361818435073077e-07, "loss": 0.5522, "step": 8382 }, { "epoch": 0.89, "grad_norm": 1.7609669230581226, "learning_rate": 3.030132249836887e-07, "loss": 0.5739, "step": 8383 }, { "epoch": 0.89, "grad_norm": 1.7402023222070648, "learning_rate": 3.02408850075171e-07, "loss": 0.6314, "step": 8384 }, { "epoch": 0.89, "grad_norm": 1.7193242858183866, "learning_rate": 3.018050597003802e-07, "loss": 0.628, "step": 8385 }, { "epoch": 0.89, "grad_norm": 1.9253772786900756, "learning_rate": 3.01201853934448e-07, "loss": 0.683, "step": 8386 }, { "epoch": 0.89, "grad_norm": 2.090632779776845, "learning_rate": 3.0059923285243373e-07, "loss": 0.5684, "step": 8387 }, { "epoch": 0.89, "grad_norm": 1.9761821914042244, "learning_rate": 2.9999719652932246e-07, "loss": 0.5548, "step": 8388 }, { "epoch": 0.89, "grad_norm": 2.4199920182976573, "learning_rate": 2.99395745040027e-07, "loss": 0.6035, "step": 8389 }, { "epoch": 0.89, "grad_norm": 1.7798621886247175, "learning_rate": 2.987948784593875e-07, "loss": 0.6014, "step": 8390 }, { "epoch": 0.9, "grad_norm": 1.9731681756198027, "learning_rate": 2.981945968621719e-07, "loss": 0.6037, "step": 8391 }, { "epoch": 0.9, "grad_norm": 1.8675405760326924, "learning_rate": 2.9759490032307493e-07, "loss": 0.5505, "step": 8392 }, { "epoch": 0.9, "grad_norm": 1.0349110837011952, "learning_rate": 2.9699578891671797e-07, "loss": 0.4777, "step": 8393 }, { "epoch": 0.9, "grad_norm": 1.913512983256914, "learning_rate": 2.963972627176498e-07, "loss": 0.7129, "step": 8394 }, { "epoch": 0.9, "grad_norm": 1.6591730953481219, "learning_rate": 2.957993218003469e-07, "loss": 0.5704, "step": 8395 }, { "epoch": 0.9, "grad_norm": 1.7716573990449611, "learning_rate": 2.9520196623921304e-07, "loss": 0.6219, "step": 8396 }, { "epoch": 0.9, "grad_norm": 1.9294726291321196, "learning_rate": 2.946051961085783e-07, "loss": 0.5891, "step": 8397 }, { "epoch": 0.9, "grad_norm": 1.8012662723121355, "learning_rate": 2.94009011482701e-07, "loss": 0.6326, "step": 8398 }, { "epoch": 0.9, "grad_norm": 1.765066951021953, "learning_rate": 2.934134124357646e-07, "loss": 0.5248, "step": 8399 }, { "epoch": 0.9, "grad_norm": 1.8722719557034904, "learning_rate": 2.9281839904188256e-07, "loss": 0.5629, "step": 8400 }, { "epoch": 0.9, "grad_norm": 1.7173493881003852, "learning_rate": 2.922239713750935e-07, "loss": 0.5399, "step": 8401 }, { "epoch": 0.9, "grad_norm": 1.1393197539256017, "learning_rate": 2.916301295093632e-07, "loss": 0.4889, "step": 8402 }, { "epoch": 0.9, "grad_norm": 1.7425890741481143, "learning_rate": 2.910368735185848e-07, "loss": 0.6399, "step": 8403 }, { "epoch": 0.9, "grad_norm": 1.9732112965844784, "learning_rate": 2.9044420347657975e-07, "loss": 0.6402, "step": 8404 }, { "epoch": 0.9, "grad_norm": 1.8792196970696906, "learning_rate": 2.898521194570952e-07, "loss": 0.5706, "step": 8405 }, { "epoch": 0.9, "grad_norm": 1.616877121285599, "learning_rate": 2.8926062153380563e-07, "loss": 0.5444, "step": 8406 }, { "epoch": 0.9, "grad_norm": 1.9379745051433017, "learning_rate": 2.886697097803115e-07, "loss": 0.6307, "step": 8407 }, { "epoch": 0.9, "grad_norm": 1.9492825592663066, "learning_rate": 2.880793842701435e-07, "loss": 0.613, "step": 8408 }, { "epoch": 0.9, "grad_norm": 1.8151055183129035, "learning_rate": 2.8748964507675614e-07, "loss": 0.6144, "step": 8409 }, { "epoch": 0.9, "grad_norm": 2.039511302618967, "learning_rate": 2.8690049227353303e-07, "loss": 0.6417, "step": 8410 }, { "epoch": 0.9, "grad_norm": 1.6154170478753827, "learning_rate": 2.863119259337832e-07, "loss": 0.5561, "step": 8411 }, { "epoch": 0.9, "grad_norm": 1.780103887617853, "learning_rate": 2.8572394613074426e-07, "loss": 0.565, "step": 8412 }, { "epoch": 0.9, "grad_norm": 1.9140293131009158, "learning_rate": 2.8513655293757935e-07, "loss": 0.6586, "step": 8413 }, { "epoch": 0.9, "grad_norm": 1.0481260545096984, "learning_rate": 2.8454974642738055e-07, "loss": 0.4567, "step": 8414 }, { "epoch": 0.9, "grad_norm": 1.7721079930725847, "learning_rate": 2.839635266731644e-07, "loss": 0.6527, "step": 8415 }, { "epoch": 0.9, "grad_norm": 1.9807510754692046, "learning_rate": 2.8337789374787596e-07, "loss": 0.6442, "step": 8416 }, { "epoch": 0.9, "grad_norm": 1.77150945784181, "learning_rate": 2.82792847724388e-07, "loss": 0.5198, "step": 8417 }, { "epoch": 0.9, "grad_norm": 1.6277199957482775, "learning_rate": 2.8220838867549894e-07, "loss": 0.5258, "step": 8418 }, { "epoch": 0.9, "grad_norm": 1.051115248929194, "learning_rate": 2.8162451667393396e-07, "loss": 0.4724, "step": 8419 }, { "epoch": 0.9, "grad_norm": 2.0561338278109034, "learning_rate": 2.8104123179234776e-07, "loss": 0.635, "step": 8420 }, { "epoch": 0.9, "grad_norm": 1.7715526319338928, "learning_rate": 2.8045853410331725e-07, "loss": 0.554, "step": 8421 }, { "epoch": 0.9, "grad_norm": 1.8175074251984138, "learning_rate": 2.7987642367935053e-07, "loss": 0.5223, "step": 8422 }, { "epoch": 0.9, "grad_norm": 2.0674068720235947, "learning_rate": 2.7929490059288076e-07, "loss": 0.6376, "step": 8423 }, { "epoch": 0.9, "grad_norm": 1.7012554345624264, "learning_rate": 2.7871396491626843e-07, "loss": 0.581, "step": 8424 }, { "epoch": 0.9, "grad_norm": 1.8148257252457594, "learning_rate": 2.781336167218013e-07, "loss": 0.5595, "step": 8425 }, { "epoch": 0.9, "grad_norm": 1.8047152812629166, "learning_rate": 2.7755385608169374e-07, "loss": 0.567, "step": 8426 }, { "epoch": 0.9, "grad_norm": 1.8596694095531274, "learning_rate": 2.769746830680864e-07, "loss": 0.5233, "step": 8427 }, { "epoch": 0.9, "grad_norm": 1.7064160379183102, "learning_rate": 2.7639609775304775e-07, "loss": 0.6127, "step": 8428 }, { "epoch": 0.9, "grad_norm": 1.1112654191466724, "learning_rate": 2.758181002085719e-07, "loss": 0.5051, "step": 8429 }, { "epoch": 0.9, "grad_norm": 1.9347453368522898, "learning_rate": 2.7524069050658185e-07, "loss": 0.5862, "step": 8430 }, { "epoch": 0.9, "grad_norm": 1.0628711623925864, "learning_rate": 2.7466386871892514e-07, "loss": 0.4652, "step": 8431 }, { "epoch": 0.9, "grad_norm": 1.0805239507252127, "learning_rate": 2.740876349173777e-07, "loss": 0.4835, "step": 8432 }, { "epoch": 0.9, "grad_norm": 1.7080596208774232, "learning_rate": 2.7351198917364205e-07, "loss": 0.5462, "step": 8433 }, { "epoch": 0.9, "grad_norm": 2.1151910855687026, "learning_rate": 2.729369315593466e-07, "loss": 0.4981, "step": 8434 }, { "epoch": 0.9, "grad_norm": 1.764525581242589, "learning_rate": 2.7236246214604857e-07, "loss": 0.6263, "step": 8435 }, { "epoch": 0.9, "grad_norm": 2.038141924891695, "learning_rate": 2.7178858100522954e-07, "loss": 0.6358, "step": 8436 }, { "epoch": 0.9, "grad_norm": 2.353546410231477, "learning_rate": 2.7121528820829914e-07, "loss": 0.5533, "step": 8437 }, { "epoch": 0.9, "grad_norm": 1.6200029409181875, "learning_rate": 2.706425838265936e-07, "loss": 0.5562, "step": 8438 }, { "epoch": 0.9, "grad_norm": 1.806413815028556, "learning_rate": 2.7007046793137703e-07, "loss": 0.537, "step": 8439 }, { "epoch": 0.9, "grad_norm": 1.6157853711304235, "learning_rate": 2.6949894059383806e-07, "loss": 0.4691, "step": 8440 }, { "epoch": 0.9, "grad_norm": 1.6983838673217864, "learning_rate": 2.689280018850937e-07, "loss": 0.5412, "step": 8441 }, { "epoch": 0.9, "grad_norm": 1.8165572007805126, "learning_rate": 2.6835765187618825e-07, "loss": 0.5712, "step": 8442 }, { "epoch": 0.9, "grad_norm": 1.7865490165662947, "learning_rate": 2.6778789063809153e-07, "loss": 0.533, "step": 8443 }, { "epoch": 0.9, "grad_norm": 1.8101057553685993, "learning_rate": 2.672187182417002e-07, "loss": 0.581, "step": 8444 }, { "epoch": 0.9, "grad_norm": 1.8060252416726355, "learning_rate": 2.6665013475783766e-07, "loss": 0.6357, "step": 8445 }, { "epoch": 0.9, "grad_norm": 1.668134940942651, "learning_rate": 2.6608214025725443e-07, "loss": 0.5398, "step": 8446 }, { "epoch": 0.9, "grad_norm": 1.9735560836251114, "learning_rate": 2.6551473481062797e-07, "loss": 0.5681, "step": 8447 }, { "epoch": 0.9, "grad_norm": 1.8260129489628514, "learning_rate": 2.6494791848856174e-07, "loss": 0.5794, "step": 8448 }, { "epoch": 0.9, "grad_norm": 1.6573975189363503, "learning_rate": 2.6438169136158654e-07, "loss": 0.5673, "step": 8449 }, { "epoch": 0.9, "grad_norm": 1.6306985142669563, "learning_rate": 2.6381605350015884e-07, "loss": 0.533, "step": 8450 }, { "epoch": 0.9, "grad_norm": 1.9905185171133977, "learning_rate": 2.6325100497466225e-07, "loss": 0.5521, "step": 8451 }, { "epoch": 0.9, "grad_norm": 1.863856814539577, "learning_rate": 2.626865458554084e-07, "loss": 0.7372, "step": 8452 }, { "epoch": 0.9, "grad_norm": 1.8651934763149625, "learning_rate": 2.621226762126333e-07, "loss": 0.5675, "step": 8453 }, { "epoch": 0.9, "grad_norm": 1.8218802345658283, "learning_rate": 2.615593961165014e-07, "loss": 0.573, "step": 8454 }, { "epoch": 0.9, "grad_norm": 1.779916318613806, "learning_rate": 2.609967056371032e-07, "loss": 0.5736, "step": 8455 }, { "epoch": 0.9, "grad_norm": 1.7795683252699765, "learning_rate": 2.6043460484445504e-07, "loss": 0.5015, "step": 8456 }, { "epoch": 0.9, "grad_norm": 1.634712744164094, "learning_rate": 2.598730938085015e-07, "loss": 0.5717, "step": 8457 }, { "epoch": 0.9, "grad_norm": 1.8867807448580989, "learning_rate": 2.593121725991121e-07, "loss": 0.6535, "step": 8458 }, { "epoch": 0.9, "grad_norm": 1.8838049030229054, "learning_rate": 2.5875184128608455e-07, "loss": 0.5345, "step": 8459 }, { "epoch": 0.9, "grad_norm": 1.9300161846698611, "learning_rate": 2.5819209993914185e-07, "loss": 0.4778, "step": 8460 }, { "epoch": 0.9, "grad_norm": 1.9408869155035064, "learning_rate": 2.5763294862793444e-07, "loss": 0.5648, "step": 8461 }, { "epoch": 0.9, "grad_norm": 1.863066537665721, "learning_rate": 2.570743874220388e-07, "loss": 0.5784, "step": 8462 }, { "epoch": 0.9, "grad_norm": 1.9876462540269413, "learning_rate": 2.565164163909589e-07, "loss": 0.719, "step": 8463 }, { "epoch": 0.9, "grad_norm": 1.8083713014220897, "learning_rate": 2.55959035604123e-07, "loss": 0.5669, "step": 8464 }, { "epoch": 0.9, "grad_norm": 1.6691705483046964, "learning_rate": 2.554022451308885e-07, "loss": 0.5359, "step": 8465 }, { "epoch": 0.9, "grad_norm": 1.6621304298114357, "learning_rate": 2.5484604504053824e-07, "loss": 0.5515, "step": 8466 }, { "epoch": 0.9, "grad_norm": 1.7345698522833426, "learning_rate": 2.542904354022813e-07, "loss": 0.5688, "step": 8467 }, { "epoch": 0.9, "grad_norm": 1.8087994056184282, "learning_rate": 2.53735416285254e-07, "loss": 0.5636, "step": 8468 }, { "epoch": 0.9, "grad_norm": 1.716314199619152, "learning_rate": 2.53180987758519e-07, "loss": 0.574, "step": 8469 }, { "epoch": 0.9, "grad_norm": 1.8270980280371638, "learning_rate": 2.526271498910643e-07, "loss": 0.5727, "step": 8470 }, { "epoch": 0.9, "grad_norm": 1.9995967811716837, "learning_rate": 2.520739027518071e-07, "loss": 0.5983, "step": 8471 }, { "epoch": 0.9, "grad_norm": 1.6032969287960677, "learning_rate": 2.5152124640958785e-07, "loss": 0.5132, "step": 8472 }, { "epoch": 0.9, "grad_norm": 2.0059499869858786, "learning_rate": 2.50969180933176e-07, "loss": 0.5476, "step": 8473 }, { "epoch": 0.9, "grad_norm": 1.8578115858552346, "learning_rate": 2.504177063912655e-07, "loss": 0.6645, "step": 8474 }, { "epoch": 0.9, "grad_norm": 1.0602334313659072, "learning_rate": 2.498668228524792e-07, "loss": 0.4981, "step": 8475 }, { "epoch": 0.9, "grad_norm": 1.8236367269744678, "learning_rate": 2.4931653038536396e-07, "loss": 0.6812, "step": 8476 }, { "epoch": 0.9, "grad_norm": 1.9326914163100084, "learning_rate": 2.48766829058395e-07, "loss": 0.6578, "step": 8477 }, { "epoch": 0.9, "grad_norm": 1.091120457613356, "learning_rate": 2.482177189399732e-07, "loss": 0.4642, "step": 8478 }, { "epoch": 0.9, "grad_norm": 1.7549540868484326, "learning_rate": 2.4766920009842433e-07, "loss": 0.4603, "step": 8479 }, { "epoch": 0.9, "grad_norm": 1.8445159764521277, "learning_rate": 2.471212726020028e-07, "loss": 0.4808, "step": 8480 }, { "epoch": 0.9, "grad_norm": 1.720913579095462, "learning_rate": 2.465739365188885e-07, "loss": 0.4277, "step": 8481 }, { "epoch": 0.9, "grad_norm": 1.873159495517888, "learning_rate": 2.460271919171886e-07, "loss": 0.6815, "step": 8482 }, { "epoch": 0.9, "grad_norm": 1.5299808904375034, "learning_rate": 2.454810388649359e-07, "loss": 0.5806, "step": 8483 }, { "epoch": 0.9, "grad_norm": 1.8964188849843726, "learning_rate": 2.4493547743008884e-07, "loss": 0.5941, "step": 8484 }, { "epoch": 0.91, "grad_norm": 2.945886851375551, "learning_rate": 2.443905076805336e-07, "loss": 0.6724, "step": 8485 }, { "epoch": 0.91, "grad_norm": 1.7404039868233745, "learning_rate": 2.4384612968408217e-07, "loss": 0.5811, "step": 8486 }, { "epoch": 0.91, "grad_norm": 1.801451163486229, "learning_rate": 2.43302343508473e-07, "loss": 0.5788, "step": 8487 }, { "epoch": 0.91, "grad_norm": 1.8281306003614846, "learning_rate": 2.42759149221371e-07, "loss": 0.5954, "step": 8488 }, { "epoch": 0.91, "grad_norm": 1.8479483481089878, "learning_rate": 2.42216546890367e-07, "loss": 0.6588, "step": 8489 }, { "epoch": 0.91, "grad_norm": 1.7227241385160392, "learning_rate": 2.416745365829781e-07, "loss": 0.5276, "step": 8490 }, { "epoch": 0.91, "grad_norm": 1.8353190056918136, "learning_rate": 2.411331183666482e-07, "loss": 0.6036, "step": 8491 }, { "epoch": 0.91, "grad_norm": 1.6240656118530432, "learning_rate": 2.4059229230874945e-07, "loss": 0.5845, "step": 8492 }, { "epoch": 0.91, "grad_norm": 1.9226837562111274, "learning_rate": 2.400520584765753e-07, "loss": 0.6843, "step": 8493 }, { "epoch": 0.91, "grad_norm": 1.0589137915877243, "learning_rate": 2.395124169373497e-07, "loss": 0.4853, "step": 8494 }, { "epoch": 0.91, "grad_norm": 1.096071024500593, "learning_rate": 2.3897336775822235e-07, "loss": 0.4961, "step": 8495 }, { "epoch": 0.91, "grad_norm": 1.7685248165426637, "learning_rate": 2.384349110062673e-07, "loss": 0.5302, "step": 8496 }, { "epoch": 0.91, "grad_norm": 1.6299053238197228, "learning_rate": 2.3789704674848703e-07, "loss": 0.4814, "step": 8497 }, { "epoch": 0.91, "grad_norm": 2.0413288252865702, "learning_rate": 2.3735977505180917e-07, "loss": 0.5555, "step": 8498 }, { "epoch": 0.91, "grad_norm": 1.0492760569530422, "learning_rate": 2.368230959830875e-07, "loss": 0.4652, "step": 8499 }, { "epoch": 0.91, "grad_norm": 1.8158791630414708, "learning_rate": 2.36287009609103e-07, "loss": 0.6957, "step": 8500 }, { "epoch": 0.91, "grad_norm": 1.816622405476813, "learning_rate": 2.3575151599656231e-07, "loss": 0.5398, "step": 8501 }, { "epoch": 0.91, "grad_norm": 1.708069936781629, "learning_rate": 2.352166152120977e-07, "loss": 0.5798, "step": 8502 }, { "epoch": 0.91, "grad_norm": 1.7854041976135369, "learning_rate": 2.346823073222687e-07, "loss": 0.5954, "step": 8503 }, { "epoch": 0.91, "grad_norm": 1.8347506927155222, "learning_rate": 2.3414859239356104e-07, "loss": 0.6577, "step": 8504 }, { "epoch": 0.91, "grad_norm": 1.9433116712803946, "learning_rate": 2.3361547049238542e-07, "loss": 0.6405, "step": 8505 }, { "epoch": 0.91, "grad_norm": 1.7383528548675926, "learning_rate": 2.330829416850805e-07, "loss": 0.5999, "step": 8506 }, { "epoch": 0.91, "grad_norm": 1.1295407027196354, "learning_rate": 2.3255100603790935e-07, "loss": 0.4826, "step": 8507 }, { "epoch": 0.91, "grad_norm": 1.707019248880475, "learning_rate": 2.3201966361706295e-07, "loss": 0.635, "step": 8508 }, { "epoch": 0.91, "grad_norm": 2.1744743029310065, "learning_rate": 2.314889144886573e-07, "loss": 0.6544, "step": 8509 }, { "epoch": 0.91, "grad_norm": 1.792903311752425, "learning_rate": 2.3095875871873508e-07, "loss": 0.6431, "step": 8510 }, { "epoch": 0.91, "grad_norm": 1.7574575152958682, "learning_rate": 2.3042919637326466e-07, "loss": 0.5932, "step": 8511 }, { "epoch": 0.91, "grad_norm": 1.7258633101432028, "learning_rate": 2.299002275181411e-07, "loss": 0.5476, "step": 8512 }, { "epoch": 0.91, "grad_norm": 1.9184755895721899, "learning_rate": 2.2937185221918567e-07, "loss": 0.5533, "step": 8513 }, { "epoch": 0.91, "grad_norm": 1.6702336056924916, "learning_rate": 2.2884407054214463e-07, "loss": 0.4882, "step": 8514 }, { "epoch": 0.91, "grad_norm": 1.9082220399381706, "learning_rate": 2.2831688255269214e-07, "loss": 0.601, "step": 8515 }, { "epoch": 0.91, "grad_norm": 1.7092199407704183, "learning_rate": 2.2779028831642736e-07, "loss": 0.5727, "step": 8516 }, { "epoch": 0.91, "grad_norm": 1.0780911164499183, "learning_rate": 2.2726428789887568e-07, "loss": 0.4874, "step": 8517 }, { "epoch": 0.91, "grad_norm": 1.71341971573532, "learning_rate": 2.267388813654886e-07, "loss": 0.5865, "step": 8518 }, { "epoch": 0.91, "grad_norm": 1.8548322961621697, "learning_rate": 2.2621406878164388e-07, "loss": 0.5972, "step": 8519 }, { "epoch": 0.91, "grad_norm": 1.6746571824274155, "learning_rate": 2.256898502126459e-07, "loss": 0.5523, "step": 8520 }, { "epoch": 0.91, "grad_norm": 1.8549333934787353, "learning_rate": 2.2516622572372416e-07, "loss": 0.5641, "step": 8521 }, { "epoch": 0.91, "grad_norm": 1.828230115287745, "learning_rate": 2.2464319538003433e-07, "loss": 0.5657, "step": 8522 }, { "epoch": 0.91, "grad_norm": 1.2097391351651485, "learning_rate": 2.2412075924665877e-07, "loss": 0.4611, "step": 8523 }, { "epoch": 0.91, "grad_norm": 1.0689764022601107, "learning_rate": 2.2359891738860607e-07, "loss": 0.4883, "step": 8524 }, { "epoch": 0.91, "grad_norm": 1.8941955096004213, "learning_rate": 2.2307766987080927e-07, "loss": 0.5658, "step": 8525 }, { "epoch": 0.91, "grad_norm": 1.8678960727142033, "learning_rate": 2.225570167581298e-07, "loss": 0.6507, "step": 8526 }, { "epoch": 0.91, "grad_norm": 1.6908782891965606, "learning_rate": 2.2203695811535307e-07, "loss": 0.5312, "step": 8527 }, { "epoch": 0.91, "grad_norm": 1.9826265925881894, "learning_rate": 2.2151749400719182e-07, "loss": 0.5952, "step": 8528 }, { "epoch": 0.91, "grad_norm": 1.8557948285807642, "learning_rate": 2.2099862449828425e-07, "loss": 0.6984, "step": 8529 }, { "epoch": 0.91, "grad_norm": 1.8226469892612878, "learning_rate": 2.2048034965319432e-07, "loss": 0.5986, "step": 8530 }, { "epoch": 0.91, "grad_norm": 1.9469952222301397, "learning_rate": 2.1996266953641266e-07, "loss": 0.6419, "step": 8531 }, { "epoch": 0.91, "grad_norm": 1.7589588241685292, "learning_rate": 2.1944558421235552e-07, "loss": 0.7008, "step": 8532 }, { "epoch": 0.91, "grad_norm": 1.150296621025415, "learning_rate": 2.1892909374536475e-07, "loss": 0.4954, "step": 8533 }, { "epoch": 0.91, "grad_norm": 1.0655860679795708, "learning_rate": 2.1841319819970953e-07, "loss": 0.4511, "step": 8534 }, { "epoch": 0.91, "grad_norm": 1.9972202963558081, "learning_rate": 2.1789789763958458e-07, "loss": 0.5583, "step": 8535 }, { "epoch": 0.91, "grad_norm": 1.09115646049752, "learning_rate": 2.173831921291081e-07, "loss": 0.4876, "step": 8536 }, { "epoch": 0.91, "grad_norm": 1.6780918612730042, "learning_rate": 2.1686908173232712e-07, "loss": 0.5478, "step": 8537 }, { "epoch": 0.91, "grad_norm": 1.1089564249059867, "learning_rate": 2.1635556651321443e-07, "loss": 0.491, "step": 8538 }, { "epoch": 0.91, "grad_norm": 1.773114382111301, "learning_rate": 2.1584264653566723e-07, "loss": 0.5096, "step": 8539 }, { "epoch": 0.91, "grad_norm": 1.7595505294327685, "learning_rate": 2.1533032186351055e-07, "loss": 0.5713, "step": 8540 }, { "epoch": 0.91, "grad_norm": 1.7224518610766963, "learning_rate": 2.1481859256049342e-07, "loss": 0.6034, "step": 8541 }, { "epoch": 0.91, "grad_norm": 1.6702828259210942, "learning_rate": 2.1430745869029213e-07, "loss": 0.5056, "step": 8542 }, { "epoch": 0.91, "grad_norm": 1.0343242275432567, "learning_rate": 2.1379692031650802e-07, "loss": 0.4747, "step": 8543 }, { "epoch": 0.91, "grad_norm": 1.105498820716025, "learning_rate": 2.1328697750266913e-07, "loss": 0.4931, "step": 8544 }, { "epoch": 0.91, "grad_norm": 1.537319376675437, "learning_rate": 2.1277763031222864e-07, "loss": 0.4885, "step": 8545 }, { "epoch": 0.91, "grad_norm": 1.9051312629724173, "learning_rate": 2.122688788085664e-07, "loss": 0.5923, "step": 8546 }, { "epoch": 0.91, "grad_norm": 1.6375057383172358, "learning_rate": 2.1176072305498786e-07, "loss": 0.6046, "step": 8547 }, { "epoch": 0.91, "grad_norm": 1.0504615108421569, "learning_rate": 2.11253163114723e-07, "loss": 0.4679, "step": 8548 }, { "epoch": 0.91, "grad_norm": 1.1188489791092178, "learning_rate": 2.107461990509302e-07, "loss": 0.4811, "step": 8549 }, { "epoch": 0.91, "grad_norm": 1.981442533673345, "learning_rate": 2.1023983092669176e-07, "loss": 0.6651, "step": 8550 }, { "epoch": 0.91, "grad_norm": 1.710888190782267, "learning_rate": 2.0973405880501674e-07, "loss": 0.6577, "step": 8551 }, { "epoch": 0.91, "grad_norm": 1.8110560319070892, "learning_rate": 2.092288827488387e-07, "loss": 0.5525, "step": 8552 }, { "epoch": 0.91, "grad_norm": 1.783186482906126, "learning_rate": 2.0872430282101897e-07, "loss": 0.582, "step": 8553 }, { "epoch": 0.91, "grad_norm": 1.9117865856926035, "learning_rate": 2.0822031908434292e-07, "loss": 0.5978, "step": 8554 }, { "epoch": 0.91, "grad_norm": 1.8813576104430978, "learning_rate": 2.0771693160152372e-07, "loss": 0.6477, "step": 8555 }, { "epoch": 0.91, "grad_norm": 1.8950156305936399, "learning_rate": 2.0721414043519904e-07, "loss": 0.5322, "step": 8556 }, { "epoch": 0.91, "grad_norm": 1.9474128196370166, "learning_rate": 2.0671194564793162e-07, "loss": 0.6093, "step": 8557 }, { "epoch": 0.91, "grad_norm": 1.7596239613423665, "learning_rate": 2.0621034730221202e-07, "loss": 0.5468, "step": 8558 }, { "epoch": 0.91, "grad_norm": 2.0572885460837904, "learning_rate": 2.0570934546045475e-07, "loss": 0.5791, "step": 8559 }, { "epoch": 0.91, "grad_norm": 1.7997421922125487, "learning_rate": 2.0520894018500048e-07, "loss": 0.5194, "step": 8560 }, { "epoch": 0.91, "grad_norm": 2.033216199644314, "learning_rate": 2.0470913153811668e-07, "loss": 0.6067, "step": 8561 }, { "epoch": 0.91, "grad_norm": 1.8396837260003531, "learning_rate": 2.0420991958199575e-07, "loss": 0.6596, "step": 8562 }, { "epoch": 0.91, "grad_norm": 1.8262910809916149, "learning_rate": 2.0371130437875585e-07, "loss": 0.6569, "step": 8563 }, { "epoch": 0.91, "grad_norm": 1.7379337330515539, "learning_rate": 2.0321328599044122e-07, "loss": 0.5287, "step": 8564 }, { "epoch": 0.91, "grad_norm": 1.7969464992537978, "learning_rate": 2.0271586447902115e-07, "loss": 0.5354, "step": 8565 }, { "epoch": 0.91, "grad_norm": 1.5500681777103227, "learning_rate": 2.0221903990639058e-07, "loss": 0.5375, "step": 8566 }, { "epoch": 0.91, "grad_norm": 1.5032744532275426, "learning_rate": 2.017228123343723e-07, "loss": 0.4924, "step": 8567 }, { "epoch": 0.91, "grad_norm": 1.7882831381371949, "learning_rate": 2.012271818247119e-07, "loss": 0.6174, "step": 8568 }, { "epoch": 0.91, "grad_norm": 1.7293793508484605, "learning_rate": 2.0073214843908227e-07, "loss": 0.5312, "step": 8569 }, { "epoch": 0.91, "grad_norm": 1.8837390921000357, "learning_rate": 2.002377122390825e-07, "loss": 0.6355, "step": 8570 }, { "epoch": 0.91, "grad_norm": 2.0213506115567816, "learning_rate": 1.9974387328623502e-07, "loss": 0.5384, "step": 8571 }, { "epoch": 0.91, "grad_norm": 1.6098281166911321, "learning_rate": 1.992506316419912e-07, "loss": 0.4963, "step": 8572 }, { "epoch": 0.91, "grad_norm": 1.8123291075634245, "learning_rate": 1.9875798736772534e-07, "loss": 0.5428, "step": 8573 }, { "epoch": 0.91, "grad_norm": 1.8416500704078789, "learning_rate": 1.9826594052473946e-07, "loss": 0.5848, "step": 8574 }, { "epoch": 0.91, "grad_norm": 1.8809055870906386, "learning_rate": 1.9777449117425907e-07, "loss": 0.633, "step": 8575 }, { "epoch": 0.91, "grad_norm": 1.8345499278938773, "learning_rate": 1.9728363937743745e-07, "loss": 0.5329, "step": 8576 }, { "epoch": 0.91, "grad_norm": 1.8110888674820365, "learning_rate": 1.967933851953524e-07, "loss": 0.5896, "step": 8577 }, { "epoch": 0.91, "grad_norm": 1.945339034892437, "learning_rate": 1.9630372868900794e-07, "loss": 0.5951, "step": 8578 }, { "epoch": 0.92, "grad_norm": 1.7447864406527562, "learning_rate": 1.9581466991933195e-07, "loss": 0.6082, "step": 8579 }, { "epoch": 0.92, "grad_norm": 1.0491598511063969, "learning_rate": 1.9532620894718024e-07, "loss": 0.4752, "step": 8580 }, { "epoch": 0.92, "grad_norm": 1.7556812964791875, "learning_rate": 1.9483834583333306e-07, "loss": 0.4823, "step": 8581 }, { "epoch": 0.92, "grad_norm": 1.851068259373852, "learning_rate": 1.9435108063849684e-07, "loss": 0.5952, "step": 8582 }, { "epoch": 0.92, "grad_norm": 1.9694204471449972, "learning_rate": 1.9386441342330253e-07, "loss": 0.585, "step": 8583 }, { "epoch": 0.92, "grad_norm": 1.0766189866136755, "learning_rate": 1.9337834424830837e-07, "loss": 0.4673, "step": 8584 }, { "epoch": 0.92, "grad_norm": 1.7761881924779164, "learning_rate": 1.9289287317399708e-07, "loss": 0.5758, "step": 8585 }, { "epoch": 0.92, "grad_norm": 1.8323638024274986, "learning_rate": 1.92408000260777e-07, "loss": 0.6727, "step": 8586 }, { "epoch": 0.92, "grad_norm": 1.629504346325203, "learning_rate": 1.9192372556898154e-07, "loss": 0.4775, "step": 8587 }, { "epoch": 0.92, "grad_norm": 1.6572363582333558, "learning_rate": 1.9144004915887026e-07, "loss": 0.5791, "step": 8588 }, { "epoch": 0.92, "grad_norm": 1.1198504949203898, "learning_rate": 1.909569710906295e-07, "loss": 0.4929, "step": 8589 }, { "epoch": 0.92, "grad_norm": 1.0541696612360958, "learning_rate": 1.9047449142436946e-07, "loss": 0.4817, "step": 8590 }, { "epoch": 0.92, "grad_norm": 1.8159849731173814, "learning_rate": 1.8999261022012606e-07, "loss": 0.6454, "step": 8591 }, { "epoch": 0.92, "grad_norm": 1.7338615046665218, "learning_rate": 1.895113275378607e-07, "loss": 0.5672, "step": 8592 }, { "epoch": 0.92, "grad_norm": 1.760500183652359, "learning_rate": 1.8903064343746226e-07, "loss": 0.5598, "step": 8593 }, { "epoch": 0.92, "grad_norm": 1.7666252134315368, "learning_rate": 1.8855055797874166e-07, "loss": 0.4353, "step": 8594 }, { "epoch": 0.92, "grad_norm": 1.1486328121740703, "learning_rate": 1.8807107122143787e-07, "loss": 0.4969, "step": 8595 }, { "epoch": 0.92, "grad_norm": 1.0843252544072828, "learning_rate": 1.8759218322521476e-07, "loss": 0.4831, "step": 8596 }, { "epoch": 0.92, "grad_norm": 1.7330864316996222, "learning_rate": 1.8711389404966197e-07, "loss": 0.5134, "step": 8597 }, { "epoch": 0.92, "grad_norm": 1.649924851350376, "learning_rate": 1.8663620375429348e-07, "loss": 0.4719, "step": 8598 }, { "epoch": 0.92, "grad_norm": 2.171307299696437, "learning_rate": 1.861591123985501e-07, "loss": 0.5455, "step": 8599 }, { "epoch": 0.92, "grad_norm": 1.5804178923111691, "learning_rate": 1.8568262004179772e-07, "loss": 0.5672, "step": 8600 }, { "epoch": 0.92, "grad_norm": 2.0692010477711777, "learning_rate": 1.8520672674332772e-07, "loss": 0.627, "step": 8601 }, { "epoch": 0.92, "grad_norm": 1.747890814268438, "learning_rate": 1.8473143256235615e-07, "loss": 0.5986, "step": 8602 }, { "epoch": 0.92, "grad_norm": 1.1088686981277744, "learning_rate": 1.842567375580251e-07, "loss": 0.4922, "step": 8603 }, { "epoch": 0.92, "grad_norm": 1.5924568937409769, "learning_rate": 1.837826417894023e-07, "loss": 0.5527, "step": 8604 }, { "epoch": 0.92, "grad_norm": 1.7687994564122624, "learning_rate": 1.833091453154806e-07, "loss": 0.5883, "step": 8605 }, { "epoch": 0.92, "grad_norm": 1.9089688527919784, "learning_rate": 1.8283624819517898e-07, "loss": 0.6835, "step": 8606 }, { "epoch": 0.92, "grad_norm": 1.7421431176686772, "learning_rate": 1.82363950487342e-07, "loss": 0.6162, "step": 8607 }, { "epoch": 0.92, "grad_norm": 1.924994145189674, "learning_rate": 1.8189225225073714e-07, "loss": 0.5844, "step": 8608 }, { "epoch": 0.92, "grad_norm": 1.6513624319255347, "learning_rate": 1.8142115354405966e-07, "loss": 0.5435, "step": 8609 }, { "epoch": 0.92, "grad_norm": 1.9803673196273612, "learning_rate": 1.8095065442592986e-07, "loss": 0.6231, "step": 8610 }, { "epoch": 0.92, "grad_norm": 1.063380144202687, "learning_rate": 1.8048075495489314e-07, "loss": 0.4728, "step": 8611 }, { "epoch": 0.92, "grad_norm": 1.7787857762822836, "learning_rate": 1.8001145518941999e-07, "loss": 0.5526, "step": 8612 }, { "epoch": 0.92, "grad_norm": 2.034740460259039, "learning_rate": 1.7954275518790697e-07, "loss": 0.631, "step": 8613 }, { "epoch": 0.92, "grad_norm": 1.03856019145238, "learning_rate": 1.7907465500867637e-07, "loss": 0.4954, "step": 8614 }, { "epoch": 0.92, "grad_norm": 1.9777905859216538, "learning_rate": 1.7860715470997381e-07, "loss": 0.5159, "step": 8615 }, { "epoch": 0.92, "grad_norm": 1.6124685804176384, "learning_rate": 1.7814025434997218e-07, "loss": 0.534, "step": 8616 }, { "epoch": 0.92, "grad_norm": 1.604823275831762, "learning_rate": 1.776739539867689e-07, "loss": 0.5836, "step": 8617 }, { "epoch": 0.92, "grad_norm": 1.052125187234725, "learning_rate": 1.7720825367838757e-07, "loss": 0.4779, "step": 8618 }, { "epoch": 0.92, "grad_norm": 1.9336683344728032, "learning_rate": 1.7674315348277626e-07, "loss": 0.6389, "step": 8619 }, { "epoch": 0.92, "grad_norm": 1.80819534830162, "learning_rate": 1.7627865345780815e-07, "loss": 0.5195, "step": 8620 }, { "epoch": 0.92, "grad_norm": 1.8095453757178015, "learning_rate": 1.7581475366128253e-07, "loss": 0.6418, "step": 8621 }, { "epoch": 0.92, "grad_norm": 1.584165387769682, "learning_rate": 1.753514541509238e-07, "loss": 0.552, "step": 8622 }, { "epoch": 0.92, "grad_norm": 1.921973305897257, "learning_rate": 1.7488875498438197e-07, "loss": 0.6041, "step": 8623 }, { "epoch": 0.92, "grad_norm": 1.7814633990224293, "learning_rate": 1.7442665621923095e-07, "loss": 0.5612, "step": 8624 }, { "epoch": 0.92, "grad_norm": 1.744919603109565, "learning_rate": 1.7396515791297141e-07, "loss": 0.526, "step": 8625 }, { "epoch": 0.92, "grad_norm": 1.7436654785139478, "learning_rate": 1.7350426012302858e-07, "loss": 0.5543, "step": 8626 }, { "epoch": 0.92, "grad_norm": 1.908688574960412, "learning_rate": 1.7304396290675374e-07, "loss": 0.5911, "step": 8627 }, { "epoch": 0.92, "grad_norm": 1.0888317755312402, "learning_rate": 1.7258426632142223e-07, "loss": 0.4677, "step": 8628 }, { "epoch": 0.92, "grad_norm": 1.7120404898517607, "learning_rate": 1.7212517042423605e-07, "loss": 0.5983, "step": 8629 }, { "epoch": 0.92, "grad_norm": 1.890444878941099, "learning_rate": 1.7166667527232063e-07, "loss": 0.606, "step": 8630 }, { "epoch": 0.92, "grad_norm": 2.0229270903817267, "learning_rate": 1.712087809227292e-07, "loss": 0.6538, "step": 8631 }, { "epoch": 0.92, "grad_norm": 1.6125490405614744, "learning_rate": 1.707514874324373e-07, "loss": 0.5372, "step": 8632 }, { "epoch": 0.92, "grad_norm": 1.8910059783470063, "learning_rate": 1.7029479485834833e-07, "loss": 0.622, "step": 8633 }, { "epoch": 0.92, "grad_norm": 1.911160908235178, "learning_rate": 1.69838703257289e-07, "loss": 0.554, "step": 8634 }, { "epoch": 0.92, "grad_norm": 1.6490588615997268, "learning_rate": 1.6938321268601233e-07, "loss": 0.5899, "step": 8635 }, { "epoch": 0.92, "grad_norm": 1.715404026716129, "learning_rate": 1.6892832320119623e-07, "loss": 0.577, "step": 8636 }, { "epoch": 0.92, "grad_norm": 1.8488545798653893, "learning_rate": 1.684740348594438e-07, "loss": 0.5899, "step": 8637 }, { "epoch": 0.92, "grad_norm": 1.72498986272147, "learning_rate": 1.6802034771728315e-07, "loss": 0.6088, "step": 8638 }, { "epoch": 0.92, "grad_norm": 1.7311998238287747, "learning_rate": 1.67567261831168e-07, "loss": 0.5435, "step": 8639 }, { "epoch": 0.92, "grad_norm": 2.116052870929029, "learning_rate": 1.671147772574766e-07, "loss": 0.703, "step": 8640 }, { "epoch": 0.92, "grad_norm": 1.8554628262769903, "learning_rate": 1.666628940525139e-07, "loss": 0.6011, "step": 8641 }, { "epoch": 0.92, "grad_norm": 1.066562530092402, "learning_rate": 1.662116122725077e-07, "loss": 0.4715, "step": 8642 }, { "epoch": 0.92, "grad_norm": 1.8779575862002709, "learning_rate": 1.6576093197361253e-07, "loss": 0.5962, "step": 8643 }, { "epoch": 0.92, "grad_norm": 1.7867830133440494, "learning_rate": 1.65310853211908e-07, "loss": 0.6252, "step": 8644 }, { "epoch": 0.92, "grad_norm": 1.6578977787427995, "learning_rate": 1.6486137604339813e-07, "loss": 0.5437, "step": 8645 }, { "epoch": 0.92, "grad_norm": 1.7962741874327657, "learning_rate": 1.6441250052401324e-07, "loss": 0.5964, "step": 8646 }, { "epoch": 0.92, "grad_norm": 1.0929323800955602, "learning_rate": 1.639642267096081e-07, "loss": 0.4513, "step": 8647 }, { "epoch": 0.92, "grad_norm": 1.767321330497635, "learning_rate": 1.6351655465596195e-07, "loss": 0.5598, "step": 8648 }, { "epoch": 0.92, "grad_norm": 1.8494083468320266, "learning_rate": 1.6306948441877968e-07, "loss": 0.6317, "step": 8649 }, { "epoch": 0.92, "grad_norm": 2.034065914316354, "learning_rate": 1.626230160536929e-07, "loss": 0.5232, "step": 8650 }, { "epoch": 0.92, "grad_norm": 2.230103100483359, "learning_rate": 1.6217714961625498e-07, "loss": 0.5713, "step": 8651 }, { "epoch": 0.92, "grad_norm": 1.0802132648504785, "learning_rate": 1.6173188516194705e-07, "loss": 0.4681, "step": 8652 }, { "epoch": 0.92, "grad_norm": 1.6497239267003514, "learning_rate": 1.6128722274617482e-07, "loss": 0.524, "step": 8653 }, { "epoch": 0.92, "grad_norm": 1.8009725914289063, "learning_rate": 1.6084316242426844e-07, "loss": 0.5368, "step": 8654 }, { "epoch": 0.92, "grad_norm": 1.7413741735657027, "learning_rate": 1.603997042514843e-07, "loss": 0.5093, "step": 8655 }, { "epoch": 0.92, "grad_norm": 2.073608837467154, "learning_rate": 1.599568482830016e-07, "loss": 0.5693, "step": 8656 }, { "epoch": 0.92, "grad_norm": 1.8823306305476013, "learning_rate": 1.5951459457392736e-07, "loss": 0.5968, "step": 8657 }, { "epoch": 0.92, "grad_norm": 1.764049388704785, "learning_rate": 1.59072943179292e-07, "loss": 0.6038, "step": 8658 }, { "epoch": 0.92, "grad_norm": 1.7171663228424852, "learning_rate": 1.5863189415405155e-07, "loss": 0.5086, "step": 8659 }, { "epoch": 0.92, "grad_norm": 1.056759491938735, "learning_rate": 1.5819144755308657e-07, "loss": 0.4699, "step": 8660 }, { "epoch": 0.92, "grad_norm": 1.7768915912390113, "learning_rate": 1.5775160343120265e-07, "loss": 0.6927, "step": 8661 }, { "epoch": 0.92, "grad_norm": 1.0586341138564022, "learning_rate": 1.5731236184313158e-07, "loss": 0.481, "step": 8662 }, { "epoch": 0.92, "grad_norm": 1.9755593666628353, "learning_rate": 1.568737228435291e-07, "loss": 0.5079, "step": 8663 }, { "epoch": 0.92, "grad_norm": 1.8693009786349744, "learning_rate": 1.5643568648697648e-07, "loss": 0.5378, "step": 8664 }, { "epoch": 0.92, "grad_norm": 1.7978131722372304, "learning_rate": 1.5599825282797908e-07, "loss": 0.5912, "step": 8665 }, { "epoch": 0.92, "grad_norm": 1.0985070504020162, "learning_rate": 1.555614219209678e-07, "loss": 0.4885, "step": 8666 }, { "epoch": 0.92, "grad_norm": 1.1003258633763293, "learning_rate": 1.5512519382029968e-07, "loss": 0.475, "step": 8667 }, { "epoch": 0.92, "grad_norm": 1.1216783805817951, "learning_rate": 1.5468956858025408e-07, "loss": 0.4813, "step": 8668 }, { "epoch": 0.92, "grad_norm": 1.7980200374850426, "learning_rate": 1.5425454625503878e-07, "loss": 0.5618, "step": 8669 }, { "epoch": 0.92, "grad_norm": 1.7068951189268309, "learning_rate": 1.5382012689878435e-07, "loss": 0.6078, "step": 8670 }, { "epoch": 0.92, "grad_norm": 1.82060126585663, "learning_rate": 1.5338631056554644e-07, "loss": 0.5605, "step": 8671 }, { "epoch": 0.93, "grad_norm": 1.6507249728970426, "learning_rate": 1.5295309730930574e-07, "loss": 0.5831, "step": 8672 }, { "epoch": 0.93, "grad_norm": 1.8794729465766034, "learning_rate": 1.525204871839686e-07, "loss": 0.6342, "step": 8673 }, { "epoch": 0.93, "grad_norm": 1.7090961617536726, "learning_rate": 1.5208848024336532e-07, "loss": 0.59, "step": 8674 }, { "epoch": 0.93, "grad_norm": 1.6797921774237687, "learning_rate": 1.5165707654125228e-07, "loss": 0.6198, "step": 8675 }, { "epoch": 0.93, "grad_norm": 1.7577298435322435, "learning_rate": 1.512262761313099e-07, "loss": 0.539, "step": 8676 }, { "epoch": 0.93, "grad_norm": 1.7748582226860063, "learning_rate": 1.5079607906714366e-07, "loss": 0.639, "step": 8677 }, { "epoch": 0.93, "grad_norm": 1.7668534364093083, "learning_rate": 1.503664854022846e-07, "loss": 0.5373, "step": 8678 }, { "epoch": 0.93, "grad_norm": 1.7236560501565477, "learning_rate": 1.4993749519018775e-07, "loss": 0.5377, "step": 8679 }, { "epoch": 0.93, "grad_norm": 1.8878901474251721, "learning_rate": 1.4950910848423371e-07, "loss": 0.6301, "step": 8680 }, { "epoch": 0.93, "grad_norm": 1.8793099419029935, "learning_rate": 1.4908132533772768e-07, "loss": 0.5644, "step": 8681 }, { "epoch": 0.93, "grad_norm": 1.8551311079680064, "learning_rate": 1.4865414580389925e-07, "loss": 0.6137, "step": 8682 }, { "epoch": 0.93, "grad_norm": 1.7917795357808624, "learning_rate": 1.482275699359048e-07, "loss": 0.6407, "step": 8683 }, { "epoch": 0.93, "grad_norm": 1.7600622295364627, "learning_rate": 1.4780159778682247e-07, "loss": 0.6154, "step": 8684 }, { "epoch": 0.93, "grad_norm": 1.8571176259610307, "learning_rate": 1.4737622940965925e-07, "loss": 0.5294, "step": 8685 }, { "epoch": 0.93, "grad_norm": 1.0631962724923223, "learning_rate": 1.4695146485734334e-07, "loss": 0.4941, "step": 8686 }, { "epoch": 0.93, "grad_norm": 1.8476868314973223, "learning_rate": 1.465273041827303e-07, "loss": 0.4428, "step": 8687 }, { "epoch": 0.93, "grad_norm": 1.941029505705887, "learning_rate": 1.4610374743859846e-07, "loss": 0.5963, "step": 8688 }, { "epoch": 0.93, "grad_norm": 1.818283165731952, "learning_rate": 1.456807946776534e-07, "loss": 0.5806, "step": 8689 }, { "epoch": 0.93, "grad_norm": 1.734147048082268, "learning_rate": 1.4525844595252303e-07, "loss": 0.555, "step": 8690 }, { "epoch": 0.93, "grad_norm": 1.7356215488963787, "learning_rate": 1.4483670131576254e-07, "loss": 0.6933, "step": 8691 }, { "epoch": 0.93, "grad_norm": 2.5284834768690736, "learning_rate": 1.4441556081984942e-07, "loss": 0.53, "step": 8692 }, { "epoch": 0.93, "grad_norm": 1.6880991587303404, "learning_rate": 1.4399502451718894e-07, "loss": 0.532, "step": 8693 }, { "epoch": 0.93, "grad_norm": 1.8381559858660959, "learning_rate": 1.4357509246010814e-07, "loss": 0.5223, "step": 8694 }, { "epoch": 0.93, "grad_norm": 1.9201068301510593, "learning_rate": 1.4315576470086024e-07, "loss": 0.5546, "step": 8695 }, { "epoch": 0.93, "grad_norm": 1.8612343289721733, "learning_rate": 1.4273704129162403e-07, "loss": 0.5712, "step": 8696 }, { "epoch": 0.93, "grad_norm": 1.5940172777762145, "learning_rate": 1.4231892228450172e-07, "loss": 0.5026, "step": 8697 }, { "epoch": 0.93, "grad_norm": 1.8107717249876951, "learning_rate": 1.4190140773152173e-07, "loss": 0.6104, "step": 8698 }, { "epoch": 0.93, "grad_norm": 1.8269238887580768, "learning_rate": 1.4148449768463635e-07, "loss": 0.5488, "step": 8699 }, { "epoch": 0.93, "grad_norm": 1.8245961131227681, "learning_rate": 1.4106819219572243e-07, "loss": 0.6209, "step": 8700 }, { "epoch": 0.93, "grad_norm": 1.6792343207177456, "learning_rate": 1.406524913165813e-07, "loss": 0.5494, "step": 8701 }, { "epoch": 0.93, "grad_norm": 1.6341618352985212, "learning_rate": 1.402373950989411e-07, "loss": 0.5663, "step": 8702 }, { "epoch": 0.93, "grad_norm": 1.9174817183707213, "learning_rate": 1.3982290359445316e-07, "loss": 0.653, "step": 8703 }, { "epoch": 0.93, "grad_norm": 1.7261519886952115, "learning_rate": 1.39409016854693e-07, "loss": 0.6056, "step": 8704 }, { "epoch": 0.93, "grad_norm": 1.85353039733232, "learning_rate": 1.389957349311627e-07, "loss": 0.6047, "step": 8705 }, { "epoch": 0.93, "grad_norm": 1.7215178443993413, "learning_rate": 1.385830578752867e-07, "loss": 0.5512, "step": 8706 }, { "epoch": 0.93, "grad_norm": 1.8419069731547606, "learning_rate": 1.3817098573841614e-07, "loss": 0.6304, "step": 8707 }, { "epoch": 0.93, "grad_norm": 1.7467740685085187, "learning_rate": 1.3775951857182724e-07, "loss": 0.5362, "step": 8708 }, { "epoch": 0.93, "grad_norm": 1.7760227668460804, "learning_rate": 1.3734865642671846e-07, "loss": 0.5818, "step": 8709 }, { "epoch": 0.93, "grad_norm": 1.71623550982108, "learning_rate": 1.3693839935421448e-07, "loss": 0.5097, "step": 8710 }, { "epoch": 0.93, "grad_norm": 2.0069028304495227, "learning_rate": 1.365287474053656e-07, "loss": 0.6511, "step": 8711 }, { "epoch": 0.93, "grad_norm": 1.8477139044024449, "learning_rate": 1.3611970063114543e-07, "loss": 0.5794, "step": 8712 }, { "epoch": 0.93, "grad_norm": 1.7240417759837292, "learning_rate": 1.3571125908245276e-07, "loss": 0.5488, "step": 8713 }, { "epoch": 0.93, "grad_norm": 1.676361612632889, "learning_rate": 1.3530342281011134e-07, "loss": 0.5591, "step": 8714 }, { "epoch": 0.93, "grad_norm": 1.6735982418482616, "learning_rate": 1.3489619186486892e-07, "loss": 0.6347, "step": 8715 }, { "epoch": 0.93, "grad_norm": 1.7548277523150217, "learning_rate": 1.344895662973983e-07, "loss": 0.5694, "step": 8716 }, { "epoch": 0.93, "grad_norm": 1.0700512750871156, "learning_rate": 1.3408354615829732e-07, "loss": 0.4818, "step": 8717 }, { "epoch": 0.93, "grad_norm": 1.7304989600750174, "learning_rate": 1.3367813149808728e-07, "loss": 0.5952, "step": 8718 }, { "epoch": 0.93, "grad_norm": 1.7220539231277834, "learning_rate": 1.332733223672167e-07, "loss": 0.6116, "step": 8719 }, { "epoch": 0.93, "grad_norm": 1.9446225092877782, "learning_rate": 1.3286911881605523e-07, "loss": 0.6871, "step": 8720 }, { "epoch": 0.93, "grad_norm": 1.8735181205568623, "learning_rate": 1.324655208949005e-07, "loss": 0.552, "step": 8721 }, { "epoch": 0.93, "grad_norm": 1.7733172504469705, "learning_rate": 1.3206252865397285e-07, "loss": 0.5287, "step": 8722 }, { "epoch": 0.93, "grad_norm": 1.6375947588823256, "learning_rate": 1.3166014214341715e-07, "loss": 0.589, "step": 8723 }, { "epoch": 0.93, "grad_norm": 1.875105579307663, "learning_rate": 1.3125836141330338e-07, "loss": 0.6381, "step": 8724 }, { "epoch": 0.93, "grad_norm": 1.8021615307585732, "learning_rate": 1.3085718651362654e-07, "loss": 0.48, "step": 8725 }, { "epoch": 0.93, "grad_norm": 1.7894117363303386, "learning_rate": 1.3045661749430617e-07, "loss": 0.5289, "step": 8726 }, { "epoch": 0.93, "grad_norm": 2.175586029728143, "learning_rate": 1.3005665440518567e-07, "loss": 0.64, "step": 8727 }, { "epoch": 0.93, "grad_norm": 1.779134780489601, "learning_rate": 1.296572972960336e-07, "loss": 0.6133, "step": 8728 }, { "epoch": 0.93, "grad_norm": 1.873264195010904, "learning_rate": 1.29258546216543e-07, "loss": 0.6342, "step": 8729 }, { "epoch": 0.93, "grad_norm": 1.8508644835838086, "learning_rate": 1.2886040121633137e-07, "loss": 0.5956, "step": 8730 }, { "epoch": 0.93, "grad_norm": 2.0484296638578736, "learning_rate": 1.284628623449413e-07, "loss": 0.5698, "step": 8731 }, { "epoch": 0.93, "grad_norm": 1.8025442780701482, "learning_rate": 1.280659296518394e-07, "loss": 0.5523, "step": 8732 }, { "epoch": 0.93, "grad_norm": 1.607711282048244, "learning_rate": 1.2766960318641776e-07, "loss": 0.4906, "step": 8733 }, { "epoch": 0.93, "grad_norm": 1.730021793078135, "learning_rate": 1.2727388299799082e-07, "loss": 0.6251, "step": 8734 }, { "epoch": 0.93, "grad_norm": 1.7544492140619132, "learning_rate": 1.2687876913580034e-07, "loss": 0.5239, "step": 8735 }, { "epoch": 0.93, "grad_norm": 1.6138045903244467, "learning_rate": 1.264842616490114e-07, "loss": 0.5377, "step": 8736 }, { "epoch": 0.93, "grad_norm": 1.7856698600842238, "learning_rate": 1.2609036058671253e-07, "loss": 0.5801, "step": 8737 }, { "epoch": 0.93, "grad_norm": 1.766727288060211, "learning_rate": 1.2569706599791896e-07, "loss": 0.5226, "step": 8738 }, { "epoch": 0.93, "grad_norm": 1.8522144661636164, "learning_rate": 1.2530437793156936e-07, "loss": 0.593, "step": 8739 }, { "epoch": 0.93, "grad_norm": 1.9222812391039636, "learning_rate": 1.2491229643652625e-07, "loss": 0.625, "step": 8740 }, { "epoch": 0.93, "grad_norm": 2.1157748625941206, "learning_rate": 1.2452082156157786e-07, "loss": 0.6194, "step": 8741 }, { "epoch": 0.93, "grad_norm": 1.9817732484188975, "learning_rate": 1.2412995335543575e-07, "loss": 0.6155, "step": 8742 }, { "epoch": 0.93, "grad_norm": 1.621894960961775, "learning_rate": 1.2373969186673773e-07, "loss": 0.5212, "step": 8743 }, { "epoch": 0.93, "grad_norm": 1.6635541017143265, "learning_rate": 1.2335003714404436e-07, "loss": 0.512, "step": 8744 }, { "epoch": 0.93, "grad_norm": 1.9413372264187867, "learning_rate": 1.2296098923584187e-07, "loss": 0.6512, "step": 8745 }, { "epoch": 0.93, "grad_norm": 1.5968294958284197, "learning_rate": 1.2257254819054043e-07, "loss": 0.5317, "step": 8746 }, { "epoch": 0.93, "grad_norm": 1.7572421288143167, "learning_rate": 1.2218471405647415e-07, "loss": 0.6295, "step": 8747 }, { "epoch": 0.93, "grad_norm": 1.6616620525170256, "learning_rate": 1.2179748688190329e-07, "loss": 0.4909, "step": 8748 }, { "epoch": 0.93, "grad_norm": 1.633550356349484, "learning_rate": 1.2141086671501047e-07, "loss": 0.5605, "step": 8749 }, { "epoch": 0.93, "grad_norm": 1.5520818991349734, "learning_rate": 1.2102485360390493e-07, "loss": 0.5581, "step": 8750 }, { "epoch": 0.93, "grad_norm": 1.1168062408649655, "learning_rate": 1.2063944759661828e-07, "loss": 0.4946, "step": 8751 }, { "epoch": 0.93, "grad_norm": 1.8829625351211026, "learning_rate": 1.202546487411088e-07, "loss": 0.5605, "step": 8752 }, { "epoch": 0.93, "grad_norm": 1.816456649349982, "learning_rate": 1.1987045708525658e-07, "loss": 0.5802, "step": 8753 }, { "epoch": 0.93, "grad_norm": 1.772685359877321, "learning_rate": 1.1948687267686887e-07, "loss": 0.6485, "step": 8754 }, { "epoch": 0.93, "grad_norm": 2.1828854103421187, "learning_rate": 1.191038955636753e-07, "loss": 0.5397, "step": 8755 }, { "epoch": 0.93, "grad_norm": 1.906365255066863, "learning_rate": 1.1872152579333162e-07, "loss": 0.6486, "step": 8756 }, { "epoch": 0.93, "grad_norm": 1.7569580747485212, "learning_rate": 1.1833976341341591e-07, "loss": 0.5897, "step": 8757 }, { "epoch": 0.93, "grad_norm": 1.8117795517734963, "learning_rate": 1.1795860847143293e-07, "loss": 0.5951, "step": 8758 }, { "epoch": 0.93, "grad_norm": 1.0330025165833192, "learning_rate": 1.1757806101481028e-07, "loss": 0.4714, "step": 8759 }, { "epoch": 0.93, "grad_norm": 2.2198372963482766, "learning_rate": 1.1719812109090013e-07, "loss": 0.5962, "step": 8760 }, { "epoch": 0.93, "grad_norm": 1.9361243610160541, "learning_rate": 1.1681878874698017e-07, "loss": 0.6795, "step": 8761 }, { "epoch": 0.93, "grad_norm": 1.6170700798575253, "learning_rate": 1.1644006403025154e-07, "loss": 0.5147, "step": 8762 }, { "epoch": 0.93, "grad_norm": 1.8841580397040154, "learning_rate": 1.160619469878399e-07, "loss": 0.5434, "step": 8763 }, { "epoch": 0.93, "grad_norm": 1.9250657893811245, "learning_rate": 1.1568443766679482e-07, "loss": 0.6003, "step": 8764 }, { "epoch": 0.93, "grad_norm": 1.8449015763941918, "learning_rate": 1.1530753611409151e-07, "loss": 0.6345, "step": 8765 }, { "epoch": 0.94, "grad_norm": 1.8244269914140319, "learning_rate": 1.1493124237662856e-07, "loss": 0.6044, "step": 8766 }, { "epoch": 0.94, "grad_norm": 1.9055605622797096, "learning_rate": 1.1455555650122963e-07, "loss": 0.6309, "step": 8767 }, { "epoch": 0.94, "grad_norm": 1.7217669899062156, "learning_rate": 1.1418047853464176e-07, "loss": 0.5734, "step": 8768 }, { "epoch": 0.94, "grad_norm": 1.5561712274363235, "learning_rate": 1.1380600852353652e-07, "loss": 0.4744, "step": 8769 }, { "epoch": 0.94, "grad_norm": 1.90111921250644, "learning_rate": 1.1343214651451106e-07, "loss": 0.5724, "step": 8770 }, { "epoch": 0.94, "grad_norm": 1.6531914706442417, "learning_rate": 1.1305889255408542e-07, "loss": 0.5447, "step": 8771 }, { "epoch": 0.94, "grad_norm": 1.8523691669428375, "learning_rate": 1.126862466887052e-07, "loss": 0.6584, "step": 8772 }, { "epoch": 0.94, "grad_norm": 1.6106886158914184, "learning_rate": 1.1231420896473943e-07, "loss": 0.5378, "step": 8773 }, { "epoch": 0.94, "grad_norm": 1.0609343682667398, "learning_rate": 1.1194277942848164e-07, "loss": 0.4656, "step": 8774 }, { "epoch": 0.94, "grad_norm": 1.733764981465794, "learning_rate": 1.115719581261493e-07, "loss": 0.5314, "step": 8775 }, { "epoch": 0.94, "grad_norm": 1.8291277402225292, "learning_rate": 1.1120174510388604e-07, "loss": 0.6114, "step": 8776 }, { "epoch": 0.94, "grad_norm": 1.927403955777475, "learning_rate": 1.1083214040775725e-07, "loss": 0.5149, "step": 8777 }, { "epoch": 0.94, "grad_norm": 1.81652784207674, "learning_rate": 1.1046314408375391e-07, "loss": 0.5979, "step": 8778 }, { "epoch": 0.94, "grad_norm": 1.0603875952353288, "learning_rate": 1.1009475617779208e-07, "loss": 0.4726, "step": 8779 }, { "epoch": 0.94, "grad_norm": 1.8245611365755101, "learning_rate": 1.0972697673571064e-07, "loss": 0.5533, "step": 8780 }, { "epoch": 0.94, "grad_norm": 1.6332343375334635, "learning_rate": 1.0935980580327243e-07, "loss": 0.6397, "step": 8781 }, { "epoch": 0.94, "grad_norm": 1.8140360610067534, "learning_rate": 1.0899324342616702e-07, "loss": 0.6406, "step": 8782 }, { "epoch": 0.94, "grad_norm": 1.8179316384918147, "learning_rate": 1.0862728965000624e-07, "loss": 0.5553, "step": 8783 }, { "epoch": 0.94, "grad_norm": 1.819743711568231, "learning_rate": 1.0826194452032645e-07, "loss": 0.6782, "step": 8784 }, { "epoch": 0.94, "grad_norm": 2.015017376272247, "learning_rate": 1.0789720808258908e-07, "loss": 0.5846, "step": 8785 }, { "epoch": 0.94, "grad_norm": 1.8409443531998047, "learning_rate": 1.0753308038217891e-07, "loss": 0.6068, "step": 8786 }, { "epoch": 0.94, "grad_norm": 1.7845052106748518, "learning_rate": 1.0716956146440471e-07, "loss": 0.5976, "step": 8787 }, { "epoch": 0.94, "grad_norm": 1.8665138358701368, "learning_rate": 1.0680665137450142e-07, "loss": 0.6549, "step": 8788 }, { "epoch": 0.94, "grad_norm": 1.9237915489688846, "learning_rate": 1.0644435015762567e-07, "loss": 0.6375, "step": 8789 }, { "epoch": 0.94, "grad_norm": 1.8822381779081878, "learning_rate": 1.0608265785885974e-07, "loss": 0.6296, "step": 8790 }, { "epoch": 0.94, "grad_norm": 1.8279257062039498, "learning_rate": 1.0572157452321097e-07, "loss": 0.5378, "step": 8791 }, { "epoch": 0.94, "grad_norm": 2.2377242858648008, "learning_rate": 1.0536110019560841e-07, "loss": 0.5457, "step": 8792 }, { "epoch": 0.94, "grad_norm": 1.8929777908206418, "learning_rate": 1.0500123492090841e-07, "loss": 0.5482, "step": 8793 }, { "epoch": 0.94, "grad_norm": 1.6475273441989169, "learning_rate": 1.0464197874388904e-07, "loss": 0.5517, "step": 8794 }, { "epoch": 0.94, "grad_norm": 1.8547529339306548, "learning_rate": 1.0428333170925342e-07, "loss": 0.6384, "step": 8795 }, { "epoch": 0.94, "grad_norm": 2.0863211738644964, "learning_rate": 1.039252938616292e-07, "loss": 0.7103, "step": 8796 }, { "epoch": 0.94, "grad_norm": 1.6422037999027592, "learning_rate": 1.0356786524556795e-07, "loss": 0.5976, "step": 8797 }, { "epoch": 0.94, "grad_norm": 1.7023528172064128, "learning_rate": 1.032110459055452e-07, "loss": 0.5443, "step": 8798 }, { "epoch": 0.94, "grad_norm": 1.8160851974585976, "learning_rate": 1.0285483588596156e-07, "loss": 0.6472, "step": 8799 }, { "epoch": 0.94, "grad_norm": 1.8649956913654313, "learning_rate": 1.0249923523114103e-07, "loss": 0.607, "step": 8800 }, { "epoch": 0.94, "grad_norm": 2.011152118812041, "learning_rate": 1.0214424398533095e-07, "loss": 0.6049, "step": 8801 }, { "epoch": 0.94, "grad_norm": 1.7178598189864651, "learning_rate": 1.0178986219270548e-07, "loss": 0.5448, "step": 8802 }, { "epoch": 0.94, "grad_norm": 1.7432390459932237, "learning_rate": 1.0143608989735987e-07, "loss": 0.5869, "step": 8803 }, { "epoch": 0.94, "grad_norm": 1.078785622401343, "learning_rate": 1.0108292714331558e-07, "loss": 0.461, "step": 8804 }, { "epoch": 0.94, "grad_norm": 1.8477172624355611, "learning_rate": 1.0073037397451802e-07, "loss": 0.5551, "step": 8805 }, { "epoch": 0.94, "grad_norm": 1.09330831982078, "learning_rate": 1.0037843043483486e-07, "loss": 0.4713, "step": 8806 }, { "epoch": 0.94, "grad_norm": 1.753274240092838, "learning_rate": 1.0002709656806109e-07, "loss": 0.604, "step": 8807 }, { "epoch": 0.94, "grad_norm": 1.923774851051709, "learning_rate": 9.967637241791339e-08, "loss": 0.6637, "step": 8808 }, { "epoch": 0.94, "grad_norm": 1.633724896044541, "learning_rate": 9.932625802803297e-08, "loss": 0.5772, "step": 8809 }, { "epoch": 0.94, "grad_norm": 1.6172791225343843, "learning_rate": 9.897675344198554e-08, "loss": 0.5329, "step": 8810 }, { "epoch": 0.94, "grad_norm": 1.851101379894602, "learning_rate": 9.862785870326131e-08, "loss": 0.6166, "step": 8811 }, { "epoch": 0.94, "grad_norm": 1.0532099272745845, "learning_rate": 9.827957385527387e-08, "loss": 0.4879, "step": 8812 }, { "epoch": 0.94, "grad_norm": 1.5378993689612952, "learning_rate": 9.793189894136135e-08, "loss": 0.5848, "step": 8813 }, { "epoch": 0.94, "grad_norm": 1.6604131507258344, "learning_rate": 9.75848340047858e-08, "loss": 0.5784, "step": 8814 }, { "epoch": 0.94, "grad_norm": 1.9176789189851566, "learning_rate": 9.723837908873324e-08, "loss": 0.5948, "step": 8815 }, { "epoch": 0.94, "grad_norm": 1.6505771501781283, "learning_rate": 9.689253423631417e-08, "loss": 0.5569, "step": 8816 }, { "epoch": 0.94, "grad_norm": 1.7506809072188965, "learning_rate": 9.654729949056363e-08, "loss": 0.6344, "step": 8817 }, { "epoch": 0.94, "grad_norm": 2.0327030234470858, "learning_rate": 9.62026748944389e-08, "loss": 0.5106, "step": 8818 }, { "epoch": 0.94, "grad_norm": 1.0874719520483163, "learning_rate": 9.585866049082348e-08, "loss": 0.4576, "step": 8819 }, { "epoch": 0.94, "grad_norm": 1.8553861658912243, "learning_rate": 9.551525632252367e-08, "loss": 0.5812, "step": 8820 }, { "epoch": 0.94, "grad_norm": 1.0422785779200743, "learning_rate": 9.517246243226975e-08, "loss": 0.4758, "step": 8821 }, { "epoch": 0.94, "grad_norm": 1.8001434866427566, "learning_rate": 9.483027886271756e-08, "loss": 0.558, "step": 8822 }, { "epoch": 0.94, "grad_norm": 1.936886262311575, "learning_rate": 9.448870565644475e-08, "loss": 0.5849, "step": 8823 }, { "epoch": 0.94, "grad_norm": 1.8751226602725546, "learning_rate": 9.414774285595451e-08, "loss": 0.5133, "step": 8824 }, { "epoch": 0.94, "grad_norm": 1.6818663299433054, "learning_rate": 9.380739050367349e-08, "loss": 0.5826, "step": 8825 }, { "epoch": 0.94, "grad_norm": 1.9626871706761722, "learning_rate": 9.346764864195335e-08, "loss": 0.5436, "step": 8826 }, { "epoch": 0.94, "grad_norm": 1.9091827401899026, "learning_rate": 9.312851731306916e-08, "loss": 0.6401, "step": 8827 }, { "epoch": 0.94, "grad_norm": 1.0750128828824048, "learning_rate": 9.278999655921883e-08, "loss": 0.4979, "step": 8828 }, { "epoch": 0.94, "grad_norm": 1.8030678765817194, "learning_rate": 9.245208642252646e-08, "loss": 0.578, "step": 8829 }, { "epoch": 0.94, "grad_norm": 1.9571934836699592, "learning_rate": 9.21147869450384e-08, "loss": 0.6538, "step": 8830 }, { "epoch": 0.94, "grad_norm": 1.7614429229278852, "learning_rate": 9.177809816872607e-08, "loss": 0.5626, "step": 8831 }, { "epoch": 0.94, "grad_norm": 1.8396511710837005, "learning_rate": 9.144202013548431e-08, "loss": 0.6015, "step": 8832 }, { "epoch": 0.94, "grad_norm": 1.7739305805523673, "learning_rate": 9.1106552887133e-08, "loss": 0.62, "step": 8833 }, { "epoch": 0.94, "grad_norm": 1.807688947715087, "learning_rate": 9.077169646541428e-08, "loss": 0.6153, "step": 8834 }, { "epoch": 0.94, "grad_norm": 1.868449608445938, "learning_rate": 9.043745091199541e-08, "loss": 0.5402, "step": 8835 }, { "epoch": 0.94, "grad_norm": 1.0565711309821397, "learning_rate": 9.010381626846753e-08, "loss": 0.4815, "step": 8836 }, { "epoch": 0.94, "grad_norm": 1.7192933307479807, "learning_rate": 8.977079257634635e-08, "loss": 0.6161, "step": 8837 }, { "epoch": 0.94, "grad_norm": 1.7703323170252154, "learning_rate": 8.943837987707038e-08, "loss": 0.6234, "step": 8838 }, { "epoch": 0.94, "grad_norm": 1.0986789887367463, "learning_rate": 8.910657821200209e-08, "loss": 0.5025, "step": 8839 }, { "epoch": 0.94, "grad_norm": 2.4488789234950348, "learning_rate": 8.877538762242844e-08, "loss": 0.6099, "step": 8840 }, { "epoch": 0.94, "grad_norm": 1.943505692141168, "learning_rate": 8.844480814956147e-08, "loss": 0.6219, "step": 8841 }, { "epoch": 0.94, "grad_norm": 1.7252731796834033, "learning_rate": 8.811483983453495e-08, "loss": 0.6809, "step": 8842 }, { "epoch": 0.94, "grad_norm": 1.8944011914115673, "learning_rate": 8.778548271840826e-08, "loss": 0.57, "step": 8843 }, { "epoch": 0.94, "grad_norm": 1.7396194334772515, "learning_rate": 8.745673684216416e-08, "loss": 0.5624, "step": 8844 }, { "epoch": 0.94, "grad_norm": 1.067804053754608, "learning_rate": 8.712860224670938e-08, "loss": 0.4883, "step": 8845 }, { "epoch": 0.94, "grad_norm": 2.133293579654441, "learning_rate": 8.680107897287404e-08, "loss": 0.5624, "step": 8846 }, { "epoch": 0.94, "grad_norm": 1.7736282152586749, "learning_rate": 8.647416706141331e-08, "loss": 0.6194, "step": 8847 }, { "epoch": 0.94, "grad_norm": 1.6373761261841844, "learning_rate": 8.614786655300522e-08, "loss": 0.5451, "step": 8848 }, { "epoch": 0.94, "grad_norm": 1.6774336096338578, "learning_rate": 8.582217748825284e-08, "loss": 0.5093, "step": 8849 }, { "epoch": 0.94, "grad_norm": 2.0907475268527276, "learning_rate": 8.549709990768263e-08, "loss": 0.6307, "step": 8850 }, { "epoch": 0.94, "grad_norm": 1.0924030483404295, "learning_rate": 8.517263385174446e-08, "loss": 0.491, "step": 8851 }, { "epoch": 0.94, "grad_norm": 1.747695317680053, "learning_rate": 8.484877936081215e-08, "loss": 0.4881, "step": 8852 }, { "epoch": 0.94, "grad_norm": 1.7043809451469416, "learning_rate": 8.452553647518458e-08, "loss": 0.4994, "step": 8853 }, { "epoch": 0.94, "grad_norm": 1.9531329543048093, "learning_rate": 8.420290523508289e-08, "loss": 0.6124, "step": 8854 }, { "epoch": 0.94, "grad_norm": 1.9210029665175448, "learning_rate": 8.388088568065278e-08, "loss": 0.5937, "step": 8855 }, { "epoch": 0.94, "grad_norm": 1.7565627902245338, "learning_rate": 8.35594778519655e-08, "loss": 0.5033, "step": 8856 }, { "epoch": 0.94, "grad_norm": 1.7401281260257426, "learning_rate": 8.323868178901295e-08, "loss": 0.5484, "step": 8857 }, { "epoch": 0.94, "grad_norm": 1.788680521367777, "learning_rate": 8.291849753171377e-08, "loss": 0.564, "step": 8858 }, { "epoch": 0.94, "grad_norm": 2.3821476447113197, "learning_rate": 8.259892511990886e-08, "loss": 0.5474, "step": 8859 }, { "epoch": 0.95, "grad_norm": 1.8190306894514028, "learning_rate": 8.227996459336362e-08, "loss": 0.5216, "step": 8860 }, { "epoch": 0.95, "grad_norm": 1.8734070318555973, "learning_rate": 8.196161599176688e-08, "loss": 0.5727, "step": 8861 }, { "epoch": 0.95, "grad_norm": 1.7716877066128023, "learning_rate": 8.164387935473194e-08, "loss": 0.5864, "step": 8862 }, { "epoch": 0.95, "grad_norm": 1.7519101793209333, "learning_rate": 8.132675472179552e-08, "loss": 0.5186, "step": 8863 }, { "epoch": 0.95, "grad_norm": 1.8720413792556285, "learning_rate": 8.101024213241826e-08, "loss": 0.5834, "step": 8864 }, { "epoch": 0.95, "grad_norm": 1.6007558214995765, "learning_rate": 8.069434162598422e-08, "loss": 0.5246, "step": 8865 }, { "epoch": 0.95, "grad_norm": 1.7233958827214628, "learning_rate": 8.037905324180307e-08, "loss": 0.4949, "step": 8866 }, { "epoch": 0.95, "grad_norm": 1.7971618568462215, "learning_rate": 8.006437701910563e-08, "loss": 0.5834, "step": 8867 }, { "epoch": 0.95, "grad_norm": 1.8370308478080117, "learning_rate": 7.975031299704839e-08, "loss": 0.5481, "step": 8868 }, { "epoch": 0.95, "grad_norm": 1.120578850320735, "learning_rate": 7.943686121471116e-08, "loss": 0.5105, "step": 8869 }, { "epoch": 0.95, "grad_norm": 1.631066000526976, "learning_rate": 7.912402171109723e-08, "loss": 0.5481, "step": 8870 }, { "epoch": 0.95, "grad_norm": 1.0994388264609865, "learning_rate": 7.881179452513433e-08, "loss": 0.474, "step": 8871 }, { "epoch": 0.95, "grad_norm": 1.0913784732395815, "learning_rate": 7.850017969567414e-08, "loss": 0.4994, "step": 8872 }, { "epoch": 0.95, "grad_norm": 1.9106016655774638, "learning_rate": 7.818917726149123e-08, "loss": 0.5829, "step": 8873 }, { "epoch": 0.95, "grad_norm": 1.0793003934851655, "learning_rate": 7.787878726128461e-08, "loss": 0.4854, "step": 8874 }, { "epoch": 0.95, "grad_norm": 1.6875387844999636, "learning_rate": 7.756900973367676e-08, "loss": 0.5368, "step": 8875 }, { "epoch": 0.95, "grad_norm": 1.7082515374026679, "learning_rate": 7.725984471721404e-08, "loss": 0.5508, "step": 8876 }, { "epoch": 0.95, "grad_norm": 1.8401254701005774, "learning_rate": 7.695129225036679e-08, "loss": 0.5514, "step": 8877 }, { "epoch": 0.95, "grad_norm": 1.6480767791099973, "learning_rate": 7.664335237152931e-08, "loss": 0.5132, "step": 8878 }, { "epoch": 0.95, "grad_norm": 1.7681489155651517, "learning_rate": 7.633602511901927e-08, "loss": 0.5222, "step": 8879 }, { "epoch": 0.95, "grad_norm": 1.750773021339989, "learning_rate": 7.602931053107831e-08, "loss": 0.601, "step": 8880 }, { "epoch": 0.95, "grad_norm": 1.891594714325655, "learning_rate": 7.57232086458709e-08, "loss": 0.6614, "step": 8881 }, { "epoch": 0.95, "grad_norm": 1.6603393169481229, "learning_rate": 7.541771950148713e-08, "loss": 0.5353, "step": 8882 }, { "epoch": 0.95, "grad_norm": 2.005413201344653, "learning_rate": 7.511284313593936e-08, "loss": 0.5009, "step": 8883 }, { "epoch": 0.95, "grad_norm": 1.9528192105781148, "learning_rate": 7.480857958716447e-08, "loss": 0.6384, "step": 8884 }, { "epoch": 0.95, "grad_norm": 1.8691160994730136, "learning_rate": 7.450492889302219e-08, "loss": 0.5834, "step": 8885 }, { "epoch": 0.95, "grad_norm": 1.8535192975237738, "learning_rate": 7.420189109129727e-08, "loss": 0.5476, "step": 8886 }, { "epoch": 0.95, "grad_norm": 1.8313937702271126, "learning_rate": 7.389946621969679e-08, "loss": 0.611, "step": 8887 }, { "epoch": 0.95, "grad_norm": 1.7830388365015166, "learning_rate": 7.359765431585287e-08, "loss": 0.6005, "step": 8888 }, { "epoch": 0.95, "grad_norm": 1.9066905260492208, "learning_rate": 7.329645541732045e-08, "loss": 0.6316, "step": 8889 }, { "epoch": 0.95, "grad_norm": 1.8469420661703273, "learning_rate": 7.299586956157901e-08, "loss": 0.605, "step": 8890 }, { "epoch": 0.95, "grad_norm": 1.7754899655209426, "learning_rate": 7.269589678603028e-08, "loss": 0.6175, "step": 8891 }, { "epoch": 0.95, "grad_norm": 1.6667277294263165, "learning_rate": 7.239653712800166e-08, "loss": 0.5169, "step": 8892 }, { "epoch": 0.95, "grad_norm": 1.7529414200154068, "learning_rate": 7.209779062474276e-08, "loss": 0.4877, "step": 8893 }, { "epoch": 0.95, "grad_norm": 1.7866043896545278, "learning_rate": 7.179965731342775e-08, "loss": 0.6168, "step": 8894 }, { "epoch": 0.95, "grad_norm": 1.8268159221509286, "learning_rate": 7.150213723115362e-08, "loss": 0.5794, "step": 8895 }, { "epoch": 0.95, "grad_norm": 1.8554000941825985, "learning_rate": 7.120523041494132e-08, "loss": 0.5005, "step": 8896 }, { "epoch": 0.95, "grad_norm": 1.7076885786607572, "learning_rate": 7.090893690173683e-08, "loss": 0.6133, "step": 8897 }, { "epoch": 0.95, "grad_norm": 1.870399489761148, "learning_rate": 7.061325672840846e-08, "loss": 0.6393, "step": 8898 }, { "epoch": 0.95, "grad_norm": 1.7492263613547894, "learning_rate": 7.031818993174788e-08, "loss": 0.5429, "step": 8899 }, { "epoch": 0.95, "grad_norm": 1.7229772380781256, "learning_rate": 7.002373654847128e-08, "loss": 0.5884, "step": 8900 }, { "epoch": 0.95, "grad_norm": 1.0447449117347571, "learning_rate": 6.972989661521878e-08, "loss": 0.4929, "step": 8901 }, { "epoch": 0.95, "grad_norm": 1.8044962567763665, "learning_rate": 6.943667016855282e-08, "loss": 0.5281, "step": 8902 }, { "epoch": 0.95, "grad_norm": 1.806793414377022, "learning_rate": 6.914405724496088e-08, "loss": 0.623, "step": 8903 }, { "epoch": 0.95, "grad_norm": 1.5816281773366068, "learning_rate": 6.885205788085325e-08, "loss": 0.4952, "step": 8904 }, { "epoch": 0.95, "grad_norm": 1.082155519157694, "learning_rate": 6.856067211256479e-08, "loss": 0.4592, "step": 8905 }, { "epoch": 0.95, "grad_norm": 1.717452679452474, "learning_rate": 6.826989997635258e-08, "loss": 0.6089, "step": 8906 }, { "epoch": 0.95, "grad_norm": 1.6857897708072496, "learning_rate": 6.797974150839881e-08, "loss": 0.4892, "step": 8907 }, { "epoch": 0.95, "grad_norm": 1.8156976699064653, "learning_rate": 6.769019674480847e-08, "loss": 0.5954, "step": 8908 }, { "epoch": 0.95, "grad_norm": 1.6850665032946994, "learning_rate": 6.74012657216111e-08, "loss": 0.5183, "step": 8909 }, { "epoch": 0.95, "grad_norm": 1.735499920212808, "learning_rate": 6.711294847475736e-08, "loss": 0.7078, "step": 8910 }, { "epoch": 0.95, "grad_norm": 1.7378517706652887, "learning_rate": 6.68252450401241e-08, "loss": 0.5209, "step": 8911 }, { "epoch": 0.95, "grad_norm": 1.9451179269283456, "learning_rate": 6.653815545351216e-08, "loss": 0.5994, "step": 8912 }, { "epoch": 0.95, "grad_norm": 1.775170597742767, "learning_rate": 6.62516797506435e-08, "loss": 0.6012, "step": 8913 }, { "epoch": 0.95, "grad_norm": 1.8731377514913543, "learning_rate": 6.596581796716572e-08, "loss": 0.555, "step": 8914 }, { "epoch": 0.95, "grad_norm": 1.6714052989210326, "learning_rate": 6.568057013864925e-08, "loss": 0.5807, "step": 8915 }, { "epoch": 0.95, "grad_norm": 1.84031888137426, "learning_rate": 6.539593630058849e-08, "loss": 0.6189, "step": 8916 }, { "epoch": 0.95, "grad_norm": 1.8384414001983924, "learning_rate": 6.511191648840065e-08, "loss": 0.5658, "step": 8917 }, { "epoch": 0.95, "grad_norm": 1.7127073633710572, "learning_rate": 6.482851073742691e-08, "loss": 0.5826, "step": 8918 }, { "epoch": 0.95, "grad_norm": 1.8454942040777689, "learning_rate": 6.454571908293294e-08, "loss": 0.5713, "step": 8919 }, { "epoch": 0.95, "grad_norm": 1.0401811484883943, "learning_rate": 6.426354156010672e-08, "loss": 0.4684, "step": 8920 }, { "epoch": 0.95, "grad_norm": 1.8101794639686417, "learning_rate": 6.398197820406072e-08, "loss": 0.5683, "step": 8921 }, { "epoch": 0.95, "grad_norm": 1.6001523355959462, "learning_rate": 6.370102904983022e-08, "loss": 0.5466, "step": 8922 }, { "epoch": 0.95, "grad_norm": 1.0681231600584131, "learning_rate": 6.342069413237506e-08, "loss": 0.4794, "step": 8923 }, { "epoch": 0.95, "grad_norm": 1.7224508500112743, "learning_rate": 6.314097348657733e-08, "loss": 0.5439, "step": 8924 }, { "epoch": 0.95, "grad_norm": 1.9271465691697744, "learning_rate": 6.286186714724419e-08, "loss": 0.5964, "step": 8925 }, { "epoch": 0.95, "grad_norm": 1.8926881050132514, "learning_rate": 6.25833751491045e-08, "loss": 0.5923, "step": 8926 }, { "epoch": 0.95, "grad_norm": 1.757285249317737, "learning_rate": 6.230549752681281e-08, "loss": 0.6119, "step": 8927 }, { "epoch": 0.95, "grad_norm": 1.632981661934781, "learning_rate": 6.202823431494476e-08, "loss": 0.5211, "step": 8928 }, { "epoch": 0.95, "grad_norm": 1.598426334000295, "learning_rate": 6.175158554800276e-08, "loss": 0.5838, "step": 8929 }, { "epoch": 0.95, "grad_norm": 1.7347400663555024, "learning_rate": 6.147555126041038e-08, "loss": 0.57, "step": 8930 }, { "epoch": 0.95, "grad_norm": 1.6370084092994028, "learning_rate": 6.12001314865146e-08, "loss": 0.567, "step": 8931 }, { "epoch": 0.95, "grad_norm": 1.836751780344726, "learning_rate": 6.092532626058745e-08, "loss": 0.588, "step": 8932 }, { "epoch": 0.95, "grad_norm": 1.9402242682988056, "learning_rate": 6.065113561682323e-08, "loss": 0.6111, "step": 8933 }, { "epoch": 0.95, "grad_norm": 1.7364989689312063, "learning_rate": 6.037755958934022e-08, "loss": 0.4679, "step": 8934 }, { "epoch": 0.95, "grad_norm": 1.8439056107032448, "learning_rate": 6.010459821218006e-08, "loss": 0.5626, "step": 8935 }, { "epoch": 0.95, "grad_norm": 1.6491639737502817, "learning_rate": 5.983225151930838e-08, "loss": 0.5506, "step": 8936 }, { "epoch": 0.95, "grad_norm": 1.7696801778941527, "learning_rate": 5.9560519544614725e-08, "loss": 0.5833, "step": 8937 }, { "epoch": 0.95, "grad_norm": 1.9131524717361619, "learning_rate": 5.928940232190983e-08, "loss": 0.6029, "step": 8938 }, { "epoch": 0.95, "grad_norm": 1.781341293810892, "learning_rate": 5.901889988493003e-08, "loss": 0.6037, "step": 8939 }, { "epoch": 0.95, "grad_norm": 1.7005741422912077, "learning_rate": 5.8749012267335095e-08, "loss": 0.5807, "step": 8940 }, { "epoch": 0.95, "grad_norm": 1.5851587280726087, "learning_rate": 5.8479739502708136e-08, "loss": 0.5015, "step": 8941 }, { "epoch": 0.95, "grad_norm": 1.680145720898511, "learning_rate": 5.8211081624554576e-08, "loss": 0.6558, "step": 8942 }, { "epoch": 0.95, "grad_norm": 1.7868260252937844, "learning_rate": 5.7943038666304906e-08, "loss": 0.5456, "step": 8943 }, { "epoch": 0.95, "grad_norm": 1.926453196143927, "learning_rate": 5.767561066131189e-08, "loss": 0.6518, "step": 8944 }, { "epoch": 0.95, "grad_norm": 1.6914415347532594, "learning_rate": 5.740879764285279e-08, "loss": 0.6055, "step": 8945 }, { "epoch": 0.95, "grad_norm": 1.0377142021097743, "learning_rate": 5.714259964412772e-08, "loss": 0.4738, "step": 8946 }, { "epoch": 0.95, "grad_norm": 1.8806919119891279, "learning_rate": 5.687701669826018e-08, "loss": 0.6557, "step": 8947 }, { "epoch": 0.95, "grad_norm": 2.0458144151321935, "learning_rate": 5.661204883829763e-08, "loss": 0.6236, "step": 8948 }, { "epoch": 0.95, "grad_norm": 1.7778430076642635, "learning_rate": 5.634769609721091e-08, "loss": 0.6024, "step": 8949 }, { "epoch": 0.95, "grad_norm": 1.0871105633752818, "learning_rate": 5.608395850789372e-08, "loss": 0.4904, "step": 8950 }, { "epoch": 0.95, "grad_norm": 1.76623929368699, "learning_rate": 5.582083610316369e-08, "loss": 0.593, "step": 8951 }, { "epoch": 0.95, "grad_norm": 1.7246338050539847, "learning_rate": 5.5558328915762404e-08, "loss": 0.5072, "step": 8952 }, { "epoch": 0.95, "grad_norm": 1.6772741609703181, "learning_rate": 5.5296436978353184e-08, "loss": 0.533, "step": 8953 }, { "epoch": 0.96, "grad_norm": 1.8751597149395192, "learning_rate": 5.503516032352496e-08, "loss": 0.5603, "step": 8954 }, { "epoch": 0.96, "grad_norm": 1.093512970262508, "learning_rate": 5.477449898378895e-08, "loss": 0.4799, "step": 8955 }, { "epoch": 0.96, "grad_norm": 1.7480560193988148, "learning_rate": 5.4514452991579205e-08, "loss": 0.5735, "step": 8956 }, { "epoch": 0.96, "grad_norm": 1.8558547730374788, "learning_rate": 5.425502237925428e-08, "loss": 0.5637, "step": 8957 }, { "epoch": 0.96, "grad_norm": 1.7948772965889643, "learning_rate": 5.399620717909615e-08, "loss": 0.5882, "step": 8958 }, { "epoch": 0.96, "grad_norm": 2.039590723778881, "learning_rate": 5.373800742331015e-08, "loss": 0.5577, "step": 8959 }, { "epoch": 0.96, "grad_norm": 1.7715108156680248, "learning_rate": 5.348042314402391e-08, "loss": 0.6968, "step": 8960 }, { "epoch": 0.96, "grad_norm": 1.8655240906968857, "learning_rate": 5.3223454373289576e-08, "loss": 0.6708, "step": 8961 }, { "epoch": 0.96, "grad_norm": 1.822431851832914, "learning_rate": 5.2967101143082674e-08, "loss": 0.5602, "step": 8962 }, { "epoch": 0.96, "grad_norm": 2.006549436807063, "learning_rate": 5.271136348530159e-08, "loss": 0.6594, "step": 8963 }, { "epoch": 0.96, "grad_norm": 1.8762202365731735, "learning_rate": 5.245624143176919e-08, "loss": 0.7663, "step": 8964 }, { "epoch": 0.96, "grad_norm": 1.9211878814575225, "learning_rate": 5.220173501423065e-08, "loss": 0.6185, "step": 8965 }, { "epoch": 0.96, "grad_norm": 1.8354374317357833, "learning_rate": 5.194784426435451e-08, "loss": 0.6475, "step": 8966 }, { "epoch": 0.96, "grad_norm": 1.7255457031357677, "learning_rate": 5.169456921373328e-08, "loss": 0.6012, "step": 8967 }, { "epoch": 0.96, "grad_norm": 1.7432716877805579, "learning_rate": 5.144190989388231e-08, "loss": 0.5898, "step": 8968 }, { "epoch": 0.96, "grad_norm": 1.85051444952502, "learning_rate": 5.1189866336241455e-08, "loss": 0.5852, "step": 8969 }, { "epoch": 0.96, "grad_norm": 1.736059046091686, "learning_rate": 5.093843857217229e-08, "loss": 0.5164, "step": 8970 }, { "epoch": 0.96, "grad_norm": 1.675803606580672, "learning_rate": 5.0687626632961454e-08, "loss": 0.6216, "step": 8971 }, { "epoch": 0.96, "grad_norm": 1.7781544526340225, "learning_rate": 5.043743054981731e-08, "loss": 0.5567, "step": 8972 }, { "epoch": 0.96, "grad_norm": 2.08120884613917, "learning_rate": 5.01878503538733e-08, "loss": 0.642, "step": 8973 }, { "epoch": 0.96, "grad_norm": 1.8282496725654165, "learning_rate": 4.9938886076184577e-08, "loss": 0.5409, "step": 8974 }, { "epoch": 0.96, "grad_norm": 1.8472658585042268, "learning_rate": 4.969053774773081e-08, "loss": 0.5301, "step": 8975 }, { "epoch": 0.96, "grad_norm": 1.6375575726006601, "learning_rate": 4.9442805399414484e-08, "loss": 0.5522, "step": 8976 }, { "epoch": 0.96, "grad_norm": 1.1052350814538174, "learning_rate": 4.919568906206207e-08, "loss": 0.4666, "step": 8977 }, { "epoch": 0.96, "grad_norm": 1.8188842827294331, "learning_rate": 4.8949188766422295e-08, "loss": 0.5743, "step": 8978 }, { "epoch": 0.96, "grad_norm": 1.635283538763649, "learning_rate": 4.870330454316785e-08, "loss": 0.5236, "step": 8979 }, { "epoch": 0.96, "grad_norm": 1.7831628441609912, "learning_rate": 4.8458036422895375e-08, "loss": 0.6153, "step": 8980 }, { "epoch": 0.96, "grad_norm": 1.8239163864928931, "learning_rate": 4.821338443612378e-08, "loss": 0.5049, "step": 8981 }, { "epoch": 0.96, "grad_norm": 1.6062580113663638, "learning_rate": 4.796934861329594e-08, "loss": 0.5885, "step": 8982 }, { "epoch": 0.96, "grad_norm": 1.1026524339995456, "learning_rate": 4.772592898477757e-08, "loss": 0.4745, "step": 8983 }, { "epoch": 0.96, "grad_norm": 1.717476120307421, "learning_rate": 4.748312558085832e-08, "loss": 0.5501, "step": 8984 }, { "epoch": 0.96, "grad_norm": 1.5963343313451503, "learning_rate": 4.7240938431750705e-08, "loss": 0.5272, "step": 8985 }, { "epoch": 0.96, "grad_norm": 1.9506824793671185, "learning_rate": 4.69993675675906e-08, "loss": 0.6627, "step": 8986 }, { "epoch": 0.96, "grad_norm": 1.793778758770802, "learning_rate": 4.6758413018437866e-08, "loss": 0.5862, "step": 8987 }, { "epoch": 0.96, "grad_norm": 1.659767641438996, "learning_rate": 4.6518074814274614e-08, "loss": 0.6026, "step": 8988 }, { "epoch": 0.96, "grad_norm": 1.7364790268494121, "learning_rate": 4.627835298500694e-08, "loss": 0.6087, "step": 8989 }, { "epoch": 0.96, "grad_norm": 1.8177528259562168, "learning_rate": 4.603924756046374e-08, "loss": 0.5957, "step": 8990 }, { "epoch": 0.96, "grad_norm": 1.6412701390505169, "learning_rate": 4.58007585703979e-08, "loss": 0.5033, "step": 8991 }, { "epoch": 0.96, "grad_norm": 1.7365465109590061, "learning_rate": 4.556288604448511e-08, "loss": 0.5544, "step": 8992 }, { "epoch": 0.96, "grad_norm": 1.5564508049276113, "learning_rate": 4.532563001232448e-08, "loss": 0.4998, "step": 8993 }, { "epoch": 0.96, "grad_norm": 1.7872460397395522, "learning_rate": 4.508899050343851e-08, "loss": 0.6035, "step": 8994 }, { "epoch": 0.96, "grad_norm": 1.710707612381357, "learning_rate": 4.485296754727309e-08, "loss": 0.5675, "step": 8995 }, { "epoch": 0.96, "grad_norm": 1.7855449004960702, "learning_rate": 4.4617561173196935e-08, "loss": 0.5678, "step": 8996 }, { "epoch": 0.96, "grad_norm": 1.7543835827642438, "learning_rate": 4.4382771410501625e-08, "loss": 0.5514, "step": 8997 }, { "epoch": 0.96, "grad_norm": 1.7801063419443326, "learning_rate": 4.4148598288403785e-08, "loss": 0.6486, "step": 8998 }, { "epoch": 0.96, "grad_norm": 1.7924679469110882, "learning_rate": 4.3915041836041785e-08, "loss": 0.5626, "step": 8999 }, { "epoch": 0.96, "grad_norm": 1.6858842472258015, "learning_rate": 4.3682102082477365e-08, "loss": 0.59, "step": 9000 }, { "epoch": 0.96, "grad_norm": 1.7423565003143229, "learning_rate": 4.344977905669623e-08, "loss": 0.6347, "step": 9001 }, { "epoch": 0.96, "grad_norm": 1.7690215654896262, "learning_rate": 4.3218072787606366e-08, "loss": 0.5759, "step": 9002 }, { "epoch": 0.96, "grad_norm": 1.7354511543665516, "learning_rate": 4.298698330404028e-08, "loss": 0.5251, "step": 9003 }, { "epoch": 0.96, "grad_norm": 1.831994223227113, "learning_rate": 4.275651063475328e-08, "loss": 0.6027, "step": 9004 }, { "epoch": 0.96, "grad_norm": 1.7241455020044636, "learning_rate": 4.252665480842244e-08, "loss": 0.6039, "step": 9005 }, { "epoch": 0.96, "grad_norm": 1.7914069227168996, "learning_rate": 4.229741585365043e-08, "loss": 0.5927, "step": 9006 }, { "epoch": 0.96, "grad_norm": 1.8242969511631384, "learning_rate": 4.206879379896223e-08, "loss": 0.5548, "step": 9007 }, { "epoch": 0.96, "grad_norm": 1.7545113906321428, "learning_rate": 4.18407886728045e-08, "loss": 0.5716, "step": 9008 }, { "epoch": 0.96, "grad_norm": 1.6024433828839644, "learning_rate": 4.1613400503550114e-08, "loss": 0.5549, "step": 9009 }, { "epoch": 0.96, "grad_norm": 1.754718259201899, "learning_rate": 4.138662931949255e-08, "loss": 0.5822, "step": 9010 }, { "epoch": 0.96, "grad_norm": 1.9761652143767265, "learning_rate": 4.1160475148849796e-08, "loss": 0.6677, "step": 9011 }, { "epoch": 0.96, "grad_norm": 1.0614354194570859, "learning_rate": 4.0934938019763227e-08, "loss": 0.4786, "step": 9012 }, { "epoch": 0.96, "grad_norm": 2.0198775225961754, "learning_rate": 4.07100179602965e-08, "loss": 0.5264, "step": 9013 }, { "epoch": 0.96, "grad_norm": 1.076989019587464, "learning_rate": 4.048571499843723e-08, "loss": 0.479, "step": 9014 }, { "epoch": 0.96, "grad_norm": 1.7852457902833345, "learning_rate": 4.0262029162095874e-08, "loss": 0.5277, "step": 9015 }, { "epoch": 0.96, "grad_norm": 1.869575317505751, "learning_rate": 4.003896047910683e-08, "loss": 0.6636, "step": 9016 }, { "epoch": 0.96, "grad_norm": 1.877214413817109, "learning_rate": 3.981650897722622e-08, "loss": 0.5725, "step": 9017 }, { "epoch": 0.96, "grad_norm": 1.7851799950247953, "learning_rate": 3.959467468413525e-08, "loss": 0.6608, "step": 9018 }, { "epoch": 0.96, "grad_norm": 1.9218663142704626, "learning_rate": 3.9373457627436276e-08, "loss": 0.6592, "step": 9019 }, { "epoch": 0.96, "grad_norm": 1.482736015050427, "learning_rate": 3.915285783465672e-08, "loss": 0.5931, "step": 9020 }, { "epoch": 0.96, "grad_norm": 1.8683004078792878, "learning_rate": 3.89328753332463e-08, "loss": 0.5834, "step": 9021 }, { "epoch": 0.96, "grad_norm": 1.822425266567074, "learning_rate": 3.871351015057756e-08, "loss": 0.6558, "step": 9022 }, { "epoch": 0.96, "grad_norm": 1.6796483092011762, "learning_rate": 3.849476231394755e-08, "loss": 0.5263, "step": 9023 }, { "epoch": 0.96, "grad_norm": 1.7845376385276088, "learning_rate": 3.8276631850575064e-08, "loss": 0.557, "step": 9024 }, { "epoch": 0.96, "grad_norm": 2.1123485871750205, "learning_rate": 3.8059118787602265e-08, "loss": 0.6685, "step": 9025 }, { "epoch": 0.96, "grad_norm": 1.7927654911014719, "learning_rate": 3.784222315209529e-08, "loss": 0.5689, "step": 9026 }, { "epoch": 0.96, "grad_norm": 1.03358996111053, "learning_rate": 3.762594497104311e-08, "loss": 0.4738, "step": 9027 }, { "epoch": 0.96, "grad_norm": 1.8192439547833608, "learning_rate": 3.741028427135807e-08, "loss": 0.6235, "step": 9028 }, { "epoch": 0.96, "grad_norm": 1.987349780957812, "learning_rate": 3.719524107987482e-08, "loss": 0.6322, "step": 9029 }, { "epoch": 0.96, "grad_norm": 1.59156600194315, "learning_rate": 3.698081542335252e-08, "loss": 0.5183, "step": 9030 }, { "epoch": 0.96, "grad_norm": 1.7653941068620653, "learning_rate": 3.6767007328471495e-08, "loss": 0.59, "step": 9031 }, { "epoch": 0.96, "grad_norm": 1.9538184267757905, "learning_rate": 3.655381682183767e-08, "loss": 0.5767, "step": 9032 }, { "epoch": 0.96, "grad_norm": 2.3240099006149655, "learning_rate": 3.634124392997873e-08, "loss": 0.5612, "step": 9033 }, { "epoch": 0.96, "grad_norm": 1.7603705016913282, "learning_rate": 3.612928867934462e-08, "loss": 0.5436, "step": 9034 }, { "epoch": 0.96, "grad_norm": 1.0666100705585277, "learning_rate": 3.591795109631091e-08, "loss": 0.4752, "step": 9035 }, { "epoch": 0.96, "grad_norm": 1.7945112873896534, "learning_rate": 3.5707231207173784e-08, "loss": 0.5725, "step": 9036 }, { "epoch": 0.96, "grad_norm": 1.076825195055526, "learning_rate": 3.5497129038153946e-08, "loss": 0.4998, "step": 9037 }, { "epoch": 0.96, "grad_norm": 1.8972375290111387, "learning_rate": 3.528764461539602e-08, "loss": 0.5733, "step": 9038 }, { "epoch": 0.96, "grad_norm": 1.1110257565872388, "learning_rate": 3.507877796496528e-08, "loss": 0.4837, "step": 9039 }, { "epoch": 0.96, "grad_norm": 1.7585018252409934, "learning_rate": 3.4870529112852046e-08, "loss": 0.5292, "step": 9040 }, { "epoch": 0.96, "grad_norm": 1.0686026357541567, "learning_rate": 3.4662898084968924e-08, "loss": 0.4686, "step": 9041 }, { "epoch": 0.96, "grad_norm": 1.8464648671017574, "learning_rate": 3.4455884907153014e-08, "loss": 0.5503, "step": 9042 }, { "epoch": 0.96, "grad_norm": 1.6504738326479573, "learning_rate": 3.4249489605162615e-08, "loss": 0.5625, "step": 9043 }, { "epoch": 0.96, "grad_norm": 1.8820654203925196, "learning_rate": 3.40437122046805e-08, "loss": 0.6611, "step": 9044 }, { "epoch": 0.96, "grad_norm": 1.704672970217638, "learning_rate": 3.383855273131231e-08, "loss": 0.6405, "step": 9045 }, { "epoch": 0.96, "grad_norm": 1.8499807668968997, "learning_rate": 3.36340112105854e-08, "loss": 0.6269, "step": 9046 }, { "epoch": 0.97, "grad_norm": 2.3181099428908825, "learning_rate": 3.3430087667952745e-08, "loss": 0.6284, "step": 9047 }, { "epoch": 0.97, "grad_norm": 1.6333882532394721, "learning_rate": 3.3226782128788495e-08, "loss": 0.5426, "step": 9048 }, { "epoch": 0.97, "grad_norm": 1.809316954178753, "learning_rate": 3.302409461839073e-08, "loss": 0.5678, "step": 9049 }, { "epoch": 0.97, "grad_norm": 1.7857587980077647, "learning_rate": 3.2822025161979857e-08, "loss": 0.6083, "step": 9050 }, { "epoch": 0.97, "grad_norm": 1.7749119791019894, "learning_rate": 3.2620573784700737e-08, "loss": 0.5714, "step": 9051 }, { "epoch": 0.97, "grad_norm": 1.8945176230879675, "learning_rate": 3.241974051162e-08, "loss": 0.5678, "step": 9052 }, { "epoch": 0.97, "grad_norm": 1.0844564039967381, "learning_rate": 3.221952536772766e-08, "loss": 0.4881, "step": 9053 }, { "epoch": 0.97, "grad_norm": 1.866002077136909, "learning_rate": 3.20199283779371e-08, "loss": 0.5578, "step": 9054 }, { "epoch": 0.97, "grad_norm": 1.6938217262523554, "learning_rate": 3.182094956708515e-08, "loss": 0.5305, "step": 9055 }, { "epoch": 0.97, "grad_norm": 1.806789022591572, "learning_rate": 3.162258895993142e-08, "loss": 0.5343, "step": 9056 }, { "epoch": 0.97, "grad_norm": 1.6795073322638734, "learning_rate": 3.1424846581157295e-08, "loss": 0.5769, "step": 9057 }, { "epoch": 0.97, "grad_norm": 1.7242261774070204, "learning_rate": 3.1227722455369205e-08, "loss": 0.5086, "step": 9058 }, { "epoch": 0.97, "grad_norm": 1.1004605996940522, "learning_rate": 3.103121660709585e-08, "loss": 0.4707, "step": 9059 }, { "epoch": 0.97, "grad_norm": 1.7149380031993364, "learning_rate": 3.0835329060789345e-08, "loss": 0.5894, "step": 9060 }, { "epoch": 0.97, "grad_norm": 1.895922229718578, "learning_rate": 3.0640059840823525e-08, "loss": 0.6069, "step": 9061 }, { "epoch": 0.97, "grad_norm": 1.6580931395793115, "learning_rate": 3.0445408971496726e-08, "loss": 0.5917, "step": 9062 }, { "epoch": 0.97, "grad_norm": 1.8174062258720516, "learning_rate": 3.0251376477030134e-08, "loss": 0.5554, "step": 9063 }, { "epoch": 0.97, "grad_norm": 1.7471890100646774, "learning_rate": 3.005796238156722e-08, "loss": 0.4763, "step": 9064 }, { "epoch": 0.97, "grad_norm": 1.0630297355959115, "learning_rate": 2.986516670917539e-08, "loss": 0.4778, "step": 9065 }, { "epoch": 0.97, "grad_norm": 1.859871127383822, "learning_rate": 2.9672989483844893e-08, "loss": 0.6065, "step": 9066 }, { "epoch": 0.97, "grad_norm": 1.740601589256556, "learning_rate": 2.9481430729488282e-08, "loss": 0.4845, "step": 9067 }, { "epoch": 0.97, "grad_norm": 1.7631519007248366, "learning_rate": 2.9290490469942035e-08, "loss": 0.6566, "step": 9068 }, { "epoch": 0.97, "grad_norm": 1.8432813757449036, "learning_rate": 2.9100168728965484e-08, "loss": 0.5994, "step": 9069 }, { "epoch": 0.97, "grad_norm": 1.7596487411325268, "learning_rate": 2.8910465530240793e-08, "loss": 0.5409, "step": 9070 }, { "epoch": 0.97, "grad_norm": 1.6988514267109456, "learning_rate": 2.8721380897372974e-08, "loss": 0.5573, "step": 9071 }, { "epoch": 0.97, "grad_norm": 1.8413427245548613, "learning_rate": 2.8532914853890424e-08, "loss": 0.7352, "step": 9072 }, { "epoch": 0.97, "grad_norm": 1.8998673893536129, "learning_rate": 2.834506742324439e-08, "loss": 0.577, "step": 9073 }, { "epoch": 0.97, "grad_norm": 1.6336366571709302, "learning_rate": 2.8157838628809498e-08, "loss": 0.6026, "step": 9074 }, { "epoch": 0.97, "grad_norm": 1.8175700390771152, "learning_rate": 2.797122849388323e-08, "loss": 0.5528, "step": 9075 }, { "epoch": 0.97, "grad_norm": 1.783802425120659, "learning_rate": 2.7785237041685343e-08, "loss": 0.5988, "step": 9076 }, { "epoch": 0.97, "grad_norm": 1.715307825928524, "learning_rate": 2.7599864295360103e-08, "loss": 0.6099, "step": 9077 }, { "epoch": 0.97, "grad_norm": 1.9158418652280167, "learning_rate": 2.74151102779735e-08, "loss": 0.6767, "step": 9078 }, { "epoch": 0.97, "grad_norm": 1.7580546972453461, "learning_rate": 2.7230975012514928e-08, "loss": 0.5686, "step": 9079 }, { "epoch": 0.97, "grad_norm": 1.7978634931638142, "learning_rate": 2.704745852189661e-08, "loss": 0.5489, "step": 9080 }, { "epoch": 0.97, "grad_norm": 1.6328738851608955, "learning_rate": 2.686456082895472e-08, "loss": 0.547, "step": 9081 }, { "epoch": 0.97, "grad_norm": 1.7479135287743832, "learning_rate": 2.6682281956446622e-08, "loss": 0.5839, "step": 9082 }, { "epoch": 0.97, "grad_norm": 1.744037596807959, "learning_rate": 2.6500621927054716e-08, "loss": 0.5014, "step": 9083 }, { "epoch": 0.97, "grad_norm": 1.7041882180945616, "learning_rate": 2.6319580763383145e-08, "loss": 0.6049, "step": 9084 }, { "epoch": 0.97, "grad_norm": 1.711213291420486, "learning_rate": 2.613915848795945e-08, "loss": 0.596, "step": 9085 }, { "epoch": 0.97, "grad_norm": 1.7387627698829478, "learning_rate": 2.5959355123233442e-08, "loss": 0.6308, "step": 9086 }, { "epoch": 0.97, "grad_norm": 1.803341730939231, "learning_rate": 2.5780170691579453e-08, "loss": 0.5127, "step": 9087 }, { "epoch": 0.97, "grad_norm": 1.7306985746073074, "learning_rate": 2.560160521529298e-08, "loss": 0.5172, "step": 9088 }, { "epoch": 0.97, "grad_norm": 1.5944619507930766, "learning_rate": 2.5423658716594023e-08, "loss": 0.5382, "step": 9089 }, { "epoch": 0.97, "grad_norm": 1.8300843721315379, "learning_rate": 2.524633121762432e-08, "loss": 0.5821, "step": 9090 }, { "epoch": 0.97, "grad_norm": 1.8136024646608395, "learning_rate": 2.5069622740450107e-08, "loss": 0.6103, "step": 9091 }, { "epoch": 0.97, "grad_norm": 1.7297853800490586, "learning_rate": 2.489353330705879e-08, "loss": 0.5617, "step": 9092 }, { "epoch": 0.97, "grad_norm": 2.0018296454318, "learning_rate": 2.4718062939362854e-08, "loss": 0.6019, "step": 9093 }, { "epoch": 0.97, "grad_norm": 1.7860921470620619, "learning_rate": 2.454321165919482e-08, "loss": 0.548, "step": 9094 }, { "epoch": 0.97, "grad_norm": 1.8583259632930222, "learning_rate": 2.4368979488313403e-08, "loss": 0.6891, "step": 9095 }, { "epoch": 0.97, "grad_norm": 1.7953240742104744, "learning_rate": 2.4195366448398483e-08, "loss": 0.5567, "step": 9096 }, { "epoch": 0.97, "grad_norm": 1.7598222637958911, "learning_rate": 2.4022372561052222e-08, "loss": 0.6218, "step": 9097 }, { "epoch": 0.97, "grad_norm": 1.755953542685822, "learning_rate": 2.3849997847801842e-08, "loss": 0.57, "step": 9098 }, { "epoch": 0.97, "grad_norm": 2.1325653265590323, "learning_rate": 2.367824233009519e-08, "loss": 0.6347, "step": 9099 }, { "epoch": 0.97, "grad_norm": 1.9403316128552988, "learning_rate": 2.3507106029305726e-08, "loss": 0.6469, "step": 9100 }, { "epoch": 0.97, "grad_norm": 1.7902765587605816, "learning_rate": 2.3336588966727524e-08, "loss": 0.5502, "step": 9101 }, { "epoch": 0.97, "grad_norm": 1.852733670990895, "learning_rate": 2.3166691163578615e-08, "loss": 0.6103, "step": 9102 }, { "epoch": 0.97, "grad_norm": 2.0828687413545093, "learning_rate": 2.2997412640999862e-08, "loss": 0.6355, "step": 9103 }, { "epoch": 0.97, "grad_norm": 1.0197201066728432, "learning_rate": 2.2828753420054418e-08, "loss": 0.463, "step": 9104 }, { "epoch": 0.97, "grad_norm": 1.8631145211612956, "learning_rate": 2.2660713521730494e-08, "loss": 0.6549, "step": 9105 }, { "epoch": 0.97, "grad_norm": 1.1163176539477944, "learning_rate": 2.2493292966936366e-08, "loss": 0.5049, "step": 9106 }, { "epoch": 0.97, "grad_norm": 1.8582060563132472, "learning_rate": 2.2326491776505366e-08, "loss": 0.5389, "step": 9107 }, { "epoch": 0.97, "grad_norm": 1.9169938901759094, "learning_rate": 2.2160309971192562e-08, "loss": 0.5392, "step": 9108 }, { "epoch": 0.97, "grad_norm": 1.7749748941742578, "learning_rate": 2.1994747571675855e-08, "loss": 0.6363, "step": 9109 }, { "epoch": 0.97, "grad_norm": 1.692811036114262, "learning_rate": 2.1829804598558212e-08, "loss": 0.6571, "step": 9110 }, { "epoch": 0.97, "grad_norm": 1.7153425244589429, "learning_rate": 2.1665481072363216e-08, "loss": 0.5536, "step": 9111 }, { "epoch": 0.97, "grad_norm": 1.077227756236168, "learning_rate": 2.1501777013537285e-08, "loss": 0.4764, "step": 9112 }, { "epoch": 0.97, "grad_norm": 1.120630546698538, "learning_rate": 2.1338692442451347e-08, "loss": 0.517, "step": 9113 }, { "epoch": 0.97, "grad_norm": 1.758281522301169, "learning_rate": 2.1176227379398617e-08, "loss": 0.6101, "step": 9114 }, { "epoch": 0.97, "grad_norm": 1.8357675061869188, "learning_rate": 2.101438184459459e-08, "loss": 0.6103, "step": 9115 }, { "epoch": 0.97, "grad_norm": 1.0878993348211807, "learning_rate": 2.08531558581776e-08, "loss": 0.4869, "step": 9116 }, { "epoch": 0.97, "grad_norm": 1.692121803647363, "learning_rate": 2.069254944021104e-08, "loss": 0.6095, "step": 9117 }, { "epoch": 0.97, "grad_norm": 1.6254158411548496, "learning_rate": 2.053256261067782e-08, "loss": 0.4553, "step": 9118 }, { "epoch": 0.97, "grad_norm": 1.7951499845183128, "learning_rate": 2.037319538948701e-08, "loss": 0.6757, "step": 9119 }, { "epoch": 0.97, "grad_norm": 1.8461027237434195, "learning_rate": 2.0214447796467752e-08, "loss": 0.5515, "step": 9120 }, { "epoch": 0.97, "grad_norm": 1.6786886867461777, "learning_rate": 2.005631985137424e-08, "loss": 0.5182, "step": 9121 }, { "epoch": 0.97, "grad_norm": 2.8731551575863956, "learning_rate": 1.9898811573882958e-08, "loss": 0.535, "step": 9122 }, { "epoch": 0.97, "grad_norm": 2.1345950628602735, "learning_rate": 1.9741922983592675e-08, "loss": 0.6647, "step": 9123 }, { "epoch": 0.97, "grad_norm": 1.8862555605177307, "learning_rate": 1.958565410002555e-08, "loss": 0.5787, "step": 9124 }, { "epoch": 0.97, "grad_norm": 1.1045802332749943, "learning_rate": 1.9430004942626036e-08, "loss": 0.4906, "step": 9125 }, { "epoch": 0.97, "grad_norm": 1.670415425714682, "learning_rate": 1.9274975530763075e-08, "loss": 0.5553, "step": 9126 }, { "epoch": 0.97, "grad_norm": 1.9234502906835746, "learning_rate": 1.912056588372624e-08, "loss": 0.6594, "step": 9127 }, { "epoch": 0.97, "grad_norm": 1.8288737633240422, "learning_rate": 1.8966776020729605e-08, "loss": 0.536, "step": 9128 }, { "epoch": 0.97, "grad_norm": 1.7874015563208108, "learning_rate": 1.8813605960910087e-08, "loss": 0.5694, "step": 9129 }, { "epoch": 0.97, "grad_norm": 1.0733426295786133, "learning_rate": 1.8661055723326328e-08, "loss": 0.478, "step": 9130 }, { "epoch": 0.97, "grad_norm": 2.0097221658544693, "learning_rate": 1.850912532696092e-08, "loss": 0.6073, "step": 9131 }, { "epoch": 0.97, "grad_norm": 1.8391044969978667, "learning_rate": 1.8357814790719297e-08, "loss": 0.5716, "step": 9132 }, { "epoch": 0.97, "grad_norm": 1.7036226692480771, "learning_rate": 1.8207124133428622e-08, "loss": 0.5779, "step": 9133 }, { "epoch": 0.97, "grad_norm": 1.7670010044345859, "learning_rate": 1.805705337384056e-08, "loss": 0.6158, "step": 9134 }, { "epoch": 0.97, "grad_norm": 1.758963308479215, "learning_rate": 1.7907602530627955e-08, "loss": 0.5164, "step": 9135 }, { "epoch": 0.97, "grad_norm": 2.816203604777965, "learning_rate": 1.775877162238815e-08, "loss": 0.5736, "step": 9136 }, { "epoch": 0.97, "grad_norm": 1.0927473688463287, "learning_rate": 1.7610560667639664e-08, "loss": 0.4777, "step": 9137 }, { "epoch": 0.97, "grad_norm": 1.8983494906923706, "learning_rate": 1.746296968482608e-08, "loss": 0.6653, "step": 9138 }, { "epoch": 0.97, "grad_norm": 2.213963107139528, "learning_rate": 1.7315998692311597e-08, "loss": 0.6273, "step": 9139 }, { "epoch": 0.97, "grad_norm": 1.9010217409014256, "learning_rate": 1.7169647708384362e-08, "loss": 0.6403, "step": 9140 }, { "epoch": 0.98, "grad_norm": 1.9327315296160545, "learning_rate": 1.7023916751255366e-08, "loss": 0.589, "step": 9141 }, { "epoch": 0.98, "grad_norm": 1.8546813338785146, "learning_rate": 1.6878805839057876e-08, "loss": 0.4401, "step": 9142 }, { "epoch": 0.98, "grad_norm": 2.0916276112365213, "learning_rate": 1.673431498984912e-08, "loss": 0.6926, "step": 9143 }, { "epoch": 0.98, "grad_norm": 1.759742553425574, "learning_rate": 1.659044422160805e-08, "loss": 0.5635, "step": 9144 }, { "epoch": 0.98, "grad_norm": 1.7805126665172504, "learning_rate": 1.6447193552237007e-08, "loss": 0.6061, "step": 9145 }, { "epoch": 0.98, "grad_norm": 1.6968222906350283, "learning_rate": 1.6304562999560624e-08, "loss": 0.6113, "step": 9146 }, { "epoch": 0.98, "grad_norm": 2.066684103349132, "learning_rate": 1.6162552581327483e-08, "loss": 0.68, "step": 9147 }, { "epoch": 0.98, "grad_norm": 1.6065345318263864, "learning_rate": 1.602116231520845e-08, "loss": 0.5854, "step": 9148 }, { "epoch": 0.98, "grad_norm": 1.056651410854777, "learning_rate": 1.5880392218796114e-08, "loss": 0.4613, "step": 9149 }, { "epoch": 0.98, "grad_norm": 1.7970220210198664, "learning_rate": 1.5740242309607577e-08, "loss": 0.6258, "step": 9150 }, { "epoch": 0.98, "grad_norm": 1.6973637199320928, "learning_rate": 1.5600712605081668e-08, "loss": 0.6335, "step": 9151 }, { "epoch": 0.98, "grad_norm": 1.070956661650149, "learning_rate": 1.5461803122581165e-08, "loss": 0.4831, "step": 9152 }, { "epoch": 0.98, "grad_norm": 1.9152426760915315, "learning_rate": 1.532351387939002e-08, "loss": 0.5187, "step": 9153 }, { "epoch": 0.98, "grad_norm": 1.8819682305214196, "learning_rate": 1.5185844892716684e-08, "loss": 0.6533, "step": 9154 }, { "epoch": 0.98, "grad_norm": 1.5391536589065846, "learning_rate": 1.5048796179690793e-08, "loss": 0.5485, "step": 9155 }, { "epoch": 0.98, "grad_norm": 1.7535677635345075, "learning_rate": 1.4912367757366485e-08, "loss": 0.5327, "step": 9156 }, { "epoch": 0.98, "grad_norm": 1.7739958907324322, "learning_rate": 1.4776559642720179e-08, "loss": 0.5007, "step": 9157 }, { "epoch": 0.98, "grad_norm": 1.6872640321172585, "learning_rate": 1.4641371852649467e-08, "loss": 0.6598, "step": 9158 }, { "epoch": 0.98, "grad_norm": 1.8574641826360396, "learning_rate": 1.4506804403977559e-08, "loss": 0.6348, "step": 9159 }, { "epoch": 0.98, "grad_norm": 1.8540897754012355, "learning_rate": 1.4372857313448286e-08, "loss": 0.5718, "step": 9160 }, { "epoch": 0.98, "grad_norm": 1.6677087807223863, "learning_rate": 1.4239530597729424e-08, "loss": 0.4899, "step": 9161 }, { "epoch": 0.98, "grad_norm": 1.8840752502692637, "learning_rate": 1.4106824273411036e-08, "loss": 0.6164, "step": 9162 }, { "epoch": 0.98, "grad_norm": 1.9893449523921374, "learning_rate": 1.3974738357005468e-08, "loss": 0.6196, "step": 9163 }, { "epoch": 0.98, "grad_norm": 1.7002902563811253, "learning_rate": 1.3843272864949574e-08, "loss": 0.5717, "step": 9164 }, { "epoch": 0.98, "grad_norm": 1.7401214672638854, "learning_rate": 1.3712427813601382e-08, "loss": 0.6779, "step": 9165 }, { "epoch": 0.98, "grad_norm": 1.9158697801984161, "learning_rate": 1.3582203219242306e-08, "loss": 0.5798, "step": 9166 }, { "epoch": 0.98, "grad_norm": 1.75400861210763, "learning_rate": 1.3452599098077168e-08, "loss": 0.558, "step": 9167 }, { "epoch": 0.98, "grad_norm": 1.8107742569545737, "learning_rate": 1.3323615466231953e-08, "loss": 0.5088, "step": 9168 }, { "epoch": 0.98, "grad_norm": 1.7694316571868547, "learning_rate": 1.3195252339756604e-08, "loss": 0.5768, "step": 9169 }, { "epoch": 0.98, "grad_norm": 1.6612077632612772, "learning_rate": 1.3067509734624451e-08, "loss": 0.5151, "step": 9170 }, { "epoch": 0.98, "grad_norm": 2.202254836232026, "learning_rate": 1.2940387666730003e-08, "loss": 0.5786, "step": 9171 }, { "epoch": 0.98, "grad_norm": 1.903292388870818, "learning_rate": 1.281388615189172e-08, "loss": 0.6329, "step": 9172 }, { "epoch": 0.98, "grad_norm": 2.07118799379817, "learning_rate": 1.2688005205850896e-08, "loss": 0.6279, "step": 9173 }, { "epoch": 0.98, "grad_norm": 1.8493086285489317, "learning_rate": 1.2562744844270559e-08, "loss": 0.5676, "step": 9174 }, { "epoch": 0.98, "grad_norm": 1.9487073971463826, "learning_rate": 1.2438105082737684e-08, "loss": 0.5111, "step": 9175 }, { "epoch": 0.98, "grad_norm": 2.1692045300500524, "learning_rate": 1.2314085936761532e-08, "loss": 0.6002, "step": 9176 }, { "epoch": 0.98, "grad_norm": 1.7766279682674568, "learning_rate": 1.219068742177365e-08, "loss": 0.506, "step": 9177 }, { "epoch": 0.98, "grad_norm": 1.964609629004181, "learning_rate": 1.2067909553128975e-08, "loss": 0.6605, "step": 9178 }, { "epoch": 0.98, "grad_norm": 1.866561804830239, "learning_rate": 1.1945752346105843e-08, "loss": 0.5614, "step": 9179 }, { "epoch": 0.98, "grad_norm": 1.7831959671028745, "learning_rate": 1.1824215815903761e-08, "loss": 0.597, "step": 9180 }, { "epoch": 0.98, "grad_norm": 1.7860337911420172, "learning_rate": 1.1703299977646187e-08, "loss": 0.5107, "step": 9181 }, { "epoch": 0.98, "grad_norm": 1.7081875589790196, "learning_rate": 1.1583004846378864e-08, "loss": 0.6353, "step": 9182 }, { "epoch": 0.98, "grad_norm": 1.6423374973346008, "learning_rate": 1.1463330437070375e-08, "loss": 0.6184, "step": 9183 }, { "epoch": 0.98, "grad_norm": 1.793943087417965, "learning_rate": 1.1344276764612693e-08, "loss": 0.635, "step": 9184 }, { "epoch": 0.98, "grad_norm": 1.6809339656163793, "learning_rate": 1.1225843843818974e-08, "loss": 0.5259, "step": 9185 }, { "epoch": 0.98, "grad_norm": 2.022533075920287, "learning_rate": 1.1108031689426868e-08, "loss": 0.6207, "step": 9186 }, { "epoch": 0.98, "grad_norm": 1.7390844947234199, "learning_rate": 1.0990840316095763e-08, "loss": 0.5615, "step": 9187 }, { "epoch": 0.98, "grad_norm": 1.4931750019309749, "learning_rate": 1.0874269738408438e-08, "loss": 0.5394, "step": 9188 }, { "epoch": 0.98, "grad_norm": 1.7050315466086337, "learning_rate": 1.0758319970869403e-08, "loss": 0.5919, "step": 9189 }, { "epoch": 0.98, "grad_norm": 1.8219095402040075, "learning_rate": 1.0642991027907668e-08, "loss": 0.554, "step": 9190 }, { "epoch": 0.98, "grad_norm": 1.6130874591753914, "learning_rate": 1.0528282923872868e-08, "loss": 0.5637, "step": 9191 }, { "epoch": 0.98, "grad_norm": 1.6922796809536584, "learning_rate": 1.041419567303914e-08, "loss": 0.5266, "step": 9192 }, { "epoch": 0.98, "grad_norm": 1.658095896085952, "learning_rate": 1.0300729289602352e-08, "loss": 0.5439, "step": 9193 }, { "epoch": 0.98, "grad_norm": 1.860613734703635, "learning_rate": 1.0187883787681207e-08, "loss": 0.5416, "step": 9194 }, { "epoch": 0.98, "grad_norm": 1.8647892648580928, "learning_rate": 1.0075659181317809e-08, "loss": 0.5702, "step": 9195 }, { "epoch": 0.98, "grad_norm": 1.8167866934649248, "learning_rate": 9.964055484476543e-09, "loss": 0.5887, "step": 9196 }, { "epoch": 0.98, "grad_norm": 1.6928135047496435, "learning_rate": 9.853072711044076e-09, "loss": 0.5531, "step": 9197 }, { "epoch": 0.98, "grad_norm": 2.2993108823543214, "learning_rate": 9.742710874830474e-09, "loss": 0.6598, "step": 9198 }, { "epoch": 0.98, "grad_norm": 1.7513647683188107, "learning_rate": 9.632969989569196e-09, "loss": 0.5815, "step": 9199 }, { "epoch": 0.98, "grad_norm": 1.811588863460386, "learning_rate": 9.52385006891432e-09, "loss": 0.5292, "step": 9200 }, { "epoch": 0.98, "grad_norm": 1.698055860167171, "learning_rate": 9.415351126444428e-09, "loss": 0.5371, "step": 9201 }, { "epoch": 0.98, "grad_norm": 1.729614743487326, "learning_rate": 9.307473175660386e-09, "loss": 0.6389, "step": 9202 }, { "epoch": 0.98, "grad_norm": 1.6985245328714023, "learning_rate": 9.200216229985903e-09, "loss": 0.5445, "step": 9203 }, { "epoch": 0.98, "grad_norm": 2.2215268718191434, "learning_rate": 9.09358030276697e-09, "loss": 0.6768, "step": 9204 }, { "epoch": 0.98, "grad_norm": 1.829515314202325, "learning_rate": 8.987565407272414e-09, "loss": 0.5285, "step": 9205 }, { "epoch": 0.98, "grad_norm": 1.8075489493646164, "learning_rate": 8.882171556694463e-09, "loss": 0.6331, "step": 9206 }, { "epoch": 0.98, "grad_norm": 2.011028424476158, "learning_rate": 8.777398764147626e-09, "loss": 0.6361, "step": 9207 }, { "epoch": 0.98, "grad_norm": 1.944340964252433, "learning_rate": 8.673247042668698e-09, "loss": 0.5578, "step": 9208 }, { "epoch": 0.98, "grad_norm": 2.0959620071162943, "learning_rate": 8.56971640521731e-09, "loss": 0.6019, "step": 9209 }, { "epoch": 0.98, "grad_norm": 1.633955859534725, "learning_rate": 8.466806864676491e-09, "loss": 0.5682, "step": 9210 }, { "epoch": 0.98, "grad_norm": 1.8841154478266473, "learning_rate": 8.364518433851554e-09, "loss": 0.5357, "step": 9211 }, { "epoch": 0.98, "grad_norm": 1.693481700311852, "learning_rate": 8.262851125470095e-09, "loss": 0.6052, "step": 9212 }, { "epoch": 0.98, "grad_norm": 1.8956232025153987, "learning_rate": 8.161804952183105e-09, "loss": 0.5976, "step": 9213 }, { "epoch": 0.98, "grad_norm": 1.0466380400102144, "learning_rate": 8.061379926563862e-09, "loss": 0.4624, "step": 9214 }, { "epoch": 0.98, "grad_norm": 1.0346670746550968, "learning_rate": 7.961576061109032e-09, "loss": 0.4789, "step": 9215 }, { "epoch": 0.98, "grad_norm": 1.7768291545901942, "learning_rate": 7.86239336823702e-09, "loss": 0.5749, "step": 9216 }, { "epoch": 0.98, "grad_norm": 1.0471154043915363, "learning_rate": 7.763831860289617e-09, "loss": 0.4949, "step": 9217 }, { "epoch": 0.98, "grad_norm": 1.8818729875778288, "learning_rate": 7.665891549530901e-09, "loss": 0.538, "step": 9218 }, { "epoch": 0.98, "grad_norm": 1.9786084317849342, "learning_rate": 7.568572448147794e-09, "loss": 0.5872, "step": 9219 }, { "epoch": 0.98, "grad_norm": 1.6407971792195373, "learning_rate": 7.471874568250049e-09, "loss": 0.5064, "step": 9220 }, { "epoch": 0.98, "grad_norm": 1.037294205823786, "learning_rate": 7.3757979218708194e-09, "loss": 0.4684, "step": 9221 }, { "epoch": 0.98, "grad_norm": 1.8198282210617251, "learning_rate": 7.280342520963324e-09, "loss": 0.6571, "step": 9222 }, { "epoch": 0.98, "grad_norm": 1.739911696983055, "learning_rate": 7.1855083774075016e-09, "loss": 0.5123, "step": 9223 }, { "epoch": 0.98, "grad_norm": 1.6187991464368183, "learning_rate": 7.091295503002249e-09, "loss": 0.4082, "step": 9224 }, { "epoch": 0.98, "grad_norm": 1.0477313423295864, "learning_rate": 6.997703909471521e-09, "loss": 0.4703, "step": 9225 }, { "epoch": 0.98, "grad_norm": 1.919341665794862, "learning_rate": 6.904733608460445e-09, "loss": 0.635, "step": 9226 }, { "epoch": 0.98, "grad_norm": 1.8062827787531086, "learning_rate": 6.8123846115381035e-09, "loss": 0.5278, "step": 9227 }, { "epoch": 0.98, "grad_norm": 1.664296021821517, "learning_rate": 6.720656930195857e-09, "loss": 0.531, "step": 9228 }, { "epoch": 0.98, "grad_norm": 1.0985324645151289, "learning_rate": 6.629550575847355e-09, "loss": 0.5018, "step": 9229 }, { "epoch": 0.98, "grad_norm": 1.8564638799616306, "learning_rate": 6.539065559829638e-09, "loss": 0.5948, "step": 9230 }, { "epoch": 0.98, "grad_norm": 1.8042517985284456, "learning_rate": 6.4492018934009245e-09, "loss": 0.559, "step": 9231 }, { "epoch": 0.98, "grad_norm": 1.099513016795136, "learning_rate": 6.359959587744491e-09, "loss": 0.4794, "step": 9232 }, { "epoch": 0.98, "grad_norm": 1.8623365048795446, "learning_rate": 6.2713386539636766e-09, "loss": 0.6007, "step": 9233 }, { "epoch": 0.98, "grad_norm": 1.0911969862753002, "learning_rate": 6.183339103086883e-09, "loss": 0.4855, "step": 9234 }, { "epoch": 0.99, "grad_norm": 1.7923490896717373, "learning_rate": 6.095960946064239e-09, "loss": 0.6187, "step": 9235 }, { "epoch": 0.99, "grad_norm": 1.6993962685824693, "learning_rate": 6.0092041937670485e-09, "loss": 0.5946, "step": 9236 }, { "epoch": 0.99, "grad_norm": 1.6960852266918942, "learning_rate": 5.923068856992231e-09, "loss": 0.5684, "step": 9237 }, { "epoch": 0.99, "grad_norm": 2.2067066572739593, "learning_rate": 5.837554946457325e-09, "loss": 0.587, "step": 9238 }, { "epoch": 0.99, "grad_norm": 1.827703044707101, "learning_rate": 5.752662472802706e-09, "loss": 0.533, "step": 9239 }, { "epoch": 0.99, "grad_norm": 1.5908432822102874, "learning_rate": 5.668391446591592e-09, "loss": 0.4937, "step": 9240 }, { "epoch": 0.99, "grad_norm": 1.0922041655031232, "learning_rate": 5.58474187831115e-09, "loss": 0.4761, "step": 9241 }, { "epoch": 0.99, "grad_norm": 1.654887045987208, "learning_rate": 5.501713778369167e-09, "loss": 0.5969, "step": 9242 }, { "epoch": 0.99, "grad_norm": 1.8294831818650488, "learning_rate": 5.41930715709682e-09, "loss": 0.5802, "step": 9243 }, { "epoch": 0.99, "grad_norm": 1.8130047174444293, "learning_rate": 5.3375220247492425e-09, "loss": 0.5447, "step": 9244 }, { "epoch": 0.99, "grad_norm": 1.781060616435354, "learning_rate": 5.256358391502736e-09, "loss": 0.6076, "step": 9245 }, { "epoch": 0.99, "grad_norm": 1.8906502849219382, "learning_rate": 5.175816267455891e-09, "loss": 0.5715, "step": 9246 }, { "epoch": 0.99, "grad_norm": 1.7074129208271134, "learning_rate": 5.095895662632355e-09, "loss": 0.6274, "step": 9247 }, { "epoch": 0.99, "grad_norm": 1.732964027136213, "learning_rate": 5.016596586975286e-09, "loss": 0.5099, "step": 9248 }, { "epoch": 0.99, "grad_norm": 1.6503719655685267, "learning_rate": 4.937919050352902e-09, "loss": 0.5232, "step": 9249 }, { "epoch": 0.99, "grad_norm": 1.5549776216901188, "learning_rate": 4.859863062555148e-09, "loss": 0.5778, "step": 9250 }, { "epoch": 0.99, "grad_norm": 1.8073032918159797, "learning_rate": 4.782428633294811e-09, "loss": 0.5369, "step": 9251 }, { "epoch": 0.99, "grad_norm": 1.6562435221689706, "learning_rate": 4.705615772206961e-09, "loss": 0.685, "step": 9252 }, { "epoch": 0.99, "grad_norm": 1.1320983648225438, "learning_rate": 4.629424488850065e-09, "loss": 0.5065, "step": 9253 }, { "epoch": 0.99, "grad_norm": 1.0819183653849003, "learning_rate": 4.553854792704316e-09, "loss": 0.4609, "step": 9254 }, { "epoch": 0.99, "grad_norm": 1.8807375286238428, "learning_rate": 4.4789066931738566e-09, "loss": 0.5186, "step": 9255 }, { "epoch": 0.99, "grad_norm": 1.7811087452538004, "learning_rate": 4.404580199583452e-09, "loss": 0.6372, "step": 9256 }, { "epoch": 0.99, "grad_norm": 1.0545596327430722, "learning_rate": 4.330875321182371e-09, "loss": 0.4844, "step": 9257 }, { "epoch": 0.99, "grad_norm": 1.851454419293877, "learning_rate": 4.257792067142718e-09, "loss": 0.5512, "step": 9258 }, { "epoch": 0.99, "grad_norm": 1.9980481957027827, "learning_rate": 4.185330446557223e-09, "loss": 0.5831, "step": 9259 }, { "epoch": 0.99, "grad_norm": 1.533925425978261, "learning_rate": 4.11349046844256e-09, "loss": 0.498, "step": 9260 }, { "epoch": 0.99, "grad_norm": 1.9698742052370808, "learning_rate": 4.0422721417388014e-09, "loss": 0.6592, "step": 9261 }, { "epoch": 0.99, "grad_norm": 1.0655396182357215, "learning_rate": 3.97167547530719e-09, "loss": 0.4898, "step": 9262 }, { "epoch": 0.99, "grad_norm": 1.6032421446204665, "learning_rate": 3.9017004779323685e-09, "loss": 0.6412, "step": 9263 }, { "epoch": 0.99, "grad_norm": 1.7582107610672344, "learning_rate": 3.832347158321814e-09, "loss": 0.4863, "step": 9264 }, { "epoch": 0.99, "grad_norm": 1.7619173086650077, "learning_rate": 3.763615525104736e-09, "loss": 0.6372, "step": 9265 }, { "epoch": 0.99, "grad_norm": 1.7072961376565887, "learning_rate": 3.6955055868342915e-09, "loss": 0.5692, "step": 9266 }, { "epoch": 0.99, "grad_norm": 1.7698279393479863, "learning_rate": 3.6280173519848137e-09, "loss": 0.6138, "step": 9267 }, { "epoch": 0.99, "grad_norm": 1.7973832416035913, "learning_rate": 3.56115082895514e-09, "loss": 0.6103, "step": 9268 }, { "epoch": 0.99, "grad_norm": 1.7881825043932131, "learning_rate": 3.494906026064726e-09, "loss": 0.5863, "step": 9269 }, { "epoch": 0.99, "grad_norm": 1.8433061302589238, "learning_rate": 3.429282951556423e-09, "loss": 0.5847, "step": 9270 }, { "epoch": 0.99, "grad_norm": 1.681858283811977, "learning_rate": 3.3642816135970315e-09, "loss": 0.5529, "step": 9271 }, { "epoch": 0.99, "grad_norm": 1.7348585607032116, "learning_rate": 3.2999020202734155e-09, "loss": 0.5459, "step": 9272 }, { "epoch": 0.99, "grad_norm": 1.6840243879735426, "learning_rate": 3.2361441795975e-09, "loss": 0.691, "step": 9273 }, { "epoch": 0.99, "grad_norm": 1.849512636054951, "learning_rate": 3.1730080995023837e-09, "loss": 0.5143, "step": 9274 }, { "epoch": 0.99, "grad_norm": 1.7828004337746992, "learning_rate": 3.1104937878445594e-09, "loss": 0.5469, "step": 9275 }, { "epoch": 0.99, "grad_norm": 1.8780540491548767, "learning_rate": 3.0486012524028054e-09, "loss": 0.5687, "step": 9276 }, { "epoch": 0.99, "grad_norm": 1.053123167459046, "learning_rate": 2.987330500878738e-09, "loss": 0.4854, "step": 9277 }, { "epoch": 0.99, "grad_norm": 1.771753320521045, "learning_rate": 2.9266815408957037e-09, "loss": 0.5488, "step": 9278 }, { "epoch": 0.99, "grad_norm": 1.079643261513288, "learning_rate": 2.8666543800009995e-09, "loss": 0.4795, "step": 9279 }, { "epoch": 0.99, "grad_norm": 1.662918288403987, "learning_rate": 2.80724902566365e-09, "loss": 0.5436, "step": 9280 }, { "epoch": 0.99, "grad_norm": 1.7200630184011663, "learning_rate": 2.748465485276075e-09, "loss": 0.5462, "step": 9281 }, { "epoch": 0.99, "grad_norm": 1.9109819348169428, "learning_rate": 2.6903037661529796e-09, "loss": 0.6035, "step": 9282 }, { "epoch": 0.99, "grad_norm": 1.8735754679378445, "learning_rate": 2.632763875530797e-09, "loss": 0.6586, "step": 9283 }, { "epoch": 0.99, "grad_norm": 1.9375554806550688, "learning_rate": 2.5758458205699113e-09, "loss": 0.5324, "step": 9284 }, { "epoch": 0.99, "grad_norm": 2.1175037840423148, "learning_rate": 2.519549608352434e-09, "loss": 0.6615, "step": 9285 }, { "epoch": 0.99, "grad_norm": 1.721594329660734, "learning_rate": 2.4638752458838733e-09, "loss": 0.5532, "step": 9286 }, { "epoch": 0.99, "grad_norm": 1.7869638337649623, "learning_rate": 2.4088227400914653e-09, "loss": 0.6285, "step": 9287 }, { "epoch": 0.99, "grad_norm": 1.7284794389367688, "learning_rate": 2.354392097825842e-09, "loss": 0.5218, "step": 9288 }, { "epoch": 0.99, "grad_norm": 1.7546674436908962, "learning_rate": 2.3005833258604727e-09, "loss": 0.5805, "step": 9289 }, { "epoch": 0.99, "grad_norm": 1.6806896997223626, "learning_rate": 2.2473964308900033e-09, "loss": 0.6591, "step": 9290 }, { "epoch": 0.99, "grad_norm": 2.0065663059585455, "learning_rate": 2.1948314195330277e-09, "loss": 0.6335, "step": 9291 }, { "epoch": 0.99, "grad_norm": 2.080220074975761, "learning_rate": 2.1428882983304254e-09, "loss": 0.5353, "step": 9292 }, { "epoch": 0.99, "grad_norm": 1.0509503886974718, "learning_rate": 2.0915670737453597e-09, "loss": 0.4694, "step": 9293 }, { "epoch": 0.99, "grad_norm": 1.8302046133941523, "learning_rate": 2.0408677521638333e-09, "loss": 0.5112, "step": 9294 }, { "epoch": 0.99, "grad_norm": 1.8896142477879678, "learning_rate": 1.990790339895243e-09, "loss": 0.5626, "step": 9295 }, { "epoch": 0.99, "grad_norm": 1.8284748158387256, "learning_rate": 1.941334843169607e-09, "loss": 0.5775, "step": 9296 }, { "epoch": 0.99, "grad_norm": 1.0839949469128973, "learning_rate": 1.8925012681419996e-09, "loss": 0.4892, "step": 9297 }, { "epoch": 0.99, "grad_norm": 1.7681055644993422, "learning_rate": 1.8442896208881178e-09, "loss": 0.5678, "step": 9298 }, { "epoch": 0.99, "grad_norm": 1.8826401592495745, "learning_rate": 1.7966999074076063e-09, "loss": 0.5533, "step": 9299 }, { "epoch": 0.99, "grad_norm": 1.9670957400160345, "learning_rate": 1.7497321336218398e-09, "loss": 0.5989, "step": 9300 }, { "epoch": 0.99, "grad_norm": 1.8514487209519472, "learning_rate": 1.7033863053750321e-09, "loss": 0.5861, "step": 9301 }, { "epoch": 0.99, "grad_norm": 2.07476071888447, "learning_rate": 1.657662428434792e-09, "loss": 0.5425, "step": 9302 }, { "epoch": 0.99, "grad_norm": 1.0856874430286956, "learning_rate": 1.612560508489347e-09, "loss": 0.4779, "step": 9303 }, { "epoch": 0.99, "grad_norm": 1.9113260536524916, "learning_rate": 1.56808055115254e-09, "loss": 0.643, "step": 9304 }, { "epoch": 0.99, "grad_norm": 1.8970974417762707, "learning_rate": 1.5242225619577222e-09, "loss": 0.5844, "step": 9305 }, { "epoch": 0.99, "grad_norm": 1.8123537278879494, "learning_rate": 1.4809865463633056e-09, "loss": 0.6098, "step": 9306 }, { "epoch": 0.99, "grad_norm": 1.763560283970267, "learning_rate": 1.4383725097483202e-09, "loss": 0.5606, "step": 9307 }, { "epoch": 0.99, "grad_norm": 1.7378430888252068, "learning_rate": 1.3963804574157468e-09, "loss": 0.5916, "step": 9308 }, { "epoch": 0.99, "grad_norm": 1.623864551385425, "learning_rate": 1.355010394591405e-09, "loss": 0.5612, "step": 9309 }, { "epoch": 0.99, "grad_norm": 1.7660570423698299, "learning_rate": 1.3142623264217335e-09, "loss": 0.569, "step": 9310 }, { "epoch": 0.99, "grad_norm": 1.9110379986464299, "learning_rate": 1.2741362579782312e-09, "loss": 0.6146, "step": 9311 }, { "epoch": 0.99, "grad_norm": 1.7968949598065949, "learning_rate": 1.234632194253571e-09, "loss": 0.536, "step": 9312 }, { "epoch": 0.99, "grad_norm": 1.9308854274093532, "learning_rate": 1.1957501401632653e-09, "loss": 0.6319, "step": 9313 }, { "epoch": 0.99, "grad_norm": 1.854176917234037, "learning_rate": 1.1574901005456662e-09, "loss": 0.6788, "step": 9314 }, { "epoch": 0.99, "grad_norm": 1.6880994529607776, "learning_rate": 1.1198520801614098e-09, "loss": 0.5526, "step": 9315 }, { "epoch": 0.99, "grad_norm": 1.9069659282025901, "learning_rate": 1.0828360836939722e-09, "loss": 0.6822, "step": 9316 }, { "epoch": 0.99, "grad_norm": 1.8945947567368275, "learning_rate": 1.0464421157496684e-09, "loss": 0.6551, "step": 9317 }, { "epoch": 0.99, "grad_norm": 1.8375159710662594, "learning_rate": 1.0106701808559882e-09, "loss": 0.6028, "step": 9318 }, { "epoch": 0.99, "grad_norm": 2.0918426850221885, "learning_rate": 9.755202834654808e-10, "loss": 0.5554, "step": 9319 }, { "epoch": 0.99, "grad_norm": 1.8614451218507844, "learning_rate": 9.409924279513149e-10, "loss": 0.5942, "step": 9320 }, { "epoch": 0.99, "grad_norm": 1.0693286875610644, "learning_rate": 9.070866186094984e-10, "loss": 0.4789, "step": 9321 }, { "epoch": 0.99, "grad_norm": 2.53431146993047, "learning_rate": 8.73802859659989e-10, "loss": 0.5712, "step": 9322 }, { "epoch": 0.99, "grad_norm": 1.750542001730585, "learning_rate": 8.411411552433635e-10, "loss": 0.5343, "step": 9323 }, { "epoch": 0.99, "grad_norm": 1.6879990541000238, "learning_rate": 8.091015094247035e-10, "loss": 0.619, "step": 9324 }, { "epoch": 0.99, "grad_norm": 1.7754866954976878, "learning_rate": 7.776839261902647e-10, "loss": 0.5902, "step": 9325 }, { "epoch": 0.99, "grad_norm": 1.7386900737027648, "learning_rate": 7.468884094491424e-10, "loss": 0.502, "step": 9326 }, { "epoch": 0.99, "grad_norm": 1.7210999475550695, "learning_rate": 7.167149630343817e-10, "loss": 0.5417, "step": 9327 }, { "epoch": 0.99, "grad_norm": 1.593849525234495, "learning_rate": 6.871635906990915e-10, "loss": 0.4633, "step": 9328 }, { "epoch": 1.0, "grad_norm": 1.05933684988125, "learning_rate": 6.582342961214405e-10, "loss": 0.4651, "step": 9329 }, { "epoch": 1.0, "grad_norm": 1.83552744620484, "learning_rate": 6.299270829013271e-10, "loss": 0.6009, "step": 9330 }, { "epoch": 1.0, "grad_norm": 1.6052818743568544, "learning_rate": 6.022419545598235e-10, "loss": 0.5242, "step": 9331 }, { "epoch": 1.0, "grad_norm": 1.7298646397478905, "learning_rate": 5.751789145436171e-10, "loss": 0.5328, "step": 9332 }, { "epoch": 1.0, "grad_norm": 1.7839301958903377, "learning_rate": 5.48737966218904e-10, "loss": 0.5846, "step": 9333 }, { "epoch": 1.0, "grad_norm": 1.9670841112027881, "learning_rate": 5.229191128763855e-10, "loss": 0.5783, "step": 9334 }, { "epoch": 1.0, "grad_norm": 1.8480218105141986, "learning_rate": 4.977223577284917e-10, "loss": 0.6153, "step": 9335 }, { "epoch": 1.0, "grad_norm": 1.056078884357945, "learning_rate": 4.731477039104926e-10, "loss": 0.4573, "step": 9336 }, { "epoch": 1.0, "grad_norm": 1.7164147567394767, "learning_rate": 4.491951544804973e-10, "loss": 0.5902, "step": 9337 }, { "epoch": 1.0, "grad_norm": 1.0798186850707778, "learning_rate": 4.258647124188997e-10, "loss": 0.4772, "step": 9338 }, { "epoch": 1.0, "grad_norm": 1.8429662523641646, "learning_rate": 4.031563806289329e-10, "loss": 0.5065, "step": 9339 }, { "epoch": 1.0, "grad_norm": 1.976713809416174, "learning_rate": 3.8107016193611455e-10, "loss": 0.5524, "step": 9340 }, { "epoch": 1.0, "grad_norm": 1.8178956530808268, "learning_rate": 3.5960605908880176e-10, "loss": 0.5817, "step": 9341 }, { "epoch": 1.0, "grad_norm": 2.4995662826022946, "learning_rate": 3.3876407475763596e-10, "loss": 0.524, "step": 9342 }, { "epoch": 1.0, "grad_norm": 1.8426207401407868, "learning_rate": 3.1854421153609813e-10, "loss": 0.6065, "step": 9343 }, { "epoch": 1.0, "grad_norm": 1.856506824124308, "learning_rate": 2.9894647193995374e-10, "loss": 0.5817, "step": 9344 }, { "epoch": 1.0, "grad_norm": 2.2114273419876294, "learning_rate": 2.799708584083627e-10, "loss": 0.6481, "step": 9345 }, { "epoch": 1.0, "grad_norm": 1.6309883567238133, "learning_rate": 2.6161737330221424e-10, "loss": 0.5097, "step": 9346 }, { "epoch": 1.0, "grad_norm": 2.1113061265708164, "learning_rate": 2.4388601890523724e-10, "loss": 0.6888, "step": 9347 }, { "epoch": 1.0, "grad_norm": 1.1156907345716756, "learning_rate": 2.2677679742399983e-10, "loss": 0.5033, "step": 9348 }, { "epoch": 1.0, "grad_norm": 1.8213900963322873, "learning_rate": 2.1028971098735473e-10, "loss": 0.6051, "step": 9349 }, { "epoch": 1.0, "grad_norm": 1.0649175069695407, "learning_rate": 1.9442476164643897e-10, "loss": 0.4481, "step": 9350 }, { "epoch": 1.0, "grad_norm": 1.1053245943433925, "learning_rate": 1.7918195137578421e-10, "loss": 0.4878, "step": 9351 }, { "epoch": 1.0, "grad_norm": 1.7317680695190314, "learning_rate": 1.6456128207220646e-10, "loss": 0.4415, "step": 9352 }, { "epoch": 1.0, "grad_norm": 1.7182843953078022, "learning_rate": 1.5056275555425104e-10, "loss": 0.5547, "step": 9353 }, { "epoch": 1.0, "grad_norm": 1.706510000238782, "learning_rate": 1.3718637356496812e-10, "loss": 0.5615, "step": 9354 }, { "epoch": 1.0, "grad_norm": 1.7829571488230531, "learning_rate": 1.2443213776802687e-10, "loss": 0.5925, "step": 9355 }, { "epoch": 1.0, "grad_norm": 1.862086576267502, "learning_rate": 1.1230004975049113e-10, "loss": 0.5696, "step": 9356 }, { "epoch": 1.0, "grad_norm": 2.2425320423502035, "learning_rate": 1.007901110217091e-10, "loss": 0.6683, "step": 9357 }, { "epoch": 1.0, "grad_norm": 1.9555628093749013, "learning_rate": 8.99023230149787e-11, "loss": 0.5822, "step": 9358 }, { "epoch": 1.0, "grad_norm": 1.670451562631038, "learning_rate": 7.96366870842169e-11, "loss": 0.5718, "step": 9359 }, { "epoch": 1.0, "grad_norm": 1.0793809993363088, "learning_rate": 6.99932045067353e-11, "loss": 0.4749, "step": 9360 }, { "epoch": 1.0, "grad_norm": 1.5617661520738644, "learning_rate": 6.09718764832401e-11, "loss": 0.4868, "step": 9361 }, { "epoch": 1.0, "grad_norm": 1.7970382571371957, "learning_rate": 5.257270413561166e-11, "loss": 0.5745, "step": 9362 }, { "epoch": 1.0, "grad_norm": 1.935735259027403, "learning_rate": 4.479568850912497e-11, "loss": 0.57, "step": 9363 }, { "epoch": 1.0, "grad_norm": 1.5414739973086415, "learning_rate": 3.764083057189449e-11, "loss": 0.4967, "step": 9364 }, { "epoch": 1.0, "grad_norm": 1.7200148316095687, "learning_rate": 3.110813121376399e-11, "loss": 0.5998, "step": 9365 }, { "epoch": 1.0, "grad_norm": 1.6503419188829231, "learning_rate": 2.519759124741672e-11, "loss": 0.5425, "step": 9366 }, { "epoch": 1.0, "grad_norm": 1.8646801862179851, "learning_rate": 1.990921140893054e-11, "loss": 0.6214, "step": 9367 }, { "epoch": 1.0, "grad_norm": 2.004926196069682, "learning_rate": 1.5242992355557485e-11, "loss": 0.6841, "step": 9368 }, { "epoch": 1.0, "grad_norm": 1.6939179692273179, "learning_rate": 1.1198934669054417e-11, "loss": 0.4939, "step": 9369 }, { "epoch": 1.0, "grad_norm": 1.701334983357359, "learning_rate": 7.777038851797258e-12, "loss": 0.553, "step": 9370 }, { "epoch": 1.0, "grad_norm": 1.7449067728978471, "learning_rate": 4.977305329556536e-12, "loss": 0.5486, "step": 9371 }, { "epoch": 1.0, "grad_norm": 1.0819118941589794, "learning_rate": 2.799734450942282e-12, "loss": 0.4752, "step": 9372 }, { "epoch": 1.0, "grad_norm": 1.0734931004726436, "learning_rate": 1.244326487404024e-12, "loss": 0.4691, "step": 9373 }, { "epoch": 1.0, "grad_norm": 1.8836454954934871, "learning_rate": 3.1108163156545743e-13, "loss": 0.5863, "step": 9374 }, { "epoch": 1.0, "grad_norm": 1.3239623349237652, "learning_rate": 0.0, "loss": 0.4858, "step": 9375 }, { "epoch": 1.0, "step": 9375, "total_flos": 1901337766797312.0, "train_loss": 0.6266841061083476, "train_runtime": 46030.0871, "train_samples_per_second": 26.07, "train_steps_per_second": 0.204 } ], "logging_steps": 1.0, "max_steps": 9375, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 1901337766797312.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }